1 /** 2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains a parser of diffs in the Unified Format. 11 It is pretty limited because it only handles those that git normally output. 12 The result are which lines has changes in what files. 13 14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified 15 16 Example: 17 --- 18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 20 @@ -31,7 +31,6 @@ import std.algorithm : map; 21 import std.array : Appender, appender, array; 22 import std.datetime : SysTime; 23 import std.format : format; 24 -import std.typecons : Tuple; 25 26 import d2sqlite3 : sqlDatabase = Database; 27 28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 29 struct Database { 30 import std.conv : to; 31 import std.exception : collectException; 32 - import std.typecons : Nullable; 33 + import std.typecons : Nullable, Flag, No; 34 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 35 36 sqlDatabase db; 37 --- 38 */ 39 module dextool.plugin.mutate.backend.diff_parser; 40 41 import logger = std.experimental.logger; 42 import std.range : ElementType; 43 import std.traits : isSomeString; 44 45 import dextool.type : AbsolutePath, Path; 46 47 version (unittest) { 48 import unit_threaded : shouldEqual, shouldBeTrue, should; 49 } 50 51 Diff diffFromStdin() @trusted { 52 import std.stdio : stdin; 53 import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser; 54 55 return toDiff(stdin.byLine); 56 } 57 58 /** Parse a range of lines to a diff. 59 * 60 * Params: 61 * r = range of strings which is the diff 62 */ 63 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) { 64 UnifiedDiffParser parser; 65 foreach (l; r) { 66 debug logger.trace(l); 67 parser.process(l); 68 } 69 return parser.result; 70 } 71 72 @safe: 73 74 struct Diff { 75 import dextool.set; 76 77 static struct Line { 78 uint line; 79 string text; 80 } 81 82 /// The raw diff that where parsed. 83 Line[][Path] rawDiff; 84 85 alias ChangedLines = Set!uint; 86 87 ChangedLines[Path] changes; 88 alias changes this; 89 90 bool empty() @safe pure nothrow const @nogc { 91 return changes.length == 0; 92 } 93 94 /** A range over the changes by file. 95 * 96 * The paths are adjusted to be relative `workdir`. 97 */ 98 auto toRange(AbsolutePath workdir) @safe { 99 return DiffRange(this, workdir); 100 } 101 } 102 103 struct DiffRange { 104 import std.path : relativePath; 105 import std.array : array; 106 import dextool.set; 107 108 static struct KeyValue { 109 Path key; 110 Diff.ChangedLines value; 111 AbsolutePath absPath; 112 } 113 114 private { 115 Diff diff; 116 AbsolutePath workdir; 117 Path[] keys; 118 } 119 120 this(Diff d, AbsolutePath workdir) { 121 this.diff = d; 122 this.workdir = workdir; 123 this.keys = diff.byKey.array; 124 debug logger.trace(workdir); 125 } 126 127 KeyValue front() @safe { 128 assert(!empty, "Can't get front of an empty range"); 129 debug logger.trace(keys[0]); 130 return KeyValue(keys[0], diff[keys[0]], AbsolutePath(keys[0], workdir)); 131 } 132 133 void popFront() @safe pure nothrow { 134 assert(!empty, "Can't pop front of an empty range"); 135 keys = keys[1 .. $]; 136 } 137 138 bool empty() @safe pure nothrow const @nogc { 139 return keys.length == 0; 140 } 141 } 142 143 /** Parse a buffer in the Unified diff format and return the hunks of changes 144 * in the targets. 145 */ 146 struct UnifiedDiffParser { 147 import std.regex : regex, matchFirst, matchAll; 148 import std.typecons : Tuple; 149 150 Diff result; 151 152 private { 153 // diff --git a/usability/report.md b/usability/report.md 154 enum re_git_diff_hdr = regex(`^diff --git.*`); 155 // --- a/standalone.d 156 enum re_hdr_original = regex(`^--- (?P<hdr>.*)`); 157 // +++ a/standalone.d 158 enum re_hdr_new = regex(`^\+\+\+ (?P<hdr>.*)`); 159 // @@ -31,7 +31,6 @@ import std.algorithm : map; 160 enum re_hunk_start_multiline = regex(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`); 161 // @@ -31 +31 @@ import std.algorithm : map; 162 enum re_hunk_start_line = regex(`^@@ -\d* \+(?P<line>\d*) @@.*`); 163 164 alias FsmState = Tuple!(bool, State, Action[]); 165 166 FsmState st; 167 StateData data; 168 bool isGitDiff; 169 } 170 171 void process(T)(T line) { 172 import std.conv : to; 173 import std.meta; 174 import std.traits : EnumMembers; 175 import std.string : startsWith, split; 176 import dextool.type : Path, AbsolutePath; 177 import dextool.set; 178 179 auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty; 180 auto hdr_original = matchFirst(line, re_hdr_original); 181 auto hdr_new = matchFirst(line, re_hdr_new); 182 auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline); 183 auto hunk_start_line = matchFirst(line, re_hunk_start_line); 184 const first_char = line.length != 0 ? line[0] : typeof(line[0]).init; 185 186 FsmState nextState(const State st) { 187 auto next = FsmState(false, st, null); 188 189 final switch (st) { 190 case State.findHdr: 191 if (is_git_diff) { 192 next[1] = State.findHdrOriginal; 193 next[2] = [Action.setGitDiff]; 194 } else if (!hdr_original.empty) { 195 next[0] = true; 196 next[1] = State.findHdrOriginal; 197 } 198 break; 199 case State.findHdrOriginal: 200 if (!hdr_original.empty) { 201 next[1] = State.findHdrNew; 202 next[2] = [Action.resetStateData, Action.saveOriginal]; 203 } 204 break; 205 case State.findHdrNew: 206 if (!hdr_new.empty) { 207 next[1] = State.checkOrigNew; 208 next[2] = [Action.saveNew]; 209 next[0] = true; 210 } 211 break; 212 case State.checkOrigNew: 213 if (data.hdrOriginal.length == 0) { 214 next[1] = State.findHdrOriginal; 215 next[2] = [Action.warnOriginal]; 216 } else if (data.hdrNew.length == 0) { 217 next[1] = State.findHdrOriginal; 218 next[2] = [Action.warnNew]; 219 } else { 220 next[1] = State.findHunkStart; 221 } 222 break; 223 case State.findNext: 224 if (!hunk_start_multiline.empty || !hunk_start_line.empty) { 225 next[1] = State.findHunkStart; 226 next[0] = true; 227 } else if (!hdr_original.empty) { 228 next[1] = State.findHdrOriginal; 229 next[0] = true; 230 } 231 break; 232 case State.findHunkStart: 233 if (!hunk_start_multiline.empty) { 234 next[1] = State.insideHunk; 235 next[2] = [Action.multiLineHunk]; 236 } else if (!hunk_start_line.empty) { 237 next[1] = State.insideHunk; 238 next[2] = [Action.lineHunk]; 239 } 240 break; 241 case State.insideHunk: 242 next[2] = [Action.saveRawDiff]; 243 if (first_char == '+') 244 next[2] ~= [Action.plusLine]; 245 else if (first_char == ' ' || line.length == 0) 246 next[2] ~= [Action.blankLine]; 247 next[1] = State.checkHunkCounter; 248 next[0] = true; 249 break; 250 case State.checkHunkCounter: 251 next[1] = State.insideHunk; 252 if (data.count == data.maxCount) 253 next[1] = State.findNext; 254 break; 255 } 256 257 return next; 258 } 259 260 void resetStateDataAct() { 261 data = StateData.init; 262 } 263 264 void saveOriginalAct() { 265 auto a = hdr_original["hdr"]; 266 auto p = () { 267 if (isGitDiff && a.length > 2) 268 return a[2 .. $].split('\t'); 269 return a.split('\t'); 270 }(); 271 272 data.hdrOriginal = Path(p[0].idup); 273 } 274 275 void saveNewAct() { 276 auto a = hdr_new["hdr"]; 277 auto p = () { 278 if (isGitDiff && a.length > 2) 279 return a[2 .. $].split('\t'); 280 return a.split('\t'); 281 }(); 282 283 data.hdrNew = Path(p[0].idup); 284 } 285 286 void warnOriginalAct() { 287 logger.warning( 288 "Broken diff data. The original file, as specificed after ---, has length zero"); 289 } 290 291 void warnNewAct() { 292 logger.warning( 293 "Broken diff data. The new file, as specificed after +++, has length zero"); 294 } 295 296 void multiLineHunkAct() { 297 try { 298 data.startPos = hunk_start_multiline["line"].to!uint; 299 data.maxCount = hunk_start_multiline["count"].to!uint; 300 data.count = 0; 301 } catch (Exception e) { 302 logger.info(e.msg); 303 logger.info("Unable to parse diff line: ", line); 304 } 305 } 306 307 void lineHunkAct() { 308 try { 309 data.startPos = hunk_start_multiline["line"].to!uint; 310 data.maxCount = 1; 311 data.count = 0; 312 } catch (Exception e) { 313 logger.info(e.msg); 314 logger.info("Unable to parse diff line: ", line); 315 } 316 } 317 318 void plusLineAct() { 319 if (data.hdrNew !in result.changes) 320 result[data.hdrNew] = Diff.ChangedLines.init; 321 result[data.hdrNew].add(data.startPos + data.count); 322 data.count++; 323 } 324 325 void blankLineAct() { 326 data.count++; 327 } 328 329 void setGitDiffAct() { 330 isGitDiff = true; 331 } 332 333 void saveRawDiffAct() { 334 result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup); 335 } 336 337 st[0] = true; 338 while (st[0]) { 339 st = nextState(st[1]); 340 debug logger.tracef("%s | %s", line, st); 341 342 foreach (const act; st[2]) { 343 static foreach (Member; [EnumMembers!Action]) { 344 if (act == Member) 345 mixin(Member.to!string ~ "Act();"); 346 } 347 } 348 debug logger.tracef("%s | %s %s", result, data, isGitDiff); 349 } 350 } 351 } 352 353 private: 354 355 struct StateData { 356 Path hdrOriginal; 357 Path hdrNew; 358 uint startPos; 359 uint maxCount; 360 uint count; 361 } 362 363 enum State { 364 findHdr, 365 findHdrOriginal, 366 findHdrNew, 367 checkOrigNew, 368 findHunkStart, 369 insideHunk, 370 findNext, 371 checkHunkCounter, 372 } 373 374 enum Action { 375 setGitDiff, 376 resetStateData, 377 saveOriginal, 378 saveNew, 379 warnOriginal, 380 warnNew, 381 multiLineHunk, 382 lineHunk, 383 plusLine, 384 blankLine, 385 saveRawDiff, 386 } 387 388 version (unittest) { 389 import std.string : lineSplitter; 390 import dextool.set; 391 import dextool.type; 392 } 393 394 @("shall detect the changes lines (unified git diff)") 395 unittest { 396 immutable lines = `diff --git a/standalone2.d b/standalone2.d 397 index 0123..2345 100644 398 --- a/standalone.d 399 +++ b/standalone2.d 400 @@ -31,7 +31,6 @@ import std.algorithm : map; 401 import std.array : Appender, appender, array; 402 import std.datetime : SysTime; 403 +import std.format : format; 404 -import std.typecons : Tuple; 405 406 import d2sqlite3 : sqlDatabase = Database; 407 408 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 409 struct Database { 410 import std.conv : to; 411 import std.exception : collectException; 412 - import std.typecons : Nullable; 413 + import std.typecons : Nullable, Flag, No; 414 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 415 416 + sqlDatabase db;`; 417 418 UnifiedDiffParser p; 419 foreach (line; lines.lineSplitter) 420 p.process(line); 421 422 // assert 423 p.result[Path("standalone2.d")].contains(33).shouldBeTrue; 424 p.result[Path("standalone2.d")].contains(48).shouldBeTrue; 425 p.result[Path("standalone2.d")].contains(51).shouldBeTrue; 426 p.result.length.should == 1; 427 } 428 429 @("shall detect the changed lines (unified with date)") 430 unittest { 431 432 immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp 2018-11-18 21:25:46.631640690 +0100 433 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp 2018-11-18 21:26:17.003691847 +0100 434 @@ -3,7 +3,7 @@ 435 /// @author Joakim Brännström (joakim.brannstrom@gmx.com) 436 437 int fun(int x) { 438 - if (x > 3) { 439 + if (x != 3) { 440 return 0; 441 } 442 return 1;`; 443 444 UnifiedDiffParser p; 445 foreach (line; lines.lineSplitter) 446 p.process(line); 447 448 // assert 449 p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6) 450 .shouldBeTrue; 451 p.result.length.should == 1; 452 }