1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains a parser of diffs in the Unified Format.
11 It is pretty limited because it only handles those that git normally output.
12 The result are which lines has changes in what files.
13 
14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified
15 
16 Example:
17 ---
18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
20 @@ -31,7 +31,6 @@ import std.algorithm : map;
21  import std.array : Appender, appender, array;
22  import std.datetime : SysTime;
23  import std.format : format;
24 -import std.typecons : Tuple;
25 
26  import d2sqlite3 : sqlDatabase = Database;
27 
28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
29  struct Database {
30      import std.conv : to;
31      import std.exception : collectException;
32 -    import std.typecons : Nullable;
33 +    import std.typecons : Nullable, Flag, No;
34      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
35 
36      sqlDatabase db;
37 ---
38 */
39 module dextool.plugin.mutate.backend.diff_parser;
40 
41 import logger = std.experimental.logger;
42 import std.range : ElementType;
43 import std.traits : isSomeString;
44 
45 import dextool.type : AbsolutePath, Path;
46 
47 version (unittest) {
48     import unit_threaded : shouldEqual, shouldBeTrue, should;
49 }
50 
51 Diff diffFromStdin() @trusted {
52     import std.stdio : stdin;
53     import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser;
54 
55     return toDiff(stdin.byLine);
56 }
57 
58 /** Parse a range of lines to a diff.
59  *
60  * Params:
61  *  r = range of strings which is the diff
62  */
63 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) {
64     UnifiedDiffParser parser;
65     foreach (l; r) {
66         debug logger.trace(l);
67         parser.process(l);
68     }
69     return parser.result;
70 }
71 
72 @safe:
73 
74 struct Diff {
75     import dextool.set;
76 
77     static struct Line {
78         uint line;
79         string text;
80     }
81 
82     /// The raw diff that where parsed.
83     Line[][Path] rawDiff;
84 
85     alias ChangedLines = Set!uint;
86 
87     ChangedLines[Path] changes;
88     alias changes this;
89 
90     bool empty() @safe pure nothrow const @nogc {
91         return changes.length == 0;
92     }
93 
94     /** A range over the changes by file.
95      *
96      * The paths are adjusted to be relative `workdir`.
97      */
98     auto toRange(AbsolutePath workdir) @safe {
99         return DiffRange(this, workdir);
100     }
101 }
102 
103 struct DiffRange {
104     import std.path : relativePath;
105     import std.array : array;
106     import dextool.set;
107 
108     static struct KeyValue {
109         Path key;
110         Diff.ChangedLines value;
111         AbsolutePath absPath;
112     }
113 
114     private {
115         Diff diff;
116         AbsolutePath workdir;
117         Path[] keys;
118     }
119 
120     this(Diff d, AbsolutePath workdir) {
121         this.diff = d;
122         this.workdir = workdir;
123         this.keys = diff.byKey.array;
124         debug logger.trace(workdir);
125     }
126 
127     KeyValue front() @safe {
128         assert(!empty, "Can't get front of an empty range");
129         debug logger.trace(keys[0]);
130         return KeyValue(keys[0], diff[keys[0]], AbsolutePath(keys[0], workdir));
131     }
132 
133     void popFront() @safe pure nothrow {
134         assert(!empty, "Can't pop front of an empty range");
135         keys = keys[1 .. $];
136     }
137 
138     bool empty() @safe pure nothrow const @nogc {
139         return keys.length == 0;
140     }
141 }
142 
143 /** Parse a buffer in the Unified diff format and return the hunks of changes
144  * in the targets.
145  */
146 struct UnifiedDiffParser {
147     import std.regex : regex, matchFirst, matchAll;
148     import std.typecons : Tuple;
149 
150     Diff result;
151 
152     private {
153         // diff --git a/usability/report.md b/usability/report.md
154         enum re_git_diff_hdr = regex(`^diff --git.*`);
155         // --- a/standalone.d
156         enum re_hdr_original = regex(`^--- (?P<hdr>.*)`);
157         // +++ a/standalone.d
158         enum re_hdr_new = regex(`^\+\+\+ (?P<hdr>.*)`);
159         // @@ -31,7 +31,6 @@ import std.algorithm : map;
160         enum re_hunk_start_multiline = regex(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`);
161         // @@ -31 +31 @@ import std.algorithm : map;
162         enum re_hunk_start_line = regex(`^@@ -\d* \+(?P<line>\d*) @@.*`);
163 
164         alias FsmState = Tuple!(bool, State, Action[]);
165 
166         FsmState st;
167         StateData data;
168         bool isGitDiff;
169     }
170 
171     void process(T)(T line) {
172         import std.conv : to;
173         import std.meta;
174         import std.traits : EnumMembers;
175         import std.string : startsWith, split;
176         import dextool.type : Path, AbsolutePath;
177         import dextool.set;
178 
179         auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty;
180         auto hdr_original = matchFirst(line, re_hdr_original);
181         auto hdr_new = matchFirst(line, re_hdr_new);
182         auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline);
183         auto hunk_start_line = matchFirst(line, re_hunk_start_line);
184         const first_char = line.length != 0 ? line[0] : typeof(line[0]).init;
185 
186         FsmState nextState(const State st) {
187             auto next = FsmState(false, st, null);
188 
189             final switch (st) {
190             case State.findHdr:
191                 if (is_git_diff) {
192                     next[1] = State.findHdrOriginal;
193                     next[2] = [Action.setGitDiff];
194                 } else if (!hdr_original.empty) {
195                     next[0] = true;
196                     next[1] = State.findHdrOriginal;
197                 }
198                 break;
199             case State.findHdrOriginal:
200                 if (!hdr_original.empty) {
201                     next[1] = State.findHdrNew;
202                     next[2] = [Action.resetStateData, Action.saveOriginal];
203                 }
204                 break;
205             case State.findHdrNew:
206                 if (!hdr_new.empty) {
207                     next[1] = State.checkOrigNew;
208                     next[2] = [Action.saveNew];
209                     next[0] = true;
210                 }
211                 break;
212             case State.checkOrigNew:
213                 if (data.hdrOriginal.length == 0) {
214                     next[1] = State.findHdrOriginal;
215                     next[2] = [Action.warnOriginal];
216                 } else if (data.hdrNew.length == 0) {
217                     next[1] = State.findHdrOriginal;
218                     next[2] = [Action.warnNew];
219                 } else {
220                     next[1] = State.findHunkStart;
221                 }
222                 break;
223             case State.findNext:
224                 if (!hunk_start_multiline.empty || !hunk_start_line.empty) {
225                     next[1] = State.findHunkStart;
226                     next[0] = true;
227                 } else if (!hdr_original.empty) {
228                     next[1] = State.findHdrOriginal;
229                     next[0] = true;
230                 }
231                 break;
232             case State.findHunkStart:
233                 if (!hunk_start_multiline.empty) {
234                     next[1] = State.insideHunk;
235                     next[2] = [Action.multiLineHunk];
236                 } else if (!hunk_start_line.empty) {
237                     next[1] = State.insideHunk;
238                     next[2] = [Action.lineHunk];
239                 }
240                 break;
241             case State.insideHunk:
242                 next[2] = [Action.saveRawDiff];
243                 if (first_char == '+')
244                     next[2] ~= [Action.plusLine];
245                 else if (first_char == ' ' || line.length == 0)
246                     next[2] ~= [Action.blankLine];
247                 next[1] = State.checkHunkCounter;
248                 next[0] = true;
249                 break;
250             case State.checkHunkCounter:
251                 next[1] = State.insideHunk;
252                 if (data.count == data.maxCount)
253                     next[1] = State.findNext;
254                 break;
255             }
256 
257             return next;
258         }
259 
260         void resetStateDataAct() {
261             data = StateData.init;
262         }
263 
264         void saveOriginalAct() {
265             auto a = hdr_original["hdr"];
266             auto p = () {
267                 if (isGitDiff && a.length > 2)
268                     return a[2 .. $].split('\t');
269                 return a.split('\t');
270             }();
271 
272             data.hdrOriginal = Path(p[0].idup);
273         }
274 
275         void saveNewAct() {
276             auto a = hdr_new["hdr"];
277             auto p = () {
278                 if (isGitDiff && a.length > 2)
279                     return a[2 .. $].split('\t');
280                 return a.split('\t');
281             }();
282 
283             data.hdrNew = Path(p[0].idup);
284         }
285 
286         void warnOriginalAct() {
287             logger.warning(
288                     "Broken diff data. The original file, as specificed after ---, has length zero");
289         }
290 
291         void warnNewAct() {
292             logger.warning(
293                     "Broken diff data. The new file, as specificed after +++, has length zero");
294         }
295 
296         void multiLineHunkAct() {
297             try {
298                 data.startPos = hunk_start_multiline["line"].to!uint;
299                 data.maxCount = hunk_start_multiline["count"].to!uint;
300                 data.count = 0;
301             } catch (Exception e) {
302                 logger.info(e.msg);
303                 logger.info("Unable to parse diff line: ", line);
304             }
305         }
306 
307         void lineHunkAct() {
308             try {
309                 data.startPos = hunk_start_multiline["line"].to!uint;
310                 data.maxCount = 1;
311                 data.count = 0;
312             } catch (Exception e) {
313                 logger.info(e.msg);
314                 logger.info("Unable to parse diff line: ", line);
315             }
316         }
317 
318         void plusLineAct() {
319             if (data.hdrNew !in result.changes)
320                 result[data.hdrNew] = Diff.ChangedLines.init;
321             result[data.hdrNew].add(data.startPos + data.count);
322             data.count++;
323         }
324 
325         void blankLineAct() {
326             data.count++;
327         }
328 
329         void setGitDiffAct() {
330             isGitDiff = true;
331         }
332 
333         void saveRawDiffAct() {
334             result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup);
335         }
336 
337         st[0] = true;
338         while (st[0]) {
339             st = nextState(st[1]);
340             debug logger.tracef("%s | %s", line, st);
341 
342             foreach (const act; st[2]) {
343                 static foreach (Member; [EnumMembers!Action]) {
344                     if (act == Member)
345                         mixin(Member.to!string ~ "Act();");
346                 }
347             }
348             debug logger.tracef("%s | %s %s", result, data, isGitDiff);
349         }
350     }
351 }
352 
353 private:
354 
355 struct StateData {
356     Path hdrOriginal;
357     Path hdrNew;
358     uint startPos;
359     uint maxCount;
360     uint count;
361 }
362 
363 enum State {
364     findHdr,
365     findHdrOriginal,
366     findHdrNew,
367     checkOrigNew,
368     findHunkStart,
369     insideHunk,
370     findNext,
371     checkHunkCounter,
372 }
373 
374 enum Action {
375     setGitDiff,
376     resetStateData,
377     saveOriginal,
378     saveNew,
379     warnOriginal,
380     warnNew,
381     multiLineHunk,
382     lineHunk,
383     plusLine,
384     blankLine,
385     saveRawDiff,
386 }
387 
388 version (unittest) {
389     import std.string : lineSplitter;
390     import dextool.set;
391     import dextool.type;
392 }
393 
394 @("shall detect the changes lines (unified git diff)")
395 unittest {
396     immutable lines = `diff --git a/standalone2.d b/standalone2.d
397 index 0123..2345 100644
398 --- a/standalone.d
399 +++ b/standalone2.d
400 @@ -31,7 +31,6 @@ import std.algorithm : map;
401  import std.array : Appender, appender, array;
402  import std.datetime : SysTime;
403 +import std.format : format;
404 -import std.typecons : Tuple;
405 
406  import d2sqlite3 : sqlDatabase = Database;
407 
408 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
409  struct Database {
410      import std.conv : to;
411      import std.exception : collectException;
412 -    import std.typecons : Nullable;
413 +    import std.typecons : Nullable, Flag, No;
414      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
415 
416 +    sqlDatabase db;`;
417 
418     UnifiedDiffParser p;
419     foreach (line; lines.lineSplitter)
420         p.process(line);
421 
422     // assert
423     p.result[Path("standalone2.d")].contains(33).shouldBeTrue;
424     p.result[Path("standalone2.d")].contains(48).shouldBeTrue;
425     p.result[Path("standalone2.d")].contains(51).shouldBeTrue;
426     p.result.length.should == 1;
427 }
428 
429 @("shall detect the changed lines (unified with date)")
430 unittest {
431 
432     immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp	2018-11-18 21:25:46.631640690 +0100
433 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp	2018-11-18 21:26:17.003691847 +0100
434 @@ -3,7 +3,7 @@
435  /// @author Joakim Brännström (joakim.brannstrom@gmx.com)
436 
437  int fun(int x) {
438 -    if (x > 3) {
439 +    if (x != 3) {
440          return 0;
441      }
442      return 1;`;
443 
444     UnifiedDiffParser p;
445     foreach (line; lines.lineSplitter)
446         p.process(line);
447 
448     // assert
449     p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6)
450         .shouldBeTrue;
451     p.result.length.should == 1;
452 }