1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains a parser of diffs in the Unified Format.
11 It is pretty limited because it only handles those that git normally output.
12 The result are which lines has changes in what files.
13 
14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified
15 
16 Example:
17 ---
18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
20 @@ -31,7 +31,6 @@ import std.algorithm : map;
21  import std.array : Appender, appender, array;
22  import std.datetime : SysTime;
23  import std.format : format;
24 -import std.typecons : Tuple;
25 
26  import d2sqlite3 : sqlDatabase = Database;
27 
28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
29  struct Database {
30      import std.conv : to;
31      import std.exception : collectException;
32 -    import std.typecons : Nullable;
33 +    import std.typecons : Nullable, Flag, No;
34      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
35 
36      sqlDatabase db;
37 ---
38 */
39 module dextool.plugin.mutate.backend.diff_parser;
40 
41 import logger = std.experimental.logger;
42 import std.range : ElementType;
43 import std.traits : isSomeString;
44 
45 import dextool.type : AbsolutePath, Path, DirName;
46 
47 version (unittest) {
48     import unit_threaded : shouldEqual, shouldBeTrue, should;
49 }
50 
51 Diff diffFromStdin() @trusted {
52     import std.stdio : stdin;
53     import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser;
54 
55     return toDiff(stdin.byLine);
56 }
57 
58 /** Parse a range of lines to a diff.
59  *
60  * Params:
61  *  r = range of strings which is the diff
62  */
63 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) {
64     UnifiedDiffParser parser;
65     foreach (l; r) {
66         debug logger.trace(l);
67         parser.process(l);
68     }
69     return parser.result;
70 }
71 
72 @safe:
73 
74 struct Diff {
75     import dextool.set;
76 
77     static struct Line {
78         uint line;
79         string text;
80     }
81 
82     /// The raw diff that where parsed.
83     Line[][Path] rawDiff;
84 
85     alias ChangedLines = Set!uint;
86 
87     ChangedLines[Path] changes;
88     alias changes this;
89 
90     bool empty() @safe pure nothrow const @nogc {
91         return changes.length == 0;
92     }
93 
94     /** A range over the changes by file.
95      *
96      * The paths are adjusted to be relative `workdir`.
97      */
98     auto toRange(AbsolutePath workdir) @safe {
99         return DiffRange(this, workdir);
100     }
101 }
102 
103 struct DiffRange {
104     import std.path : relativePath;
105     import std.array : array;
106     import dextool.set;
107 
108     static struct KeyValue {
109         Path key;
110         Diff.ChangedLines value;
111         AbsolutePath absPath;
112     }
113 
114     private {
115         Diff diff;
116         AbsolutePath workdir;
117         Path[] keys;
118     }
119 
120     this(Diff d, AbsolutePath workdir) {
121         this.diff = d;
122         this.workdir = workdir;
123         this.keys = diff.byKey.array;
124         debug logger.trace(workdir);
125     }
126 
127     KeyValue front() @safe {
128         assert(!empty, "Can't get front of an empty range");
129         debug logger.trace(keys[0]);
130         return KeyValue(keys[0], diff[keys[0]], AbsolutePath(keys[0],
131                 DirName(cast(string) workdir)));
132     }
133 
134     void popFront() @safe pure nothrow {
135         assert(!empty, "Can't pop front of an empty range");
136         keys = keys[1 .. $];
137     }
138 
139     bool empty() @safe pure nothrow const @nogc {
140         return keys.length == 0;
141     }
142 }
143 
144 /** Parse a buffer in the Unified diff format and return the hunks of changes
145  * in the targets.
146  */
147 struct UnifiedDiffParser {
148     import std.regex : ctRegex, matchFirst, matchAll;
149     import std.typecons : Tuple;
150 
151     Diff result;
152 
153     private {
154         // diff --git a/usability/report.md b/usability/report.md
155         enum re_git_diff_hdr = ctRegex!(`^diff --git.*`);
156         // --- a/standalone.d
157         enum re_hdr_original = ctRegex!(`^--- (?P<hdr>.*)`);
158         // +++ a/standalone.d
159         enum re_hdr_new = ctRegex!(`^\+\+\+ (?P<hdr>.*)`);
160         // @@ -31,7 +31,6 @@ import std.algorithm : map;
161         enum re_hunk_start_multiline = ctRegex!(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`);
162         // @@ -31 +31 @@ import std.algorithm : map;
163         enum re_hunk_start_line = ctRegex!(`^@@ -\d* \+(?P<line>\d*) @@.*`);
164 
165         alias FsmState = Tuple!(bool, State, Action[]);
166 
167         FsmState st;
168         StateData data;
169         bool isGitDiff;
170     }
171 
172     void process(T)(T line) {
173         import std.conv : to;
174         import std.meta;
175         import std.traits : EnumMembers;
176         import std.string : startsWith, split;
177         import dextool.type : Path, AbsolutePath;
178         import dextool.set;
179 
180         auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty;
181         auto hdr_original = matchFirst(line, re_hdr_original);
182         auto hdr_new = matchFirst(line, re_hdr_new);
183         auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline);
184         auto hunk_start_line = matchFirst(line, re_hunk_start_line);
185         const first_char = line.length != 0 ? line[0] : typeof(line[0]).init;
186 
187         FsmState nextState(const State st) {
188             auto next = FsmState(false, st, null);
189 
190             final switch (st) {
191             case State.findHdr:
192                 if (is_git_diff) {
193                     next[1] = State.findHdrOriginal;
194                     next[2] = [Action.setGitDiff];
195                 } else if (!hdr_original.empty) {
196                     next[0] = true;
197                     next[1] = State.findHdrOriginal;
198                 }
199                 break;
200             case State.findHdrOriginal:
201                 if (!hdr_original.empty) {
202                     next[1] = State.findHdrNew;
203                     next[2] = [Action.resetStateData, Action.saveOriginal];
204                 }
205                 break;
206             case State.findHdrNew:
207                 if (!hdr_new.empty) {
208                     next[1] = State.checkOrigNew;
209                     next[2] = [Action.saveNew];
210                     next[0] = true;
211                 }
212                 break;
213             case State.checkOrigNew:
214                 if (data.hdrOriginal.length == 0) {
215                     next[1] = State.findHdrOriginal;
216                     next[2] = [Action.warnOriginal];
217                 } else if (data.hdrNew.length == 0) {
218                     next[1] = State.findHdrOriginal;
219                     next[2] = [Action.warnNew];
220                 } else {
221                     next[1] = State.findHunkStart;
222                 }
223                 break;
224             case State.findNext:
225                 if (!hunk_start_multiline.empty || !hunk_start_line.empty) {
226                     next[1] = State.findHunkStart;
227                     next[0] = true;
228                 } else if (!hdr_original.empty) {
229                     next[1] = State.findHdrOriginal;
230                     next[0] = true;
231                 }
232                 break;
233             case State.findHunkStart:
234                 if (!hunk_start_multiline.empty) {
235                     next[1] = State.insideHunk;
236                     next[2] = [Action.multiLineHunk];
237                 } else if (!hunk_start_line.empty) {
238                     next[1] = State.insideHunk;
239                     next[2] = [Action.lineHunk];
240                 }
241                 break;
242             case State.insideHunk:
243                 next[2] = [Action.saveRawDiff];
244                 if (first_char == '+')
245                     next[2] ~= [Action.plusLine];
246                 else if (first_char == ' ' || line.length == 0)
247                     next[2] ~= [Action.blankLine];
248                 next[1] = State.checkHunkCounter;
249                 next[0] = true;
250                 break;
251             case State.checkHunkCounter:
252                 next[1] = State.insideHunk;
253                 if (data.count == data.maxCount)
254                     next[1] = State.findNext;
255                 break;
256             }
257 
258             return next;
259         }
260 
261         void resetStateDataAct() {
262             data = StateData.init;
263         }
264 
265         void saveOriginalAct() {
266             auto a = hdr_original["hdr"];
267             auto p = () {
268                 if (isGitDiff && a.length > 2)
269                     return a[2 .. $].split('\t');
270                 return a.split('\t');
271             }();
272 
273             data.hdrOriginal = Path(p[0].idup);
274         }
275 
276         void saveNewAct() {
277             auto a = hdr_new["hdr"];
278             auto p = () {
279                 if (isGitDiff && a.length > 2)
280                     return a[2 .. $].split('\t');
281                 return a.split('\t');
282             }();
283 
284             data.hdrNew = Path(p[0].idup);
285         }
286 
287         void warnOriginalAct() {
288             logger.warning(
289                     "Broken diff data. The original file, as specificed after ---, has length zero");
290         }
291 
292         void warnNewAct() {
293             logger.warning(
294                     "Broken diff data. The new file, as specificed after +++, has length zero");
295         }
296 
297         void multiLineHunkAct() {
298             data.startPos = hunk_start_multiline["line"].to!uint;
299             data.maxCount = hunk_start_multiline["count"].to!uint;
300             data.count = 0;
301         }
302 
303         void lineHunkAct() {
304             data.startPos = hunk_start_multiline["line"].to!uint;
305             data.maxCount = 1;
306             data.count = 0;
307         }
308 
309         void plusLineAct() {
310             if (data.hdrNew !in result.changes)
311                 result[data.hdrNew] = Diff.ChangedLines.init;
312             result[data.hdrNew].add(data.startPos + data.count);
313             data.count++;
314         }
315 
316         void blankLineAct() {
317             data.count++;
318         }
319 
320         void setGitDiffAct() {
321             isGitDiff = true;
322         }
323 
324         void saveRawDiffAct() {
325             result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup);
326         }
327 
328         st[0] = true;
329         while (st[0]) {
330             st = nextState(st[1]);
331             debug logger.tracef("%s | %s", line, st);
332 
333             foreach (const act; st[2]) {
334                 static foreach (Member; [EnumMembers!Action]) {
335                     if (act == Member)
336                         mixin(Member.to!string ~ "Act();");
337                 }
338             }
339             debug logger.tracef("%s | %s %s", result, data, isGitDiff);
340         }
341     }
342 }
343 
344 private:
345 
346 struct StateData {
347     Path hdrOriginal;
348     Path hdrNew;
349     uint startPos;
350     uint maxCount;
351     uint count;
352 }
353 
354 enum State {
355     findHdr,
356     findHdrOriginal,
357     findHdrNew,
358     checkOrigNew,
359     findHunkStart,
360     insideHunk,
361     findNext,
362     checkHunkCounter,
363 }
364 
365 enum Action {
366     setGitDiff,
367     resetStateData,
368     saveOriginal,
369     saveNew,
370     warnOriginal,
371     warnNew,
372     multiLineHunk,
373     lineHunk,
374     plusLine,
375     blankLine,
376     saveRawDiff,
377 }
378 
379 version (unittest) {
380     import std.string : lineSplitter;
381     import dextool.set;
382     import dextool.type;
383 }
384 
385 @("shall detect the changes lines (unified git diff)")
386 unittest {
387     immutable lines = `diff --git a/standalone2.d b/standalone2.d
388 index 0123..2345 100644
389 --- a/standalone.d
390 +++ b/standalone2.d
391 @@ -31,7 +31,6 @@ import std.algorithm : map;
392  import std.array : Appender, appender, array;
393  import std.datetime : SysTime;
394 +import std.format : format;
395 -import std.typecons : Tuple;
396 
397  import d2sqlite3 : sqlDatabase = Database;
398 
399 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
400  struct Database {
401      import std.conv : to;
402      import std.exception : collectException;
403 -    import std.typecons : Nullable;
404 +    import std.typecons : Nullable, Flag, No;
405      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
406 
407 +    sqlDatabase db;`;
408 
409     UnifiedDiffParser p;
410     foreach (line; lines.lineSplitter)
411         p.process(line);
412 
413     // assert
414     p.result[Path("standalone2.d")].contains(33).shouldBeTrue;
415     p.result[Path("standalone2.d")].contains(48).shouldBeTrue;
416     p.result[Path("standalone2.d")].contains(51).shouldBeTrue;
417     p.result.length.should == 1;
418 }
419 
420 @("shall detect the changed lines (unified with date)")
421 unittest {
422 
423     immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp	2018-11-18 21:25:46.631640690 +0100
424 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp	2018-11-18 21:26:17.003691847 +0100
425 @@ -3,7 +3,7 @@
426  /// @author Joakim Brännström (joakim.brannstrom@gmx.com)
427 
428  int fun(int x) {
429 -    if (x > 3) {
430 +    if (x != 3) {
431          return 0;
432      }
433      return 1;`;
434 
435     UnifiedDiffParser p;
436     foreach (line; lines.lineSplitter)
437         p.process(line);
438 
439     // assert
440     p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6)
441         .shouldBeTrue;
442     p.result.length.should == 1;
443 }