1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains a parser of diffs in the Unified Format.
11 It is pretty limited because it only handles those that git normally output.
12 The result are which lines has changes in what files.
13 
14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified
15 
16 Example:
17 ---
18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
20 @@ -31,7 +31,6 @@ import std.algorithm : map;
21  import std.array : Appender, appender, array;
22  import std.datetime : SysTime;
23  import std.format : format;
24 -import std.typecons : Tuple;
25 
26  import d2sqlite3 : sqlDatabase = Database;
27 
28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
29  struct Database {
30      import std.conv : to;
31      import std.exception : collectException;
32 -    import std.typecons : Nullable;
33 +    import std.typecons : Nullable, Flag, No;
34      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
35 
36      sqlDatabase db;
37 ---
38 */
39 module dextool.plugin.mutate.backend.diff_parser;
40 
41 import logger = std.experimental.logger;
42 import std.range : ElementType;
43 import std.traits : isSomeString;
44 
45 import dextool.type : AbsolutePath, Path, DirName;
46 
47 version (unittest) {
48     import unit_threaded : shouldEqual, shouldBeTrue, should;
49 }
50 
51 Diff diffFromStdin() @trusted {
52     import std.stdio : stdin;
53     import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser;
54 
55     return toDiff(stdin.byLine);
56 }
57 
58 /** Parse a range of lines to a diff.
59  *
60  * Params:
61  *  r = range of strings which is the diff
62  */
63 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) {
64     UnifiedDiffParser parser;
65     foreach (l; r) {
66         debug logger.trace(l);
67         parser.process(l);
68     }
69     return parser.result;
70 }
71 
72 @safe:
73 
74 struct Diff {
75     import dextool.set;
76 
77     static struct Line {
78         uint line;
79         string text;
80     }
81 
82     /// The raw diff that where parsed.
83     Line[][Path] rawDiff;
84 
85     alias ChangedLines = Set!uint;
86 
87     ChangedLines[Path] changes;
88     alias changes this;
89 
90     bool empty() @safe pure nothrow const @nogc {
91         return changes.length == 0;
92     }
93 
94     /** A range over the changes by file.
95      *
96      * The paths are adjusted to be relative `workdir`.
97      */
98     auto toRange(AbsolutePath workdir) @safe {
99         return DiffRange(this, workdir);
100     }
101 }
102 
103 struct DiffRange {
104     import std.path : relativePath;
105     import std.array : array;
106     import dextool.set;
107 
108     static struct KeyValue {
109         Path key;
110         Diff.ChangedLines value;
111         AbsolutePath absPath;
112     }
113 
114     private {
115         Diff diff;
116         AbsolutePath workdir;
117         Path[] keys;
118     }
119 
120     this(Diff d, AbsolutePath workdir) {
121         this.diff = d;
122         this.workdir = workdir;
123         this.keys = diff.byKey.array;
124         debug logger.trace(workdir);
125     }
126 
127     KeyValue front() @safe {
128         assert(!empty, "Can't get front of an empty range");
129         debug logger.trace(keys[0]);
130         return KeyValue(keys[0], diff[keys[0]], AbsolutePath(keys[0],
131                 DirName(cast(string) workdir)));
132     }
133 
134     void popFront() @safe pure nothrow {
135         assert(!empty, "Can't pop front of an empty range");
136         keys = keys[1 .. $];
137     }
138 
139     bool empty() @safe pure nothrow const @nogc {
140         return keys.length == 0;
141     }
142 }
143 
144 /** Parse a buffer in the Unified diff format and return the hunks of changes
145  * in the targets.
146  */
147 struct UnifiedDiffParser {
148     import std.regex : ctRegex, matchFirst, matchAll;
149     import std.typecons : Tuple;
150 
151     Diff result;
152 
153     private {
154         // diff --git a/usability/report.md b/usability/report.md
155         enum re_git_diff_hdr = ctRegex!(`^diff --git.*`);
156         // --- a/standalone.d
157         enum re_hdr_original = ctRegex!(`^--- (?P<hdr>.*)`);
158         // +++ a/standalone.d
159         enum re_hdr_new = ctRegex!(`^\+\+\+ (?P<hdr>.*)`);
160         // @@ -31,7 +31,6 @@ import std.algorithm : map;
161         enum re_hunk_start_multiline = ctRegex!(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`);
162         // @@ -31 +31 @@ import std.algorithm : map;
163         enum re_hunk_start_line = ctRegex!(`^@@ -\d* \+(?P<line>\d*) @@.*`);
164 
165         alias FsmState = Tuple!(bool, State, Action[]);
166 
167         FsmState st;
168         StateData data;
169         bool isGitDiff;
170     }
171 
172     void process(T)(T line) {
173         import std.conv : to;
174         import std.meta;
175         import std.traits : EnumMembers;
176         import std.string : startsWith, split;
177         import dextool.type : Path, AbsolutePath;
178         import dextool.set;
179 
180         auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty;
181         auto hdr_original = matchFirst(line, re_hdr_original);
182         auto hdr_new = matchFirst(line, re_hdr_new);
183         auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline);
184         auto hunk_start_line = matchFirst(line, re_hunk_start_line);
185         const first_char = line.length != 0 ? line[0] : typeof(line[0]).init;
186 
187         FsmState nextState(const State st) {
188             auto next = FsmState(false, st, null);
189 
190             final switch (st) {
191             case State.findHdr:
192                 if (is_git_diff) {
193                     next[1] = State.findHdrOriginal;
194                     next[2] = [Action.setGitDiff];
195                 } else if (!hdr_original.empty) {
196                     next[0] = true;
197                     next[1] = State.findHdrOriginal;
198                 }
199                 break;
200             case State.findHdrOriginal:
201                 if (!hdr_original.empty) {
202                     next[1] = State.findHdrNew;
203                     next[2] = [Action.resetStateData, Action.saveOriginal];
204                 }
205                 break;
206             case State.findHdrNew:
207                 if (!hdr_new.empty) {
208                     next[1] = State.checkOrigNew;
209                     next[2] = [Action.saveNew];
210                     next[0] = true;
211                 }
212                 break;
213             case State.checkOrigNew:
214                 if (data.hdrOriginal.length == 0) {
215                     next[1] = State.findHdrOriginal;
216                     next[2] = [Action.warnOriginal];
217                 } else if (data.hdrNew.length == 0) {
218                     next[1] = State.findHdrOriginal;
219                     next[2] = [Action.warnNew];
220                 } else {
221                     next[1] = State.findHunkStart;
222                 }
223                 break;
224             case State.findNext:
225                 if (!hunk_start_multiline.empty || !hunk_start_line.empty) {
226                     next[1] = State.findHunkStart;
227                     next[0] = true;
228                 } else if (!hdr_original.empty) {
229                     next[1] = State.findHdrOriginal;
230                     next[0] = true;
231                 }
232                 break;
233             case State.findHunkStart:
234                 if (!hunk_start_multiline.empty) {
235                     next[1] = State.insideHunk;
236                     next[2] = [Action.multiLineHunk];
237                 } else if (!hunk_start_line.empty) {
238                     next[1] = State.insideHunk;
239                     next[2] = [Action.lineHunk];
240                 }
241                 break;
242             case State.insideHunk:
243                 next[2] = [Action.saveRawDiff];
244                 if (first_char == '+')
245                     next[2] ~= [Action.plusLine];
246                 else if (first_char == ' ' || line.length == 0)
247                     next[2] ~= [Action.blankLine];
248                 next[1] = State.checkHunkCounter;
249                 next[0] = true;
250                 break;
251             case State.checkHunkCounter:
252                 next[1] = State.insideHunk;
253                 if (data.count == data.maxCount)
254                     next[1] = State.findNext;
255                 break;
256             }
257 
258             return next;
259         }
260 
261         void resetStateDataAct() {
262             data = StateData.init;
263         }
264 
265         void saveOriginalAct() {
266             auto a = hdr_original["hdr"];
267             auto p = () {
268                 if (isGitDiff && a.length > 2)
269                     return a[2 .. $].split('\t');
270                 return a.split('\t');
271             }();
272 
273             data.hdrOriginal = Path(p[0].idup);
274         }
275 
276         void saveNewAct() {
277             auto a = hdr_new["hdr"];
278             auto p = () {
279                 if (isGitDiff && a.length > 2)
280                     return a[2 .. $].split('\t');
281                 return a.split('\t');
282             }();
283 
284             data.hdrNew = Path(p[0].idup);
285         }
286 
287         void warnOriginalAct() {
288             logger.warning(
289                     "Broken diff data. The original file, as specificed after ---, has length zero");
290         }
291 
292         void warnNewAct() {
293             logger.warning(
294                     "Broken diff data. The new file, as specificed after +++, has length zero");
295         }
296 
297         void multiLineHunkAct() {
298             try {
299                 data.startPos = hunk_start_multiline["line"].to!uint;
300                 data.maxCount = hunk_start_multiline["count"].to!uint;
301                 data.count = 0;
302             } catch (Exception e) {
303                 logger.info(e.msg);
304                 logger.info("Unable to parse diff line: ", line);
305             }
306         }
307 
308         void lineHunkAct() {
309             try {
310                 data.startPos = hunk_start_multiline["line"].to!uint;
311                 data.maxCount = 1;
312                 data.count = 0;
313             } catch (Exception e) {
314                 logger.info(e.msg);
315                 logger.info("Unable to parse diff line: ", line);
316             }
317         }
318 
319         void plusLineAct() {
320             if (data.hdrNew !in result.changes)
321                 result[data.hdrNew] = Diff.ChangedLines.init;
322             result[data.hdrNew].add(data.startPos + data.count);
323             data.count++;
324         }
325 
326         void blankLineAct() {
327             data.count++;
328         }
329 
330         void setGitDiffAct() {
331             isGitDiff = true;
332         }
333 
334         void saveRawDiffAct() {
335             result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup);
336         }
337 
338         st[0] = true;
339         while (st[0]) {
340             st = nextState(st[1]);
341             debug logger.tracef("%s | %s", line, st);
342 
343             foreach (const act; st[2]) {
344                 static foreach (Member; [EnumMembers!Action]) {
345                     if (act == Member)
346                         mixin(Member.to!string ~ "Act();");
347                 }
348             }
349             debug logger.tracef("%s | %s %s", result, data, isGitDiff);
350         }
351     }
352 }
353 
354 private:
355 
356 struct StateData {
357     Path hdrOriginal;
358     Path hdrNew;
359     uint startPos;
360     uint maxCount;
361     uint count;
362 }
363 
364 enum State {
365     findHdr,
366     findHdrOriginal,
367     findHdrNew,
368     checkOrigNew,
369     findHunkStart,
370     insideHunk,
371     findNext,
372     checkHunkCounter,
373 }
374 
375 enum Action {
376     setGitDiff,
377     resetStateData,
378     saveOriginal,
379     saveNew,
380     warnOriginal,
381     warnNew,
382     multiLineHunk,
383     lineHunk,
384     plusLine,
385     blankLine,
386     saveRawDiff,
387 }
388 
389 version (unittest) {
390     import std.string : lineSplitter;
391     import dextool.set;
392     import dextool.type;
393 }
394 
395 @("shall detect the changes lines (unified git diff)")
396 unittest {
397     immutable lines = `diff --git a/standalone2.d b/standalone2.d
398 index 0123..2345 100644
399 --- a/standalone.d
400 +++ b/standalone2.d
401 @@ -31,7 +31,6 @@ import std.algorithm : map;
402  import std.array : Appender, appender, array;
403  import std.datetime : SysTime;
404 +import std.format : format;
405 -import std.typecons : Tuple;
406 
407  import d2sqlite3 : sqlDatabase = Database;
408 
409 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
410  struct Database {
411      import std.conv : to;
412      import std.exception : collectException;
413 -    import std.typecons : Nullable;
414 +    import std.typecons : Nullable, Flag, No;
415      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
416 
417 +    sqlDatabase db;`;
418 
419     UnifiedDiffParser p;
420     foreach (line; lines.lineSplitter)
421         p.process(line);
422 
423     // assert
424     p.result[Path("standalone2.d")].contains(33).shouldBeTrue;
425     p.result[Path("standalone2.d")].contains(48).shouldBeTrue;
426     p.result[Path("standalone2.d")].contains(51).shouldBeTrue;
427     p.result.length.should == 1;
428 }
429 
430 @("shall detect the changed lines (unified with date)")
431 unittest {
432 
433     immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp	2018-11-18 21:25:46.631640690 +0100
434 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp	2018-11-18 21:26:17.003691847 +0100
435 @@ -3,7 +3,7 @@
436  /// @author Joakim Brännström (joakim.brannstrom@gmx.com)
437 
438  int fun(int x) {
439 -    if (x > 3) {
440 +    if (x != 3) {
441          return 0;
442      }
443      return 1;`;
444 
445     UnifiedDiffParser p;
446     foreach (line; lines.lineSplitter)
447         p.process(line);
448 
449     // assert
450     p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6)
451         .shouldBeTrue;
452     p.result.length.should == 1;
453 }