1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains a parser of diffs in the Unified Format.
11 It is pretty limited because it only handles those that git normally output.
12 The result are which lines has changes in what files.
13 
14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified
15 
16 Example:
17 ---
18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
20 @@ -31,7 +31,6 @@ import std.algorithm : map;
21  import std.array : Appender, appender, array;
22  import std.datetime : SysTime;
23  import std.format : format;
24 -import std.typecons : Tuple;
25 
26  import d2sqlite3 : sqlDatabase = Database;
27 
28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
29  struct Database {
30      import std.conv : to;
31      import std.exception : collectException;
32 -    import std.typecons : Nullable;
33 +    import std.typecons : Nullable, Flag, No;
34      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
35 
36      sqlDatabase db;
37 ---
38 */
39 module dextool.plugin.mutate.backend.diff_parser;
40 
41 import logger = std.experimental.logger;
42 import std.path : buildPath;
43 import std.range : ElementType;
44 import std.traits : isSomeString;
45 
46 import dextool.type : AbsolutePath, Path;
47 
48 version (unittest) {
49     import unit_threaded : shouldEqual, shouldBeTrue, should;
50 }
51 
52 Diff diffFromStdin() @trusted {
53     import std.stdio : stdin;
54     import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser;
55 
56     return toDiff(stdin.byLine);
57 }
58 
59 /** Parse a range of lines to a diff.
60  *
61  * Params:
62  *  r = range of strings which is the diff
63  */
64 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) {
65     UnifiedDiffParser parser;
66     foreach (l; r) {
67         debug logger.trace(l);
68         parser.process(l);
69     }
70     return parser.result;
71 }
72 
73 @safe:
74 
75 struct Diff {
76     import my.set;
77 
78     static struct Line {
79         uint line;
80         string text;
81     }
82 
83     /// The raw diff that where parsed.
84     Line[][Path] rawDiff;
85 
86     alias ChangedLines = Set!uint;
87 
88     ChangedLines[Path] changes;
89     alias changes this;
90 
91     bool empty() @safe pure nothrow const @nogc {
92         return changes.length == 0;
93     }
94 
95     /** A range over the changes by file.
96      *
97      * The paths are adjusted to be relative `workdir`.
98      */
99     auto toRange(AbsolutePath workdir) @safe {
100         return DiffRange(this, workdir);
101     }
102 }
103 
104 struct DiffRange {
105     import std.path : relativePath;
106     import std.array : array;
107     import my.set;
108 
109     static struct KeyValue {
110         Path key;
111         Diff.ChangedLines value;
112         AbsolutePath absPath;
113     }
114 
115     private {
116         Diff diff;
117         AbsolutePath workdir;
118         Path[] keys;
119     }
120 
121     this(Diff d, AbsolutePath workdir) {
122         this.diff = d;
123         this.workdir = workdir;
124         this.keys = diff.byKey.array;
125         debug logger.trace(workdir);
126     }
127 
128     KeyValue front() @safe {
129         assert(!empty, "Can't get front of an empty range");
130         debug logger.trace(keys[0]);
131         return KeyValue(keys[0], diff[keys[0]], AbsolutePath(buildPath(workdir, keys[0])));
132     }
133 
134     void popFront() @safe pure nothrow {
135         assert(!empty, "Can't pop front of an empty range");
136         keys = keys[1 .. $];
137     }
138 
139     bool empty() @safe pure nothrow const @nogc {
140         return keys.length == 0;
141     }
142 }
143 
144 /** Parse a buffer in the Unified diff format and return the hunks of changes
145  * in the targets.
146  */
147 struct UnifiedDiffParser {
148     import std.regex : Regex, regex, matchFirst, matchAll;
149     import std.typecons : Tuple;
150 
151     Diff result;
152 
153     private {
154         // diff --git a/usability/report.md b/usability/report.md
155         Regex!char re_git_diff_hdr = regex(`^diff --git.*`);
156         // --- a/standalone.d
157         Regex!char re_hdr_original = regex(`^--- (?P<hdr>.*)`);
158         // +++ a/standalone.d
159         Regex!char re_hdr_new = regex(`^\+\+\+ (?P<hdr>.*)`);
160         // @@ -31,7 +31,6 @@ import std.algorithm : map;
161         Regex!char re_hunk_start_multiline = regex(
162                 `^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`);
163         // @@ -31 +31 @@ import std.algorithm : map;
164         Regex!char re_hunk_start_line = regex(`^@@ -\d* \+(?P<line>\d*) @@.*`);
165 
166         alias FsmState = Tuple!(bool, State, Action[]);
167 
168         FsmState st;
169         StateData data;
170         bool isGitDiff;
171     }
172 
173     void process(T)(T line) {
174         import std.conv : to;
175         import std.meta;
176         import std.traits : EnumMembers;
177         import std..string : startsWith, split;
178         import dextool.type : Path, AbsolutePath;
179         import my.set;
180 
181         auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty;
182         auto hdr_original = matchFirst(line, re_hdr_original);
183         auto hdr_new = matchFirst(line, re_hdr_new);
184         auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline);
185         auto hunk_start_line = matchFirst(line, re_hunk_start_line);
186         const first_char = line.length != 0 ? line[0] : typeof(line[0]).init;
187 
188         FsmState nextState(const State st) {
189             auto next = FsmState(false, st, null);
190 
191             final switch (st) {
192             case State.findHdr:
193                 if (is_git_diff) {
194                     next[1] = State.findHdrOriginal;
195                     next[2] = [Action.setGitDiff];
196                 } else if (!hdr_original.empty) {
197                     next[0] = true;
198                     next[1] = State.findHdrOriginal;
199                 }
200                 break;
201             case State.findHdrOriginal:
202                 if (!hdr_original.empty) {
203                     next[1] = State.findHdrNew;
204                     next[2] = [Action.resetStateData, Action.saveOriginal];
205                 }
206                 break;
207             case State.findHdrNew:
208                 if (!hdr_new.empty) {
209                     next[1] = State.checkOrigNew;
210                     next[2] = [Action.saveNew];
211                     next[0] = true;
212                 }
213                 break;
214             case State.checkOrigNew:
215                 if (data.hdrOriginal.length == 0) {
216                     next[1] = State.findHdrOriginal;
217                     next[2] = [Action.warnOriginal];
218                 } else if (data.hdrNew.length == 0) {
219                     next[1] = State.findHdrOriginal;
220                     next[2] = [Action.warnNew];
221                 } else {
222                     next[1] = State.findHunkStart;
223                 }
224                 break;
225             case State.findNext:
226                 if (!hunk_start_multiline.empty || !hunk_start_line.empty) {
227                     next[1] = State.findHunkStart;
228                     next[0] = true;
229                 } else if (!hdr_original.empty) {
230                     next[1] = State.findHdrOriginal;
231                     next[0] = true;
232                 }
233                 break;
234             case State.findHunkStart:
235                 if (!hunk_start_multiline.empty) {
236                     next[1] = State.insideHunk;
237                     next[2] = [Action.multiLineHunk];
238                 } else if (!hunk_start_line.empty) {
239                     next[1] = State.insideHunk;
240                     next[2] = [Action.lineHunk];
241                 }
242                 break;
243             case State.insideHunk:
244                 next[2] = [Action.saveRawDiff];
245                 if (first_char == '+')
246                     next[2] ~= [Action.plusLine];
247                 else if (first_char == ' ' || line.length == 0)
248                     next[2] ~= [Action.blankLine];
249                 next[1] = State.checkHunkCounter;
250                 next[0] = true;
251                 break;
252             case State.checkHunkCounter:
253                 next[1] = State.insideHunk;
254                 if (data.count == data.maxCount)
255                     next[1] = State.findNext;
256                 break;
257             }
258 
259             return next;
260         }
261 
262         void resetStateDataAct() {
263             data = StateData.init;
264         }
265 
266         void saveOriginalAct() {
267             auto a = hdr_original["hdr"];
268             auto p = () {
269                 if (isGitDiff && a.length > 2)
270                     return a[2 .. $].split('\t');
271                 return a.split('\t');
272             }();
273 
274             data.hdrOriginal = Path(p[0].idup);
275         }
276 
277         void saveNewAct() {
278             auto a = hdr_new["hdr"];
279             auto p = () {
280                 if (isGitDiff && a.length > 2)
281                     return a[2 .. $].split('\t');
282                 return a.split('\t');
283             }();
284 
285             data.hdrNew = Path(p[0].idup);
286         }
287 
288         void warnOriginalAct() {
289             logger.warning(
290                     "Broken diff data. The original file, as specificed after ---, has length zero");
291         }
292 
293         void warnNewAct() {
294             logger.warning(
295                     "Broken diff data. The new file, as specificed after +++, has length zero");
296         }
297 
298         void multiLineHunkAct() {
299             try {
300                 data.startPos = hunk_start_multiline["line"].to!uint;
301                 data.maxCount = hunk_start_multiline["count"].to!uint;
302                 data.count = 0;
303             } catch (Exception e) {
304                 logger.info(e.msg);
305                 logger.info("Unable to parse diff line: ", line);
306             }
307         }
308 
309         void lineHunkAct() {
310             try {
311                 data.startPos = hunk_start_multiline["line"].to!uint;
312                 data.maxCount = 1;
313                 data.count = 0;
314             } catch (Exception e) {
315                 logger.info(e.msg);
316                 logger.info("Unable to parse diff line: ", line);
317             }
318         }
319 
320         void plusLineAct() {
321             if (data.hdrNew !in result.changes)
322                 result[data.hdrNew] = Diff.ChangedLines.init;
323             result[data.hdrNew].add(data.startPos + data.count);
324             data.count++;
325         }
326 
327         void blankLineAct() {
328             data.count++;
329         }
330 
331         void setGitDiffAct() {
332             isGitDiff = true;
333         }
334 
335         void saveRawDiffAct() {
336             result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup);
337         }
338 
339         st[0] = true;
340         while (st[0]) {
341             st = nextState(st[1]);
342             debug logger.tracef("%s | %s", line, st);
343 
344             foreach (const act; st[2]) {
345                 static foreach (Member; [EnumMembers!Action]) {
346                     if (act == Member)
347                         mixin(Member.to!string ~ "Act();");
348                 }
349             }
350             debug logger.tracef("%s | %s %s", result, data, isGitDiff);
351         }
352     }
353 }
354 
355 private:
356 
357 struct StateData {
358     Path hdrOriginal;
359     Path hdrNew;
360     uint startPos;
361     uint maxCount;
362     uint count;
363 }
364 
365 enum State {
366     findHdr,
367     findHdrOriginal,
368     findHdrNew,
369     checkOrigNew,
370     findHunkStart,
371     insideHunk,
372     findNext,
373     checkHunkCounter,
374 }
375 
376 enum Action {
377     setGitDiff,
378     resetStateData,
379     saveOriginal,
380     saveNew,
381     warnOriginal,
382     warnNew,
383     multiLineHunk,
384     lineHunk,
385     plusLine,
386     blankLine,
387     saveRawDiff,
388 }
389 
390 version (unittest) {
391     import std..string : lineSplitter;
392     import my.set;
393     import dextool.type;
394 }
395 
396 @("shall detect the changes lines (unified git diff)")
397 unittest {
398     immutable lines = `diff --git a/standalone2.d b/standalone2.d
399 index 0123..2345 100644
400 --- a/standalone.d
401 +++ b/standalone2.d
402 @@ -31,7 +31,6 @@ import std.algorithm : map;
403  import std.array : Appender, appender, array;
404  import std.datetime : SysTime;
405 +import std.format : format;
406 -import std.typecons : Tuple;
407 
408  import d2sqlite3 : sqlDatabase = Database;
409 
410 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
411  struct Database {
412      import std.conv : to;
413      import std.exception : collectException;
414 -    import std.typecons : Nullable;
415 +    import std.typecons : Nullable, Flag, No;
416      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
417 
418 +    sqlDatabase db;`;
419 
420     UnifiedDiffParser p;
421     foreach (line; lines.lineSplitter)
422         p.process(line);
423 
424     // assert
425     p.result[Path("standalone2.d")].contains(33).shouldBeTrue;
426     p.result[Path("standalone2.d")].contains(48).shouldBeTrue;
427     p.result[Path("standalone2.d")].contains(51).shouldBeTrue;
428     p.result.length.should == 1;
429 }
430 
431 @("shall detect the changed lines (unified with date)")
432 unittest {
433 
434     immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp	2018-11-18 21:25:46.631640690 +0100
435 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp	2018-11-18 21:26:17.003691847 +0100
436 @@ -3,7 +3,7 @@
437  /// @author Joakim Brännström (joakim.brannstrom@gmx.com)
438 
439  int fun(int x) {
440 -    if (x > 3) {
441 +    if (x != 3) {
442          return 0;
443      }
444      return 1;`;
445 
446     UnifiedDiffParser p;
447     foreach (line; lines.lineSplitter)
448         p.process(line);
449 
450     // assert
451     p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6)
452         .shouldBeTrue;
453     p.result.length.should == 1;
454 }