1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains a parser of diffs in the Unified Format.
11 It is pretty limited because it only handles those that git normally output.
12 The result are which lines has changes in what files.
13 
14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified
15 
16 Example:
17 ---
18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d
20 @@ -31,7 +31,6 @@ import std.algorithm : map;
21  import std.array : Appender, appender, array;
22  import std.datetime : SysTime;
23  import std.format : format;
24 -import std.typecons : Tuple;
25 
26  import d2sqlite3 : sqlDatabase = Database;
27 
28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
29  struct Database {
30      import std.conv : to;
31      import std.exception : collectException;
32 -    import std.typecons : Nullable;
33 +    import std.typecons : Nullable, Flag, No;
34      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
35 
36      sqlDatabase db;
37 ---
38 */
39 module dextool.plugin.mutate.backend.diff_parser;
40 
41 import logger = std.experimental.logger;
42 
43 version (unittest) {
44     import unit_threaded : shouldEqual, shouldBeTrue, should;
45 }
46 
47 @safe:
48 
49 struct Diff {
50     import dextool.type : AbsolutePath;
51     import dextool.set;
52 
53     static struct Line {
54         uint line;
55         string text;
56     }
57 
58     /// The raw diff that where parsed.
59     Line[][AbsolutePath] rawDiff;
60 
61     alias ChangedLines = Set!uint;
62 
63     ChangedLines[AbsolutePath] changes;
64     alias changes this;
65 
66     bool empty() @safe pure nothrow const @nogc {
67         return changes.length == 0;
68     }
69 
70     /** A range over the changes by file.
71      *
72      * The paths are adjusted to be relative `workdir`.
73      */
74     auto toRange(AbsolutePath workdir) @safe {
75         return DiffRange(this, workdir);
76     }
77 }
78 
79 struct DiffRange {
80     import dextool.type : AbsolutePath, Path;
81     import std.path : relativePath;
82     import std.array : array;
83     import dextool.set;
84 
85     static struct KeyValue {
86         Path key;
87         Diff.ChangedLines value;
88         AbsolutePath absPath;
89     }
90 
91     private {
92         Diff diff;
93         AbsolutePath workdir;
94         AbsolutePath[] keys;
95     }
96 
97     this(Diff d, AbsolutePath workdir) {
98         this.diff = d;
99         this.workdir = workdir;
100         this.keys = diff.byKey.array;
101         debug logger.trace(workdir);
102     }
103 
104     KeyValue front() @safe pure {
105         assert(!empty, "Can't get front of an empty range");
106         debug logger.trace(keys[0]);
107         return KeyValue(keys[0].relativePath(workdir).Path, diff[keys[0]], keys[0]);
108     }
109 
110     void popFront() @safe pure nothrow {
111         assert(!empty, "Can't pop front of an empty range");
112         keys = keys[1 .. $];
113     }
114 
115     bool empty() @safe pure nothrow const @nogc {
116         return keys.length == 0;
117     }
118 }
119 
120 /** Parse a buffer in the Unified diff format and return the hunks of changes
121  * in the targets.
122  */
123 struct UnifiedDiffParser {
124     import std.regex : ctRegex, matchFirst, matchAll;
125     import std.typecons : Tuple;
126 
127     Diff result;
128 
129     private {
130         // diff --git a/usability/report.md b/usability/report.md
131         enum re_git_diff_hdr = ctRegex!(`^diff --git.*`);
132         // --- a/standalone.d
133         enum re_hdr_original = ctRegex!(`^--- (?P<hdr>.*)`);
134         // +++ a/standalone.d
135         enum re_hdr_new = ctRegex!(`^\+\+\+ (?P<hdr>.*)`);
136         // @@ -31,7 +31,6 @@ import std.algorithm : map;
137         enum re_hunk_start_multiline = ctRegex!(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`);
138         // @@ -31 +31 @@ import std.algorithm : map;
139         enum re_hunk_start_line = ctRegex!(`^@@ -\d* \+(?P<line>\d*) @@.*`);
140 
141         alias FsmState = Tuple!(bool, State, Action[]);
142 
143         FsmState st;
144         StateData data;
145         bool isGitDiff;
146     }
147 
148     void process(T)(T line) {
149         import std.conv : to;
150         import std.meta;
151         import std.traits : EnumMembers;
152         import std.string : startsWith, split;
153         import dextool.type : Path, AbsolutePath;
154         import dextool.set;
155 
156         auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty;
157         auto hdr_original = matchFirst(line, re_hdr_original);
158         auto hdr_new = matchFirst(line, re_hdr_new);
159         auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline);
160         auto hunk_start_line = matchFirst(line, re_hunk_start_line);
161         const first_char = line.length != 0 ? line[0] : typeof(line[0]).init;
162 
163         FsmState nextState(const State st) {
164             auto next = FsmState(false, st, null);
165 
166             final switch (st) {
167             case State.findHdr:
168                 if (is_git_diff) {
169                     next[1] = State.findHdrOriginal;
170                     next[2] = [Action.setGitDiff];
171                 } else if (!hdr_original.empty) {
172                     next[0] = true;
173                     next[1] = State.findHdrOriginal;
174                 }
175                 break;
176             case State.findHdrOriginal:
177                 if (!hdr_original.empty) {
178                     next[1] = State.findHdrNew;
179                     next[2] = [Action.resetStateData, Action.saveOriginal];
180                 }
181                 break;
182             case State.findHdrNew:
183                 if (!hdr_new.empty) {
184                     next[1] = State.checkOrigNew;
185                     next[2] = [Action.saveNew];
186                     next[0] = true;
187                 }
188                 break;
189             case State.checkOrigNew:
190                 if (data.hdrOriginal.length == 0) {
191                     next[1] = State.findHdrOriginal;
192                     next[2] = [Action.warnOriginal];
193                 } else if (data.hdrNew.length == 0) {
194                     next[1] = State.findHdrOriginal;
195                     next[2] = [Action.warnNew];
196                 } else {
197                     next[1] = State.findHunkStart;
198                 }
199                 break;
200             case State.findNext:
201                 if (!hunk_start_multiline.empty || !hunk_start_line.empty) {
202                     next[1] = State.findHunkStart;
203                     next[0] = true;
204                 } else if (!hdr_original.empty) {
205                     next[1] = State.findHdrOriginal;
206                     next[0] = true;
207                 }
208                 break;
209             case State.findHunkStart:
210                 if (!hunk_start_multiline.empty) {
211                     next[1] = State.insideHunk;
212                     next[2] = [Action.multiLineHunk];
213                 } else if (!hunk_start_line.empty) {
214                     next[1] = State.insideHunk;
215                     next[2] = [Action.lineHunk];
216                 }
217                 break;
218             case State.insideHunk:
219                 next[2] = [Action.saveRawDiff];
220                 if (first_char == '+')
221                     next[2] ~= [Action.plusLine];
222                 else if (first_char == ' ' || line.length == 0)
223                     next[2] ~= [Action.blankLine];
224                 next[1] = State.checkHunkCounter;
225                 next[0] = true;
226                 break;
227             case State.checkHunkCounter:
228                 next[1] = State.insideHunk;
229                 if (data.count == data.maxCount)
230                     next[1] = State.findNext;
231                 break;
232             }
233 
234             return next;
235         }
236 
237         void resetStateDataAct() {
238             data = StateData.init;
239         }
240 
241         void saveOriginalAct() {
242             auto a = hdr_original["hdr"];
243             auto p = () {
244                 if (isGitDiff && a.length > 2)
245                     return a[2 .. $].split('\t');
246                 return a.split('\t');
247             }();
248 
249             data.hdrOriginal = Path(p[0].idup).AbsolutePath;
250         }
251 
252         void saveNewAct() {
253             auto a = hdr_new["hdr"];
254             auto p = () {
255                 if (isGitDiff && a.length > 2)
256                     return a[2 .. $].split('\t');
257                 return a.split('\t');
258             }();
259 
260             data.hdrNew = Path(p[0].idup).AbsolutePath;
261         }
262 
263         void warnOriginalAct() {
264             logger.warning(
265                     "Broken diff data. The original file, as specificed after ---, has length zero");
266         }
267 
268         void warnNewAct() {
269             logger.warning(
270                     "Broken diff data. The new file, as specificed after +++, has length zero");
271         }
272 
273         void multiLineHunkAct() {
274             data.startPos = hunk_start_multiline["line"].to!uint;
275             data.maxCount = hunk_start_multiline["count"].to!uint;
276             data.count = 0;
277         }
278 
279         void lineHunkAct() {
280             data.startPos = hunk_start_multiline["line"].to!uint;
281             data.maxCount = 1;
282             data.count = 0;
283         }
284 
285         void plusLineAct() {
286             if (data.hdrNew !in result.changes)
287                 result[data.hdrNew] = Diff.ChangedLines.init;
288             result[data.hdrNew].add(data.startPos + data.count);
289             data.count++;
290         }
291 
292         void blankLineAct() {
293             data.count++;
294         }
295 
296         void setGitDiffAct() {
297             isGitDiff = true;
298         }
299 
300         void saveRawDiffAct() {
301             result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup);
302         }
303 
304         st[0] = true;
305         while (st[0]) {
306             st = nextState(st[1]);
307             debug logger.tracef("%s | %s", line, st);
308 
309             foreach (const act; st[2]) {
310                 static foreach (Member; [EnumMembers!Action]) {
311                     if (act == Member)
312                         mixin(Member.to!string ~ "Act();");
313                 }
314             }
315             debug logger.tracef("%s | %s %s", result, data, isGitDiff);
316         }
317     }
318 }
319 
320 private:
321 
322 struct StateData {
323     import dextool.type : AbsolutePath;
324 
325     AbsolutePath hdrOriginal;
326     AbsolutePath hdrNew;
327     uint startPos;
328     uint maxCount;
329     uint count;
330 }
331 
332 enum State {
333     findHdr,
334     findHdrOriginal,
335     findHdrNew,
336     checkOrigNew,
337     findHunkStart,
338     insideHunk,
339     findNext,
340     checkHunkCounter,
341 }
342 
343 enum Action {
344     setGitDiff,
345     resetStateData,
346     saveOriginal,
347     saveNew,
348     warnOriginal,
349     warnNew,
350     multiLineHunk,
351     lineHunk,
352     plusLine,
353     blankLine,
354     saveRawDiff,
355 }
356 
357 version (unittest) {
358     import std.string : lineSplitter;
359     import dextool.set;
360     import dextool.type;
361 }
362 
363 @("shall detect the changes lines (unified git diff)")
364 unittest {
365     immutable lines = `diff --git a/standalone2.d b/standalone2.d
366 index 0123..2345 100644
367 --- a/standalone.d
368 +++ b/standalone2.d
369 @@ -31,7 +31,6 @@ import std.algorithm : map;
370  import std.array : Appender, appender, array;
371  import std.datetime : SysTime;
372 +import std.format : format;
373 -import std.typecons : Tuple;
374 
375  import d2sqlite3 : sqlDatabase = Database;
376 
377 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
378  struct Database {
379      import std.conv : to;
380      import std.exception : collectException;
381 -    import std.typecons : Nullable;
382 +    import std.typecons : Nullable, Flag, No;
383      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
384 
385 +    sqlDatabase db;`;
386 
387     UnifiedDiffParser p;
388     foreach (line; lines.lineSplitter)
389         p.process(line);
390 
391     // assert
392     p.result[Path("standalone2.d").AbsolutePath].contains(33).shouldBeTrue;
393     p.result[Path("standalone2.d").AbsolutePath].contains(48).shouldBeTrue;
394     p.result[Path("standalone2.d").AbsolutePath].contains(51).shouldBeTrue;
395     p.result.length.should == 1;
396 }
397 
398 @("shall detect the changed lines (unified with date)")
399 unittest {
400 
401     immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp	2018-11-18 21:25:46.631640690 +0100
402 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp	2018-11-18 21:26:17.003691847 +0100
403 @@ -3,7 +3,7 @@
404  /// @author Joakim Brännström (joakim.brannstrom@gmx.com)
405 
406  int fun(int x) {
407 -    if (x > 3) {
408 +    if (x != 3) {
409          return 0;
410      }
411      return 1;`;
412 
413     UnifiedDiffParser p;
414     foreach (line; lines.lineSplitter)
415         p.process(line);
416 
417     // assert
418     p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")
419         .AbsolutePath].contains(6).shouldBeTrue;
420     p.result.length.should == 1;
421 }