1 /** 2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains a parser of diffs in the Unified Format. 11 It is pretty limited because it only handles those that git normally output. 12 The result are which lines has changes in what files. 13 14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified 15 16 Example: 17 --- 18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 20 @@ -31,7 +31,6 @@ import std.algorithm : map; 21 import std.array : Appender, appender, array; 22 import std.datetime : SysTime; 23 import std.format : format; 24 -import std.typecons : Tuple; 25 26 import d2sqlite3 : sqlDatabase = Database; 27 28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 29 struct Database { 30 import std.conv : to; 31 import std.exception : collectException; 32 - import std.typecons : Nullable; 33 + import std.typecons : Nullable, Flag, No; 34 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 35 36 sqlDatabase db; 37 --- 38 */ 39 module dextool.plugin.mutate.backend.diff_parser; 40 41 import logger = std.experimental.logger; 42 import std.range : ElementType; 43 import std.traits : isSomeString; 44 45 import dextool.type : AbsolutePath, Path, DirName; 46 47 version (unittest) { 48 import unit_threaded : shouldEqual, shouldBeTrue, should; 49 } 50 51 Diff diffFromStdin() @trusted { 52 import std.stdio : stdin; 53 import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser; 54 55 return toDiff(stdin.byLine); 56 } 57 58 /** Parse a range of lines to a diff. 59 * 60 * Params: 61 * r = range of strings which is the diff 62 */ 63 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) { 64 UnifiedDiffParser parser; 65 foreach (l; r) { 66 debug logger.trace(l); 67 parser.process(l); 68 } 69 return parser.result; 70 } 71 72 @safe: 73 74 struct Diff { 75 import dextool.set; 76 77 static struct Line { 78 uint line; 79 string text; 80 } 81 82 /// The raw diff that where parsed. 83 Line[][Path] rawDiff; 84 85 alias ChangedLines = Set!uint; 86 87 ChangedLines[Path] changes; 88 alias changes this; 89 90 bool empty() @safe pure nothrow const @nogc { 91 return changes.length == 0; 92 } 93 94 /** A range over the changes by file. 95 * 96 * The paths are adjusted to be relative `workdir`. 97 */ 98 auto toRange(AbsolutePath workdir) @safe { 99 return DiffRange(this, workdir); 100 } 101 } 102 103 struct DiffRange { 104 import std.path : relativePath; 105 import std.array : array; 106 import dextool.set; 107 108 static struct KeyValue { 109 Path key; 110 Diff.ChangedLines value; 111 AbsolutePath absPath; 112 } 113 114 private { 115 Diff diff; 116 AbsolutePath workdir; 117 Path[] keys; 118 } 119 120 this(Diff d, AbsolutePath workdir) { 121 this.diff = d; 122 this.workdir = workdir; 123 this.keys = diff.byKey.array; 124 debug logger.trace(workdir); 125 } 126 127 KeyValue front() @safe { 128 assert(!empty, "Can't get front of an empty range"); 129 debug logger.trace(keys[0]); 130 return KeyValue(keys[0], diff[keys[0]], AbsolutePath(keys[0], 131 DirName(cast(string) workdir))); 132 } 133 134 void popFront() @safe pure nothrow { 135 assert(!empty, "Can't pop front of an empty range"); 136 keys = keys[1 .. $]; 137 } 138 139 bool empty() @safe pure nothrow const @nogc { 140 return keys.length == 0; 141 } 142 } 143 144 /** Parse a buffer in the Unified diff format and return the hunks of changes 145 * in the targets. 146 */ 147 struct UnifiedDiffParser { 148 import std.regex : ctRegex, matchFirst, matchAll; 149 import std.typecons : Tuple; 150 151 Diff result; 152 153 private { 154 // diff --git a/usability/report.md b/usability/report.md 155 enum re_git_diff_hdr = ctRegex!(`^diff --git.*`); 156 // --- a/standalone.d 157 enum re_hdr_original = ctRegex!(`^--- (?P<hdr>.*)`); 158 // +++ a/standalone.d 159 enum re_hdr_new = ctRegex!(`^\+\+\+ (?P<hdr>.*)`); 160 // @@ -31,7 +31,6 @@ import std.algorithm : map; 161 enum re_hunk_start_multiline = ctRegex!(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`); 162 // @@ -31 +31 @@ import std.algorithm : map; 163 enum re_hunk_start_line = ctRegex!(`^@@ -\d* \+(?P<line>\d*) @@.*`); 164 165 alias FsmState = Tuple!(bool, State, Action[]); 166 167 FsmState st; 168 StateData data; 169 bool isGitDiff; 170 } 171 172 void process(T)(T line) { 173 import std.conv : to; 174 import std.meta; 175 import std.traits : EnumMembers; 176 import std.string : startsWith, split; 177 import dextool.type : Path, AbsolutePath; 178 import dextool.set; 179 180 auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty; 181 auto hdr_original = matchFirst(line, re_hdr_original); 182 auto hdr_new = matchFirst(line, re_hdr_new); 183 auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline); 184 auto hunk_start_line = matchFirst(line, re_hunk_start_line); 185 const first_char = line.length != 0 ? line[0] : typeof(line[0]).init; 186 187 FsmState nextState(const State st) { 188 auto next = FsmState(false, st, null); 189 190 final switch (st) { 191 case State.findHdr: 192 if (is_git_diff) { 193 next[1] = State.findHdrOriginal; 194 next[2] = [Action.setGitDiff]; 195 } else if (!hdr_original.empty) { 196 next[0] = true; 197 next[1] = State.findHdrOriginal; 198 } 199 break; 200 case State.findHdrOriginal: 201 if (!hdr_original.empty) { 202 next[1] = State.findHdrNew; 203 next[2] = [Action.resetStateData, Action.saveOriginal]; 204 } 205 break; 206 case State.findHdrNew: 207 if (!hdr_new.empty) { 208 next[1] = State.checkOrigNew; 209 next[2] = [Action.saveNew]; 210 next[0] = true; 211 } 212 break; 213 case State.checkOrigNew: 214 if (data.hdrOriginal.length == 0) { 215 next[1] = State.findHdrOriginal; 216 next[2] = [Action.warnOriginal]; 217 } else if (data.hdrNew.length == 0) { 218 next[1] = State.findHdrOriginal; 219 next[2] = [Action.warnNew]; 220 } else { 221 next[1] = State.findHunkStart; 222 } 223 break; 224 case State.findNext: 225 if (!hunk_start_multiline.empty || !hunk_start_line.empty) { 226 next[1] = State.findHunkStart; 227 next[0] = true; 228 } else if (!hdr_original.empty) { 229 next[1] = State.findHdrOriginal; 230 next[0] = true; 231 } 232 break; 233 case State.findHunkStart: 234 if (!hunk_start_multiline.empty) { 235 next[1] = State.insideHunk; 236 next[2] = [Action.multiLineHunk]; 237 } else if (!hunk_start_line.empty) { 238 next[1] = State.insideHunk; 239 next[2] = [Action.lineHunk]; 240 } 241 break; 242 case State.insideHunk: 243 next[2] = [Action.saveRawDiff]; 244 if (first_char == '+') 245 next[2] ~= [Action.plusLine]; 246 else if (first_char == ' ' || line.length == 0) 247 next[2] ~= [Action.blankLine]; 248 next[1] = State.checkHunkCounter; 249 next[0] = true; 250 break; 251 case State.checkHunkCounter: 252 next[1] = State.insideHunk; 253 if (data.count == data.maxCount) 254 next[1] = State.findNext; 255 break; 256 } 257 258 return next; 259 } 260 261 void resetStateDataAct() { 262 data = StateData.init; 263 } 264 265 void saveOriginalAct() { 266 auto a = hdr_original["hdr"]; 267 auto p = () { 268 if (isGitDiff && a.length > 2) 269 return a[2 .. $].split('\t'); 270 return a.split('\t'); 271 }(); 272 273 data.hdrOriginal = Path(p[0].idup); 274 } 275 276 void saveNewAct() { 277 auto a = hdr_new["hdr"]; 278 auto p = () { 279 if (isGitDiff && a.length > 2) 280 return a[2 .. $].split('\t'); 281 return a.split('\t'); 282 }(); 283 284 data.hdrNew = Path(p[0].idup); 285 } 286 287 void warnOriginalAct() { 288 logger.warning( 289 "Broken diff data. The original file, as specificed after ---, has length zero"); 290 } 291 292 void warnNewAct() { 293 logger.warning( 294 "Broken diff data. The new file, as specificed after +++, has length zero"); 295 } 296 297 void multiLineHunkAct() { 298 try { 299 data.startPos = hunk_start_multiline["line"].to!uint; 300 data.maxCount = hunk_start_multiline["count"].to!uint; 301 data.count = 0; 302 } catch (Exception e) { 303 logger.info(e.msg); 304 logger.info("Unable to parse diff line: ", line); 305 } 306 } 307 308 void lineHunkAct() { 309 try { 310 data.startPos = hunk_start_multiline["line"].to!uint; 311 data.maxCount = 1; 312 data.count = 0; 313 } catch (Exception e) { 314 logger.info(e.msg); 315 logger.info("Unable to parse diff line: ", line); 316 } 317 } 318 319 void plusLineAct() { 320 if (data.hdrNew !in result.changes) 321 result[data.hdrNew] = Diff.ChangedLines.init; 322 result[data.hdrNew].add(data.startPos + data.count); 323 data.count++; 324 } 325 326 void blankLineAct() { 327 data.count++; 328 } 329 330 void setGitDiffAct() { 331 isGitDiff = true; 332 } 333 334 void saveRawDiffAct() { 335 result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup); 336 } 337 338 st[0] = true; 339 while (st[0]) { 340 st = nextState(st[1]); 341 debug logger.tracef("%s | %s", line, st); 342 343 foreach (const act; st[2]) { 344 static foreach (Member; [EnumMembers!Action]) { 345 if (act == Member) 346 mixin(Member.to!string ~ "Act();"); 347 } 348 } 349 debug logger.tracef("%s | %s %s", result, data, isGitDiff); 350 } 351 } 352 } 353 354 private: 355 356 struct StateData { 357 Path hdrOriginal; 358 Path hdrNew; 359 uint startPos; 360 uint maxCount; 361 uint count; 362 } 363 364 enum State { 365 findHdr, 366 findHdrOriginal, 367 findHdrNew, 368 checkOrigNew, 369 findHunkStart, 370 insideHunk, 371 findNext, 372 checkHunkCounter, 373 } 374 375 enum Action { 376 setGitDiff, 377 resetStateData, 378 saveOriginal, 379 saveNew, 380 warnOriginal, 381 warnNew, 382 multiLineHunk, 383 lineHunk, 384 plusLine, 385 blankLine, 386 saveRawDiff, 387 } 388 389 version (unittest) { 390 import std.string : lineSplitter; 391 import dextool.set; 392 import dextool.type; 393 } 394 395 @("shall detect the changes lines (unified git diff)") 396 unittest { 397 immutable lines = `diff --git a/standalone2.d b/standalone2.d 398 index 0123..2345 100644 399 --- a/standalone.d 400 +++ b/standalone2.d 401 @@ -31,7 +31,6 @@ import std.algorithm : map; 402 import std.array : Appender, appender, array; 403 import std.datetime : SysTime; 404 +import std.format : format; 405 -import std.typecons : Tuple; 406 407 import d2sqlite3 : sqlDatabase = Database; 408 409 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 410 struct Database { 411 import std.conv : to; 412 import std.exception : collectException; 413 - import std.typecons : Nullable; 414 + import std.typecons : Nullable, Flag, No; 415 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 416 417 + sqlDatabase db;`; 418 419 UnifiedDiffParser p; 420 foreach (line; lines.lineSplitter) 421 p.process(line); 422 423 // assert 424 p.result[Path("standalone2.d")].contains(33).shouldBeTrue; 425 p.result[Path("standalone2.d")].contains(48).shouldBeTrue; 426 p.result[Path("standalone2.d")].contains(51).shouldBeTrue; 427 p.result.length.should == 1; 428 } 429 430 @("shall detect the changed lines (unified with date)") 431 unittest { 432 433 immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp 2018-11-18 21:25:46.631640690 +0100 434 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp 2018-11-18 21:26:17.003691847 +0100 435 @@ -3,7 +3,7 @@ 436 /// @author Joakim Brännström (joakim.brannstrom@gmx.com) 437 438 int fun(int x) { 439 - if (x > 3) { 440 + if (x != 3) { 441 return 0; 442 } 443 return 1;`; 444 445 UnifiedDiffParser p; 446 foreach (line; lines.lineSplitter) 447 p.process(line); 448 449 // assert 450 p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6) 451 .shouldBeTrue; 452 p.result.length.should == 1; 453 }