1 /** 2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains a parser of diffs in the Unified Format. 11 It is pretty limited because it only handles those that git normally output. 12 The result are which lines has changes in what files. 13 14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified 15 16 Example: 17 --- 18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 20 @@ -31,7 +31,6 @@ import std.algorithm : map; 21 import std.array : Appender, appender, array; 22 import std.datetime : SysTime; 23 import std.format : format; 24 -import std.typecons : Tuple; 25 26 import d2sqlite3 : sqlDatabase = Database; 27 28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 29 struct Database { 30 import std.conv : to; 31 import std.exception : collectException; 32 - import std.typecons : Nullable; 33 + import std.typecons : Nullable, Flag, No; 34 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 35 36 sqlDatabase db; 37 --- 38 */ 39 module dextool.plugin.mutate.backend.diff_parser; 40 41 import logger = std.experimental.logger; 42 import std.path : buildPath; 43 import std.range : ElementType; 44 import std.traits : isSomeString; 45 46 import dextool.type : AbsolutePath, Path; 47 48 version (unittest) { 49 import unit_threaded : shouldEqual, shouldBeTrue, should; 50 } 51 52 Diff diffFromStdin() @trusted { 53 import std.stdio : stdin; 54 import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser; 55 56 return toDiff(stdin.byLine); 57 } 58 59 /** Parse a range of lines to a diff. 60 * 61 * Params: 62 * r = range of strings which is the diff 63 */ 64 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) { 65 UnifiedDiffParser parser; 66 foreach (l; r) { 67 debug logger.trace(l); 68 parser.process(l); 69 } 70 return parser.result; 71 } 72 73 @safe: 74 75 struct Diff { 76 import my.set; 77 78 static struct Line { 79 uint line; 80 string text; 81 } 82 83 /// The raw diff that where parsed. 84 Line[][Path] rawDiff; 85 86 alias ChangedLines = Set!uint; 87 88 ChangedLines[Path] changes; 89 alias changes this; 90 91 bool empty() @safe pure nothrow const @nogc { 92 return changes.length == 0; 93 } 94 95 /** A range over the changes by file. 96 * 97 * The paths are adjusted to be relative `workdir`. 98 */ 99 auto toRange(AbsolutePath workdir) @safe { 100 return DiffRange(this, workdir); 101 } 102 } 103 104 struct DiffRange { 105 import std.path : relativePath; 106 import std.array : array; 107 import my.set; 108 109 static struct KeyValue { 110 Path key; 111 Diff.ChangedLines value; 112 AbsolutePath absPath; 113 } 114 115 private { 116 Diff diff; 117 AbsolutePath workdir; 118 Path[] keys; 119 } 120 121 this(Diff d, AbsolutePath workdir) { 122 this.diff = d; 123 this.workdir = workdir; 124 this.keys = diff.byKey.array; 125 debug logger.trace(workdir); 126 } 127 128 KeyValue front() @safe { 129 assert(!empty, "Can't get front of an empty range"); 130 debug logger.trace(keys[0]); 131 return KeyValue(keys[0], diff[keys[0]], AbsolutePath(buildPath(workdir, keys[0]))); 132 } 133 134 void popFront() @safe pure nothrow { 135 assert(!empty, "Can't pop front of an empty range"); 136 keys = keys[1 .. $]; 137 } 138 139 bool empty() @safe pure nothrow const @nogc { 140 return keys.length == 0; 141 } 142 } 143 144 /** Parse a buffer in the Unified diff format and return the hunks of changes 145 * in the targets. 146 */ 147 struct UnifiedDiffParser { 148 import std.regex : Regex, regex, matchFirst, matchAll; 149 import std.typecons : Tuple; 150 151 Diff result; 152 153 private { 154 // diff --git a/usability/report.md b/usability/report.md 155 Regex!char re_git_diff_hdr = regex(`^diff --git.*`); 156 // --- a/standalone.d 157 Regex!char re_hdr_original = regex(`^--- (?P<hdr>.*)`); 158 // +++ a/standalone.d 159 Regex!char re_hdr_new = regex(`^\+\+\+ (?P<hdr>.*)`); 160 // @@ -31,7 +31,6 @@ import std.algorithm : map; 161 Regex!char re_hunk_start_multiline = regex( 162 `^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`); 163 // @@ -31 +31 @@ import std.algorithm : map; 164 Regex!char re_hunk_start_line = regex(`^@@ -\d* \+(?P<line>\d*) @@.*`); 165 166 alias FsmState = Tuple!(bool, State, Action[]); 167 168 FsmState st; 169 StateData data; 170 bool isGitDiff; 171 } 172 173 void process(T)(T line) { 174 import std.conv : to; 175 import std.meta; 176 import std.traits : EnumMembers; 177 import std..string : startsWith, split; 178 import dextool.type : Path, AbsolutePath; 179 import my.set; 180 181 auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty; 182 auto hdr_original = matchFirst(line, re_hdr_original); 183 auto hdr_new = matchFirst(line, re_hdr_new); 184 auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline); 185 auto hunk_start_line = matchFirst(line, re_hunk_start_line); 186 const first_char = line.length != 0 ? line[0] : typeof(line[0]).init; 187 188 FsmState nextState(const State st) { 189 auto next = FsmState(false, st, null); 190 191 final switch (st) { 192 case State.findHdr: 193 if (is_git_diff) { 194 next[1] = State.findHdrOriginal; 195 next[2] = [Action.setGitDiff]; 196 } else if (!hdr_original.empty) { 197 next[0] = true; 198 next[1] = State.findHdrOriginal; 199 } 200 break; 201 case State.findHdrOriginal: 202 if (!hdr_original.empty) { 203 next[1] = State.findHdrNew; 204 next[2] = [Action.resetStateData, Action.saveOriginal]; 205 } 206 break; 207 case State.findHdrNew: 208 if (!hdr_new.empty) { 209 next[1] = State.checkOrigNew; 210 next[2] = [Action.saveNew]; 211 next[0] = true; 212 } 213 break; 214 case State.checkOrigNew: 215 if (data.hdrOriginal.length == 0) { 216 next[1] = State.findHdrOriginal; 217 next[2] = [Action.warnOriginal]; 218 } else if (data.hdrNew.length == 0) { 219 next[1] = State.findHdrOriginal; 220 next[2] = [Action.warnNew]; 221 } else { 222 next[1] = State.findHunkStart; 223 } 224 break; 225 case State.findNext: 226 if (!hunk_start_multiline.empty || !hunk_start_line.empty) { 227 next[1] = State.findHunkStart; 228 next[0] = true; 229 } else if (!hdr_original.empty) { 230 next[1] = State.findHdrOriginal; 231 next[0] = true; 232 } 233 break; 234 case State.findHunkStart: 235 if (!hunk_start_multiline.empty) { 236 next[1] = State.insideHunk; 237 next[2] = [Action.multiLineHunk]; 238 } else if (!hunk_start_line.empty) { 239 next[1] = State.insideHunk; 240 next[2] = [Action.lineHunk]; 241 } 242 break; 243 case State.insideHunk: 244 next[2] = [Action.saveRawDiff]; 245 if (first_char == '+') 246 next[2] ~= [Action.plusLine]; 247 else if (first_char == ' ' || line.length == 0) 248 next[2] ~= [Action.blankLine]; 249 next[1] = State.checkHunkCounter; 250 next[0] = true; 251 break; 252 case State.checkHunkCounter: 253 next[1] = State.insideHunk; 254 if (data.count == data.maxCount) 255 next[1] = State.findNext; 256 break; 257 } 258 259 return next; 260 } 261 262 void resetStateDataAct() { 263 data = StateData.init; 264 } 265 266 void saveOriginalAct() { 267 auto a = hdr_original["hdr"]; 268 auto p = () { 269 if (isGitDiff && a.length > 2) 270 return a[2 .. $].split('\t'); 271 return a.split('\t'); 272 }(); 273 274 data.hdrOriginal = Path(p[0].idup); 275 } 276 277 void saveNewAct() { 278 auto a = hdr_new["hdr"]; 279 auto p = () { 280 if (isGitDiff && a.length > 2) 281 return a[2 .. $].split('\t'); 282 return a.split('\t'); 283 }(); 284 285 data.hdrNew = Path(p[0].idup); 286 } 287 288 void warnOriginalAct() { 289 logger.warning( 290 "Broken diff data. The original file, as specificed after ---, has length zero"); 291 } 292 293 void warnNewAct() { 294 logger.warning( 295 "Broken diff data. The new file, as specificed after +++, has length zero"); 296 } 297 298 void multiLineHunkAct() { 299 try { 300 data.startPos = hunk_start_multiline["line"].to!uint; 301 data.maxCount = hunk_start_multiline["count"].to!uint; 302 data.count = 0; 303 } catch (Exception e) { 304 logger.info(e.msg); 305 logger.info("Unable to parse diff line: ", line); 306 } 307 } 308 309 void lineHunkAct() { 310 try { 311 data.startPos = hunk_start_multiline["line"].to!uint; 312 data.maxCount = 1; 313 data.count = 0; 314 } catch (Exception e) { 315 logger.info(e.msg); 316 logger.info("Unable to parse diff line: ", line); 317 } 318 } 319 320 void plusLineAct() { 321 if (data.hdrNew !in result.changes) 322 result[data.hdrNew] = Diff.ChangedLines.init; 323 result[data.hdrNew].add(data.startPos + data.count); 324 data.count++; 325 } 326 327 void blankLineAct() { 328 data.count++; 329 } 330 331 void setGitDiffAct() { 332 isGitDiff = true; 333 } 334 335 void saveRawDiffAct() { 336 result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup); 337 } 338 339 st[0] = true; 340 while (st[0]) { 341 st = nextState(st[1]); 342 debug logger.tracef("%s | %s", line, st); 343 344 foreach (const act; st[2]) { 345 static foreach (Member; [EnumMembers!Action]) { 346 if (act == Member) 347 mixin(Member.to!string ~ "Act();"); 348 } 349 } 350 debug logger.tracef("%s | %s %s", result, data, isGitDiff); 351 } 352 } 353 } 354 355 private: 356 357 struct StateData { 358 Path hdrOriginal; 359 Path hdrNew; 360 uint startPos; 361 uint maxCount; 362 uint count; 363 } 364 365 enum State { 366 findHdr, 367 findHdrOriginal, 368 findHdrNew, 369 checkOrigNew, 370 findHunkStart, 371 insideHunk, 372 findNext, 373 checkHunkCounter, 374 } 375 376 enum Action { 377 setGitDiff, 378 resetStateData, 379 saveOriginal, 380 saveNew, 381 warnOriginal, 382 warnNew, 383 multiLineHunk, 384 lineHunk, 385 plusLine, 386 blankLine, 387 saveRawDiff, 388 } 389 390 version (unittest) { 391 import std..string : lineSplitter; 392 import my.set; 393 import dextool.type; 394 } 395 396 @("shall detect the changes lines (unified git diff)") 397 unittest { 398 immutable lines = `diff --git a/standalone2.d b/standalone2.d 399 index 0123..2345 100644 400 --- a/standalone.d 401 +++ b/standalone2.d 402 @@ -31,7 +31,6 @@ import std.algorithm : map; 403 import std.array : Appender, appender, array; 404 import std.datetime : SysTime; 405 +import std.format : format; 406 -import std.typecons : Tuple; 407 408 import d2sqlite3 : sqlDatabase = Database; 409 410 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 411 struct Database { 412 import std.conv : to; 413 import std.exception : collectException; 414 - import std.typecons : Nullable; 415 + import std.typecons : Nullable, Flag, No; 416 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 417 418 + sqlDatabase db;`; 419 420 UnifiedDiffParser p; 421 foreach (line; lines.lineSplitter) 422 p.process(line); 423 424 // assert 425 p.result[Path("standalone2.d")].contains(33).shouldBeTrue; 426 p.result[Path("standalone2.d")].contains(48).shouldBeTrue; 427 p.result[Path("standalone2.d")].contains(51).shouldBeTrue; 428 p.result.length.should == 1; 429 } 430 431 @("shall detect the changed lines (unified with date)") 432 unittest { 433 434 immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp 2018-11-18 21:25:46.631640690 +0100 435 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp 2018-11-18 21:26:17.003691847 +0100 436 @@ -3,7 +3,7 @@ 437 /// @author Joakim Brännström (joakim.brannstrom@gmx.com) 438 439 int fun(int x) { 440 - if (x > 3) { 441 + if (x != 3) { 442 return 0; 443 } 444 return 1;`; 445 446 UnifiedDiffParser p; 447 foreach (line; lines.lineSplitter) 448 p.process(line); 449 450 // assert 451 p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6) 452 .shouldBeTrue; 453 p.result.length.should == 1; 454 }