1 /** 2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains a parser of diffs in the Unified Format. 11 It is pretty limited because it only handles those that git normally output. 12 The result are which lines has changes in what files. 13 14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified 15 16 Example: 17 --- 18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 20 @@ -31,7 +31,6 @@ import std.algorithm : map; 21 import std.array : Appender, appender, array; 22 import std.datetime : SysTime; 23 import std.format : format; 24 -import std.typecons : Tuple; 25 26 import d2sqlite3 : sqlDatabase = Database; 27 28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 29 struct Database { 30 import std.conv : to; 31 import std.exception : collectException; 32 - import std.typecons : Nullable; 33 + import std.typecons : Nullable, Flag, No; 34 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 35 36 sqlDatabase db; 37 --- 38 */ 39 module dextool.plugin.mutate.backend.diff_parser; 40 41 import logger = std.experimental.logger; 42 import std.range : ElementType; 43 import std.traits : isSomeString; 44 45 import dextool.type : AbsolutePath, Path, DirName; 46 47 version (unittest) { 48 import unit_threaded : shouldEqual, shouldBeTrue, should; 49 } 50 51 Diff diffFromStdin() @trusted { 52 import std.stdio : stdin; 53 import dextool.plugin.mutate.backend.diff_parser : UnifiedDiffParser; 54 55 return toDiff(stdin.byLine); 56 } 57 58 /** Parse a range of lines to a diff. 59 * 60 * Params: 61 * r = range of strings which is the diff 62 */ 63 Diff toDiff(Range)(Range r) if (isSomeString!(ElementType!Range)) { 64 UnifiedDiffParser parser; 65 foreach (l; r) { 66 debug logger.trace(l); 67 parser.process(l); 68 } 69 return parser.result; 70 } 71 72 @safe: 73 74 struct Diff { 75 import dextool.set; 76 77 static struct Line { 78 uint line; 79 string text; 80 } 81 82 /// The raw diff that where parsed. 83 Line[][Path] rawDiff; 84 85 alias ChangedLines = Set!uint; 86 87 ChangedLines[Path] changes; 88 alias changes this; 89 90 bool empty() @safe pure nothrow const @nogc { 91 return changes.length == 0; 92 } 93 94 /** A range over the changes by file. 95 * 96 * The paths are adjusted to be relative `workdir`. 97 */ 98 auto toRange(AbsolutePath workdir) @safe { 99 return DiffRange(this, workdir); 100 } 101 } 102 103 struct DiffRange { 104 import std.path : relativePath; 105 import std.array : array; 106 import dextool.set; 107 108 static struct KeyValue { 109 Path key; 110 Diff.ChangedLines value; 111 AbsolutePath absPath; 112 } 113 114 private { 115 Diff diff; 116 AbsolutePath workdir; 117 Path[] keys; 118 } 119 120 this(Diff d, AbsolutePath workdir) { 121 this.diff = d; 122 this.workdir = workdir; 123 this.keys = diff.byKey.array; 124 debug logger.trace(workdir); 125 } 126 127 KeyValue front() @safe { 128 assert(!empty, "Can't get front of an empty range"); 129 debug logger.trace(keys[0]); 130 return KeyValue(keys[0], diff[keys[0]], AbsolutePath(keys[0], 131 DirName(cast(string) workdir))); 132 } 133 134 void popFront() @safe pure nothrow { 135 assert(!empty, "Can't pop front of an empty range"); 136 keys = keys[1 .. $]; 137 } 138 139 bool empty() @safe pure nothrow const @nogc { 140 return keys.length == 0; 141 } 142 } 143 144 /** Parse a buffer in the Unified diff format and return the hunks of changes 145 * in the targets. 146 */ 147 struct UnifiedDiffParser { 148 import std.regex : ctRegex, matchFirst, matchAll; 149 import std.typecons : Tuple; 150 151 Diff result; 152 153 private { 154 // diff --git a/usability/report.md b/usability/report.md 155 enum re_git_diff_hdr = ctRegex!(`^diff --git.*`); 156 // --- a/standalone.d 157 enum re_hdr_original = ctRegex!(`^--- (?P<hdr>.*)`); 158 // +++ a/standalone.d 159 enum re_hdr_new = ctRegex!(`^\+\+\+ (?P<hdr>.*)`); 160 // @@ -31,7 +31,6 @@ import std.algorithm : map; 161 enum re_hunk_start_multiline = ctRegex!(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`); 162 // @@ -31 +31 @@ import std.algorithm : map; 163 enum re_hunk_start_line = ctRegex!(`^@@ -\d* \+(?P<line>\d*) @@.*`); 164 165 alias FsmState = Tuple!(bool, State, Action[]); 166 167 FsmState st; 168 StateData data; 169 bool isGitDiff; 170 } 171 172 void process(T)(T line) { 173 import std.conv : to; 174 import std.meta; 175 import std.traits : EnumMembers; 176 import std.string : startsWith, split; 177 import dextool.type : Path, AbsolutePath; 178 import dextool.set; 179 180 auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty; 181 auto hdr_original = matchFirst(line, re_hdr_original); 182 auto hdr_new = matchFirst(line, re_hdr_new); 183 auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline); 184 auto hunk_start_line = matchFirst(line, re_hunk_start_line); 185 const first_char = line.length != 0 ? line[0] : typeof(line[0]).init; 186 187 FsmState nextState(const State st) { 188 auto next = FsmState(false, st, null); 189 190 final switch (st) { 191 case State.findHdr: 192 if (is_git_diff) { 193 next[1] = State.findHdrOriginal; 194 next[2] = [Action.setGitDiff]; 195 } else if (!hdr_original.empty) { 196 next[0] = true; 197 next[1] = State.findHdrOriginal; 198 } 199 break; 200 case State.findHdrOriginal: 201 if (!hdr_original.empty) { 202 next[1] = State.findHdrNew; 203 next[2] = [Action.resetStateData, Action.saveOriginal]; 204 } 205 break; 206 case State.findHdrNew: 207 if (!hdr_new.empty) { 208 next[1] = State.checkOrigNew; 209 next[2] = [Action.saveNew]; 210 next[0] = true; 211 } 212 break; 213 case State.checkOrigNew: 214 if (data.hdrOriginal.length == 0) { 215 next[1] = State.findHdrOriginal; 216 next[2] = [Action.warnOriginal]; 217 } else if (data.hdrNew.length == 0) { 218 next[1] = State.findHdrOriginal; 219 next[2] = [Action.warnNew]; 220 } else { 221 next[1] = State.findHunkStart; 222 } 223 break; 224 case State.findNext: 225 if (!hunk_start_multiline.empty || !hunk_start_line.empty) { 226 next[1] = State.findHunkStart; 227 next[0] = true; 228 } else if (!hdr_original.empty) { 229 next[1] = State.findHdrOriginal; 230 next[0] = true; 231 } 232 break; 233 case State.findHunkStart: 234 if (!hunk_start_multiline.empty) { 235 next[1] = State.insideHunk; 236 next[2] = [Action.multiLineHunk]; 237 } else if (!hunk_start_line.empty) { 238 next[1] = State.insideHunk; 239 next[2] = [Action.lineHunk]; 240 } 241 break; 242 case State.insideHunk: 243 next[2] = [Action.saveRawDiff]; 244 if (first_char == '+') 245 next[2] ~= [Action.plusLine]; 246 else if (first_char == ' ' || line.length == 0) 247 next[2] ~= [Action.blankLine]; 248 next[1] = State.checkHunkCounter; 249 next[0] = true; 250 break; 251 case State.checkHunkCounter: 252 next[1] = State.insideHunk; 253 if (data.count == data.maxCount) 254 next[1] = State.findNext; 255 break; 256 } 257 258 return next; 259 } 260 261 void resetStateDataAct() { 262 data = StateData.init; 263 } 264 265 void saveOriginalAct() { 266 auto a = hdr_original["hdr"]; 267 auto p = () { 268 if (isGitDiff && a.length > 2) 269 return a[2 .. $].split('\t'); 270 return a.split('\t'); 271 }(); 272 273 data.hdrOriginal = Path(p[0].idup); 274 } 275 276 void saveNewAct() { 277 auto a = hdr_new["hdr"]; 278 auto p = () { 279 if (isGitDiff && a.length > 2) 280 return a[2 .. $].split('\t'); 281 return a.split('\t'); 282 }(); 283 284 data.hdrNew = Path(p[0].idup); 285 } 286 287 void warnOriginalAct() { 288 logger.warning( 289 "Broken diff data. The original file, as specificed after ---, has length zero"); 290 } 291 292 void warnNewAct() { 293 logger.warning( 294 "Broken diff data. The new file, as specificed after +++, has length zero"); 295 } 296 297 void multiLineHunkAct() { 298 data.startPos = hunk_start_multiline["line"].to!uint; 299 data.maxCount = hunk_start_multiline["count"].to!uint; 300 data.count = 0; 301 } 302 303 void lineHunkAct() { 304 data.startPos = hunk_start_multiline["line"].to!uint; 305 data.maxCount = 1; 306 data.count = 0; 307 } 308 309 void plusLineAct() { 310 if (data.hdrNew !in result.changes) 311 result[data.hdrNew] = Diff.ChangedLines.init; 312 result[data.hdrNew].add(data.startPos + data.count); 313 data.count++; 314 } 315 316 void blankLineAct() { 317 data.count++; 318 } 319 320 void setGitDiffAct() { 321 isGitDiff = true; 322 } 323 324 void saveRawDiffAct() { 325 result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup); 326 } 327 328 st[0] = true; 329 while (st[0]) { 330 st = nextState(st[1]); 331 debug logger.tracef("%s | %s", line, st); 332 333 foreach (const act; st[2]) { 334 static foreach (Member; [EnumMembers!Action]) { 335 if (act == Member) 336 mixin(Member.to!string ~ "Act();"); 337 } 338 } 339 debug logger.tracef("%s | %s %s", result, data, isGitDiff); 340 } 341 } 342 } 343 344 private: 345 346 struct StateData { 347 Path hdrOriginal; 348 Path hdrNew; 349 uint startPos; 350 uint maxCount; 351 uint count; 352 } 353 354 enum State { 355 findHdr, 356 findHdrOriginal, 357 findHdrNew, 358 checkOrigNew, 359 findHunkStart, 360 insideHunk, 361 findNext, 362 checkHunkCounter, 363 } 364 365 enum Action { 366 setGitDiff, 367 resetStateData, 368 saveOriginal, 369 saveNew, 370 warnOriginal, 371 warnNew, 372 multiLineHunk, 373 lineHunk, 374 plusLine, 375 blankLine, 376 saveRawDiff, 377 } 378 379 version (unittest) { 380 import std.string : lineSplitter; 381 import dextool.set; 382 import dextool.type; 383 } 384 385 @("shall detect the changes lines (unified git diff)") 386 unittest { 387 immutable lines = `diff --git a/standalone2.d b/standalone2.d 388 index 0123..2345 100644 389 --- a/standalone.d 390 +++ b/standalone2.d 391 @@ -31,7 +31,6 @@ import std.algorithm : map; 392 import std.array : Appender, appender, array; 393 import std.datetime : SysTime; 394 +import std.format : format; 395 -import std.typecons : Tuple; 396 397 import d2sqlite3 : sqlDatabase = Database; 398 399 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 400 struct Database { 401 import std.conv : to; 402 import std.exception : collectException; 403 - import std.typecons : Nullable; 404 + import std.typecons : Nullable, Flag, No; 405 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 406 407 + sqlDatabase db;`; 408 409 UnifiedDiffParser p; 410 foreach (line; lines.lineSplitter) 411 p.process(line); 412 413 // assert 414 p.result[Path("standalone2.d")].contains(33).shouldBeTrue; 415 p.result[Path("standalone2.d")].contains(48).shouldBeTrue; 416 p.result[Path("standalone2.d")].contains(51).shouldBeTrue; 417 p.result.length.should == 1; 418 } 419 420 @("shall detect the changed lines (unified with date)") 421 unittest { 422 423 immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp 2018-11-18 21:25:46.631640690 +0100 424 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp 2018-11-18 21:26:17.003691847 +0100 425 @@ -3,7 +3,7 @@ 426 /// @author Joakim Brännström (joakim.brannstrom@gmx.com) 427 428 int fun(int x) { 429 - if (x > 3) { 430 + if (x != 3) { 431 return 0; 432 } 433 return 1;`; 434 435 UnifiedDiffParser p; 436 foreach (line; lines.lineSplitter) 437 p.process(line); 438 439 // assert 440 p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp")].contains(6) 441 .shouldBeTrue; 442 p.result.length.should == 1; 443 }