1 /** 2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains a parser of diffs in the Unified Format. 11 It is pretty limited because it only handles those that git normally output. 12 The result are which lines has changes in what files. 13 14 https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified 15 16 Example: 17 --- 18 -- a/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 19 ++ b/plugin/mutate/source/dextool/plugin/mutate/backend/database/standalone.d 20 @@ -31,7 +31,6 @@ import std.algorithm : map; 21 import std.array : Appender, appender, array; 22 import std.datetime : SysTime; 23 import std.format : format; 24 -import std.typecons : Tuple; 25 26 import d2sqlite3 : sqlDatabase = Database; 27 28 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 29 struct Database { 30 import std.conv : to; 31 import std.exception : collectException; 32 - import std.typecons : Nullable; 33 + import std.typecons : Nullable, Flag, No; 34 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 35 36 sqlDatabase db; 37 --- 38 */ 39 module dextool.plugin.mutate.backend.diff_parser; 40 41 import logger = std.experimental.logger; 42 43 version (unittest) { 44 import unit_threaded : shouldEqual, shouldBeTrue, should; 45 } 46 47 @safe: 48 49 struct Diff { 50 import dextool.type : AbsolutePath; 51 import dextool.set; 52 53 static struct Line { 54 uint line; 55 string text; 56 } 57 58 /// The raw diff that where parsed. 59 Line[][AbsolutePath] rawDiff; 60 61 alias ChangedLines = Set!uint; 62 63 ChangedLines[AbsolutePath] changes; 64 alias changes this; 65 66 bool empty() @safe pure nothrow const @nogc { 67 return changes.length == 0; 68 } 69 70 /** A range over the changes by file. 71 * 72 * The paths are adjusted to be relative `workdir`. 73 */ 74 auto toRange(AbsolutePath workdir) @safe { 75 return DiffRange(this, workdir); 76 } 77 } 78 79 struct DiffRange { 80 import dextool.type : AbsolutePath, Path; 81 import std.path : relativePath; 82 import std.array : array; 83 import dextool.set; 84 85 static struct KeyValue { 86 Path key; 87 Diff.ChangedLines value; 88 AbsolutePath absPath; 89 } 90 91 private { 92 Diff diff; 93 AbsolutePath workdir; 94 AbsolutePath[] keys; 95 } 96 97 this(Diff d, AbsolutePath workdir) { 98 this.diff = d; 99 this.workdir = workdir; 100 this.keys = diff.byKey.array; 101 debug logger.trace(workdir); 102 } 103 104 KeyValue front() @safe pure { 105 assert(!empty, "Can't get front of an empty range"); 106 debug logger.trace(keys[0]); 107 return KeyValue(keys[0].relativePath(workdir).Path, diff[keys[0]], keys[0]); 108 } 109 110 void popFront() @safe pure nothrow { 111 assert(!empty, "Can't pop front of an empty range"); 112 keys = keys[1 .. $]; 113 } 114 115 bool empty() @safe pure nothrow const @nogc { 116 return keys.length == 0; 117 } 118 } 119 120 /** Parse a buffer in the Unified diff format and return the hunks of changes 121 * in the targets. 122 */ 123 struct UnifiedDiffParser { 124 import std.regex : ctRegex, matchFirst, matchAll; 125 import std.typecons : Tuple; 126 127 Diff result; 128 129 private { 130 // diff --git a/usability/report.md b/usability/report.md 131 enum re_git_diff_hdr = ctRegex!(`^diff --git.*`); 132 // --- a/standalone.d 133 enum re_hdr_original = ctRegex!(`^--- (?P<hdr>.*)`); 134 // +++ a/standalone.d 135 enum re_hdr_new = ctRegex!(`^\+\+\+ (?P<hdr>.*)`); 136 // @@ -31,7 +31,6 @@ import std.algorithm : map; 137 enum re_hunk_start_multiline = ctRegex!(`^@@ -\d*,\d* \+(?P<line>\d*),(?P<count>\d*) @@.*`); 138 // @@ -31 +31 @@ import std.algorithm : map; 139 enum re_hunk_start_line = ctRegex!(`^@@ -\d* \+(?P<line>\d*) @@.*`); 140 141 alias FsmState = Tuple!(bool, State, Action[]); 142 143 FsmState st; 144 StateData data; 145 bool isGitDiff; 146 } 147 148 void process(T)(T line) { 149 import std.conv : to; 150 import std.meta; 151 import std.traits : EnumMembers; 152 import std.string : startsWith, split; 153 import dextool.type : Path, AbsolutePath; 154 import dextool.set; 155 156 auto is_git_diff = !matchFirst(line, re_git_diff_hdr).empty; 157 auto hdr_original = matchFirst(line, re_hdr_original); 158 auto hdr_new = matchFirst(line, re_hdr_new); 159 auto hunk_start_multiline = matchFirst(line, re_hunk_start_multiline); 160 auto hunk_start_line = matchFirst(line, re_hunk_start_line); 161 const first_char = line.length != 0 ? line[0] : typeof(line[0]).init; 162 163 FsmState nextState(const State st) { 164 auto next = FsmState(false, st, null); 165 166 final switch (st) { 167 case State.findHdr: 168 if (is_git_diff) { 169 next[1] = State.findHdrOriginal; 170 next[2] = [Action.setGitDiff]; 171 } else if (!hdr_original.empty) { 172 next[0] = true; 173 next[1] = State.findHdrOriginal; 174 } 175 break; 176 case State.findHdrOriginal: 177 if (!hdr_original.empty) { 178 next[1] = State.findHdrNew; 179 next[2] = [Action.resetStateData, Action.saveOriginal]; 180 } 181 break; 182 case State.findHdrNew: 183 if (!hdr_new.empty) { 184 next[1] = State.checkOrigNew; 185 next[2] = [Action.saveNew]; 186 next[0] = true; 187 } 188 break; 189 case State.checkOrigNew: 190 if (data.hdrOriginal.length == 0) { 191 next[1] = State.findHdrOriginal; 192 next[2] = [Action.warnOriginal]; 193 } else if (data.hdrNew.length == 0) { 194 next[1] = State.findHdrOriginal; 195 next[2] = [Action.warnNew]; 196 } else { 197 next[1] = State.findHunkStart; 198 } 199 break; 200 case State.findNext: 201 if (!hunk_start_multiline.empty || !hunk_start_line.empty) { 202 next[1] = State.findHunkStart; 203 next[0] = true; 204 } else if (!hdr_original.empty) { 205 next[1] = State.findHdrOriginal; 206 next[0] = true; 207 } 208 break; 209 case State.findHunkStart: 210 if (!hunk_start_multiline.empty) { 211 next[1] = State.insideHunk; 212 next[2] = [Action.multiLineHunk]; 213 } else if (!hunk_start_line.empty) { 214 next[1] = State.insideHunk; 215 next[2] = [Action.lineHunk]; 216 } 217 break; 218 case State.insideHunk: 219 next[2] = [Action.saveRawDiff]; 220 if (first_char == '+') 221 next[2] ~= [Action.plusLine]; 222 else if (first_char == ' ' || line.length == 0) 223 next[2] ~= [Action.blankLine]; 224 next[1] = State.checkHunkCounter; 225 next[0] = true; 226 break; 227 case State.checkHunkCounter: 228 next[1] = State.insideHunk; 229 if (data.count == data.maxCount) 230 next[1] = State.findNext; 231 break; 232 } 233 234 return next; 235 } 236 237 void resetStateDataAct() { 238 data = StateData.init; 239 } 240 241 void saveOriginalAct() { 242 auto a = hdr_original["hdr"]; 243 auto p = () { 244 if (isGitDiff && a.length > 2) 245 return a[2 .. $].split('\t'); 246 return a.split('\t'); 247 }(); 248 249 data.hdrOriginal = Path(p[0].idup).AbsolutePath; 250 } 251 252 void saveNewAct() { 253 auto a = hdr_new["hdr"]; 254 auto p = () { 255 if (isGitDiff && a.length > 2) 256 return a[2 .. $].split('\t'); 257 return a.split('\t'); 258 }(); 259 260 data.hdrNew = Path(p[0].idup).AbsolutePath; 261 } 262 263 void warnOriginalAct() { 264 logger.warning( 265 "Broken diff data. The original file, as specificed after ---, has length zero"); 266 } 267 268 void warnNewAct() { 269 logger.warning( 270 "Broken diff data. The new file, as specificed after +++, has length zero"); 271 } 272 273 void multiLineHunkAct() { 274 data.startPos = hunk_start_multiline["line"].to!uint; 275 data.maxCount = hunk_start_multiline["count"].to!uint; 276 data.count = 0; 277 } 278 279 void lineHunkAct() { 280 data.startPos = hunk_start_multiline["line"].to!uint; 281 data.maxCount = 1; 282 data.count = 0; 283 } 284 285 void plusLineAct() { 286 if (data.hdrNew !in result.changes) 287 result[data.hdrNew] = Diff.ChangedLines.init; 288 result[data.hdrNew].add(data.startPos + data.count); 289 data.count++; 290 } 291 292 void blankLineAct() { 293 data.count++; 294 } 295 296 void setGitDiffAct() { 297 isGitDiff = true; 298 } 299 300 void saveRawDiffAct() { 301 result.rawDiff[data.hdrNew] ~= Diff.Line(data.startPos + data.count, line.idup); 302 } 303 304 st[0] = true; 305 while (st[0]) { 306 st = nextState(st[1]); 307 debug logger.tracef("%s | %s", line, st); 308 309 foreach (const act; st[2]) { 310 static foreach (Member; [EnumMembers!Action]) { 311 if (act == Member) 312 mixin(Member.to!string ~ "Act();"); 313 } 314 } 315 debug logger.tracef("%s | %s %s", result, data, isGitDiff); 316 } 317 } 318 } 319 320 private: 321 322 struct StateData { 323 import dextool.type : AbsolutePath; 324 325 AbsolutePath hdrOriginal; 326 AbsolutePath hdrNew; 327 uint startPos; 328 uint maxCount; 329 uint count; 330 } 331 332 enum State { 333 findHdr, 334 findHdrOriginal, 335 findHdrNew, 336 checkOrigNew, 337 findHunkStart, 338 insideHunk, 339 findNext, 340 checkHunkCounter, 341 } 342 343 enum Action { 344 setGitDiff, 345 resetStateData, 346 saveOriginal, 347 saveNew, 348 warnOriginal, 349 warnNew, 350 multiLineHunk, 351 lineHunk, 352 plusLine, 353 blankLine, 354 saveRawDiff, 355 } 356 357 version (unittest) { 358 import std.string : lineSplitter; 359 import dextool.set; 360 import dextool.type; 361 } 362 363 @("shall detect the changes lines (unified git diff)") 364 unittest { 365 immutable lines = `diff --git a/standalone2.d b/standalone2.d 366 index 0123..2345 100644 367 --- a/standalone.d 368 +++ b/standalone2.d 369 @@ -31,7 +31,6 @@ import std.algorithm : map; 370 import std.array : Appender, appender, array; 371 import std.datetime : SysTime; 372 +import std.format : format; 373 -import std.typecons : Tuple; 374 375 import d2sqlite3 : sqlDatabase = Database; 376 377 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 378 struct Database { 379 import std.conv : to; 380 import std.exception : collectException; 381 - import std.typecons : Nullable; 382 + import std.typecons : Nullable, Flag, No; 383 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 384 385 + sqlDatabase db;`; 386 387 UnifiedDiffParser p; 388 foreach (line; lines.lineSplitter) 389 p.process(line); 390 391 // assert 392 p.result[Path("standalone2.d").AbsolutePath].contains(33).shouldBeTrue; 393 p.result[Path("standalone2.d").AbsolutePath].contains(48).shouldBeTrue; 394 p.result[Path("standalone2.d").AbsolutePath].contains(51).shouldBeTrue; 395 p.result.length.should == 1; 396 } 397 398 @("shall detect the changed lines (unified with date)") 399 unittest { 400 401 immutable lines = `--- plugin/mutate/testdata/report_one_ror_mutation_point.cpp 2018-11-18 21:25:46.631640690 +0100 402 +++ plugin/mutate/testdata/report_one_ror_mutation_point2.cpp 2018-11-18 21:26:17.003691847 +0100 403 @@ -3,7 +3,7 @@ 404 /// @author Joakim Brännström (joakim.brannstrom@gmx.com) 405 406 int fun(int x) { 407 - if (x > 3) { 408 + if (x != 3) { 409 return 0; 410 } 411 return 1;`; 412 413 UnifiedDiffParser p; 414 foreach (line; lines.lineSplitter) 415 p.process(line); 416 417 // assert 418 p.result[Path("plugin/mutate/testdata/report_one_ror_mutation_point2.cpp") 419 .AbsolutePath].contains(6).shouldBeTrue; 420 p.result.length.should == 1; 421 }