1 /** 2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 #SPC-analyzer 11 12 TODO cache the checksums. They are *heavy*. 13 */ 14 module dextool.plugin.mutate.backend.analyze; 15 16 import logger = std.experimental.logger; 17 import std.algorithm : map, filter; 18 import std.array : array, appender; 19 import std.concurrency; 20 import std.datetime : dur; 21 import std.exception : collectException; 22 import std.parallelism; 23 import std.typecons; 24 25 import colorlog; 26 27 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, 28 CompileCommandDB, SearchResult; 29 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream; 30 import dextool.plugin.mutate.backend.analyze.visitor : makeRootVisitor; 31 import dextool.plugin.mutate.backend.database : Database, LineMetadata, MutationPointEntry2; 32 import dextool.plugin.mutate.backend.database.type : MarkedMutant; 33 import dextool.plugin.mutate.backend.diff_parser : Diff; 34 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO; 35 import dextool.plugin.mutate.backend.report.utility : statusToString, Table; 36 import dextool.plugin.mutate.backend.utility : checksum, trustedRelativePath, Checksum; 37 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze; 38 import dextool.set; 39 import dextool.type : ExitStatusType, AbsolutePath, Path, DirName; 40 import dextool.user_filerange; 41 42 version (unittest) { 43 import unit_threaded.assertions; 44 } 45 46 /** Analyze the files in `frange` for mutations. 47 */ 48 ExitStatusType runAnalyzer(ref Database db, ConfigAnalyze conf_analyze, 49 ConfigCompiler conf_compiler, UserFileRange frange, ValidateLoc val_loc, FilesysIO fio) @trusted { 50 import std.algorithm : filter, map; 51 import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff; 52 53 auto fileFilter = () { 54 try { 55 return FileFilter(fio.getOutputDir, conf_analyze.unifiedDiffFromStdin, 56 conf_analyze.unifiedDiffFromStdin ? diffFromStdin : Diff.init); 57 } catch (Exception e) { 58 logger.warning("Unable to parse diff"); 59 logger.info(e.msg); 60 } 61 return FileFilter.init; 62 }(); 63 64 auto pool = () { 65 if (conf_analyze.poolSize == 0) 66 return new TaskPool(); 67 return new TaskPool(conf_analyze.poolSize); 68 }(); 69 70 // will only be used by one thread at a time. 71 auto store = spawn(&storeActor, cast(shared)&db, cast(shared) fio.dup, 72 conf_analyze.prune, conf_analyze.fastDbStore); 73 74 int taskCnt; 75 foreach (f; frange.filter!(a => !a.isNull) 76 .map!(a => a.get) 77 .filter!(a => !isPathInsideAnyRoot(conf_analyze.exclude, a.absoluteFile)) 78 .filter!(a => fileFilter.shouldAnalyze(a.absoluteFile))) { 79 try { 80 pool.put(task!analyzeActor(f, val_loc.dup, fio.dup, conf_compiler, store)); 81 taskCnt++; 82 } catch (Exception e) { 83 logger.trace(e); 84 logger.warning(e.msg); 85 } 86 } 87 88 // inform the store actor of how many analyse results it should *try* to 89 // save. 90 send(store, AnalyzeCntMsg(taskCnt)); 91 // wait for all files to be analyzed 92 pool.finish(true); 93 // wait for the store actor to finish 94 receiveOnly!StoreDoneMsg; 95 96 return ExitStatusType.Ok; 97 } 98 99 @safe: 100 101 /** Filter function for files. Either all or those in stdin. 102 * 103 * The matching ignores the file extension in order to lessen the problem of a 104 * file that this approach skip headers because they do not exist in 105 * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if 106 * `foo.cpp` is in `compile_commands.json`. 107 */ 108 struct FileFilter { 109 import std.path : stripExtension; 110 111 Set!string files; 112 bool useFileFilter; 113 AbsolutePath root; 114 115 this(AbsolutePath root, bool fromStdin, Diff diff) { 116 this.root = root; 117 this.useFileFilter = fromStdin; 118 foreach (a; diff.toRange(root)) { 119 files.add(a.key.stripExtension); 120 } 121 } 122 123 bool shouldAnalyze(AbsolutePath p) { 124 import std.path : relativePath; 125 126 if (!useFileFilter) { 127 return true; 128 } 129 130 return relativePath(p, root).stripExtension in files; 131 } 132 } 133 134 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for. 135 struct AnalyzeCntMsg { 136 int value; 137 } 138 139 struct StoreDoneMsg { 140 } 141 142 /// Start an analyze of a file 143 void analyzeActor(SearchResult fileToAnalyze, ValidateLoc vloc, FilesysIO fio, 144 ConfigCompiler conf, Tid storeActor) @trusted nothrow { 145 try { 146 auto analyzer = Analyze(vloc, fio, conf.forceSystemIncludes); 147 analyzer.process(fileToAnalyze); 148 send(storeActor, cast(immutable) analyzer.result); 149 return; 150 } catch (Exception e) { 151 } 152 153 // send a dummy result 154 try { 155 send(storeActor, cast(immutable) new Analyze.Result); 156 } catch (Exception e) { 157 } 158 } 159 160 /// Store the result of the analyze. 161 void storeActor(scope shared Database* dbShared, scope shared FilesysIO fioShared, 162 const bool prune, const bool fastDbStore) @trusted nothrow { 163 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 164 import cachetools : CacheLRU; 165 import dextool.cachetools : nullableCache; 166 167 Database* db = cast(Database*) dbShared; 168 FilesysIO fio = cast(FilesysIO) fioShared; 169 170 // A file is at most saved one time to the database. 171 Set!Path savedFiles; 172 173 auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256, 174 30.dur!"seconds"); 175 auto getFileDbChecksum = nullableCache!(string, Checksum, 176 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds"); 177 auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) { 178 return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]); 179 })(256, 30.dur!"seconds"); 180 181 static struct Files { 182 Checksum[Path] value; 183 184 this(ref Database db) { 185 foreach (a; db.getDetailedFiles) { 186 value[a.file] = a.fileChecksum; 187 } 188 } 189 } 190 191 void save(immutable Analyze.Result result) { 192 // mark files that have an unchanged checksum as "already saved" 193 foreach (f; result.idFile 194 .byKey 195 .filter!(a => a !in savedFiles) 196 .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a))) { 197 logger.info("Unchanged ".color(Color.yellow), f); 198 savedFiles.add(f); 199 } 200 201 // only saves mutation points to a file one time. 202 { 203 auto app = appender!(MutationPointEntry2[])(); 204 foreach (mp; result.mutationPoints // remove those that has been globally saved 205 .filter!(a => a.file !in savedFiles)) { 206 app.put(mp); 207 } 208 foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) { 209 logger.info("Saving ".color(Color.green), f); 210 db.removeFile(fio.toRelativeRoot(f)); 211 const info = result.infoId[result.idFile[f]]; 212 db.put(fio.toRelativeRoot(f), info.checksum, info.language); 213 savedFiles.add(f); 214 } 215 db.put(app.data, fio.getOutputDir); 216 } 217 218 { 219 Set!long printed; 220 auto app = appender!(LineMetadata[])(); 221 foreach (md; result.metadata) { 222 // transform the ID from local to global. 223 const fid = getFileId(fio.toRelativeRoot(result.fileId[md.id])); 224 if (fid.isNull && !printed.contains(md.id)) { 225 printed.add(md.id); 226 logger.warningf("File with suppressed mutants (// NOMUT) not in the database: %s. Skipping...", 227 result.fileId[md.id]).collectException; 228 continue; 229 } 230 app.put(LineMetadata(fid.get, md.line, md.attr)); 231 } 232 db.put(app.data); 233 } 234 } 235 236 // listen for results from workers until the expected number is processed. 237 void recv() { 238 logger.info("Updating files"); 239 240 int resultCnt; 241 Nullable!int maxResults; 242 bool running = true; 243 244 while (running) { 245 try { 246 receive((AnalyzeCntMsg a) { maxResults = a.value; }, (immutable Analyze.Result a) { 247 resultCnt++; 248 save(a); 249 },); 250 } catch (Exception e) { 251 logger.trace(e).collectException; 252 logger.warning(e.msg).collectException; 253 } 254 255 if (!maxResults.isNull && resultCnt >= maxResults.get) { 256 running = false; 257 } 258 } 259 } 260 261 void pruneFiles() { 262 import std.path : buildPath; 263 264 logger.info("Pruning the database of dropped files"); 265 auto files = db.getFiles.map!(a => buildPath(fio.getOutputDir, a).Path).toSet; 266 267 foreach (f; files.setDifference(savedFiles).toRange) { 268 logger.info("Removing ".color(Color.red), f); 269 db.removeFile(fio.toRelativeRoot(f)); 270 } 271 } 272 273 void fastDbOn() { 274 if (!fastDbStore) 275 return; 276 logger.info( 277 "Turning OFF sqlite3 synchronization protection to improve the write performance"); 278 logger.warning("Do NOT interrupt dextool in any way because it may corrupt the database"); 279 db.run("PRAGMA synchronous = OFF"); 280 db.run("PRAGMA journal_mode = MEMORY"); 281 } 282 283 void fastDbOff() { 284 if (!fastDbStore) 285 return; 286 db.run("PRAGMA synchronous = ON"); 287 db.run("PRAGMA journal_mode = DELETE"); 288 } 289 290 try { 291 import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext; 292 293 setMaxMailboxSize(thisTid, 64, OnCrowding.block); 294 295 fastDbOn(); 296 297 auto trans = db.transaction; 298 299 // TODO: only remove those files that are modified. 300 logger.info("Removing metadata"); 301 db.clearMetadata; 302 303 recv(); 304 305 // TODO: print what files has been updated. 306 logger.info("Resetting timeout context"); 307 resetTimeoutContext(*db); 308 309 logger.info("Updating metadata"); 310 db.updateMetadata; 311 312 if (prune) { 313 pruneFiles(); 314 } 315 316 logger.info("Removing orphant mutants"); 317 db.removeOrphanedMutants; 318 319 logger.info("Updating manually marked mutants"); 320 updateMarkedMutants(*db); 321 printLostMarkings(db.getLostMarkings); 322 323 logger.info("Committing changes"); 324 trans.commit; 325 logger.info("Ok".color(Color.green)); 326 327 fastDbOff(); 328 } catch (Exception e) { 329 logger.error(e.msg).collectException; 330 } 331 332 try { 333 send(ownerTid, StoreDoneMsg.init); 334 } catch (Exception e) { 335 logger.errorf("Fatal error. Unable to send %s to the main thread", 336 StoreDoneMsg.init).collectException; 337 } 338 } 339 340 /// Analyze a file for mutants. 341 struct Analyze { 342 import std.regex : Regex, regex, matchFirst; 343 import std.typecons : NullableRef, Nullable, Yes; 344 import miniorm : Transaction; 345 import cpptooling.analyzer.clang.context : ClangContext; 346 import cpptooling.utility.virtualfilesystem; 347 import dextool.compilation_db : SearchResult; 348 import dextool.type : FileName, Exists, makeExists; 349 import dextool.utility : analyzeFile; 350 351 private { 352 static immutable raw_re_nomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`; 353 354 Regex!char re_nomut; 355 356 ValidateLoc val_loc; 357 FilesysIO fio; 358 bool forceSystemIncludes; 359 360 Cache cache; 361 362 Result result; 363 } 364 365 this(ValidateLoc val_loc, FilesysIO fio, bool forceSystemIncludes) @trusted { 366 this.val_loc = val_loc; 367 this.fio = fio; 368 this.cache = new Cache; 369 this.re_nomut = regex(raw_re_nomut); 370 this.forceSystemIncludes = forceSystemIncludes; 371 this.result = new Result; 372 } 373 374 void process(SearchResult in_file) @safe { 375 in_file.flags.forceSystemIncludes = forceSystemIncludes; 376 377 // find the file and flags to analyze 378 Exists!AbsolutePath checked_in_file; 379 try { 380 checked_in_file = makeExists(in_file.absoluteFile); 381 } catch (Exception e) { 382 logger.warning(e.msg); 383 return; 384 } 385 386 () @trusted { 387 auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly); 388 auto tstream = new TokenStreamImpl(ctx); 389 390 analyzeForMutants(in_file, checked_in_file, ctx, tstream); 391 // TODO: filter files so they are only analyzed once for comments 392 foreach (f; result.fileId.byValue) 393 analyzeForComments(f, tstream); 394 }(); 395 } 396 397 void analyzeForMutants(SearchResult in_file, 398 Exists!AbsolutePath checked_in_file, ref ClangContext ctx, TokenStream tstream) @safe { 399 auto root = makeRootVisitor(fio, val_loc, tstream, cache); 400 analyzeFile(checked_in_file, in_file.flags.completeFlags, root.visitor, ctx); 401 402 result.mutationPoints = root.mutationPoints; 403 foreach (f; root.mutationPointFiles) { 404 const id = result.idFile.length; 405 result.idFile[f.path] = id; 406 result.fileId[id] = f.path; 407 result.infoId[id] = Result.FileInfo(f.cs, f.lang); 408 } 409 } 410 411 /** 412 * Tokens are always from the same file. 413 */ 414 void analyzeForComments(Path file, TokenStream tstream) @trusted { 415 import std.algorithm : filter; 416 import clang.c.Index : CXTokenKind; 417 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 418 419 const fid = result.idFile.require(file, result.fileId.length).FileId; 420 421 auto mdata = appender!(LineMetadata[])(); 422 foreach (t; cache.getTokens(AbsolutePath(file), tstream) 423 .filter!(a => a.kind == CXTokenKind.comment)) { 424 auto m = matchFirst(t.spelling, re_nomut); 425 if (m.whichPattern == 0) 426 continue; 427 428 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"])))); 429 logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column); 430 } 431 432 result.metadata ~= mdata.data; 433 } 434 435 static class Result { 436 import dextool.plugin.mutate.backend.type : Language; 437 438 MutationPointEntry2[] mutationPoints; 439 440 static struct FileInfo { 441 Checksum checksum; 442 Language language; 443 } 444 445 /// The key is the ID from idFile. 446 FileInfo[ulong] infoId; 447 448 /// The IDs is unique for *this* analyze, not globally. 449 long[Path] idFile; 450 Path[long] fileId; 451 452 // The FileID used in the metadata is local to this analysis. It has to 453 // be remapped when added to the database. 454 LineMetadata[] metadata; 455 } 456 } 457 458 @( 459 "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment") 460 unittest { 461 import std.regex : regex, matchFirst; 462 import unit_threaded.runner.io : writelnUt; 463 464 auto re_nomut = regex(Analyze.raw_re_nomut); 465 // NOMUT in other type of comments should NOT match. 466 matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0); 467 matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0); 468 matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0); 469 matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0); 470 471 matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1); 472 matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual(""); 473 matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1); 474 matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1); 475 matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch"); 476 matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf"); 477 auto m = matchFirst("// NOMUT (arch) smurf", re_nomut); 478 m["tag"].shouldEqual("arch"); 479 m["comment"].shouldEqual("smurf"); 480 } 481 482 /// Stream of tokens excluding comment tokens. 483 class TokenStreamImpl : TokenStream { 484 import std.typecons : NullableRef, nullableRef; 485 import cpptooling.analyzer.clang.context : ClangContext; 486 import dextool.plugin.mutate.backend.type : Token; 487 488 NullableRef!ClangContext ctx; 489 490 /// The context must outlive any instance of this class. 491 this(ref ClangContext ctx) { 492 this.ctx = nullableRef(&ctx); 493 } 494 495 Token[] getTokens(Path p) { 496 import dextool.plugin.mutate.backend.utility : tokenize; 497 498 return tokenize(ctx, p); 499 } 500 501 Token[] getFilteredTokens(Path p) { 502 import std.array : array; 503 import std.algorithm : filter; 504 import clang.c.Index : CXTokenKind; 505 import dextool.plugin.mutate.backend.utility : tokenize; 506 507 // Filter a stream of tokens for those that should affect the checksum. 508 return tokenize(ctx, p).filter!(a => a.kind != CXTokenKind.comment).array; 509 } 510 } 511 512 /// Returns: true if `f` is inside any `roots`. 513 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe { 514 import dextool.utility : isPathInsideRoot; 515 516 foreach (root; roots) { 517 if (isPathInsideRoot(root, f)) 518 return true; 519 } 520 521 return false; 522 } 523 524 /** Update the connection between the marked mutants and their mutation status 525 * id and mutation id. 526 */ 527 void updateMarkedMutants(ref Database db) { 528 import dextool.plugin.mutate.backend.database.type : MutationStatusId; 529 530 void update(MarkedMutant m) { 531 const stId = db.getMutationStatusId(m.statusChecksum); 532 if (stId.isNull) 533 return; 534 const mutId = db.getMutationId(stId.get); 535 if (mutId.isNull) 536 return; 537 db.removeMarkedMutant(m.statusChecksum); 538 db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum, 539 m.toStatus, m.rationale, m.mutText); 540 db.updateMutationStatus(stId.get, m.toStatus); 541 } 542 543 // find those marked mutants that have a checksum that is different from 544 // the mutation status the marked mutant is related to. If possible change 545 // the relation to the correct mutation status id. 546 foreach (m; db.getMarkedMutants 547 .map!(a => tuple(a, db.getChecksum(a.statusId))) 548 .filter!(a => !a[1].isNull) 549 .filter!(a => a[0].statusChecksum != a[1].get)) { 550 update(m[0]); 551 } 552 } 553 554 /// Prints a marked mutant that has become lost due to rerun of analyze 555 void printLostMarkings(MarkedMutant[] lostMutants) { 556 import std.algorithm : sort; 557 import std.array : empty; 558 import std.conv : to; 559 import std.stdio : writeln; 560 561 if (lostMutants.empty) 562 return; 563 564 Table!6 tbl = Table!6([ 565 "ID", "File", "Line", "Column", "Status", "Rationale" 566 ]); 567 foreach (m; lostMutants) { 568 typeof(tbl).Row r = [ 569 m.mutationId.to!string, m.path, m.sloc.line.to!string, 570 m.sloc.column.to!string, m.toStatus.to!string, m.rationale 571 ]; 572 tbl.put(r); 573 } 574 logger.warning("Marked mutants was lost"); 575 writeln(tbl); 576 } 577 578 @("shall only let files in the diff through") 579 unittest { 580 import std.string : lineSplitter; 581 import dextool.plugin.mutate.backend.diff_parser; 582 583 immutable lines = `diff --git a/standalone2.d b/standalone2.d 584 index 0123..2345 100644 585 --- a/standalone.d 586 +++ b/standalone2.d 587 @@ -31,7 +31,6 @@ import std.algorithm : map; 588 import std.array : Appender, appender, array; 589 import std.datetime : SysTime; 590 +import std.format : format; 591 -import std.typecons : Tuple; 592 593 import d2sqlite3 : sqlDatabase = Database; 594 595 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 596 struct Database { 597 import std.conv : to; 598 import std.exception : collectException; 599 - import std.typecons : Nullable; 600 + import std.typecons : Nullable, Flag, No; 601 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 602 603 + sqlDatabase db;`; 604 605 UnifiedDiffParser p; 606 foreach (line; lines.lineSplitter) 607 p.process(line); 608 auto diff = p.result; 609 610 auto files = FileFilter(".".Path.AbsolutePath, true, diff); 611 612 files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse; 613 files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue; 614 }