1 /** 2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 #SPC-analyzer 11 12 TODO cache the checksums. They are *heavy*. 13 */ 14 module dextool.plugin.mutate.backend.analyze; 15 16 import logger = std.experimental.logger; 17 import std.algorithm : map, filter; 18 import std.array : array, appender; 19 import std.concurrency; 20 import std.datetime : dur; 21 import std.exception : collectException; 22 import std.parallelism; 23 import std.typecons; 24 25 import colorlog; 26 27 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, 28 CompileCommandDB, SearchResult; 29 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream; 30 import dextool.plugin.mutate.backend.analyze.visitor : makeRootVisitor; 31 import dextool.plugin.mutate.backend.database : Database, LineMetadata, MutationPointEntry2; 32 import dextool.plugin.mutate.backend.database.type : MarkedMutant; 33 import dextool.plugin.mutate.backend.diff_parser : Diff; 34 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO; 35 import dextool.plugin.mutate.backend.report.utility : statusToString, Table; 36 import dextool.plugin.mutate.backend.utility : checksum, trustedRelativePath, Checksum; 37 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze; 38 import dextool.set; 39 import dextool.type : ExitStatusType, AbsolutePath, Path; 40 import dextool.user_filerange; 41 42 version (unittest) { 43 import unit_threaded.assertions; 44 } 45 46 /** Analyze the files in `frange` for mutations. 47 */ 48 ExitStatusType runAnalyzer(ref Database db, ConfigAnalyze conf_analyze, 49 ConfigCompiler conf_compiler, UserFileRange frange, ValidateLoc val_loc, FilesysIO fio) @trusted { 50 import std.algorithm : filter, map; 51 import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff; 52 53 auto fileFilter = () { 54 try { 55 return FileFilter(fio.getOutputDir, conf_analyze.unifiedDiffFromStdin, 56 conf_analyze.unifiedDiffFromStdin ? diffFromStdin : Diff.init); 57 } catch (Exception e) { 58 logger.warning("Unable to parse diff"); 59 logger.info(e.msg); 60 } 61 return FileFilter.init; 62 }(); 63 64 auto pool = () { 65 if (conf_analyze.poolSize == 0) 66 return new TaskPool(); 67 return new TaskPool(conf_analyze.poolSize); 68 }(); 69 70 // will only be used by one thread at a time. 71 auto store = spawn(&storeActor, cast(shared)&db, cast(shared) fio.dup, 72 conf_analyze.prune, conf_analyze.fastDbStore); 73 74 int taskCnt; 75 foreach (f; frange.filter!(a => !a.isNull) 76 .map!(a => a.get) 77 .filter!(a => !isPathInsideAnyRoot(conf_analyze.exclude, a.absoluteFile)) 78 .filter!(a => fileFilter.shouldAnalyze(a.absoluteFile))) { 79 try { 80 pool.put(task!analyzeActor(f, val_loc.dup, fio.dup, conf_compiler, store)); 81 taskCnt++; 82 } catch (Exception e) { 83 logger.trace(e); 84 logger.warning(e.msg); 85 } 86 } 87 88 // inform the store actor of how many analyse results it should *try* to 89 // save. 90 send(store, AnalyzeCntMsg(taskCnt)); 91 // wait for all files to be analyzed 92 pool.finish(true); 93 // wait for the store actor to finish 94 receiveOnly!StoreDoneMsg; 95 96 return ExitStatusType.Ok; 97 } 98 99 @safe: 100 101 /** Filter function for files. Either all or those in stdin. 102 * 103 * The matching ignores the file extension in order to lessen the problem of a 104 * file that this approach skip headers because they do not exist in 105 * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if 106 * `foo.cpp` is in `compile_commands.json`. 107 */ 108 struct FileFilter { 109 import std.path : stripExtension; 110 111 Set!string files; 112 bool useFileFilter; 113 AbsolutePath root; 114 115 this(AbsolutePath root, bool fromStdin, Diff diff) { 116 this.root = root; 117 this.useFileFilter = fromStdin; 118 foreach (a; diff.toRange(root)) { 119 files.add(a.key.stripExtension); 120 } 121 } 122 123 bool shouldAnalyze(AbsolutePath p) { 124 import std.path : relativePath; 125 126 if (!useFileFilter) { 127 return true; 128 } 129 130 return relativePath(p, root).stripExtension in files; 131 } 132 } 133 134 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for. 135 struct AnalyzeCntMsg { 136 int value; 137 } 138 139 struct StoreDoneMsg { 140 } 141 142 /// Start an analyze of a file 143 void analyzeActor(SearchResult fileToAnalyze, ValidateLoc vloc, FilesysIO fio, 144 ConfigCompiler conf, Tid storeActor) @trusted nothrow { 145 try { 146 auto analyzer = Analyze(vloc, fio, conf.forceSystemIncludes); 147 analyzer.process(fileToAnalyze); 148 send(storeActor, cast(immutable) analyzer.result); 149 return; 150 } catch (Exception e) { 151 } 152 153 // send a dummy result 154 try { 155 send(storeActor, cast(immutable) new Analyze.Result); 156 } catch (Exception e) { 157 } 158 } 159 160 /// Store the result of the analyze. 161 void storeActor(scope shared Database* dbShared, scope shared FilesysIO fioShared, 162 const bool prune, const bool fastDbStore) @trusted nothrow { 163 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 164 import cachetools : CacheLRU; 165 import dextool.cachetools : nullableCache; 166 167 Database* db = cast(Database*) dbShared; 168 FilesysIO fio = cast(FilesysIO) fioShared; 169 170 // A file is at most saved one time to the database. 171 Set!Path savedFiles; 172 173 auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256, 174 30.dur!"seconds"); 175 auto getFileDbChecksum = nullableCache!(string, Checksum, 176 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds"); 177 auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) { 178 return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]); 179 })(256, 30.dur!"seconds"); 180 181 static struct Files { 182 Checksum[Path] value; 183 184 this(ref Database db) { 185 foreach (a; db.getDetailedFiles) { 186 value[a.file] = a.fileChecksum; 187 } 188 } 189 } 190 191 void save(immutable Analyze.Result result) { 192 // mark files that have an unchanged checksum as "already saved" 193 foreach (f; result.idFile 194 .byKey 195 .filter!(a => a !in savedFiles) 196 .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a))) { 197 logger.info("Unchanged ".color(Color.yellow), f); 198 savedFiles.add(f); 199 } 200 201 // only saves mutation points to a file one time. 202 { 203 auto app = appender!(MutationPointEntry2[])(); 204 foreach (mp; result.mutationPoints // remove those that has been globally saved 205 .filter!(a => a.file !in savedFiles)) { 206 app.put(mp); 207 } 208 foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) { 209 logger.info("Saving ".color(Color.green), f); 210 db.removeFile(fio.toRelativeRoot(f)); 211 const info = result.infoId[result.idFile[f]]; 212 db.put(fio.toRelativeRoot(f), info.checksum, info.language); 213 savedFiles.add(f); 214 } 215 db.put(app.data, fio.getOutputDir); 216 } 217 218 { 219 Set!long printed; 220 auto app = appender!(LineMetadata[])(); 221 foreach (md; result.metadata) { 222 // transform the ID from local to global. 223 const fid = getFileId(fio.toRelativeRoot(result.fileId[md.id])); 224 if (fid.isNull && !printed.contains(md.id)) { 225 printed.add(md.id); 226 logger.warningf("File with suppressed mutants (// NOMUT) not in the database: %s. Skipping...", 227 result.fileId[md.id]).collectException; 228 continue; 229 } 230 app.put(LineMetadata(fid.get, md.line, md.attr)); 231 } 232 db.put(app.data); 233 } 234 } 235 236 // listen for results from workers until the expected number is processed. 237 void recv() { 238 logger.info("Updating files"); 239 240 int resultCnt; 241 Nullable!int maxResults; 242 bool running = true; 243 244 while (running) { 245 try { 246 receive((AnalyzeCntMsg a) { maxResults = a.value; }, (immutable Analyze.Result a) { 247 resultCnt++; 248 save(a); 249 },); 250 } catch (Exception e) { 251 logger.trace(e).collectException; 252 logger.warning(e.msg).collectException; 253 } 254 255 if (!maxResults.isNull && resultCnt >= maxResults.get) { 256 running = false; 257 } 258 } 259 } 260 261 void pruneFiles() { 262 import std.path : buildPath; 263 264 logger.info("Pruning the database of dropped files"); 265 auto files = db.getFiles.map!(a => buildPath(fio.getOutputDir, a).Path).toSet; 266 267 foreach (f; files.setDifference(savedFiles).toRange) { 268 logger.info("Removing ".color(Color.red), f); 269 db.removeFile(fio.toRelativeRoot(f)); 270 } 271 } 272 273 void fastDbOn() { 274 if (!fastDbStore) 275 return; 276 logger.info( 277 "Turning OFF sqlite3 synchronization protection to improve the write performance"); 278 logger.warning("Do NOT interrupt dextool in any way because it may corrupt the database"); 279 db.run("PRAGMA synchronous = OFF"); 280 db.run("PRAGMA journal_mode = MEMORY"); 281 } 282 283 void fastDbOff() { 284 if (!fastDbStore) 285 return; 286 db.run("PRAGMA synchronous = ON"); 287 db.run("PRAGMA journal_mode = DELETE"); 288 } 289 290 try { 291 import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext; 292 293 setMaxMailboxSize(thisTid, 64, OnCrowding.block); 294 295 fastDbOn(); 296 297 auto trans = db.transaction; 298 299 // TODO: only remove those files that are modified. 300 logger.info("Removing metadata"); 301 db.clearMetadata; 302 303 recv(); 304 305 // TODO: print what files has been updated. 306 logger.info("Resetting timeout context"); 307 resetTimeoutContext(*db); 308 309 logger.info("Updating metadata"); 310 db.updateMetadata; 311 312 if (prune) { 313 pruneFiles(); 314 logger.info("Removing orphant mutants"); 315 db.removeOrphanedMutants; 316 } 317 318 logger.info("Updating manually marked mutants"); 319 updateMarkedMutants(*db); 320 printLostMarkings(db.getLostMarkings); 321 322 logger.info("Committing changes"); 323 trans.commit; 324 logger.info("Ok".color(Color.green)); 325 326 fastDbOff(); 327 } catch (Exception e) { 328 logger.error(e.msg).collectException; 329 } 330 331 try { 332 send(ownerTid, StoreDoneMsg.init); 333 } catch (Exception e) { 334 logger.errorf("Fatal error. Unable to send %s to the main thread", 335 StoreDoneMsg.init).collectException; 336 } 337 } 338 339 /// Analyze a file for mutants. 340 struct Analyze { 341 import std.regex : Regex, regex, matchFirst; 342 import std.typecons : NullableRef, Nullable, Yes; 343 import miniorm : Transaction; 344 import cpptooling.analyzer.clang.context : ClangContext; 345 import cpptooling.utility.virtualfilesystem; 346 import dextool.compilation_db : SearchResult; 347 import dextool.type : Exists, makeExists; 348 import dextool.utility : analyzeFile; 349 350 private { 351 static immutable raw_re_nomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`; 352 353 Regex!char re_nomut; 354 355 ValidateLoc val_loc; 356 FilesysIO fio; 357 bool forceSystemIncludes; 358 359 Cache cache; 360 361 Result result; 362 } 363 364 this(ValidateLoc val_loc, FilesysIO fio, bool forceSystemIncludes) @trusted { 365 this.val_loc = val_loc; 366 this.fio = fio; 367 this.cache = new Cache; 368 this.re_nomut = regex(raw_re_nomut); 369 this.forceSystemIncludes = forceSystemIncludes; 370 this.result = new Result; 371 } 372 373 void process(SearchResult in_file) @safe { 374 in_file.flags.forceSystemIncludes = forceSystemIncludes; 375 376 // find the file and flags to analyze 377 Exists!AbsolutePath checked_in_file; 378 try { 379 checked_in_file = makeExists(in_file.absoluteFile); 380 } catch (Exception e) { 381 logger.warning(e.msg); 382 return; 383 } 384 385 () @trusted { 386 auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly); 387 auto tstream = new TokenStreamImpl(ctx); 388 389 analyzeForMutants(in_file, checked_in_file, ctx, tstream); 390 // TODO: filter files so they are only analyzed once for comments 391 foreach (f; result.fileId.byValue) 392 analyzeForComments(f, tstream); 393 }(); 394 } 395 396 void analyzeForMutants(SearchResult in_file, 397 Exists!AbsolutePath checked_in_file, ref ClangContext ctx, TokenStream tstream) @safe { 398 auto root = makeRootVisitor(fio, val_loc, tstream, cache); 399 analyzeFile(checked_in_file, in_file.flags.completeFlags, root.visitor, ctx); 400 401 result.mutationPoints = root.mutationPoints; 402 foreach (f; root.mutationPointFiles) { 403 const id = result.idFile.length; 404 result.idFile[f.path] = id; 405 result.fileId[id] = f.path; 406 result.infoId[id] = Result.FileInfo(f.cs, f.lang); 407 } 408 } 409 410 /** 411 * Tokens are always from the same file. 412 */ 413 void analyzeForComments(Path file, TokenStream tstream) @trusted { 414 import std.algorithm : filter; 415 import clang.c.Index : CXTokenKind; 416 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 417 418 const fid = result.idFile.require(file, result.fileId.length).FileId; 419 420 auto mdata = appender!(LineMetadata[])(); 421 foreach (t; cache.getTokens(AbsolutePath(file), tstream) 422 .filter!(a => a.kind == CXTokenKind.comment)) { 423 auto m = matchFirst(t.spelling, re_nomut); 424 if (m.whichPattern == 0) 425 continue; 426 427 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"])))); 428 logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column); 429 } 430 431 result.metadata ~= mdata.data; 432 } 433 434 static class Result { 435 import dextool.plugin.mutate.backend.type : Language; 436 437 MutationPointEntry2[] mutationPoints; 438 439 static struct FileInfo { 440 Checksum checksum; 441 Language language; 442 } 443 444 /// The key is the ID from idFile. 445 FileInfo[ulong] infoId; 446 447 /// The IDs is unique for *this* analyze, not globally. 448 long[Path] idFile; 449 Path[long] fileId; 450 451 // The FileID used in the metadata is local to this analysis. It has to 452 // be remapped when added to the database. 453 LineMetadata[] metadata; 454 } 455 } 456 457 @( 458 "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment") 459 unittest { 460 import std.regex : regex, matchFirst; 461 import unit_threaded.runner.io : writelnUt; 462 463 auto re_nomut = regex(Analyze.raw_re_nomut); 464 // NOMUT in other type of comments should NOT match. 465 matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0); 466 matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0); 467 matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0); 468 matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0); 469 470 matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1); 471 matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual(""); 472 matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1); 473 matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1); 474 matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch"); 475 matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf"); 476 auto m = matchFirst("// NOMUT (arch) smurf", re_nomut); 477 m["tag"].shouldEqual("arch"); 478 m["comment"].shouldEqual("smurf"); 479 } 480 481 /// Stream of tokens excluding comment tokens. 482 class TokenStreamImpl : TokenStream { 483 import std.typecons : NullableRef, nullableRef; 484 import cpptooling.analyzer.clang.context : ClangContext; 485 import dextool.plugin.mutate.backend.type : Token; 486 487 NullableRef!ClangContext ctx; 488 489 /// The context must outlive any instance of this class. 490 this(ref ClangContext ctx) { 491 this.ctx = nullableRef(&ctx); 492 } 493 494 Token[] getTokens(Path p) { 495 import dextool.plugin.mutate.backend.utility : tokenize; 496 497 return tokenize(ctx, p); 498 } 499 500 Token[] getFilteredTokens(Path p) { 501 import std.array : array; 502 import std.algorithm : filter; 503 import clang.c.Index : CXTokenKind; 504 import dextool.plugin.mutate.backend.utility : tokenize; 505 506 // Filter a stream of tokens for those that should affect the checksum. 507 return tokenize(ctx, p).filter!(a => a.kind != CXTokenKind.comment).array; 508 } 509 } 510 511 /// Returns: true if `f` is inside any `roots`. 512 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe { 513 import dextool.utility : isPathInsideRoot; 514 515 foreach (root; roots) { 516 if (isPathInsideRoot(root, f)) 517 return true; 518 } 519 520 return false; 521 } 522 523 /** Update the connection between the marked mutants and their mutation status 524 * id and mutation id. 525 */ 526 void updateMarkedMutants(ref Database db) { 527 import dextool.plugin.mutate.backend.database.type : MutationStatusId; 528 529 void update(MarkedMutant m) { 530 const stId = db.getMutationStatusId(m.statusChecksum); 531 if (stId.isNull) 532 return; 533 const mutId = db.getMutationId(stId.get); 534 if (mutId.isNull) 535 return; 536 db.removeMarkedMutant(m.statusChecksum); 537 db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum, 538 m.toStatus, m.rationale, m.mutText); 539 db.updateMutationStatus(stId.get, m.toStatus); 540 } 541 542 // find those marked mutants that have a checksum that is different from 543 // the mutation status the marked mutant is related to. If possible change 544 // the relation to the correct mutation status id. 545 foreach (m; db.getMarkedMutants 546 .map!(a => tuple(a, db.getChecksum(a.statusId))) 547 .filter!(a => !a[1].isNull) 548 .filter!(a => a[0].statusChecksum != a[1].get)) { 549 update(m[0]); 550 } 551 } 552 553 /// Prints a marked mutant that has become lost due to rerun of analyze 554 void printLostMarkings(MarkedMutant[] lostMutants) { 555 import std.algorithm : sort; 556 import std.array : empty; 557 import std.conv : to; 558 import std.stdio : writeln; 559 560 if (lostMutants.empty) 561 return; 562 563 Table!6 tbl = Table!6([ 564 "ID", "File", "Line", "Column", "Status", "Rationale" 565 ]); 566 foreach (m; lostMutants) { 567 typeof(tbl).Row r = [ 568 m.mutationId.to!string, m.path, m.sloc.line.to!string, 569 m.sloc.column.to!string, m.toStatus.to!string, m.rationale 570 ]; 571 tbl.put(r); 572 } 573 logger.warning("Marked mutants was lost"); 574 writeln(tbl); 575 } 576 577 @("shall only let files in the diff through") 578 unittest { 579 import std.string : lineSplitter; 580 import dextool.plugin.mutate.backend.diff_parser; 581 582 immutable lines = `diff --git a/standalone2.d b/standalone2.d 583 index 0123..2345 100644 584 --- a/standalone.d 585 +++ b/standalone2.d 586 @@ -31,7 +31,6 @@ import std.algorithm : map; 587 import std.array : Appender, appender, array; 588 import std.datetime : SysTime; 589 +import std.format : format; 590 -import std.typecons : Tuple; 591 592 import d2sqlite3 : sqlDatabase = Database; 593 594 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 595 struct Database { 596 import std.conv : to; 597 import std.exception : collectException; 598 - import std.typecons : Nullable; 599 + import std.typecons : Nullable, Flag, No; 600 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 601 602 + sqlDatabase db;`; 603 604 UnifiedDiffParser p; 605 foreach (line; lines.lineSplitter) 606 p.process(line); 607 auto diff = p.result; 608 609 auto files = FileFilter(".".Path.AbsolutePath, true, diff); 610 611 files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse; 612 files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue; 613 }