1 /**cpptooling.analyzer.clang 2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 #SPC-analyzer 11 12 TODO cache the checksums. They are *heavy*. 13 */ 14 module dextool.plugin.mutate.backend.analyze; 15 16 import logger = std.experimental.logger; 17 import std.algorithm : map, filter, joiner, cache; 18 import std.array : array, appender, empty; 19 import std.concurrency; 20 import std.datetime : dur, Duration; 21 import std.exception : collectException; 22 import std.parallelism; 23 import std.range : tee, enumerate; 24 import std.typecons : tuple; 25 26 import colorlog; 27 import my.filter : GlobFilter; 28 import my.named_type; 29 import my.optional; 30 import my.set; 31 32 import dextool.utility : dextoolBinaryId; 33 34 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB, 35 ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath; 36 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream; 37 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult; 38 import dextool.plugin.mutate.backend.database : Database, LineMetadata, 39 MutationPointEntry2, DepFile; 40 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile, 41 TestFilePath, TestFileChecksum, ToolVersion; 42 import dextool.plugin.mutate.backend.diff_parser : Diff; 43 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO; 44 import dextool.plugin.mutate.backend.report.utility : statusToString, Table; 45 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile; 46 import dextool.plugin.mutate.backend.type : Mutation; 47 import dextool.plugin.mutate.type : MutationKind; 48 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze; 49 import dextool.type : ExitStatusType, AbsolutePath, Path; 50 51 version (unittest) { 52 import unit_threaded.assertions; 53 } 54 55 /** Analyze the files in `frange` for mutations. 56 */ 57 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const MutationKind[] userKinds, ConfigAnalyze confAnalyze, 58 ConfigCompiler conf_compiler, ParsedCompileCommandRange frange, 59 ValidateLoc valLoc, FilesysIO fio) @trusted { 60 import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff; 61 import dextool.plugin.mutate.backend.mutation_type : toInternal; 62 63 auto fileFilter = () { 64 try { 65 return FileFilter(fio.getOutputDir, confAnalyze.unifiedDiffFromStdin, 66 confAnalyze.unifiedDiffFromStdin ? diffFromStdin : Diff.init); 67 } catch (Exception e) { 68 logger.info(e.msg); 69 logger.warning("Unable to parse diff"); 70 } 71 return FileFilter.init; 72 }(); 73 74 bool shouldAnalyze(AbsolutePath p) { 75 return confAnalyze.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p); 76 } 77 78 auto pool = () { 79 if (confAnalyze.poolSize == 0) 80 return new TaskPool(); 81 return new TaskPool(confAnalyze.poolSize); 82 }(); 83 84 // if a dependency of a root file has been changed. 85 auto changedDeps = dependencyAnalyze(dbPath, fio); 86 87 // will only be used by one thread at a time. 88 auto store = spawn(&storeActor, dbPath, cast(shared) fio.dup, 89 cast(shared) confAnalyze, cast(immutable) changedDeps.byKeyValue 90 .filter!(a => !a.value) 91 .map!(a => a.key) 92 .array); 93 94 try { 95 pool.put(task!testPathActor(confAnalyze.testPaths, 96 confAnalyze.testFileMatcher, fio.dup, store)); 97 } catch (Exception e) { 98 logger.trace(e); 99 logger.warning(e.msg); 100 } 101 102 auto kinds = toInternal(userKinds); 103 int taskCnt; 104 Set!AbsolutePath alreadyAnalyzed; 105 // dfmt off 106 foreach (f; frange 107 // The tool only supports analyzing a file one time. 108 // This optimize it in some cases where the same file occurs 109 // multiple times in the compile commands database. 110 .filter!(a => a.cmd.absoluteFile !in alreadyAnalyzed) 111 .tee!(a => alreadyAnalyzed.add(a.cmd.absoluteFile)) 112 .cache 113 .filter!(a => shouldAnalyze(a.cmd.absoluteFile)) 114 ) { 115 try { 116 if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) { 117 if (!(*v || confAnalyze.forceSaveAnalyze)) 118 continue; 119 } 120 121 //logger.infof("%s sending", f.cmd.absoluteFile); 122 pool.put(task!analyzeActor(kinds, f, valLoc.dup, fio.dup, conf_compiler, confAnalyze, store)); 123 taskCnt++; 124 } catch (Exception e) { 125 logger.trace(e); 126 logger.warning(e.msg); 127 } 128 } 129 // dfmt on 130 131 changedDeps = typeof(changedDeps).init; // free the memory 132 133 // inform the store actor of how many analyse results it should *try* to 134 // save. 135 send(store, AnalyzeCntMsg(taskCnt)); 136 // wait for all files to be analyzed 137 pool.finish(true); 138 // wait for the store actor to finish 139 receiveOnly!StoreDoneMsg; 140 141 if (confAnalyze.profile) 142 try { 143 import std.stdio : writeln; 144 145 writeln(getProfileResult.toString); 146 } catch (Exception e) { 147 logger.warning("Unable to print the profile data: ", e.msg).collectException; 148 } 149 150 return ExitStatusType.Ok; 151 } 152 153 @safe: 154 155 /** Filter function for files. Either all or those in stdin. 156 * 157 * The matching ignores the file extension in order to lessen the problem of a 158 * file that this approach skip headers because they do not exist in 159 * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if 160 * `foo.cpp` is in `compile_commands.json`. 161 * 162 * TODO: this may create problems for header only libraries because only the 163 * unittest would include the header which mean that for this to work the 164 * unittest would have to reside in the same directory as the header file. 165 * Which they normally never do. This then lead to a diff of a header only lib 166 * lead to "no files analyzed". 167 */ 168 struct FileFilter { 169 import std.path : stripExtension; 170 171 Set!string files; 172 bool useFileFilter; 173 AbsolutePath root; 174 175 this(AbsolutePath root, bool fromStdin, Diff diff) { 176 this.root = root; 177 this.useFileFilter = fromStdin; 178 foreach (a; diff.toRange(root)) { 179 files.add(a.key.stripExtension); 180 } 181 } 182 183 bool shouldAnalyze(AbsolutePath p) { 184 import std.path : relativePath; 185 186 if (!useFileFilter) { 187 return true; 188 } 189 190 return relativePath(p, root).stripExtension in files; 191 } 192 } 193 194 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for. 195 struct AnalyzeCntMsg { 196 int value; 197 } 198 199 /// The main thread is waiting for storeActor to send this message. 200 struct StoreDoneMsg { 201 } 202 203 /// Start an analyze of a file 204 void analyzeActor(Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze, ValidateLoc vloc, 205 FilesysIO fio, ConfigCompiler compilerConf, ConfigAnalyze analyzeConf, Tid storeActor) @trusted nothrow { 206 auto profile = Profile("analyze file " ~ fileToAnalyze.cmd.absoluteFile); 207 208 try { 209 //logger.infof("%s begin", fileToAnalyze.cmd.absoluteFile); 210 auto analyzer = Analyze(kinds, vloc, fio, Analyze.Config(compilerConf.forceSystemIncludes, 211 analyzeConf.saveCoverage.get, compilerConf.allowErrors.get)); 212 analyzer.process(fileToAnalyze); 213 send(storeActor, cast(immutable) analyzer.result); 214 //logger.infof("%s end", fileToAnalyze.cmd.absoluteFile); 215 return; 216 } catch (Exception e) { 217 logger.error(e.msg).collectException; 218 } 219 220 // send a dummy result 221 try { 222 //logger.infof("%s failed", fileToAnalyze.cmd.absoluteFile); 223 send(storeActor, cast(immutable) new Analyze.Result); 224 } catch (Exception e) { 225 logger.error(e.msg).collectException; 226 } 227 } 228 229 class TestFileResult { 230 Duration time; 231 TestFile[Checksum] files; 232 } 233 234 void testPathActor(const AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio, Tid storeActor) @trusted nothrow { 235 import std.datetime : Clock; 236 import std.datetime.stopwatch : StopWatch, AutoStart; 237 import std.file : isDir, isFile, dirEntries, SpanMode; 238 import my.container.vector; 239 240 auto profile = Profile("checksum test files"); 241 242 auto sw = StopWatch(AutoStart.yes); 243 244 TestFile makeTestFile(const AbsolutePath file) { 245 auto cs = checksum(fio.makeInput(file).content[]); 246 return TestFile(TestFilePath(fio.toRelativeRoot(file)), 247 TestFileChecksum(cs), Clock.currTime); 248 } 249 250 auto paths = vector(userPaths.dup); 251 252 auto tfiles = new TestFileResult; 253 scope (exit) 254 tfiles.time = sw.peek; 255 256 while (!paths.empty) { 257 try { 258 if (isDir(paths.front)) { 259 logger.trace(" Test directory ", paths.front); 260 foreach (a; dirEntries(paths.front, SpanMode.shallow).map!( 261 a => AbsolutePath(a.name))) { 262 paths.put(a); 263 } 264 } else if (isFile(paths.front) && matcher.match(paths.front)) { 265 logger.trace(" Test saved ", paths.front); 266 auto t = makeTestFile(paths.front); 267 tfiles.files[t.checksum.get] = t; 268 } 269 } catch (Exception e) { 270 logger.warning(e.msg).collectException; 271 } 272 273 paths.popFront; 274 } 275 276 logger.infof("Found %s test files", tfiles.files.length).collectException; 277 278 try { 279 send(storeActor, cast(immutable) tfiles); 280 } catch (Exception e) { 281 } 282 } 283 284 /// Store the result of the analyze. 285 void storeActor(const AbsolutePath dbPath, scope shared FilesysIO fioShared, 286 scope shared ConfigAnalyze confAnalyzeShared, immutable Path[] rootFiles) @trusted nothrow { 287 import cachetools : CacheLRU; 288 import dextool.cachetools : nullableCache; 289 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 290 291 const confAnalyze = cast() confAnalyzeShared; 292 293 // The conditions that the storeActor is waiting for receiving the results 294 // from the workers. 295 static struct RecvWaiter { 296 int analyzeFileWaitCnt = int.max; 297 int analyzeFileCnt; 298 299 bool isTestFilesDone; 300 301 bool isWaiting() { 302 return analyzeFileCnt < analyzeFileWaitCnt || !isTestFilesDone; 303 } 304 } 305 306 static struct SchemataSaver { 307 import sumtype; 308 import my.optional; 309 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataBuilder; 310 311 typeof(ConfigAnalyze.minMutantsPerSchema) minMutantsPerSchema; 312 typeof(ConfigAnalyze.mutantsPerSchema) mutantsPerSchema; 313 SchemataBuilder builder; 314 315 void put(FilesysIO fio, SchemataResult.Schemata[AbsolutePath] a) { 316 builder.put(fio, a); 317 } 318 319 void process(ref Database db, Optional!(SchemataBuilder.ET) value) { 320 value.match!((Some!(SchemataBuilder.ET) a) { 321 try { 322 auto mutants = a.mutants 323 .map!(a => db.getMutationStatusId(a.id)) 324 .filter!(a => !a.isNull) 325 .map!(a => a.get) 326 .array; 327 if (!mutants.empty) { 328 const id = db.putSchemata(a.checksum, a.fragments, mutants); 329 logger.tracef(!id.isNull, "Saving schema %s with %s mutants", 330 id.get.get, mutants.length); 331 } 332 } catch (Exception e) { 333 logger.trace(e.msg); 334 } 335 }, (None a) {}); 336 } 337 338 /// Consume fragments used by scheman containing >min mutants. 339 void intermediate(ref Database db) { 340 builder.discardMinScheman = false; 341 builder.mutantsPerSchema = mutantsPerSchema.get; 342 builder.minMutantsPerSchema = mutantsPerSchema.get; 343 344 while (!builder.isDone) { 345 process(db, builder.next); 346 } 347 348 builder.restart; 349 } 350 351 /// Consume all fragments or discard. 352 void finalize(ref Database db) { 353 builder.discardMinScheman = true; 354 builder.mutantsPerSchema = mutantsPerSchema.get; 355 builder.minMutantsPerSchema = minMutantsPerSchema.get; 356 357 // two loops to pass over all mutants and retry new schema 358 // compositions. Any schema that is less than the minimum will be 359 // discarded so the number of mutants will shrink. 360 while (!builder.isDone) { 361 while (!builder.isDone) { 362 process(db, builder.next); 363 } 364 builder.restart; 365 } 366 } 367 } 368 369 auto schemas = SchemataSaver(confAnalyze.minMutantsPerSchema, confAnalyze.mutantsPerSchema); 370 371 void helper(FilesysIO fio, ref Database db) nothrow { 372 // A file is at most saved one time to the database. 373 Set!AbsolutePath savedFiles; 374 375 const isToolVersionDifferent = () nothrow{ 376 try { 377 return db.isToolVersionDifferent(ToolVersion(dextoolBinaryId)); 378 } catch (Exception e) { 379 } 380 return true; 381 }(); 382 383 auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256, 384 30.dur!"seconds"); 385 auto getFileDbChecksum = nullableCache!(string, Checksum, 386 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds"); 387 auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) { 388 return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]); 389 })(256, 30.dur!"seconds"); 390 391 static struct Files { 392 Checksum[Path] value; 393 394 this(ref Database db) { 395 foreach (a; db.getDetailedFiles) { 396 value[a.file] = a.fileChecksum; 397 } 398 } 399 } 400 401 void save(immutable Analyze.Result result_) { 402 import dextool.plugin.mutate.backend.type : Language; 403 404 auto result = cast() result_; 405 406 auto profile = Profile("save " ~ result.root); 407 408 // mark files that have an unchanged checksum as "already saved" 409 foreach (f; result.idFile 410 .byKey 411 .filter!(a => a !in savedFiles) 412 .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a) 413 && !confAnalyze.forceSaveAnalyze && !isToolVersionDifferent)) { 414 logger.info("Unchanged ".color(Color.yellow), f); 415 savedFiles.add(f); 416 } 417 418 // only saves mutation points to a file one time. 419 { 420 auto app = appender!(MutationPointEntry2[])(); 421 bool isChanged = isToolVersionDifferent; 422 foreach (mp; result.mutationPoints 423 .map!(a => tuple!("data", "file")(a, fio.toAbsoluteRoot(a.file))) 424 .filter!(a => a.file !in savedFiles)) { 425 app.put(mp.data); 426 } 427 foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) { 428 isChanged = true; 429 logger.info("Saving ".color(Color.green), f); 430 const relp = fio.toRelativeRoot(f); 431 432 // this is critical in order to remove old data about a file. 433 db.removeFile(relp); 434 435 const info = result.infoId[result.idFile[f]]; 436 db.put(relp, info.checksum, info.language, f == result.root); 437 savedFiles.add(f); 438 } 439 db.put(app.data, fio.getOutputDir); 440 441 if (result.root !in savedFiles) { 442 // this occurs when the file is e.g. a unittest that uses a 443 // header only library. The unittests are not mutated thus 444 // no mutation points exists in them but we want dextool to 445 // still, if possible, track the unittests for changes. 446 isChanged = true; 447 const relp = fio.toRelativeRoot(result.root); 448 db.removeFile(relp); 449 // the language do not matter because it is a file without 450 // any mutants. 451 db.put(relp, result.rootCs, Language.init, true); 452 savedFiles.add(fio.toAbsoluteRoot(result.root)); 453 } 454 455 // must always update dependencies because they may not contain 456 // mutants. Only files that are changed and contain mutants 457 // trigger isChanged to be true. 458 db.dependencyApi.set(fio.toRelativeRoot(result.root), result.dependencies); 459 460 if (isChanged) { 461 foreach (a; result.coverage.byKeyValue) { 462 const fid = getFileId(fio.toRelativeRoot(result.fileId[a.key])); 463 if (!fid.isNull) { 464 db.clearCoverageMap(fid.get); 465 db.putCoverageMap(fid.get, a.value); 466 } 467 } 468 469 // only save the schematas if mutation points where saved. 470 // This ensure that only schematas for changed/new files 471 // are saved. 472 schemas.put(fio, result.schematas); 473 schemas.intermediate(db); 474 } 475 } 476 477 { 478 Set!long printed; 479 auto app = appender!(LineMetadata[])(); 480 foreach (md; result.metadata) { 481 const localId = Analyze.Result.LocalFileId(md.id.get); 482 // transform the ID from local to global. 483 const fid = getFileId(fio.toRelativeRoot(result.fileId[localId])); 484 if (fid.isNull && !printed.contains(md.id.get)) { 485 printed.add(md.id.get); 486 logger.info("File with suppressed mutants (// NOMUT) not in the database: ", 487 result.fileId[localId]).collectException; 488 } else if (!fid.isNull) { 489 app.put(LineMetadata(fid.get, md.line, md.attr)); 490 } 491 } 492 db.put(app.data); 493 } 494 } 495 496 void saveTestResult(immutable TestFileResult result) { 497 auto profile = Profile("save test files"); 498 Set!Checksum old; 499 500 foreach (a; db.getTestFiles) { 501 old.add(a.checksum.get); 502 if (a.checksum.get !in result.files) { 503 logger.info("Removed test file ", a.file.get.toString); 504 db.removeFile(a.file); 505 } 506 } 507 508 foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) { 509 logger.info("Saving test file ", a.file.get.toString); 510 db.put(a); 511 } 512 } 513 514 // listen for results from workers until the expected number is processed. 515 void recv() { 516 logger.info("Updating files"); 517 RecvWaiter waiter; 518 519 while (waiter.isWaiting) { 520 try { 521 receive((AnalyzeCntMsg a) { 522 waiter.analyzeFileWaitCnt = a.value; 523 }, (immutable Analyze.Result a) { 524 auto trans = db.transaction; 525 waiter.analyzeFileCnt++; 526 save(a); 527 trans.commit; 528 529 logger.infof("Analyzed file %s/%s", 530 waiter.analyzeFileCnt, waiter.analyzeFileWaitCnt); 531 }, (immutable TestFileResult a) { 532 auto trans = db.transaction; 533 waiter.isTestFilesDone = true; 534 saveTestResult(a); 535 trans.commit; 536 537 logger.info("Done analyzing test files in ", a.time); 538 }); 539 } catch (Exception e) { 540 logger.trace(e).collectException; 541 logger.warning(e.msg).collectException; 542 } 543 } 544 } 545 546 void pruneFiles() { 547 import std.path : buildPath; 548 549 auto profile = Profile("prune files"); 550 551 logger.info("Pruning the database of dropped files"); 552 auto files = db.getFiles.map!(a => fio.toAbsoluteRoot(a)).toSet; 553 554 foreach (f; files.setDifference(savedFiles).toRange) { 555 logger.info("Removing ".color(Color.red), f); 556 db.removeFile(fio.toRelativeRoot(f)); 557 } 558 } 559 560 void addRoots() { 561 if (confAnalyze.forceSaveAnalyze || isToolVersionDifferent) 562 return; 563 564 // add root files and their dependencies that has not been analyzed because nothing has changed. 565 // By adding them they are not removed. 566 567 auto profile = Profile("add roots and dependencies"); 568 foreach (a; rootFiles) { 569 auto p = fio.toAbsoluteRoot(a); 570 if (p !in savedFiles) { 571 savedFiles.add(p); 572 // fejk text for the user to tell them that yes, the files have 573 // been analyzed. 574 logger.info("Analyzing ", a); 575 logger.info("Unchanged ".color(Color.yellow), a); 576 } 577 } 578 foreach (a; rootFiles.map!(a => db.dependencyApi.get(a)).joiner) { 579 savedFiles.add(fio.toAbsoluteRoot(a)); 580 } 581 } 582 583 void fastDbOn() { 584 if (!confAnalyze.fastDbStore) 585 return; 586 logger.info( 587 "Turning OFF sqlite3 synchronization protection to improve the write performance"); 588 logger.warning( 589 "Do NOT interrupt dextool in any way because it may corrupt the database"); 590 db.run("PRAGMA synchronous = OFF"); 591 db.run("PRAGMA journal_mode = MEMORY"); 592 } 593 594 void fastDbOff() { 595 if (!confAnalyze.fastDbStore) 596 return; 597 db.run("PRAGMA synchronous = ON"); 598 db.run("PRAGMA journal_mode = DELETE"); 599 } 600 601 try { 602 import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext; 603 604 // by making the mailbox size follow the number of workers the overall 605 // behavior will slow down if saving to the database is too slow. This 606 // avoids excessive or even fatal memory usage. 607 setMaxMailboxSize(thisTid, confAnalyze.poolSize + 2, OnCrowding.block); 608 609 fastDbOn(); 610 611 { 612 auto trans = db.transaction; 613 auto profile = Profile("prune old schemas"); 614 if (isToolVersionDifferent) { 615 logger.info("Prune database of schematan created by the old version"); 616 db.deleteAllSchemas; 617 } 618 trans.commit; 619 } 620 621 recv(); 622 { 623 auto trans = db.transaction; 624 schemas.finalize(db); 625 trans.commit; 626 } 627 628 { 629 auto trans = db.transaction; 630 addRoots(); 631 632 logger.info("Resetting timeout context"); 633 resetTimeoutContext(db); 634 635 logger.info("Updating metadata"); 636 db.updateMetadata; 637 638 if (confAnalyze.prune) { 639 pruneFiles(); 640 { 641 auto profile = Profile("remove orphaned mutants"); 642 logger.info("Removing orphaned mutants"); 643 db.removeOrphanedMutants; 644 } 645 { 646 auto profile = Profile("prune schemas"); 647 logger.info("Prune the database of unused schemas"); 648 db.pruneSchemas; 649 } 650 { 651 auto profile = Profile("prune dependencies"); 652 logger.info("Prune dependencies"); 653 db.dependencyApi.cleanup; 654 } 655 } 656 657 logger.info("Updating manually marked mutants"); 658 updateMarkedMutants(db); 659 printLostMarkings(db.getLostMarkings); 660 661 if (isToolVersionDifferent) { 662 logger.info("Updating tool version"); 663 db.updateToolVersion(ToolVersion(dextoolBinaryId)); 664 } 665 666 logger.info("Committing changes"); 667 trans.commit; 668 logger.info("Ok".color(Color.green)); 669 } 670 671 fastDbOff(); 672 673 if (isToolVersionDifferent) { 674 auto profile = Profile("compact"); 675 logger.info("Compacting the database"); 676 db.vacuum; 677 } 678 } catch (Exception e) { 679 logger.error(e.msg).collectException; 680 logger.error("Failed to save the result of the analyze to the database") 681 .collectException; 682 } 683 684 try { 685 send(ownerTid, StoreDoneMsg.init); 686 } catch (Exception e) { 687 logger.errorf("Fatal error. Unable to send %s to the main thread", 688 StoreDoneMsg.init).collectException; 689 } 690 } 691 692 try { 693 FilesysIO fio = cast(FilesysIO) fioShared; 694 auto db = Database.make(dbPath); 695 helper(fio, db); 696 } catch (Exception e) { 697 logger.error(e.msg).collectException; 698 } 699 } 700 701 /// Analyze a file for mutants. 702 struct Analyze { 703 import std.regex : Regex, regex, matchFirst; 704 import std.typecons : Yes; 705 import libclang_ast.context : ClangContext; 706 707 static struct Config { 708 bool forceSystemIncludes; 709 bool saveCoverage; 710 bool allowErrors; 711 } 712 713 private { 714 static immutable rawReNomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`; 715 716 Regex!char re_nomut; 717 718 ValidateLoc valLoc; 719 FilesysIO fio; 720 721 Cache cache; 722 723 Result result; 724 725 Config conf; 726 727 Mutation.Kind[] kinds; 728 } 729 730 this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted { 731 this.kinds = kinds; 732 this.valLoc = valLoc; 733 this.fio = fio; 734 this.cache = new Cache; 735 this.re_nomut = regex(rawReNomut); 736 this.result = new Result; 737 this.conf = conf; 738 } 739 740 void process(ParsedCompileCommand commandsForFileToAnalyze) @safe { 741 import std.file : exists; 742 743 commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes; 744 745 try { 746 if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) { 747 logger.warningf("Failed to analyze %s. Do not exist", 748 commandsForFileToAnalyze.cmd.absoluteFile); 749 return; 750 } 751 } catch (Exception e) { 752 logger.warning(e.msg); 753 return; 754 } 755 756 result.root = commandsForFileToAnalyze.cmd.absoluteFile; 757 758 try { 759 result.rootCs = checksum(result.root); 760 761 auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly); 762 auto tstream = new TokenStreamImpl(ctx); 763 764 analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream); 765 foreach (f; result.fileId.byValue) 766 analyzeForComments(f, tstream); 767 } catch (Exception e) { 768 () @trusted { logger.trace(e); }(); 769 logger.info(e.msg); 770 logger.error("failed analyze of ", 771 commandsForFileToAnalyze.cmd.absoluteFile).collectException; 772 } 773 } 774 775 void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze, 776 AbsolutePath fileToAnalyze, ref ClangContext ctx, TokenStream tstream) @safe { 777 import my.gc.refc : RefCounted; 778 import dextool.plugin.mutate.backend.analyze.ast : Ast; 779 import dextool.plugin.mutate.backend.analyze.pass_clang; 780 import dextool.plugin.mutate.backend.analyze.pass_coverage; 781 import dextool.plugin.mutate.backend.analyze.pass_filter; 782 import dextool.plugin.mutate.backend.analyze.pass_mutant; 783 import dextool.plugin.mutate.backend.analyze.pass_schemata; 784 import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic; 785 786 logger.info("Analyzing ", fileToAnalyze); 787 RefCounted!(Ast) ast; 788 { 789 auto tu = ctx.makeTranslationUnit(fileToAnalyze, 790 commandsForFileToAnalyze.flags.completeFlags); 791 if (tu.hasParseErrors) { 792 logDiagnostic(tu); 793 logger.warningf("Compile error in %s", fileToAnalyze); 794 if (!conf.allowErrors) { 795 logger.warning("Skipping"); 796 return; 797 } 798 } 799 800 auto res = toMutateAst(tu.cursor, fio); 801 ast = res.ast; 802 saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies); 803 debug logger.trace(ast); 804 } 805 806 auto codeMutants = () { 807 auto mutants = toMutants(ast, fio, valLoc, kinds); 808 debug logger.trace(mutants); 809 810 debug logger.trace("filter mutants"); 811 mutants = filterMutants(fio, mutants); 812 debug logger.trace(mutants); 813 814 return toCodeMutants(mutants, fio, tstream); 815 }(); 816 debug logger.trace(codeMutants); 817 818 { 819 auto schemas = toSchemata(ast, fio, codeMutants); 820 debug logger.trace(schemas); 821 logger.tracef("path dedup count:%s length_acc:%s", ast.paths.count, 822 ast.paths.lengthAccum); 823 824 result.schematas = schemas.getSchematas; 825 } 826 827 result.mutationPoints = codeMutants.points.byKeyValue.map!( 828 a => a.value.map!(b => MutationPointEntry2(fio.toRelativeRoot(a.key), 829 b.offset, b.sloc.begin, b.sloc.end, b.mutants))).joiner.array; 830 foreach (f; codeMutants.points.byKey) { 831 const id = Result.LocalFileId(result.idFile.length); 832 result.idFile[f] = id; 833 result.fileId[id] = f; 834 result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang); 835 } 836 837 if (conf.saveCoverage) { 838 auto cov = toCoverage(ast, fio, valLoc); 839 debug logger.trace(cov); 840 841 foreach (a; cov.points.byKeyValue) { 842 if (auto id = a.key in result.idFile) { 843 result.coverage[*id] = a.value; 844 } 845 } 846 } 847 } 848 849 /** Tokens are always from the same file. 850 * 851 * TODO: move this to pass_clang. 852 */ 853 void analyzeForComments(AbsolutePath file, TokenStream tstream) @trusted { 854 import std.algorithm : filter; 855 import clang.c.Index : CXTokenKind; 856 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 857 858 if (auto localId = file in result.idFile) { 859 const fid = FileId(localId.get); 860 861 auto mdata = appender!(LineMetadata[])(); 862 foreach (t; cache.getTokens(AbsolutePath(file), tstream) 863 .filter!(a => a.kind == CXTokenKind.comment)) { 864 auto m = matchFirst(t.spelling, re_nomut); 865 if (m.whichPattern == 0) 866 continue; 867 868 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"])))); 869 logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column); 870 } 871 872 result.metadata ~= mdata.data; 873 } 874 } 875 876 void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted { 877 import std.algorithm : cache; 878 import std.mmfile; 879 880 auto rootDir = root.dirName; 881 882 foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir, 883 flags.includes, flags.systemIncludes)) 884 .cache 885 .filter!(a => a.hasValue) 886 .map!(a => a.orElse(AbsolutePath.init)) 887 .filter!(a => valLoc.isInsideOutputDir(a))) { 888 try { 889 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p)); 890 } catch (Exception e) { 891 logger.trace(e.msg).collectException; 892 } 893 } 894 895 debug logger.trace(result.dependencies); 896 } 897 898 static class Result { 899 import dextool.plugin.mutate.backend.analyze.ast : Interval; 900 import dextool.plugin.mutate.backend.database.type : SchemataFragment; 901 import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum; 902 903 alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init, 904 TagStringable, Hashable); 905 alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init, 906 TagStringable, Hashable); 907 908 MutationPointEntry2[] mutationPoints; 909 910 static struct FileInfo { 911 Checksum checksum; 912 Language language; 913 } 914 915 /// The file that is analyzed, which is a root 916 AbsolutePath root; 917 Checksum rootCs; 918 919 /// The dependencies the root has. 920 DepFile[] dependencies; 921 922 /// The key is the ID from idFile. 923 FileInfo[LocalFileId] infoId; 924 925 /// The IDs is unique for *this* analyze, not globally. 926 LocalFileId[AbsolutePath] idFile; 927 AbsolutePath[LocalFileId] fileId; 928 929 // The FileID used in the metadata is local to this analysis. It has to 930 // be remapped when added to the database. 931 LineMetadata[] metadata; 932 933 /// Mutant schematas that has been generated. 934 SchemataResult.Schemata[AbsolutePath] schematas; 935 936 /// Coverage intervals that can be instrumented. 937 Interval[][LocalFileId] coverage; 938 } 939 } 940 941 @( 942 "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment") 943 unittest { 944 import std.regex : regex, matchFirst; 945 import unit_threaded.runner.io : writelnUt; 946 947 auto re_nomut = regex(Analyze.rawReNomut); 948 // NOMUT in other type of comments should NOT match. 949 matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0); 950 matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0); 951 matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0); 952 matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0); 953 954 matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1); 955 matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual(""); 956 matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1); 957 matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1); 958 matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch"); 959 matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf"); 960 auto m = matchFirst("// NOMUT (arch) smurf", re_nomut); 961 m["tag"].shouldEqual("arch"); 962 m["comment"].shouldEqual("smurf"); 963 } 964 965 /// Stream of tokens excluding comment tokens. 966 class TokenStreamImpl : TokenStream { 967 import libclang_ast.context : ClangContext; 968 import dextool.plugin.mutate.backend.type : Token; 969 import dextool.plugin.mutate.backend.utility : tokenize; 970 971 ClangContext* ctx; 972 973 /// The context must outlive any instance of this class. 974 // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000 975 this(ref ClangContext ctx) @trusted { 976 this.ctx = &ctx; 977 } 978 979 Token[] getTokens(Path p) { 980 return tokenize(*ctx, p); 981 } 982 983 Token[] getFilteredTokens(Path p) { 984 import clang.c.Index : CXTokenKind; 985 986 // Filter a stream of tokens for those that should affect the checksum. 987 return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array; 988 } 989 } 990 991 /// Returns: true if `f` is inside any `roots`. 992 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe { 993 import dextool.utility : isPathInsideRoot; 994 995 foreach (root; roots) { 996 if (isPathInsideRoot(root, f)) 997 return true; 998 } 999 1000 return false; 1001 } 1002 1003 /** Update the connection between the marked mutants and their mutation status 1004 * id and mutation id. 1005 */ 1006 void updateMarkedMutants(ref Database db) { 1007 import dextool.plugin.mutate.backend.database.type : MutationStatusId; 1008 import dextool.plugin.mutate.backend.type : ExitStatus; 1009 1010 void update(MarkedMutant m) { 1011 const stId = db.getMutationStatusId(m.statusChecksum); 1012 if (stId.isNull) 1013 return; 1014 const mutId = db.getMutationId(stId.get); 1015 if (mutId.isNull) 1016 return; 1017 db.removeMarkedMutant(m.statusChecksum); 1018 db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum, 1019 m.toStatus, m.rationale, m.mutText); 1020 db.updateMutationStatus(stId.get, m.toStatus, ExitStatus(0)); 1021 } 1022 1023 // find those marked mutants that have a checksum that is different from 1024 // the mutation status the marked mutant is related to. If possible change 1025 // the relation to the correct mutation status id. 1026 foreach (m; db.getMarkedMutants 1027 .map!(a => tuple(a, db.getChecksum(a.statusId))) 1028 .filter!(a => !a[1].isNull) 1029 .filter!(a => a[0].statusChecksum != a[1].get)) { 1030 update(m[0]); 1031 } 1032 } 1033 1034 /// Prints a marked mutant that has become lost due to rerun of analyze 1035 void printLostMarkings(MarkedMutant[] lostMutants) { 1036 import std.algorithm : sort; 1037 import std.array : empty; 1038 import std.conv : to; 1039 import std.stdio : writeln; 1040 1041 if (lostMutants.empty) 1042 return; 1043 1044 Table!6 tbl = Table!6([ 1045 "ID", "File", "Line", "Column", "Status", "Rationale" 1046 ]); 1047 foreach (m; lostMutants) { 1048 typeof(tbl).Row r = [ 1049 m.mutationId.get.to!string, m.path, m.sloc.line.to!string, 1050 m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get 1051 ]; 1052 tbl.put(r); 1053 } 1054 logger.warning("Marked mutants was lost"); 1055 writeln(tbl); 1056 } 1057 1058 @("shall only let files in the diff through") 1059 unittest { 1060 import std.string : lineSplitter; 1061 import dextool.plugin.mutate.backend.diff_parser; 1062 1063 immutable lines = `diff --git a/standalone2.d b/standalone2.d 1064 index 0123..2345 100644 1065 --- a/standalone.d 1066 +++ b/standalone2.d 1067 @@ -31,7 +31,6 @@ import std.algorithm : map; 1068 import std.array : Appender, appender, array; 1069 import std.datetime : SysTime; 1070 +import std.format : format; 1071 -import std.typecons : Tuple; 1072 1073 import d2sqlite3 : sqlDatabase = Database; 1074 1075 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 1076 struct Database { 1077 import std.conv : to; 1078 import std.exception : collectException; 1079 - import std.typecons : Nullable; 1080 + import std.typecons : Nullable, Flag, No; 1081 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 1082 1083 + sqlDatabase db;`; 1084 1085 UnifiedDiffParser p; 1086 foreach (line; lines.lineSplitter) 1087 p.process(line); 1088 auto diff = p.result; 1089 1090 auto files = FileFilter(".".Path.AbsolutePath, true, diff); 1091 1092 files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse; 1093 files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue; 1094 } 1095 1096 /// Convert to an absolute path by finding the first match among the compiler flags 1097 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir, 1098 ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow { 1099 import std.algorithm : map, filter; 1100 import std.file : exists; 1101 import std.path : buildPath; 1102 1103 Optional!AbsolutePath lookup(string dir) nothrow { 1104 const p = buildPath(dir, file); 1105 try { 1106 if (exists(p)) 1107 return some(AbsolutePath(p)); 1108 } catch (Exception e) { 1109 } 1110 return none!AbsolutePath; 1111 } 1112 1113 { 1114 auto a = lookup(workDir.toString); 1115 if (a.hasValue) 1116 return a; 1117 } 1118 1119 foreach (a; includes.map!(a => lookup(a.payload)) 1120 .filter!(a => a.hasValue)) { 1121 return a; 1122 } 1123 1124 foreach (a; systemIncludes.map!(a => lookup(a.value)) 1125 .filter!(a => a.hasValue)) { 1126 return a; 1127 } 1128 1129 return none!AbsolutePath; 1130 } 1131 1132 /** Returns: the root files that need to be re-analyzed because either them or 1133 * their dependency has changed. 1134 */ 1135 bool[Path] dependencyAnalyze(const AbsolutePath dbPath, FilesysIO fio) @trusted { 1136 import dextool.cachetools : nullableCache; 1137 import dextool.plugin.mutate.backend.database : FileId; 1138 1139 auto db = Database.make(dbPath); 1140 1141 typeof(return) rval; 1142 1143 // pessimistic. Add all as needing to be analyzed. 1144 foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) { 1145 rval[a] = false; 1146 } 1147 1148 try { 1149 auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256, 1150 30.dur!"seconds"); 1151 auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256, 1152 30.dur!"seconds"); 1153 auto getFileDbChecksum = nullableCache!(string, Checksum, 1154 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds"); 1155 auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) { 1156 return checksum(p); 1157 })(256, 30.dur!"seconds"); 1158 1159 Checksum[Path] dbDeps; 1160 foreach (a; db.dependencyApi.getAll) 1161 dbDeps[a.file] = a.checksum; 1162 1163 const isToolVersionDifferent = db.isToolVersionDifferent(ToolVersion(dextoolBinaryId)); 1164 bool isChanged(T)(T f) { 1165 if (isToolVersionDifferent) { 1166 // because the tool version is updated then all files need to 1167 // be re-analyzed. an update can mean that scheman are 1168 // improved, mutants has been changed/removed etc. it is 1169 // unknown. the only way to be sure is to re-analyze all files. 1170 return true; 1171 } 1172 1173 if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root))) 1174 return true; 1175 1176 foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) { 1177 return true; 1178 } 1179 1180 return false; 1181 } 1182 1183 foreach (f; db.getRootFiles 1184 .map!(a => db.getFile(a).get) 1185 .map!(a => tuple!("root", "rootCs", "deps")(a, 1186 getFileDbChecksum(a), db.dependencyApi.get(a))) 1187 .cache 1188 .filter!(a => isChanged(a)) 1189 .map!(a => a.root)) { 1190 rval[f] = true; 1191 } 1192 } catch (Exception e) { 1193 logger.warning(e.msg); 1194 } 1195 1196 logger.trace("Dependency analyze: ", rval); 1197 1198 return rval; 1199 }