1 /**cpptooling.analyzer.clang 2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 #SPC-analyzer 11 12 TODO cache the checksums. They are *heavy*. 13 */ 14 module dextool.plugin.mutate.backend.analyze; 15 16 import logger = std.experimental.logger; 17 import std.algorithm : map, filter, joiner, cache; 18 import std.array : array, appender, empty; 19 import std.concurrency; 20 import std.datetime : dur, Duration; 21 import std.exception : collectException; 22 import std.parallelism; 23 import std.range : tee, enumerate; 24 import std.typecons : tuple; 25 26 import colorlog; 27 import my.filter : GlobFilter; 28 import my.named_type; 29 import my.optional; 30 import my.set; 31 32 static import colorlog; 33 34 import dextool.utility : dextoolBinaryId; 35 36 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB, 37 ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath; 38 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream; 39 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult; 40 import dextool.plugin.mutate.backend.database : Database, LineMetadata, 41 MutationPointEntry2, DepFile; 42 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile, 43 TestFilePath, TestFileChecksum, ToolVersion; 44 import dextool.plugin.mutate.backend.diff_parser : Diff; 45 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO; 46 import dextool.plugin.mutate.backend.report.utility : statusToString, Table; 47 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile; 48 import dextool.plugin.mutate.backend.type : Mutation; 49 import dextool.plugin.mutate.type : MutationKind; 50 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze, ConfigSchema, ConfigCoverage; 51 import dextool.type : ExitStatusType, AbsolutePath, Path; 52 53 version (unittest) { 54 import unit_threaded.assertions; 55 } 56 57 alias log = colorlog.log!"analyze"; 58 59 /** Analyze the files in `frange` for mutations. 60 */ 61 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const MutationKind[] userKinds, 62 ConfigAnalyze analyzeConf, ConfigCompiler compilerConf, 63 ConfigSchema schemaConf, ConfigCoverage covConf, 64 ParsedCompileCommandRange frange, ValidateLoc valLoc, FilesysIO fio) @trusted { 65 import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff; 66 import dextool.plugin.mutate.backend.mutation_type : toInternal; 67 68 auto fileFilter = () { 69 try { 70 return FileFilter(fio.getOutputDir, analyzeConf.unifiedDiffFromStdin, 71 analyzeConf.unifiedDiffFromStdin ? diffFromStdin : Diff.init); 72 } catch (Exception e) { 73 log.info(e.msg); 74 log.warning("Unable to parse diff"); 75 } 76 return FileFilter.init; 77 }(); 78 79 bool shouldAnalyze(AbsolutePath p) { 80 return analyzeConf.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p); 81 } 82 83 auto pool = () { 84 if (analyzeConf.poolSize == 0) 85 return new TaskPool(); 86 return new TaskPool(analyzeConf.poolSize); 87 }(); 88 89 // if a dependency of a root file has been changed. 90 auto changedDeps = dependencyAnalyze(dbPath, fio); 91 92 // will only be used by one thread at a time. 93 auto store = spawn(&storeActor, dbPath, cast(shared) fio.dup, 94 cast(shared) StoreConfig(analyzeConf, schemaConf, covConf), 95 cast(immutable) changedDeps.byKeyValue 96 .filter!(a => !a.value) 97 .map!(a => a.key) 98 .array); 99 100 try { 101 pool.put(task!testPathActor(analyzeConf.testPaths, 102 analyzeConf.testFileMatcher, fio.dup, store)); 103 } catch (Exception e) { 104 log.trace(e); 105 log.warning(e.msg); 106 } 107 108 auto kinds = toInternal(userKinds); 109 int taskCnt; 110 Set!AbsolutePath alreadyAnalyzed; 111 // dfmt off 112 foreach (f; frange 113 // The tool only supports analyzing a file one time. 114 // This optimize it in some cases where the same file occurs 115 // multiple times in the compile commands database. 116 .filter!(a => a.cmd.absoluteFile !in alreadyAnalyzed) 117 .tee!(a => alreadyAnalyzed.add(a.cmd.absoluteFile)) 118 .cache 119 .filter!(a => shouldAnalyze(a.cmd.absoluteFile)) 120 ) { 121 try { 122 if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) { 123 if (!(*v || analyzeConf.forceSaveAnalyze)) 124 continue; 125 } 126 127 //log.infof("%s sending", f.cmd.absoluteFile); 128 pool.put(task!analyzeActor(kinds, f, valLoc.dup, fio.dup, AnalyzeConfig(compilerConf, analyzeConf, covConf), store)); 129 taskCnt++; 130 } catch (Exception e) { 131 log.trace(e); 132 log.warning(e.msg); 133 } 134 } 135 // dfmt on 136 137 changedDeps = typeof(changedDeps).init; // free the memory 138 139 // inform the store actor of how many analyse results it should *try* to 140 // save. 141 send(store, AnalyzeCntMsg(taskCnt)); 142 // wait for all files to be analyzed 143 pool.finish(true); 144 // wait for the store actor to finish 145 receiveOnly!StoreDoneMsg; 146 147 if (analyzeConf.profile) 148 try { 149 import std.stdio : writeln; 150 151 writeln(getProfileResult.toString); 152 } catch (Exception e) { 153 log.warning("Unable to print the profile data: ", e.msg).collectException; 154 } 155 156 return ExitStatusType.Ok; 157 } 158 159 @safe: 160 161 /** Filter function for files. Either all or those in stdin. 162 * 163 * The matching ignores the file extension in order to lessen the problem of a 164 * file that this approach skip headers because they do not exist in 165 * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if 166 * `foo.cpp` is in `compile_commands.json`. 167 * 168 * TODO: this may create problems for header only libraries because only the 169 * unittest would include the header which mean that for this to work the 170 * unittest would have to reside in the same directory as the header file. 171 * Which they normally never do. This then lead to a diff of a header only lib 172 * lead to "no files analyzed". 173 */ 174 struct FileFilter { 175 import std.path : stripExtension; 176 177 Set!string files; 178 bool useFileFilter; 179 AbsolutePath root; 180 181 this(AbsolutePath root, bool fromStdin, Diff diff) { 182 this.root = root; 183 this.useFileFilter = fromStdin; 184 foreach (a; diff.toRange(root)) { 185 files.add(a.key.stripExtension); 186 } 187 } 188 189 bool shouldAnalyze(AbsolutePath p) { 190 import std.path : relativePath; 191 192 if (!useFileFilter) { 193 return true; 194 } 195 196 return relativePath(p, root).stripExtension in files; 197 } 198 } 199 200 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for. 201 struct AnalyzeCntMsg { 202 int value; 203 } 204 205 /// The main thread is waiting for storeActor to send this message. 206 struct StoreDoneMsg { 207 } 208 209 struct AnalyzeConfig { 210 ConfigCompiler compiler; 211 ConfigAnalyze analyze; 212 ConfigCoverage coverage; 213 } 214 215 /// Start an analyze of a file 216 void analyzeActor(Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze, 217 ValidateLoc vloc, FilesysIO fio, AnalyzeConfig conf, Tid storeActor) @trusted nothrow { 218 auto profile = Profile("analyze file " ~ fileToAnalyze.cmd.absoluteFile); 219 220 try { 221 log.tracef("%s begin", fileToAnalyze.cmd.absoluteFile); 222 auto analyzer = Analyze(kinds, vloc, fio, Analyze.Config(conf.compiler.forceSystemIncludes, 223 conf.coverage.use, conf.compiler.allowErrors.get)); 224 analyzer.process(fileToAnalyze); 225 226 bool onlyValidFiles = true; 227 foreach (a; analyzer.result.idFile.byKey) { 228 if (!isFileSupported(fio, a)) { 229 log.warningf( 230 "%s: file not supported. It must be in utf-8 format without a BOM marker"); 231 onlyValidFiles = false; 232 break; 233 } 234 } 235 236 if (onlyValidFiles) 237 send(storeActor, cast(immutable) analyzer.result); 238 else 239 send(storeActor, cast(immutable) Analyze.Result.init); 240 log.tracef("%s end", fileToAnalyze.cmd.absoluteFile); 241 return; 242 } catch (Exception e) { 243 log.error(e.msg).collectException; 244 } 245 246 // send a dummy result 247 try { 248 log.tracef("%s failed", fileToAnalyze.cmd.absoluteFile); 249 send(storeActor, cast(immutable) new Analyze.Result); 250 } catch (Exception e) { 251 log.error(e.msg).collectException; 252 } 253 } 254 255 class TestFileResult { 256 Duration time; 257 TestFile[Checksum] files; 258 } 259 260 void testPathActor(const AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio, Tid storeActor) @trusted nothrow { 261 import std.datetime : Clock; 262 import std.datetime.stopwatch : StopWatch, AutoStart; 263 import std.file : isDir, isFile, dirEntries, SpanMode; 264 import my.container.vector; 265 266 auto profile = Profile("checksum test files"); 267 268 auto sw = StopWatch(AutoStart.yes); 269 270 TestFile makeTestFile(const AbsolutePath file) { 271 auto cs = checksum(fio.makeInput(file).content[]); 272 return TestFile(TestFilePath(fio.toRelativeRoot(file)), 273 TestFileChecksum(cs), Clock.currTime); 274 } 275 276 auto paths = vector(userPaths.dup); 277 278 auto tfiles = new TestFileResult; 279 scope (exit) 280 tfiles.time = sw.peek; 281 282 while (!paths.empty) { 283 try { 284 if (isDir(paths.front)) { 285 log.trace(" Test directory ", paths.front); 286 foreach (a; dirEntries(paths.front, SpanMode.shallow).map!( 287 a => AbsolutePath(a.name))) { 288 paths.put(a); 289 } 290 } else if (isFile(paths.front) && matcher.match(paths.front)) { 291 log.trace(" Test saved ", paths.front); 292 auto t = makeTestFile(paths.front); 293 tfiles.files[t.checksum.get] = t; 294 } 295 } catch (Exception e) { 296 log.warning(e.msg).collectException; 297 } 298 299 paths.popFront; 300 } 301 302 log.infof("Found %s test files", tfiles.files.length).collectException; 303 304 try { 305 send(storeActor, cast(immutable) tfiles); 306 } catch (Exception e) { 307 } 308 } 309 310 struct StoreConfig { 311 ConfigAnalyze analyze; 312 ConfigSchema schema; 313 ConfigCoverage coverage; 314 } 315 316 /// Store the result of the analyze. 317 void storeActor(const AbsolutePath dbPath, scope shared FilesysIO fioShared, 318 scope shared StoreConfig confShared, immutable Path[] rootFiles) @trusted nothrow { 319 import cachetools : CacheLRU; 320 import dextool.cachetools : nullableCache; 321 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 322 323 const conf = cast() confShared; 324 325 // The conditions that the storeActor is waiting for receiving the results 326 // from the workers. 327 static struct RecvWaiter { 328 int analyzeFileWaitCnt = int.max; 329 int analyzeFileCnt; 330 331 bool isTestFilesDone; 332 333 bool isWaiting() { 334 return analyzeFileCnt < analyzeFileWaitCnt || !isTestFilesDone; 335 } 336 } 337 338 static struct SchemataSaver { 339 import sumtype; 340 import my.optional; 341 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataBuilder; 342 343 typeof(ConfigSchema.minMutantsPerSchema) minMutantsPerSchema; 344 typeof(ConfigSchema.mutantsPerSchema) mutantsPerSchema; 345 SchemataBuilder builder; 346 347 void put(FilesysIO fio, SchemataResult.Schemata[AbsolutePath] a) { 348 builder.put(fio, a); 349 } 350 351 void process(ref Database db, Optional!(SchemataBuilder.ET) value) { 352 value.match!((Some!(SchemataBuilder.ET) a) { 353 try { 354 auto mutants = a.mutants 355 .map!(a => db.getMutationStatusId(a.id)) 356 .filter!(a => !a.isNull) 357 .map!(a => a.get) 358 .array; 359 if (!mutants.empty) { 360 const id = db.putSchemata(a.checksum, a.fragments, mutants); 361 log.tracef(!id.isNull, "Saving schema %s with %s mutants", 362 id.get.get, mutants.length); 363 } 364 } catch (Exception e) { 365 log.trace(e.msg); 366 } 367 }, (None a) {}); 368 } 369 370 /// Consume fragments used by scheman containing >min mutants. 371 void intermediate(ref Database db) { 372 builder.discardMinScheman = false; 373 builder.mutantsPerSchema = mutantsPerSchema.get; 374 builder.minMutantsPerSchema = mutantsPerSchema.get; 375 376 while (!builder.isDone) { 377 process(db, builder.next); 378 } 379 380 builder.restart; 381 } 382 383 /// Consume all fragments or discard. 384 void finalize(ref Database db) { 385 builder.discardMinScheman = true; 386 builder.mutantsPerSchema = mutantsPerSchema.get; 387 builder.minMutantsPerSchema = minMutantsPerSchema.get; 388 389 // two loops to pass over all mutants and retry new schema 390 // compositions. Any schema that is less than the minimum will be 391 // discarded so the number of mutants will shrink. 392 while (!builder.isDone) { 393 while (!builder.isDone) { 394 process(db, builder.next); 395 } 396 builder.restart; 397 } 398 } 399 } 400 401 auto schemas = SchemataSaver(conf.schema.minMutantsPerSchema, conf.schema.mutantsPerSchema); 402 403 void helper(FilesysIO fio, ref Database db) nothrow { 404 // A file is at most saved one time to the database. 405 Set!AbsolutePath savedFiles; 406 407 const isToolVersionDifferent = () nothrow{ 408 try { 409 return db.isToolVersionDifferent(ToolVersion(dextoolBinaryId)); 410 } catch (Exception e) { 411 } 412 return true; 413 }(); 414 415 auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256, 416 30.dur!"seconds"); 417 auto getFileDbChecksum = nullableCache!(string, Checksum, 418 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds"); 419 auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) { 420 return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]); 421 })(256, 30.dur!"seconds"); 422 423 static struct Files { 424 Checksum[Path] value; 425 426 this(ref Database db) { 427 foreach (a; db.getDetailedFiles) { 428 value[a.file] = a.fileChecksum; 429 } 430 } 431 } 432 433 void save(immutable Analyze.Result result_) { 434 import dextool.plugin.mutate.backend.type : Language; 435 436 auto result = cast() result_; 437 438 auto profile = Profile("save " ~ result.root); 439 440 // mark files that have an unchanged checksum as "already saved" 441 foreach (f; result.idFile 442 .byKey 443 .filter!(a => a !in savedFiles) 444 .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a) 445 && !conf.analyze.forceSaveAnalyze && !isToolVersionDifferent)) { 446 log.info("Unchanged ".color(Color.yellow), f); 447 savedFiles.add(f); 448 } 449 450 // only saves mutation points to a file one time. 451 { 452 auto app = appender!(MutationPointEntry2[])(); 453 bool isChanged = isToolVersionDifferent; 454 foreach (mp; result.mutationPoints 455 .map!(a => tuple!("data", "file")(a, fio.toAbsoluteRoot(a.file))) 456 .filter!(a => a.file !in savedFiles)) { 457 app.put(mp.data); 458 } 459 foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) { 460 isChanged = true; 461 log.info("Saving ".color(Color.green), f); 462 const relp = fio.toRelativeRoot(f); 463 464 // this is critical in order to remove old data about a file. 465 db.removeFile(relp); 466 467 const info = result.infoId[result.idFile[f]]; 468 db.put(relp, info.checksum, info.language, f == result.root); 469 savedFiles.add(f); 470 } 471 db.put(app.data, fio.getOutputDir); 472 473 if (result.root !in savedFiles) { 474 // this occurs when the file is e.g. a unittest that uses a 475 // header only library. The unittests are not mutated thus 476 // no mutation points exists in them but we want dextool to 477 // still, if possible, track the unittests for changes. 478 isChanged = true; 479 const relp = fio.toRelativeRoot(result.root); 480 db.removeFile(relp); 481 // the language do not matter because it is a file without 482 // any mutants. 483 db.put(relp, result.rootCs, Language.init, true); 484 savedFiles.add(fio.toAbsoluteRoot(result.root)); 485 } 486 487 // must always update dependencies because they may not contain 488 // mutants. Only files that are changed and contain mutants 489 // trigger isChanged to be true. 490 db.dependencyApi.set(fio.toRelativeRoot(result.root), result.dependencies); 491 492 if (isChanged) { 493 foreach (a; result.coverage.byKeyValue) { 494 const fid = getFileId(fio.toRelativeRoot(result.fileId[a.key])); 495 if (!fid.isNull) { 496 db.clearCoverageMap(fid.get); 497 db.putCoverageMap(fid.get, a.value); 498 } 499 } 500 501 // only save the schematas if mutation points where saved. 502 // This ensure that only schematas for changed/new files 503 // are saved. 504 schemas.put(fio, result.schematas); 505 schemas.intermediate(db); 506 } 507 } 508 509 { 510 Set!long printed; 511 auto app = appender!(LineMetadata[])(); 512 foreach (md; result.metadata) { 513 const localId = Analyze.Result.LocalFileId(md.id.get); 514 // transform the ID from local to global. 515 const fid = getFileId(fio.toRelativeRoot(result.fileId[localId])); 516 if (fid.isNull && !printed.contains(md.id.get)) { 517 printed.add(md.id.get); 518 log.info("File with suppressed mutants (// NOMUT) not in the database: ", 519 result.fileId[localId]).collectException; 520 } else if (!fid.isNull) { 521 app.put(LineMetadata(fid.get, md.line, md.attr)); 522 } 523 } 524 db.put(app.data); 525 } 526 } 527 528 void saveTestResult(immutable TestFileResult result) { 529 auto profile = Profile("save test files"); 530 Set!Checksum old; 531 532 foreach (a; db.getTestFiles) { 533 old.add(a.checksum.get); 534 if (a.checksum.get !in result.files) { 535 log.info("Removed test file ", a.file.get.toString); 536 db.removeFile(a.file); 537 } 538 } 539 540 foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) { 541 log.info("Saving test file ", a.file.get.toString); 542 db.put(a); 543 } 544 } 545 546 // listen for results from workers until the expected number is processed. 547 void recv() { 548 log.info("Updating files"); 549 RecvWaiter waiter; 550 551 while (waiter.isWaiting) { 552 try { 553 receive((AnalyzeCntMsg a) { 554 waiter.analyzeFileWaitCnt = a.value; 555 }, (immutable Analyze.Result a) { 556 auto trans = db.transaction; 557 waiter.analyzeFileCnt++; 558 save(a); 559 trans.commit; 560 561 log.infof("Analyzed file %s/%s", waiter.analyzeFileCnt, 562 waiter.analyzeFileWaitCnt); 563 }, (immutable TestFileResult a) { 564 auto trans = db.transaction; 565 waiter.isTestFilesDone = true; 566 saveTestResult(a); 567 trans.commit; 568 569 log.info("Done analyzing test files in ", a.time); 570 }); 571 } catch (Exception e) { 572 log.trace(e).collectException; 573 log.warning(e.msg).collectException; 574 } 575 } 576 } 577 578 void pruneFiles() { 579 import std.path : buildPath; 580 581 auto profile = Profile("prune files"); 582 583 log.info("Pruning the database of dropped files"); 584 auto files = db.getFiles.map!(a => fio.toAbsoluteRoot(a)).toSet; 585 586 foreach (f; files.setDifference(savedFiles).toRange) { 587 log.info("Removing ".color(Color.red), f); 588 db.removeFile(fio.toRelativeRoot(f)); 589 } 590 } 591 592 void addRoots() { 593 if (conf.analyze.forceSaveAnalyze || isToolVersionDifferent) 594 return; 595 596 // add root files and their dependencies that has not been analyzed because nothing has changed. 597 // By adding them they are not removed. 598 599 auto profile = Profile("add roots and dependencies"); 600 foreach (a; rootFiles) { 601 auto p = fio.toAbsoluteRoot(a); 602 if (p !in savedFiles) { 603 savedFiles.add(p); 604 // fejk text for the user to tell them that yes, the files have 605 // been analyzed. 606 log.info("Analyzing ", a); 607 log.info("Unchanged ".color(Color.yellow), a); 608 } 609 } 610 foreach (a; rootFiles.map!(a => db.dependencyApi.get(a)).joiner) { 611 savedFiles.add(fio.toAbsoluteRoot(a)); 612 } 613 } 614 615 void fastDbOn() { 616 if (!conf.analyze.fastDbStore) 617 return; 618 log.info( 619 "Turning OFF sqlite3 synchronization protection to improve the write performance"); 620 log.warning("Do NOT interrupt dextool in any way because it may corrupt the database"); 621 db.run("PRAGMA synchronous = OFF"); 622 db.run("PRAGMA journal_mode = MEMORY"); 623 } 624 625 void fastDbOff() { 626 if (!conf.analyze.fastDbStore) 627 return; 628 db.run("PRAGMA synchronous = ON"); 629 db.run("PRAGMA journal_mode = DELETE"); 630 } 631 632 try { 633 import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext; 634 635 // by making the mailbox size follow the number of workers the overall 636 // behavior will slow down if saving to the database is too slow. This 637 // avoids excessive or even fatal memory usage. 638 setMaxMailboxSize(thisTid, 2, OnCrowding.block); 639 640 fastDbOn(); 641 642 { 643 auto trans = db.transaction; 644 auto profile = Profile("prune old schemas"); 645 if (isToolVersionDifferent) { 646 log.info("Prune database of schematan created by the old version"); 647 db.deleteAllSchemas; 648 } 649 trans.commit; 650 } 651 652 recv(); 653 { 654 auto trans = db.transaction; 655 schemas.finalize(db); 656 trans.commit; 657 } 658 659 { 660 auto trans = db.transaction; 661 addRoots(); 662 663 log.info("Resetting timeout context"); 664 resetTimeoutContext(db); 665 666 log.info("Updating metadata"); 667 db.updateMetadata; 668 669 if (conf.analyze.prune) { 670 pruneFiles(); 671 { 672 auto profile = Profile("remove orphaned mutants"); 673 log.info("Removing orphaned mutants"); 674 db.removeOrphanedMutants; 675 } 676 { 677 auto profile = Profile("prune schemas"); 678 log.info("Prune the database of unused schemas"); 679 db.pruneSchemas; 680 } 681 { 682 auto profile = Profile("prune dependencies"); 683 log.info("Prune dependencies"); 684 db.dependencyApi.cleanup; 685 } 686 } 687 688 log.info("Updating manually marked mutants"); 689 updateMarkedMutants(db); 690 printLostMarkings(db.getLostMarkings); 691 692 if (isToolVersionDifferent) { 693 log.info("Updating tool version"); 694 db.updateToolVersion(ToolVersion(dextoolBinaryId)); 695 } 696 697 log.info("Committing changes"); 698 trans.commit; 699 log.info("Ok".color(Color.green)); 700 } 701 702 fastDbOff(); 703 704 if (isToolVersionDifferent) { 705 auto profile = Profile("compact"); 706 log.info("Compacting the database"); 707 db.vacuum; 708 } 709 } catch (Exception e) { 710 log.error(e.msg).collectException; 711 log.error("Failed to save the result of the analyze to the database").collectException; 712 } 713 714 try { 715 send(ownerTid, StoreDoneMsg.init); 716 } catch (Exception e) { 717 log.errorf("Fatal error. Unable to send %s to the main thread", 718 StoreDoneMsg.init).collectException; 719 } 720 } 721 722 try { 723 FilesysIO fio = cast(FilesysIO) fioShared; 724 auto db = Database.make(dbPath); 725 helper(fio, db); 726 } catch (Exception e) { 727 log.error(e.msg).collectException; 728 } 729 } 730 731 /// Analyze a file for mutants. 732 struct Analyze { 733 import std.regex : Regex, regex, matchFirst; 734 import std.typecons : Yes; 735 import libclang_ast.context : ClangContext; 736 737 static struct Config { 738 bool forceSystemIncludes; 739 bool saveCoverage; 740 bool allowErrors; 741 } 742 743 private { 744 static immutable rawReNomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`; 745 746 Regex!char re_nomut; 747 748 ValidateLoc valLoc; 749 FilesysIO fio; 750 751 Cache cache; 752 753 Result result; 754 755 Config conf; 756 757 Mutation.Kind[] kinds; 758 } 759 760 this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted { 761 this.kinds = kinds; 762 this.valLoc = valLoc; 763 this.fio = fio; 764 this.cache = new Cache; 765 this.re_nomut = regex(rawReNomut); 766 this.result = new Result; 767 this.conf = conf; 768 } 769 770 void process(ParsedCompileCommand commandsForFileToAnalyze) @safe { 771 import std.file : exists; 772 773 commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes; 774 775 try { 776 if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) { 777 log.warningf("Failed to analyze %s. Do not exist", 778 commandsForFileToAnalyze.cmd.absoluteFile); 779 return; 780 } 781 } catch (Exception e) { 782 log.warning(e.msg); 783 return; 784 } 785 786 result.root = commandsForFileToAnalyze.cmd.absoluteFile; 787 788 try { 789 result.rootCs = checksum(result.root); 790 791 auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly); 792 auto tstream = new TokenStreamImpl(ctx); 793 794 analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream); 795 foreach (f; result.fileId.byValue) 796 analyzeForComments(f, tstream); 797 } catch (Exception e) { 798 () @trusted { log.trace(e); }(); 799 log.info(e.msg); 800 log.error("failed analyze of ", 801 commandsForFileToAnalyze.cmd.absoluteFile).collectException; 802 } 803 } 804 805 void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze, 806 AbsolutePath fileToAnalyze, ref ClangContext ctx, TokenStream tstream) @safe { 807 import my.gc.refc : RefCounted; 808 import dextool.plugin.mutate.backend.analyze.ast : Ast; 809 import dextool.plugin.mutate.backend.analyze.pass_clang; 810 import dextool.plugin.mutate.backend.analyze.pass_coverage; 811 import dextool.plugin.mutate.backend.analyze.pass_filter; 812 import dextool.plugin.mutate.backend.analyze.pass_mutant; 813 import dextool.plugin.mutate.backend.analyze.pass_schemata; 814 import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic; 815 816 log.info("Analyzing ", fileToAnalyze); 817 RefCounted!(Ast) ast; 818 { 819 auto tu = ctx.makeTranslationUnit(fileToAnalyze, 820 commandsForFileToAnalyze.flags.completeFlags); 821 if (tu.hasParseErrors) { 822 logDiagnostic(tu); 823 log.warningf("Compile error in %s", fileToAnalyze); 824 if (!conf.allowErrors) { 825 log.warning("Skipping"); 826 return; 827 } 828 } 829 830 auto res = toMutateAst(tu.cursor, fio); 831 ast = res.ast; 832 saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies); 833 log!"analyze.pass_clang".trace(ast); 834 } 835 836 auto codeMutants = () { 837 auto mutants = toMutants(ast, fio, valLoc, kinds); 838 log!"analyze.pass_mutant".trace(mutants); 839 840 log!"analyze.pass_filter".trace("filter mutants"); 841 mutants = filterMutants(fio, mutants); 842 log!"analyze.pass_filter".trace(mutants); 843 844 return toCodeMutants(mutants, fio, tstream); 845 }(); 846 debug logger.trace(codeMutants); 847 848 { 849 auto schemas = toSchemata(ast, fio, codeMutants); 850 log!"analyze.pass_schema".trace(schemas); 851 log.tracef("path dedup count:%s length_acc:%s", ast.paths.count, 852 ast.paths.lengthAccum); 853 854 result.schematas = schemas.getSchematas; 855 } 856 857 result.mutationPoints = codeMutants.points.byKeyValue.map!( 858 a => a.value.map!(b => MutationPointEntry2(fio.toRelativeRoot(a.key), 859 b.offset, b.sloc.begin, b.sloc.end, b.mutants))).joiner.array; 860 foreach (f; codeMutants.points.byKey) { 861 const id = Result.LocalFileId(result.idFile.length); 862 result.idFile[f] = id; 863 result.fileId[id] = f; 864 result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang); 865 } 866 867 if (conf.saveCoverage) { 868 auto cov = toCoverage(ast, fio, valLoc); 869 debug logger.trace(cov); 870 871 foreach (a; cov.points.byKeyValue) { 872 if (auto id = a.key in result.idFile) { 873 result.coverage[*id] = a.value; 874 } 875 } 876 } 877 } 878 879 /** Tokens are always from the same file. 880 * 881 * TODO: move this to pass_clang. 882 */ 883 void analyzeForComments(AbsolutePath file, TokenStream tstream) @trusted { 884 import std.algorithm : filter; 885 import clang.c.Index : CXTokenKind; 886 import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut; 887 888 if (auto localId = file in result.idFile) { 889 const fid = FileId(localId.get); 890 891 auto mdata = appender!(LineMetadata[])(); 892 foreach (t; cache.getTokens(AbsolutePath(file), tstream) 893 .filter!(a => a.kind == CXTokenKind.comment)) { 894 auto m = matchFirst(t.spelling, re_nomut); 895 if (m.whichPattern == 0) 896 continue; 897 898 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"])))); 899 log.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column); 900 } 901 902 result.metadata ~= mdata.data; 903 } 904 } 905 906 void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted { 907 import std.algorithm : cache; 908 import std.mmfile; 909 910 auto rootDir = root.dirName; 911 912 foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir, 913 flags.includes, flags.systemIncludes)) 914 .cache 915 .filter!(a => a.hasValue) 916 .map!(a => a.orElse(AbsolutePath.init)) 917 .filter!(a => valLoc.isInsideOutputDir(a))) { 918 try { 919 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p)); 920 } catch (Exception e) { 921 log.trace(e.msg).collectException; 922 } 923 } 924 925 log.trace(result.dependencies); 926 } 927 928 static class Result { 929 import dextool.plugin.mutate.backend.analyze.ast : Interval; 930 import dextool.plugin.mutate.backend.database.type : SchemataFragment; 931 import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum; 932 933 alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init, 934 TagStringable, Hashable); 935 alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init, 936 TagStringable, Hashable); 937 938 MutationPointEntry2[] mutationPoints; 939 940 static struct FileInfo { 941 Checksum checksum; 942 Language language; 943 } 944 945 /// The file that is analyzed, which is a root 946 AbsolutePath root; 947 Checksum rootCs; 948 949 /// The dependencies the root has. 950 DepFile[] dependencies; 951 952 /// The key is the ID from idFile. 953 FileInfo[LocalFileId] infoId; 954 955 /// The IDs is unique for *this* analyze, not globally. 956 LocalFileId[AbsolutePath] idFile; 957 AbsolutePath[LocalFileId] fileId; 958 959 // The FileID used in the metadata is local to this analysis. It has to 960 // be remapped when added to the database. 961 LineMetadata[] metadata; 962 963 /// Mutant schematas that has been generated. 964 SchemataResult.Schemata[AbsolutePath] schematas; 965 966 /// Coverage intervals that can be instrumented. 967 Interval[][LocalFileId] coverage; 968 } 969 } 970 971 @( 972 "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment") 973 unittest { 974 import std.regex : regex, matchFirst; 975 import unit_threaded.runner.io : writelnUt; 976 977 auto re_nomut = regex(Analyze.rawReNomut); 978 // NOMUT in other type of comments should NOT match. 979 matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0); 980 matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0); 981 matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0); 982 matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0); 983 984 matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1); 985 matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual(""); 986 matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1); 987 matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1); 988 matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch"); 989 matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf"); 990 auto m = matchFirst("// NOMUT (arch) smurf", re_nomut); 991 m["tag"].shouldEqual("arch"); 992 m["comment"].shouldEqual("smurf"); 993 } 994 995 /// Stream of tokens excluding comment tokens. 996 class TokenStreamImpl : TokenStream { 997 import libclang_ast.context : ClangContext; 998 import dextool.plugin.mutate.backend.type : Token; 999 import dextool.plugin.mutate.backend.utility : tokenize; 1000 1001 ClangContext* ctx; 1002 1003 /// The context must outlive any instance of this class. 1004 // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000 1005 this(ref ClangContext ctx) @trusted { 1006 this.ctx = &ctx; 1007 } 1008 1009 Token[] getTokens(Path p) { 1010 return tokenize(*ctx, p); 1011 } 1012 1013 Token[] getFilteredTokens(Path p) { 1014 import clang.c.Index : CXTokenKind; 1015 1016 // Filter a stream of tokens for those that should affect the checksum. 1017 return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array; 1018 } 1019 } 1020 1021 /// Returns: true if `f` is inside any `roots`. 1022 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe { 1023 import dextool.utility : isPathInsideRoot; 1024 1025 foreach (root; roots) { 1026 if (isPathInsideRoot(root, f)) 1027 return true; 1028 } 1029 1030 return false; 1031 } 1032 1033 /** Update the connection between the marked mutants and their mutation status 1034 * id and mutation id. 1035 */ 1036 void updateMarkedMutants(ref Database db) { 1037 import dextool.plugin.mutate.backend.database.type : MutationStatusId; 1038 import dextool.plugin.mutate.backend.type : ExitStatus; 1039 1040 void update(MarkedMutant m) { 1041 const stId = db.getMutationStatusId(m.statusChecksum); 1042 if (stId.isNull) 1043 return; 1044 const mutId = db.getMutationId(stId.get); 1045 if (mutId.isNull) 1046 return; 1047 db.removeMarkedMutant(m.statusChecksum); 1048 db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum, 1049 m.toStatus, m.rationale, m.mutText); 1050 db.updateMutationStatus(stId.get, m.toStatus, ExitStatus(0)); 1051 } 1052 1053 // find those marked mutants that have a checksum that is different from 1054 // the mutation status the marked mutant is related to. If possible change 1055 // the relation to the correct mutation status id. 1056 foreach (m; db.getMarkedMutants 1057 .map!(a => tuple(a, db.getChecksum(a.statusId))) 1058 .filter!(a => !a[1].isNull) 1059 .filter!(a => a[0].statusChecksum != a[1].get)) { 1060 update(m[0]); 1061 } 1062 } 1063 1064 /// Prints a marked mutant that has become lost due to rerun of analyze 1065 void printLostMarkings(MarkedMutant[] lostMutants) { 1066 import std.algorithm : sort; 1067 import std.array : empty; 1068 import std.conv : to; 1069 import std.stdio : writeln; 1070 1071 if (lostMutants.empty) 1072 return; 1073 1074 Table!6 tbl = Table!6([ 1075 "ID", "File", "Line", "Column", "Status", "Rationale" 1076 ]); 1077 foreach (m; lostMutants) { 1078 typeof(tbl).Row r = [ 1079 m.mutationId.get.to!string, m.path, m.sloc.line.to!string, 1080 m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get 1081 ]; 1082 tbl.put(r); 1083 } 1084 log.warning("Marked mutants was lost"); 1085 writeln(tbl); 1086 } 1087 1088 @("shall only let files in the diff through") 1089 unittest { 1090 import std..string : lineSplitter; 1091 import dextool.plugin.mutate.backend.diff_parser; 1092 1093 immutable lines = `diff --git a/standalone2.d b/standalone2.d 1094 index 0123..2345 100644 1095 --- a/standalone.d 1096 +++ b/standalone2.d 1097 @@ -31,7 +31,6 @@ import std.algorithm : map; 1098 import std.array : Appender, appender, array; 1099 import std.datetime : SysTime; 1100 +import std.format : format; 1101 -import std.typecons : Tuple; 1102 1103 import d2sqlite3 : sqlDatabase = Database; 1104 1105 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language; 1106 struct Database { 1107 import std.conv : to; 1108 import std.exception : collectException; 1109 - import std.typecons : Nullable; 1110 + import std.typecons : Nullable, Flag, No; 1111 import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum; 1112 1113 + sqlDatabase db;`; 1114 1115 UnifiedDiffParser p; 1116 foreach (line; lines.lineSplitter) 1117 p.process(line); 1118 auto diff = p.result; 1119 1120 auto files = FileFilter(".".Path.AbsolutePath, true, diff); 1121 1122 files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse; 1123 files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue; 1124 } 1125 1126 /// Convert to an absolute path by finding the first match among the compiler flags 1127 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir, 1128 ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow { 1129 import std.algorithm : map, filter; 1130 import std.file : exists; 1131 import std.path : buildPath; 1132 1133 Optional!AbsolutePath lookup(string dir) nothrow { 1134 const p = buildPath(dir, file); 1135 try { 1136 if (exists(p)) 1137 return some(AbsolutePath(p)); 1138 } catch (Exception e) { 1139 } 1140 return none!AbsolutePath; 1141 } 1142 1143 { 1144 auto a = lookup(workDir.toString); 1145 if (a.hasValue) 1146 return a; 1147 } 1148 1149 foreach (a; includes.map!(a => lookup(a.payload)) 1150 .filter!(a => a.hasValue)) { 1151 return a; 1152 } 1153 1154 foreach (a; systemIncludes.map!(a => lookup(a.value)) 1155 .filter!(a => a.hasValue)) { 1156 return a; 1157 } 1158 1159 return none!AbsolutePath; 1160 } 1161 1162 /** Returns: the root files that need to be re-analyzed because either them or 1163 * their dependency has changed. 1164 */ 1165 bool[Path] dependencyAnalyze(const AbsolutePath dbPath, FilesysIO fio) @trusted { 1166 import dextool.cachetools : nullableCache; 1167 import dextool.plugin.mutate.backend.database : FileId; 1168 1169 auto db = Database.make(dbPath); 1170 1171 typeof(return) rval; 1172 1173 // pessimistic. Add all as needing to be analyzed. 1174 foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) { 1175 rval[a] = false; 1176 } 1177 1178 try { 1179 auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256, 1180 30.dur!"seconds"); 1181 auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256, 1182 30.dur!"seconds"); 1183 auto getFileDbChecksum = nullableCache!(string, Checksum, 1184 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds"); 1185 auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) { 1186 return checksum(p); 1187 })(256, 30.dur!"seconds"); 1188 1189 Checksum[Path] dbDeps; 1190 foreach (a; db.dependencyApi.getAll) 1191 dbDeps[a.file] = a.checksum; 1192 1193 const isToolVersionDifferent = db.isToolVersionDifferent(ToolVersion(dextoolBinaryId)); 1194 bool isChanged(T)(T f) { 1195 if (isToolVersionDifferent) { 1196 // because the tool version is updated then all files need to 1197 // be re-analyzed. an update can mean that scheman are 1198 // improved, mutants has been changed/removed etc. it is 1199 // unknown. the only way to be sure is to re-analyze all files. 1200 return true; 1201 } 1202 1203 if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root))) 1204 return true; 1205 1206 foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) { 1207 return true; 1208 } 1209 1210 return false; 1211 } 1212 1213 foreach (f; db.getRootFiles 1214 .map!(a => db.getFile(a).get) 1215 .map!(a => tuple!("root", "rootCs", "deps")(a, 1216 getFileDbChecksum(a), db.dependencyApi.get(a))) 1217 .cache 1218 .filter!(a => isChanged(a)) 1219 .map!(a => a.root)) { 1220 rval[f] = true; 1221 } 1222 } catch (Exception e) { 1223 log.warning(e.msg); 1224 } 1225 1226 log.trace("Dependency analyze: ", rval); 1227 1228 return rval; 1229 } 1230 1231 /// Only utf-8 files are supported 1232 bool isFileSupported(FilesysIO fio, AbsolutePath p) @safe { 1233 import std.algorithm : among; 1234 import std.encoding : getBOM, BOM; 1235 1236 auto entry = fio.makeInput(p).content.getBOM(); 1237 const res = entry.schema.among(BOM.utf8, BOM.none); 1238 1239 if (res == 1) 1240 log.warningf("%s has a utf-8 BOM marker. It will make all coverage and scheman fail to compile", 1241 p); 1242 1243 return res != 0; 1244 }