1 /**cpptooling.analyzer.clang
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import logger = std.experimental.logger;
17 import std.algorithm : map, filter, joiner, cache;
18 import std.array : array, appender, empty;
19 import std.concurrency;
20 import std.datetime : dur, Duration;
21 import std.exception : collectException;
22 import std.parallelism;
23 import std.range : tee, enumerate;
24 import std.typecons : tuple;
25 
26 import colorlog;
27 import my.filter : GlobFilter;
28 import my.named_type;
29 import my.optional;
30 import my.set;
31 
32 import dextool.utility : dextoolBinaryId;
33 
34 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB,
35     ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath;
36 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
37 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult;
38 import dextool.plugin.mutate.backend.database : Database, LineMetadata,
39     MutationPointEntry2, DepFile;
40 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile,
41     TestFilePath, TestFileChecksum, ToolVersion;
42 import dextool.plugin.mutate.backend.diff_parser : Diff;
43 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
44 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
45 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile;
46 import dextool.plugin.mutate.backend.type : Mutation;
47 import dextool.plugin.mutate.type : MutationKind;
48 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze;
49 import dextool.type : ExitStatusType, AbsolutePath, Path;
50 
51 version (unittest) {
52     import unit_threaded.assertions;
53 }
54 
55 /** Analyze the files in `frange` for mutations.
56  */
57 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const MutationKind[] userKinds, ConfigAnalyze confAnalyze,
58         ConfigCompiler conf_compiler, ParsedCompileCommandRange frange,
59         ValidateLoc valLoc, FilesysIO fio) @trusted {
60     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
61     import dextool.plugin.mutate.backend.mutation_type : toInternal;
62 
63     auto fileFilter = () {
64         try {
65             return FileFilter(fio.getOutputDir, confAnalyze.unifiedDiffFromStdin,
66                     confAnalyze.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
67         } catch (Exception e) {
68             logger.info(e.msg);
69             logger.warning("Unable to parse diff");
70         }
71         return FileFilter.init;
72     }();
73 
74     bool shouldAnalyze(AbsolutePath p) {
75         return confAnalyze.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p);
76     }
77 
78     auto pool = () {
79         if (confAnalyze.poolSize == 0)
80             return new TaskPool();
81         return new TaskPool(confAnalyze.poolSize);
82     }();
83 
84     // if a dependency of a root file has been changed.
85     auto changedDeps = dependencyAnalyze(dbPath, fio);
86 
87     // will only be used by one thread at a time.
88     auto store = spawn(&storeActor, dbPath, cast(shared) fio.dup,
89             cast(shared) confAnalyze, cast(immutable) changedDeps.byKeyValue
90             .filter!(a => !a.value)
91             .map!(a => a.key)
92             .array);
93 
94     try {
95         pool.put(task!testPathActor(confAnalyze.testPaths,
96                 confAnalyze.testFileMatcher, fio.dup, store));
97     } catch (Exception e) {
98         logger.trace(e);
99         logger.warning(e.msg);
100     }
101 
102     auto kinds = toInternal(userKinds);
103     int taskCnt;
104     Set!AbsolutePath alreadyAnalyzed;
105     // dfmt off
106     foreach (f; frange
107             // The tool only supports analyzing a file one time.
108             // This optimize it in some cases where the same file occurs
109             // multiple times in the compile commands database.
110             .filter!(a => a.cmd.absoluteFile !in alreadyAnalyzed)
111             .tee!(a => alreadyAnalyzed.add(a.cmd.absoluteFile))
112             .cache
113             .filter!(a => shouldAnalyze(a.cmd.absoluteFile))
114             ) {
115         try {
116             if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) {
117                 if (!(*v || confAnalyze.forceSaveAnalyze))
118                     continue;
119             }
120 
121             //logger.infof("%s sending", f.cmd.absoluteFile);
122             pool.put(task!analyzeActor(kinds, f, valLoc.dup, fio.dup, conf_compiler, confAnalyze, store));
123             taskCnt++;
124         } catch (Exception e) {
125             logger.trace(e);
126             logger.warning(e.msg);
127         }
128     }
129     // dfmt on
130 
131     changedDeps = typeof(changedDeps).init; // free the memory
132 
133     // inform the store actor of how many analyse results it should *try* to
134     // save.
135     send(store, AnalyzeCntMsg(taskCnt));
136     // wait for all files to be analyzed
137     pool.finish(true);
138     // wait for the store actor to finish
139     receiveOnly!StoreDoneMsg;
140 
141     if (confAnalyze.profile)
142         try {
143             import std.stdio : writeln;
144 
145             writeln(getProfileResult.toString);
146         } catch (Exception e) {
147             logger.warning("Unable to print the profile data: ", e.msg).collectException;
148         }
149 
150     return ExitStatusType.Ok;
151 }
152 
153 @safe:
154 
155 /** Filter function for files. Either all or those in stdin.
156  *
157  * The matching ignores the file extension in order to lessen the problem of a
158  * file that this approach skip headers because they do not exist in
159  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
160  * `foo.cpp` is in `compile_commands.json`.
161  *
162  * TODO: this may create problems for header only libraries because only the
163  * unittest would include the header which mean that for this to work the
164  * unittest would have to reside in the same directory as the header file.
165  * Which they normally never do. This then lead to a diff of a header only lib
166  * lead to "no files analyzed".
167  */
168 struct FileFilter {
169     import std.path : stripExtension;
170 
171     Set!string files;
172     bool useFileFilter;
173     AbsolutePath root;
174 
175     this(AbsolutePath root, bool fromStdin, Diff diff) {
176         this.root = root;
177         this.useFileFilter = fromStdin;
178         foreach (a; diff.toRange(root)) {
179             files.add(a.key.stripExtension);
180         }
181     }
182 
183     bool shouldAnalyze(AbsolutePath p) {
184         import std.path : relativePath;
185 
186         if (!useFileFilter) {
187             return true;
188         }
189 
190         return relativePath(p, root).stripExtension in files;
191     }
192 }
193 
194 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
195 struct AnalyzeCntMsg {
196     int value;
197 }
198 
199 /// The main thread is waiting for storeActor to send this message.
200 struct StoreDoneMsg {
201 }
202 
203 /// Start an analyze of a file
204 void analyzeActor(Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze, ValidateLoc vloc,
205         FilesysIO fio, ConfigCompiler compilerConf, ConfigAnalyze analyzeConf, Tid storeActor) @trusted nothrow {
206     auto profile = Profile("analyze file " ~ fileToAnalyze.cmd.absoluteFile);
207 
208     try {
209         //logger.infof("%s begin", fileToAnalyze.cmd.absoluteFile);
210         auto analyzer = Analyze(kinds, vloc, fio, Analyze.Config(compilerConf.forceSystemIncludes,
211                 analyzeConf.saveCoverage.get, compilerConf.allowErrors.get));
212         analyzer.process(fileToAnalyze);
213         send(storeActor, cast(immutable) analyzer.result);
214         //logger.infof("%s end", fileToAnalyze.cmd.absoluteFile);
215         return;
216     } catch (Exception e) {
217         logger.error(e.msg).collectException;
218     }
219 
220     // send a dummy result
221     try {
222         //logger.infof("%s failed", fileToAnalyze.cmd.absoluteFile);
223         send(storeActor, cast(immutable) new Analyze.Result);
224     } catch (Exception e) {
225         logger.error(e.msg).collectException;
226     }
227 }
228 
229 class TestFileResult {
230     Duration time;
231     TestFile[Checksum] files;
232 }
233 
234 void testPathActor(const AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio, Tid storeActor) @trusted nothrow {
235     import std.datetime : Clock;
236     import std.datetime.stopwatch : StopWatch, AutoStart;
237     import std.file : isDir, isFile, dirEntries, SpanMode;
238     import my.container.vector;
239 
240     auto profile = Profile("checksum test files");
241 
242     auto sw = StopWatch(AutoStart.yes);
243 
244     TestFile makeTestFile(const AbsolutePath file) {
245         auto cs = checksum(fio.makeInput(file).content[]);
246         return TestFile(TestFilePath(fio.toRelativeRoot(file)),
247                 TestFileChecksum(cs), Clock.currTime);
248     }
249 
250     auto paths = vector(userPaths.dup);
251 
252     auto tfiles = new TestFileResult;
253     scope (exit)
254         tfiles.time = sw.peek;
255 
256     while (!paths.empty) {
257         try {
258             if (isDir(paths.front)) {
259                 logger.trace("  Test directory ", paths.front);
260                 foreach (a; dirEntries(paths.front, SpanMode.shallow).map!(
261                         a => AbsolutePath(a.name))) {
262                     paths.put(a);
263                 }
264             } else if (isFile(paths.front) && matcher.match(paths.front)) {
265                 logger.trace("  Test saved ", paths.front);
266                 auto t = makeTestFile(paths.front);
267                 tfiles.files[t.checksum.get] = t;
268             }
269         } catch (Exception e) {
270             logger.warning(e.msg).collectException;
271         }
272 
273         paths.popFront;
274     }
275 
276     logger.infof("Found %s test files", tfiles.files.length).collectException;
277 
278     try {
279         send(storeActor, cast(immutable) tfiles);
280     } catch (Exception e) {
281     }
282 }
283 
284 /// Store the result of the analyze.
285 void storeActor(const AbsolutePath dbPath, scope shared FilesysIO fioShared,
286         scope shared ConfigAnalyze confAnalyzeShared, immutable Path[] rootFiles) @trusted nothrow {
287     import cachetools : CacheLRU;
288     import dextool.cachetools : nullableCache;
289     import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
290 
291     const confAnalyze = cast() confAnalyzeShared;
292 
293     // The conditions that the storeActor is waiting for receiving the results
294     // from the workers.
295     static struct RecvWaiter {
296         int analyzeFileWaitCnt = int.max;
297         int analyzeFileCnt;
298 
299         bool isTestFilesDone;
300 
301         bool isWaiting() {
302             return analyzeFileCnt < analyzeFileWaitCnt || !isTestFilesDone;
303         }
304     }
305 
306     static struct SchemataSaver {
307         import sumtype;
308         import my.optional;
309         import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataBuilder;
310 
311         typeof(ConfigAnalyze.minMutantsPerSchema) minMutantsPerSchema;
312         typeof(ConfigAnalyze.mutantsPerSchema) mutantsPerSchema;
313         SchemataBuilder builder;
314 
315         void put(FilesysIO fio, SchemataResult.Schemata[AbsolutePath] a) {
316             builder.put(fio, a);
317         }
318 
319         void process(ref Database db, Optional!(SchemataBuilder.ET) value) {
320             value.match!((Some!(SchemataBuilder.ET) a) {
321                 try {
322                     auto mutants = a.mutants
323                         .map!(a => db.getMutationStatusId(a.id))
324                         .filter!(a => !a.isNull)
325                         .map!(a => a.get)
326                         .array;
327                     if (!mutants.empty) {
328                         const id = db.putSchemata(a.checksum, a.fragments, mutants);
329                         logger.tracef(!id.isNull, "Saving schema %s with %s mutants",
330                             id.get.get, mutants.length);
331                     }
332                 } catch (Exception e) {
333                     logger.trace(e.msg);
334                 }
335             }, (None a) {});
336         }
337 
338         /// Consume fragments used by scheman containing >min mutants.
339         void intermediate(ref Database db) {
340             builder.discardMinScheman = false;
341             builder.mutantsPerSchema = mutantsPerSchema.get;
342             builder.minMutantsPerSchema = mutantsPerSchema.get;
343 
344             while (!builder.isDone) {
345                 process(db, builder.next);
346             }
347 
348             builder.restart;
349         }
350 
351         /// Consume all fragments or discard.
352         void finalize(ref Database db) {
353             builder.discardMinScheman = true;
354             builder.mutantsPerSchema = mutantsPerSchema.get;
355             builder.minMutantsPerSchema = minMutantsPerSchema.get;
356 
357             // two loops to pass over all mutants and retry new schema
358             // compositions. Any schema that is less than the minimum will be
359             // discarded so the number of mutants will shrink.
360             while (!builder.isDone) {
361                 while (!builder.isDone) {
362                     process(db, builder.next);
363                 }
364                 builder.restart;
365             }
366         }
367     }
368 
369     auto schemas = SchemataSaver(confAnalyze.minMutantsPerSchema, confAnalyze.mutantsPerSchema);
370 
371     void helper(FilesysIO fio, ref Database db) nothrow {
372         // A file is at most saved one time to the database.
373         Set!AbsolutePath savedFiles;
374 
375         const isToolVersionDifferent = () nothrow{
376             try {
377                 return db.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
378             } catch (Exception e) {
379             }
380             return true;
381         }();
382 
383         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
384                 30.dur!"seconds");
385         auto getFileDbChecksum = nullableCache!(string, Checksum,
386                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
387         auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
388             return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]);
389         })(256, 30.dur!"seconds");
390 
391         static struct Files {
392             Checksum[Path] value;
393 
394             this(ref Database db) {
395                 foreach (a; db.getDetailedFiles) {
396                     value[a.file] = a.fileChecksum;
397                 }
398             }
399         }
400 
401         void save(immutable Analyze.Result result_) {
402             import dextool.plugin.mutate.backend.type : Language;
403 
404             auto result = cast() result_;
405 
406             auto profile = Profile("save " ~ result.root);
407 
408             // mark files that have an unchanged checksum as "already saved"
409             foreach (f; result.idFile
410                     .byKey
411                     .filter!(a => a !in savedFiles)
412                     .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a)
413                         && !confAnalyze.forceSaveAnalyze && !isToolVersionDifferent)) {
414                 logger.info("Unchanged ".color(Color.yellow), f);
415                 savedFiles.add(f);
416             }
417 
418             // only saves mutation points to a file one time.
419             {
420                 auto app = appender!(MutationPointEntry2[])();
421                 bool isChanged = isToolVersionDifferent;
422                 foreach (mp; result.mutationPoints
423                         .map!(a => tuple!("data", "file")(a, fio.toAbsoluteRoot(a.file)))
424                         .filter!(a => a.file !in savedFiles)) {
425                     app.put(mp.data);
426                 }
427                 foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) {
428                     isChanged = true;
429                     logger.info("Saving ".color(Color.green), f);
430                     const relp = fio.toRelativeRoot(f);
431 
432                     // this is critical in order to remove old data about a file.
433                     db.removeFile(relp);
434 
435                     const info = result.infoId[result.idFile[f]];
436                     db.put(relp, info.checksum, info.language, f == result.root);
437                     savedFiles.add(f);
438                 }
439                 db.put(app.data, fio.getOutputDir);
440 
441                 if (result.root !in savedFiles) {
442                     // this occurs when the file is e.g. a unittest that uses a
443                     // header only library. The unittests are not mutated thus
444                     // no mutation points exists in them but we want dextool to
445                     // still, if possible, track the unittests for changes.
446                     isChanged = true;
447                     const relp = fio.toRelativeRoot(result.root);
448                     db.removeFile(relp);
449                     // the language do not matter because it is a file without
450                     // any mutants.
451                     db.put(relp, result.rootCs, Language.init, true);
452                     savedFiles.add(fio.toAbsoluteRoot(result.root));
453                 }
454 
455                 // must always update dependencies because they may not contain
456                 // mutants. Only files that are changed and contain mutants
457                 // trigger isChanged to be true.
458                 db.dependencyApi.set(fio.toRelativeRoot(result.root), result.dependencies);
459 
460                 if (isChanged) {
461                     foreach (a; result.coverage.byKeyValue) {
462                         const fid = getFileId(fio.toRelativeRoot(result.fileId[a.key]));
463                         if (!fid.isNull) {
464                             db.clearCoverageMap(fid.get);
465                             db.putCoverageMap(fid.get, a.value);
466                         }
467                     }
468 
469                     // only save the schematas if mutation points where saved.
470                     // This ensure that only schematas for changed/new files
471                     // are saved.
472                     schemas.put(fio, result.schematas);
473                     schemas.intermediate(db);
474                 }
475             }
476 
477             {
478                 Set!long printed;
479                 auto app = appender!(LineMetadata[])();
480                 foreach (md; result.metadata) {
481                     const localId = Analyze.Result.LocalFileId(md.id.get);
482                     // transform the ID from local to global.
483                     const fid = getFileId(fio.toRelativeRoot(result.fileId[localId]));
484                     if (fid.isNull && !printed.contains(md.id.get)) {
485                         printed.add(md.id.get);
486                         logger.info("File with suppressed mutants (// NOMUT) not in the database: ",
487                                 result.fileId[localId]).collectException;
488                     } else if (!fid.isNull) {
489                         app.put(LineMetadata(fid.get, md.line, md.attr));
490                     }
491                 }
492                 db.put(app.data);
493             }
494         }
495 
496         void saveTestResult(immutable TestFileResult result) {
497             auto profile = Profile("save test files");
498             Set!Checksum old;
499 
500             foreach (a; db.getTestFiles) {
501                 old.add(a.checksum.get);
502                 if (a.checksum.get !in result.files) {
503                     logger.info("Removed test file ", a.file.get.toString);
504                     db.removeFile(a.file);
505                 }
506             }
507 
508             foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) {
509                 logger.info("Saving test file ", a.file.get.toString);
510                 db.put(a);
511             }
512         }
513 
514         // listen for results from workers until the expected number is processed.
515         void recv() {
516             logger.info("Updating files");
517             RecvWaiter waiter;
518 
519             while (waiter.isWaiting) {
520                 try {
521                     receive((AnalyzeCntMsg a) {
522                         waiter.analyzeFileWaitCnt = a.value;
523                     }, (immutable Analyze.Result a) {
524                         auto trans = db.transaction;
525                         waiter.analyzeFileCnt++;
526                         save(a);
527                         trans.commit;
528 
529                         logger.infof("Analyzed file %s/%s",
530                             waiter.analyzeFileCnt, waiter.analyzeFileWaitCnt);
531                     }, (immutable TestFileResult a) {
532                         auto trans = db.transaction;
533                         waiter.isTestFilesDone = true;
534                         saveTestResult(a);
535                         trans.commit;
536 
537                         logger.info("Done analyzing test files in ", a.time);
538                     });
539                 } catch (Exception e) {
540                     logger.trace(e).collectException;
541                     logger.warning(e.msg).collectException;
542                 }
543             }
544         }
545 
546         void pruneFiles() {
547             import std.path : buildPath;
548 
549             auto profile = Profile("prune files");
550 
551             logger.info("Pruning the database of dropped files");
552             auto files = db.getFiles.map!(a => fio.toAbsoluteRoot(a)).toSet;
553 
554             foreach (f; files.setDifference(savedFiles).toRange) {
555                 logger.info("Removing ".color(Color.red), f);
556                 db.removeFile(fio.toRelativeRoot(f));
557             }
558         }
559 
560         void addRoots() {
561             if (confAnalyze.forceSaveAnalyze || isToolVersionDifferent)
562                 return;
563 
564             // add root files and their dependencies that has not been analyzed because nothing has changed.
565             // By adding them they are not removed.
566 
567             auto profile = Profile("add roots and dependencies");
568             foreach (a; rootFiles) {
569                 auto p = fio.toAbsoluteRoot(a);
570                 if (p !in savedFiles) {
571                     savedFiles.add(p);
572                     // fejk text for the user to tell them that yes, the files have
573                     // been analyzed.
574                     logger.info("Analyzing ", a);
575                     logger.info("Unchanged ".color(Color.yellow), a);
576                 }
577             }
578             foreach (a; rootFiles.map!(a => db.dependencyApi.get(a)).joiner) {
579                 savedFiles.add(fio.toAbsoluteRoot(a));
580             }
581         }
582 
583         void fastDbOn() {
584             if (!confAnalyze.fastDbStore)
585                 return;
586             logger.info(
587                     "Turning OFF sqlite3 synchronization protection to improve the write performance");
588             logger.warning(
589                     "Do NOT interrupt dextool in any way because it may corrupt the database");
590             db.run("PRAGMA synchronous = OFF");
591             db.run("PRAGMA journal_mode = MEMORY");
592         }
593 
594         void fastDbOff() {
595             if (!confAnalyze.fastDbStore)
596                 return;
597             db.run("PRAGMA synchronous = ON");
598             db.run("PRAGMA journal_mode = DELETE");
599         }
600 
601         try {
602             import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
603 
604             // by making the mailbox size follow the number of workers the overall
605             // behavior will slow down if saving to the database is too slow. This
606             // avoids excessive or even fatal memory usage.
607             setMaxMailboxSize(thisTid, confAnalyze.poolSize + 2, OnCrowding.block);
608 
609             fastDbOn();
610 
611             {
612                 auto trans = db.transaction;
613                 auto profile = Profile("prune old schemas");
614                 if (isToolVersionDifferent) {
615                     logger.info("Prune database of schematan created by the old version");
616                     db.deleteAllSchemas;
617                 }
618                 trans.commit;
619             }
620 
621             recv();
622             {
623                 auto trans = db.transaction;
624                 schemas.finalize(db);
625                 trans.commit;
626             }
627 
628             {
629                 auto trans = db.transaction;
630                 addRoots();
631 
632                 logger.info("Resetting timeout context");
633                 resetTimeoutContext(db);
634 
635                 logger.info("Updating metadata");
636                 db.updateMetadata;
637 
638                 if (confAnalyze.prune) {
639                     pruneFiles();
640                     {
641                         auto profile = Profile("remove orphaned mutants");
642                         logger.info("Removing orphaned mutants");
643                         db.removeOrphanedMutants;
644                     }
645                     {
646                         auto profile = Profile("prune schemas");
647                         logger.info("Prune the database of unused schemas");
648                         db.pruneSchemas;
649                     }
650                     {
651                         auto profile = Profile("prune dependencies");
652                         logger.info("Prune dependencies");
653                         db.dependencyApi.cleanup;
654                     }
655                 }
656 
657                 logger.info("Updating manually marked mutants");
658                 updateMarkedMutants(db);
659                 printLostMarkings(db.getLostMarkings);
660 
661                 if (isToolVersionDifferent) {
662                     logger.info("Updating tool version");
663                     db.updateToolVersion(ToolVersion(dextoolBinaryId));
664                 }
665 
666                 logger.info("Committing changes");
667                 trans.commit;
668                 logger.info("Ok".color(Color.green));
669             }
670 
671             fastDbOff();
672 
673             if (isToolVersionDifferent) {
674                 auto profile = Profile("compact");
675                 logger.info("Compacting the database");
676                 db.vacuum;
677             }
678         } catch (Exception e) {
679             logger.error(e.msg).collectException;
680             logger.error("Failed to save the result of the analyze to the database")
681                 .collectException;
682         }
683 
684         try {
685             send(ownerTid, StoreDoneMsg.init);
686         } catch (Exception e) {
687             logger.errorf("Fatal error. Unable to send %s to the main thread",
688                     StoreDoneMsg.init).collectException;
689         }
690     }
691 
692     try {
693         FilesysIO fio = cast(FilesysIO) fioShared;
694         auto db = Database.make(dbPath);
695         helper(fio, db);
696     } catch (Exception e) {
697         logger.error(e.msg).collectException;
698     }
699 }
700 
701 /// Analyze a file for mutants.
702 struct Analyze {
703     import std.regex : Regex, regex, matchFirst;
704     import std.typecons : Yes;
705     import libclang_ast.context : ClangContext;
706 
707     static struct Config {
708         bool forceSystemIncludes;
709         bool saveCoverage;
710         bool allowErrors;
711     }
712 
713     private {
714         static immutable rawReNomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
715 
716         Regex!char re_nomut;
717 
718         ValidateLoc valLoc;
719         FilesysIO fio;
720 
721         Cache cache;
722 
723         Result result;
724 
725         Config conf;
726 
727         Mutation.Kind[] kinds;
728     }
729 
730     this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted {
731         this.kinds = kinds;
732         this.valLoc = valLoc;
733         this.fio = fio;
734         this.cache = new Cache;
735         this.re_nomut = regex(rawReNomut);
736         this.result = new Result;
737         this.conf = conf;
738     }
739 
740     void process(ParsedCompileCommand commandsForFileToAnalyze) @safe {
741         import std.file : exists;
742 
743         commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes;
744 
745         try {
746             if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) {
747                 logger.warningf("Failed to analyze %s. Do not exist",
748                         commandsForFileToAnalyze.cmd.absoluteFile);
749                 return;
750             }
751         } catch (Exception e) {
752             logger.warning(e.msg);
753             return;
754         }
755 
756         result.root = commandsForFileToAnalyze.cmd.absoluteFile;
757 
758         try {
759             result.rootCs = checksum(result.root);
760 
761             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
762             auto tstream = new TokenStreamImpl(ctx);
763 
764             analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream);
765             foreach (f; result.fileId.byValue)
766                 analyzeForComments(f, tstream);
767         } catch (Exception e) {
768             () @trusted { logger.trace(e); }();
769             logger.info(e.msg);
770             logger.error("failed analyze of ",
771                     commandsForFileToAnalyze.cmd.absoluteFile).collectException;
772         }
773     }
774 
775     void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze,
776             AbsolutePath fileToAnalyze, ref ClangContext ctx, TokenStream tstream) @safe {
777         import my.gc.refc : RefCounted;
778         import dextool.plugin.mutate.backend.analyze.ast : Ast;
779         import dextool.plugin.mutate.backend.analyze.pass_clang;
780         import dextool.plugin.mutate.backend.analyze.pass_coverage;
781         import dextool.plugin.mutate.backend.analyze.pass_filter;
782         import dextool.plugin.mutate.backend.analyze.pass_mutant;
783         import dextool.plugin.mutate.backend.analyze.pass_schemata;
784         import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic;
785 
786         logger.info("Analyzing ", fileToAnalyze);
787         RefCounted!(Ast) ast;
788         {
789             auto tu = ctx.makeTranslationUnit(fileToAnalyze,
790                     commandsForFileToAnalyze.flags.completeFlags);
791             if (tu.hasParseErrors) {
792                 logDiagnostic(tu);
793                 logger.warningf("Compile error in %s", fileToAnalyze);
794                 if (!conf.allowErrors) {
795                     logger.warning("Skipping");
796                     return;
797                 }
798             }
799 
800             auto res = toMutateAst(tu.cursor, fio);
801             ast = res.ast;
802             saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies);
803             debug logger.trace(ast);
804         }
805 
806         auto codeMutants = () {
807             auto mutants = toMutants(ast, fio, valLoc, kinds);
808             debug logger.trace(mutants);
809 
810             debug logger.trace("filter mutants");
811             mutants = filterMutants(fio, mutants);
812             debug logger.trace(mutants);
813 
814             return toCodeMutants(mutants, fio, tstream);
815         }();
816         debug logger.trace(codeMutants);
817 
818         {
819             auto schemas = toSchemata(ast, fio, codeMutants);
820             debug logger.trace(schemas);
821             logger.tracef("path dedup count:%s length_acc:%s", ast.paths.count,
822                     ast.paths.lengthAccum);
823 
824             result.schematas = schemas.getSchematas;
825         }
826 
827         result.mutationPoints = codeMutants.points.byKeyValue.map!(
828                 a => a.value.map!(b => MutationPointEntry2(fio.toRelativeRoot(a.key),
829                 b.offset, b.sloc.begin, b.sloc.end, b.mutants))).joiner.array;
830         foreach (f; codeMutants.points.byKey) {
831             const id = Result.LocalFileId(result.idFile.length);
832             result.idFile[f] = id;
833             result.fileId[id] = f;
834             result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang);
835         }
836 
837         if (conf.saveCoverage) {
838             auto cov = toCoverage(ast, fio, valLoc);
839             debug logger.trace(cov);
840 
841             foreach (a; cov.points.byKeyValue) {
842                 if (auto id = a.key in result.idFile) {
843                     result.coverage[*id] = a.value;
844                 }
845             }
846         }
847     }
848 
849     /** Tokens are always from the same file.
850      *
851      * TODO: move this to pass_clang.
852      */
853     void analyzeForComments(AbsolutePath file, TokenStream tstream) @trusted {
854         import std.algorithm : filter;
855         import clang.c.Index : CXTokenKind;
856         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
857 
858         if (auto localId = file in result.idFile) {
859             const fid = FileId(localId.get);
860 
861             auto mdata = appender!(LineMetadata[])();
862             foreach (t; cache.getTokens(AbsolutePath(file), tstream)
863                     .filter!(a => a.kind == CXTokenKind.comment)) {
864                 auto m = matchFirst(t.spelling, re_nomut);
865                 if (m.whichPattern == 0)
866                     continue;
867 
868                 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
869                 logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
870             }
871 
872             result.metadata ~= mdata.data;
873         }
874     }
875 
876     void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted {
877         import std.algorithm : cache;
878         import std.mmfile;
879 
880         auto rootDir = root.dirName;
881 
882         foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir,
883                 flags.includes, flags.systemIncludes))
884                 .cache
885                 .filter!(a => a.hasValue)
886                 .map!(a => a.orElse(AbsolutePath.init))
887                 .filter!(a => valLoc.isInsideOutputDir(a))) {
888             try {
889                 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p));
890             } catch (Exception e) {
891                 logger.trace(e.msg).collectException;
892             }
893         }
894 
895         debug logger.trace(result.dependencies);
896     }
897 
898     static class Result {
899         import dextool.plugin.mutate.backend.analyze.ast : Interval;
900         import dextool.plugin.mutate.backend.database.type : SchemataFragment;
901         import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum;
902 
903         alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init,
904                 TagStringable, Hashable);
905         alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init,
906                 TagStringable, Hashable);
907 
908         MutationPointEntry2[] mutationPoints;
909 
910         static struct FileInfo {
911             Checksum checksum;
912             Language language;
913         }
914 
915         /// The file that is analyzed, which is a root
916         AbsolutePath root;
917         Checksum rootCs;
918 
919         /// The dependencies the root has.
920         DepFile[] dependencies;
921 
922         /// The key is the ID from idFile.
923         FileInfo[LocalFileId] infoId;
924 
925         /// The IDs is unique for *this* analyze, not globally.
926         LocalFileId[AbsolutePath] idFile;
927         AbsolutePath[LocalFileId] fileId;
928 
929         // The FileID used in the metadata is local to this analysis. It has to
930         // be remapped when added to the database.
931         LineMetadata[] metadata;
932 
933         /// Mutant schematas that has been generated.
934         SchemataResult.Schemata[AbsolutePath] schematas;
935 
936         /// Coverage intervals that can be instrumented.
937         Interval[][LocalFileId] coverage;
938     }
939 }
940 
941 @(
942         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
943 unittest {
944     import std.regex : regex, matchFirst;
945     import unit_threaded.runner.io : writelnUt;
946 
947     auto re_nomut = regex(Analyze.rawReNomut);
948     // NOMUT in other type of comments should NOT match.
949     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
950     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
951     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
952     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
953 
954     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
955     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
956     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
957     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
958     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
959     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
960     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
961     m["tag"].shouldEqual("arch");
962     m["comment"].shouldEqual("smurf");
963 }
964 
965 /// Stream of tokens excluding comment tokens.
966 class TokenStreamImpl : TokenStream {
967     import libclang_ast.context : ClangContext;
968     import dextool.plugin.mutate.backend.type : Token;
969     import dextool.plugin.mutate.backend.utility : tokenize;
970 
971     ClangContext* ctx;
972 
973     /// The context must outlive any instance of this class.
974     // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000
975     this(ref ClangContext ctx) @trusted {
976         this.ctx = &ctx;
977     }
978 
979     Token[] getTokens(Path p) {
980         return tokenize(*ctx, p);
981     }
982 
983     Token[] getFilteredTokens(Path p) {
984         import clang.c.Index : CXTokenKind;
985 
986         // Filter a stream of tokens for those that should affect the checksum.
987         return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
988     }
989 }
990 
991 /// Returns: true if `f` is inside any `roots`.
992 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
993     import dextool.utility : isPathInsideRoot;
994 
995     foreach (root; roots) {
996         if (isPathInsideRoot(root, f))
997             return true;
998     }
999 
1000     return false;
1001 }
1002 
1003 /** Update the connection between the marked mutants and their mutation status
1004  * id and mutation id.
1005  */
1006 void updateMarkedMutants(ref Database db) {
1007     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
1008     import dextool.plugin.mutate.backend.type : ExitStatus;
1009 
1010     void update(MarkedMutant m) {
1011         const stId = db.getMutationStatusId(m.statusChecksum);
1012         if (stId.isNull)
1013             return;
1014         const mutId = db.getMutationId(stId.get);
1015         if (mutId.isNull)
1016             return;
1017         db.removeMarkedMutant(m.statusChecksum);
1018         db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum,
1019                 m.toStatus, m.rationale, m.mutText);
1020         db.updateMutationStatus(stId.get, m.toStatus, ExitStatus(0));
1021     }
1022 
1023     // find those marked mutants that have a checksum that is different from
1024     // the mutation status the marked mutant is related to. If possible change
1025     // the relation to the correct mutation status id.
1026     foreach (m; db.getMarkedMutants
1027             .map!(a => tuple(a, db.getChecksum(a.statusId)))
1028             .filter!(a => !a[1].isNull)
1029             .filter!(a => a[0].statusChecksum != a[1].get)) {
1030         update(m[0]);
1031     }
1032 }
1033 
1034 /// Prints a marked mutant that has become lost due to rerun of analyze
1035 void printLostMarkings(MarkedMutant[] lostMutants) {
1036     import std.algorithm : sort;
1037     import std.array : empty;
1038     import std.conv : to;
1039     import std.stdio : writeln;
1040 
1041     if (lostMutants.empty)
1042         return;
1043 
1044     Table!6 tbl = Table!6([
1045             "ID", "File", "Line", "Column", "Status", "Rationale"
1046             ]);
1047     foreach (m; lostMutants) {
1048         typeof(tbl).Row r = [
1049             m.mutationId.get.to!string, m.path, m.sloc.line.to!string,
1050             m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get
1051         ];
1052         tbl.put(r);
1053     }
1054     logger.warning("Marked mutants was lost");
1055     writeln(tbl);
1056 }
1057 
1058 @("shall only let files in the diff through")
1059 unittest {
1060     import std.string : lineSplitter;
1061     import dextool.plugin.mutate.backend.diff_parser;
1062 
1063     immutable lines = `diff --git a/standalone2.d b/standalone2.d
1064 index 0123..2345 100644
1065 --- a/standalone.d
1066 +++ b/standalone2.d
1067 @@ -31,7 +31,6 @@ import std.algorithm : map;
1068  import std.array : Appender, appender, array;
1069  import std.datetime : SysTime;
1070 +import std.format : format;
1071 -import std.typecons : Tuple;
1072 
1073  import d2sqlite3 : sqlDatabase = Database;
1074 
1075 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
1076  struct Database {
1077      import std.conv : to;
1078      import std.exception : collectException;
1079 -    import std.typecons : Nullable;
1080 +    import std.typecons : Nullable, Flag, No;
1081      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
1082 
1083 +    sqlDatabase db;`;
1084 
1085     UnifiedDiffParser p;
1086     foreach (line; lines.lineSplitter)
1087         p.process(line);
1088     auto diff = p.result;
1089 
1090     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
1091 
1092     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
1093     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
1094 }
1095 
1096 /// Convert to an absolute path by finding the first match among the compiler flags
1097 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir,
1098         ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow {
1099     import std.algorithm : map, filter;
1100     import std.file : exists;
1101     import std.path : buildPath;
1102 
1103     Optional!AbsolutePath lookup(string dir) nothrow {
1104         const p = buildPath(dir, file);
1105         try {
1106             if (exists(p))
1107                 return some(AbsolutePath(p));
1108         } catch (Exception e) {
1109         }
1110         return none!AbsolutePath;
1111     }
1112 
1113     {
1114         auto a = lookup(workDir.toString);
1115         if (a.hasValue)
1116             return a;
1117     }
1118 
1119     foreach (a; includes.map!(a => lookup(a.payload))
1120             .filter!(a => a.hasValue)) {
1121         return a;
1122     }
1123 
1124     foreach (a; systemIncludes.map!(a => lookup(a.value))
1125             .filter!(a => a.hasValue)) {
1126         return a;
1127     }
1128 
1129     return none!AbsolutePath;
1130 }
1131 
1132 /** Returns: the root files that need to be re-analyzed because either them or
1133  * their dependency has changed.
1134  */
1135 bool[Path] dependencyAnalyze(const AbsolutePath dbPath, FilesysIO fio) @trusted {
1136     import dextool.cachetools : nullableCache;
1137     import dextool.plugin.mutate.backend.database : FileId;
1138 
1139     auto db = Database.make(dbPath);
1140 
1141     typeof(return) rval;
1142 
1143     // pessimistic. Add all as needing to be analyzed.
1144     foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) {
1145         rval[a] = false;
1146     }
1147 
1148     try {
1149         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
1150                 30.dur!"seconds");
1151         auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256,
1152                 30.dur!"seconds");
1153         auto getFileDbChecksum = nullableCache!(string, Checksum,
1154                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
1155         auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) {
1156             return checksum(p);
1157         })(256, 30.dur!"seconds");
1158 
1159         Checksum[Path] dbDeps;
1160         foreach (a; db.dependencyApi.getAll)
1161             dbDeps[a.file] = a.checksum;
1162 
1163         const isToolVersionDifferent = db.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
1164         bool isChanged(T)(T f) {
1165             if (isToolVersionDifferent) {
1166                 // because the tool version is updated then all files need to
1167                 // be re-analyzed. an update can mean that scheman are
1168                 // improved, mutants has been changed/removed etc. it is
1169                 // unknown. the only way to be sure is to re-analyze all files.
1170                 return true;
1171             }
1172 
1173             if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root)))
1174                 return true;
1175 
1176             foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) {
1177                 return true;
1178             }
1179 
1180             return false;
1181         }
1182 
1183         foreach (f; db.getRootFiles
1184                 .map!(a => db.getFile(a).get)
1185                 .map!(a => tuple!("root", "rootCs", "deps")(a,
1186                     getFileDbChecksum(a), db.dependencyApi.get(a)))
1187                 .cache
1188                 .filter!(a => isChanged(a))
1189                 .map!(a => a.root)) {
1190             rval[f] = true;
1191         }
1192     } catch (Exception e) {
1193         logger.warning(e.msg);
1194     }
1195 
1196     logger.trace("Dependency analyze: ", rval);
1197 
1198     return rval;
1199 }