1 /**cpptooling.analyzer.clang
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import core.thread : Thread;
17 import logger = std.experimental.logger;
18 import std.algorithm : map, filter, joiner, cache;
19 import std.array : array, appender, empty;
20 import std.concurrency;
21 import std.datetime : dur, Duration;
22 import std.exception : collectException;
23 import std.functional : toDelegate;
24 import std.parallelism : TaskPool, totalCPUs;
25 import std.range : tee, enumerate;
26 import std.typecons : tuple;
27 
28 import colorlog;
29 import my.actor.utility.limiter;
30 import my.actor;
31 import my.filter : GlobFilter;
32 import my.gc.refc;
33 import my.named_type;
34 import my.optional;
35 import my.set;
36 
37 static import colorlog;
38 
39 import dextool.utility : dextoolBinaryId;
40 
41 import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
42 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB,
43     ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath;
44 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
45 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult;
46 import dextool.plugin.mutate.backend.database : Database, LineMetadata,
47     MutationPointEntry2, DepFile;
48 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile,
49     TestFilePath, TestFileChecksum, ToolVersion;
50 import dextool.plugin.mutate.backend.diff_parser : Diff;
51 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
52 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
53 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile;
54 import dextool.plugin.mutate.backend.type : Mutation;
55 import dextool.plugin.mutate.type : MutationKind;
56 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze, ConfigSchema, ConfigCoverage;
57 import dextool.type : ExitStatusType, AbsolutePath, Path;
58 
59 version (unittest) {
60     import unit_threaded.assertions;
61 }
62 
63 alias log = colorlog.log!"analyze";
64 
65 /** Analyze the files in `frange` for mutations.
66  */
67 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const MutationKind[] userKinds,
68         ConfigAnalyze analyzeConf, ConfigCompiler compilerConf,
69         ConfigSchema schemaConf, ConfigCoverage covConf,
70         ParsedCompileCommandRange frange, ValidateLoc valLoc, FilesysIO fio) @trusted {
71     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
72     import dextool.plugin.mutate.backend.mutation_type : toInternal;
73 
74     auto fileFilter = () {
75         try {
76             return FileFilter(fio.getOutputDir, analyzeConf.unifiedDiffFromStdin,
77                     analyzeConf.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
78         } catch (Exception e) {
79             log.info(e.msg);
80             log.warning("Unable to parse diff");
81         }
82         return FileFilter.init;
83     }();
84 
85     bool shouldAnalyze(AbsolutePath p) {
86         return analyzeConf.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p);
87     }
88 
89     auto sys = makeSystem;
90 
91     auto flowCtrl = sys.spawn(&spawnFlowControl, analyzeConf.poolSize == 0
92             ? (totalCPUs + 1) : analyzeConf.poolSize);
93 
94     auto db = refCounted(Database.make(dbPath));
95 
96     // if a dependency of a root file has been changed.
97     auto changedDeps = dependencyAnalyze(db.get, fio);
98     auto schemaQ = SchemaQ(db.get.schemaApi.getMutantProbability);
99 
100     auto store = sys.spawn(&spawnStoreActor, flowCtrl, db,
101             StoreConfig(analyzeConf, schemaConf, covConf), fio, changedDeps.byKeyValue
102             .filter!(a => !a.value)
103             .map!(a => a.key)
104             .array);
105     db.release;
106     // it crashes if the store actor try to call dextoolBinaryId. I don't know
107     // why... TLS store trashed? But it works, somehow, if I put some writeln
108     // inside dextoolBinaryId.
109     send(store, Start.init, ToolVersion(dextoolBinaryId));
110 
111     sys.spawn(&spawnTestPathActor, store, analyzeConf.testPaths, analyzeConf.testFileMatcher, fio);
112 
113     auto kinds = toInternal(userKinds);
114     //int taskCnt;
115     Set!AbsolutePath alreadyAnalyzed;
116     // dfmt off
117     foreach (f; frange
118             // The tool only supports analyzing a file one time.
119             // This optimize it in some cases where the same file occurs
120             // multiple times in the compile commands database.
121             .filter!(a => a.cmd.absoluteFile !in alreadyAnalyzed)
122             .tee!(a => alreadyAnalyzed.add(a.cmd.absoluteFile))
123             .cache
124             .filter!(a => shouldAnalyze(a.cmd.absoluteFile))
125             ) {
126         try {
127             if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) {
128                 if (!(*v || analyzeConf.forceSaveAnalyze))
129                     continue;
130             }
131 
132             // TODO: how to "slow down" if store is working too slow.
133 
134             // must dup schemaQ or we run into multithreaded bugs because a
135             // SchemaQ have mutable caches internally.  also must allocate on
136             // the GC because otherwise they share the same associative array.
137             // Don't ask me how that happens because `.dup` should have created
138             // a unique one. If you print the address here of `.state` and the
139             // receiving end you will see that they are re-used between actors!
140             auto sq = new SchemaQ(schemaQ.dup.state);
141             auto a = sys.spawn(&spawnAnalyzer, flowCtrl, store, kinds, f, valLoc.dup, fio.dup, AnalyzeConfig(compilerConf, analyzeConf, covConf, sq));
142             send(store, StartedAnalyzer.init);
143         } catch (Exception e) {
144             log.trace(e);
145             log.warning(e.msg);
146         }
147     }
148     // dfmt on
149 
150     send(store, DoneStartingAnalyzers.init);
151 
152     changedDeps = typeof(changedDeps).init; // free the memory
153 
154     auto self = scopedActor;
155     bool waiting = true;
156     while (waiting) {
157         try {
158             self.request(store, infTimeout).send(IsDone.init).then((bool x) {
159                 waiting = !x;
160             });
161         } catch (ScopedActorException e) {
162             logger.warning(e.error);
163             return ExitStatusType.Errors;
164         }
165         () @trusted { Thread.sleep(100.dur!"msecs"); }();
166     }
167 
168     if (analyzeConf.profile)
169         try {
170             import std.stdio : writeln;
171 
172             writeln(getProfileResult.toString);
173         } catch (Exception e) {
174             log.warning("Unable to print the profile data: ", e.msg).collectException;
175         }
176 
177     return ExitStatusType.Ok;
178 }
179 
180 @safe:
181 
182 /** Filter function for files. Either all or those in stdin.
183  *
184  * The matching ignores the file extension in order to lessen the problem of a
185  * file that this approach skip headers because they do not exist in
186  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
187  * `foo.cpp` is in `compile_commands.json`.
188  *
189  * TODO: this may create problems for header only libraries because only the
190  * unittest would include the header which mean that for this to work the
191  * unittest would have to reside in the same directory as the header file.
192  * Which they normally never do. This then lead to a diff of a header only lib
193  * lead to "no files analyzed".
194  */
195 struct FileFilter {
196     import std.path : stripExtension;
197 
198     Set!string files;
199     bool useFileFilter;
200     AbsolutePath root;
201 
202     this(AbsolutePath root, bool fromStdin, Diff diff) {
203         this.root = root;
204         this.useFileFilter = fromStdin;
205         foreach (a; diff.toRange(root)) {
206             files.add(a.key.stripExtension);
207         }
208     }
209 
210     bool shouldAnalyze(AbsolutePath p) {
211         import std.path : relativePath;
212 
213         if (!useFileFilter) {
214             return true;
215         }
216 
217         return relativePath(p, root).stripExtension in files;
218     }
219 }
220 
221 struct StartedAnalyzer {
222 }
223 
224 struct DoneStartingAnalyzers {
225 }
226 
227 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
228 struct AnalyzeCntMsg {
229     int value;
230 }
231 
232 /// The main thread is waiting for storeActor to send this message.
233 struct StoreDoneMsg {
234 }
235 
236 struct AnalyzeConfig {
237     import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
238 
239     ConfigCompiler compiler;
240     ConfigAnalyze analyze;
241     ConfigCoverage coverage;
242     SchemaQ* sq;
243 }
244 
245 struct WaitForToken {
246 }
247 
248 struct RunAnalyze {
249 }
250 
251 alias AnalyzeActor = typedActor!(void function(WaitForToken), void function(RunAnalyze));
252 
253 /// Start an analyze of a file
254 auto spawnAnalyzer(AnalyzeActor.Impl self, FlowControlActor.Address flowCtrl, StoreActor.Address storeAddr,
255         Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze,
256         ValidateLoc vloc, FilesysIO fio, AnalyzeConfig conf) {
257     auto st = tuple!("self", "flowCtrl", "storeAddr", "kinds", "fileToAnalyze",
258             "vloc", "fio", "conf")(self, flowCtrl, storeAddr, kinds,
259             fileToAnalyze, vloc, fio.dup, conf);
260     alias Ctx = typeof(st);
261 
262     static void wait(ref Ctx ctx, WaitForToken) {
263         ctx.self.request(ctx.flowCtrl, infTimeout).send(TakeTokenMsg.init)
264             .capture(ctx).then((ref Ctx ctx, Token _) => send(ctx.self, RunAnalyze.init));
265     }
266 
267     static void run(ref Ctx ctx, RunAnalyze) @safe {
268         auto profile = Profile("analyze file " ~ ctx.fileToAnalyze.cmd.absoluteFile);
269 
270         bool onlyValidFiles = true;
271 
272         try {
273             log.tracef("%s begin", ctx.fileToAnalyze.cmd.absoluteFile);
274             auto analyzer = Analyze(ctx.kinds, ctx.vloc, ctx.fio,
275                     Analyze.Config(ctx.conf.compiler.forceSystemIncludes,
276                         ctx.conf.coverage.use, ctx.conf.compiler.allowErrors.get, *ctx.conf.sq));
277             analyzer.process(ctx.fileToAnalyze);
278 
279             foreach (a; analyzer.result.idFile.byKey) {
280                 if (!isFileSupported(ctx.fio, a)) {
281                     log.warningf(
282                             "%s: file not supported. It must be in utf-8 format without a BOM marker");
283                     onlyValidFiles = false;
284                     break;
285                 }
286             }
287 
288             if (onlyValidFiles)
289                 send(ctx.storeAddr, analyzer.result, Token.init);
290             log.tracef("%s end", ctx.fileToAnalyze.cmd.absoluteFile);
291         } catch (Exception e) {
292             onlyValidFiles = false;
293             log.error(e.msg).collectException;
294         }
295 
296         if (!onlyValidFiles) {
297             log.tracef("%s failed", ctx.fileToAnalyze.cmd.absoluteFile).collectException;
298             send(ctx.storeAddr, Token.init);
299         }
300 
301         ctx.self.shutdown;
302     }
303 
304     self.name = "analyze";
305     send(self, WaitForToken.init);
306     return impl(self, &run, capture(st), &wait, capture(st));
307 }
308 
309 class TestFileResult {
310     Duration time;
311     TestFile[Checksum] files;
312 }
313 
314 alias TestPathActor = typedActor!(void function(Start, StoreActor.Address));
315 
316 auto spawnTestPathActor(TestPathActor.Impl self, StoreActor.Address store,
317         AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio) {
318     import std.datetime : Clock;
319     import std.datetime.stopwatch : StopWatch, AutoStart;
320     import std.file : isDir, isFile, dirEntries, SpanMode;
321     import my.container.vector;
322 
323     auto st = tuple!("self", "matcher", "fio", "userPaths")(self, matcher, fio.dup, userPaths);
324     alias Ctx = typeof(st);
325 
326     static void start(ref Ctx ctx, Start, StoreActor.Address store) {
327         auto profile = Profile("checksum test files");
328 
329         auto sw = StopWatch(AutoStart.yes);
330 
331         TestFile makeTestFile(const AbsolutePath file) {
332             auto cs = checksum(ctx.fio.makeInput(file).content[]);
333             return TestFile(TestFilePath(ctx.fio.toRelativeRoot(file)),
334                     TestFileChecksum(cs), Clock.currTime);
335         }
336 
337         auto paths = vector(ctx.userPaths);
338 
339         auto tfiles = new TestFileResult;
340         scope (exit)
341             tfiles.time = sw.peek;
342 
343         while (!paths.empty) {
344             try {
345                 if (isDir(paths.front)) {
346                     log.trace("  Test directory ", paths.front);
347                     foreach (a; dirEntries(paths.front, SpanMode.shallow).map!(
348                             a => AbsolutePath(a.name))) {
349                         paths.put(a);
350                     }
351                 } else if (isFile(paths.front) && ctx.matcher.match(paths.front)) {
352                     log.trace("  Test saved ", paths.front);
353                     auto t = makeTestFile(paths.front);
354                     tfiles.files[t.checksum.get] = t;
355                 }
356             } catch (Exception e) {
357                 log.warning(e.msg).collectException;
358             }
359 
360             paths.popFront;
361         }
362 
363         log.infof("Found %s test files", tfiles.files.length).collectException;
364         send(store, tfiles);
365         ctx.self.shutdown;
366     }
367 
368     self.name = "test path";
369     send(self, Start.init, store);
370     return impl(self, &start, capture(st));
371 }
372 
373 struct Start {
374 }
375 
376 struct IsDone {
377 }
378 
379 struct SetDone {
380 }
381 
382 // Check if it is time to post process
383 struct CheckPostProcess {
384 }
385 // Run the post processning.
386 struct PostProcess {
387 }
388 
389 struct StoreConfig {
390     ConfigAnalyze analyze;
391     ConfigSchema schema;
392     ConfigCoverage coverage;
393 }
394 
395 alias StoreActor = typedActor!(void function(Start, ToolVersion), bool function(IsDone),
396         void function(StartedAnalyzer), void function(Analyze.Result, Token), // failed to analyze the file, but still returning the token.
397         void function(Token),
398         void function(DoneStartingAnalyzers), void function(TestFileResult),
399         void function(CheckPostProcess), void function(PostProcess),);
400 
401 /// Store the result of the analyze.
402 auto spawnStoreActor(StoreActor.Impl self, FlowControlActor.Address flowCtrl,
403         RefCounted!(Database) db, StoreConfig conf, FilesysIO fio, Path[] rootFiles) @trusted {
404     static struct SchemataSaver {
405         import sumtype;
406         import my.optional;
407         import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataBuilder;
408 
409         typeof(ConfigSchema.minMutantsPerSchema) minMutantsPerSchema;
410         typeof(ConfigSchema.mutantsPerSchema) mutantsPerSchema;
411         SchemataBuilder builder;
412 
413         void put(FilesysIO fio, SchemataResult.Schemata[AbsolutePath] a) {
414             builder.put(fio, a);
415         }
416 
417         void process(ref Database db, Optional!(SchemataBuilder.ET) value) {
418             value.match!((Some!(SchemataBuilder.ET) a) {
419                 try {
420                     auto mutants = a.mutants
421                         .map!(a => db.mutantApi.getMutationStatusId(a.id))
422                         .filter!(a => !a.isNull)
423                         .map!(a => a.get)
424                         .array;
425                     if (!mutants.empty) {
426                         const id = db.schemaApi.putSchemata(a.checksum, a.fragments, mutants);
427                         log.tracef(!id.isNull, "Saving schema %s with %s mutants",
428                             id.get.get, mutants.length);
429                     }
430                 } catch (Exception e) {
431                     log.trace(e.msg);
432                 }
433             }, (None a) {});
434         }
435 
436         /// Consume fragments used by scheman containing >min mutants.
437         void setIntermediate() {
438             log.trace("schema generator phase: intermediate");
439             builder.discardMinScheman = false;
440             builder.useProbability = true;
441             builder.useProbablitySmallSize = false;
442             builder.mutantsPerSchema = mutantsPerSchema.get;
443             builder.minMutantsPerSchema = mutantsPerSchema.get;
444             builder.thresholdStartValue = 1.0;
445         }
446 
447         void setReducedIntermediate(long sizeDiv, long threshold) {
448             import std.algorithm : max;
449 
450             log.tracef("schema generator phase: reduced size:%s threshold:%s", sizeDiv, threshold);
451             builder.discardMinScheman = false;
452             builder.useProbability = true;
453             builder.useProbablitySmallSize = false;
454             builder.mutantsPerSchema = mutantsPerSchema.get;
455             builder.minMutantsPerSchema = max(minMutantsPerSchema.get,
456                     mutantsPerSchema.get / sizeDiv);
457             // TODO: interresting effect. this need to be studied. I think this
458             // is the behavior that is "best".
459             builder.thresholdStartValue = 1.0 - (cast(double) threshold / 100.0);
460         }
461 
462         void run(ref Database db) {
463             // sort the fragments by file which should allow those with high
464             // probability to result in larger scheman while those with smaller
465             // end up with small scheman. Smaller are thus those that higly
466             // likely fail to compile.
467             // 2021-09-03: sorting the fragments where a bad idea. It lead to
468             // very large schemas in one and the same file which failed
469             // compilation because the computer ran out of memory.
470             // Therefor testing a strategy of shuffling instead.
471             builder.shuffle;
472 
473             while (!builder.isDone) {
474                 process(db, builder.next);
475             }
476 
477             builder.restart;
478         }
479 
480         /// Consume all fragments or discard.
481         void finalize(ref Database db) {
482             log.trace("schema generator phase: finalize");
483             builder.discardMinScheman = true;
484             builder.useProbability = false;
485             builder.useProbablitySmallSize = true;
486             builder.mutantsPerSchema = mutantsPerSchema.get;
487             builder.minMutantsPerSchema = minMutantsPerSchema.get;
488             builder.thresholdStartValue = 0;
489 
490             // two loops to pass over all mutants and retry new schema
491             // compositions. Any schema that is less than the minimum will be
492             // discarded so the number of mutants will shrink.
493             while (!builder.isDone) {
494                 while (!builder.isDone) {
495                     process(db, builder.next);
496                 }
497                 builder.restart;
498             }
499         }
500     }
501 
502     static struct State {
503         // analyze of src
504         int startedAnalyzers;
505         int savedResult;
506         bool doneStarting;
507 
508         /// Set when the whole process is done.
509         bool isDone;
510 
511         bool savedTestFileResult;
512 
513         bool isToolVersionDifferent;
514 
515         // A file is at most saved one time to the database.
516         Set!AbsolutePath savedFiles;
517 
518         SchemataSaver schemas;
519     }
520 
521     auto st = tuple!("self", "db", "state", "fio", "conf", "rootFiles", "flowCtrl")(self,
522             db, refCounted(State.init), fio.dup, conf, rootFiles, flowCtrl);
523     alias Ctx = typeof(st);
524 
525     static void start(ref Ctx ctx, Start, ToolVersion toolVersion) {
526         import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
527         import dextool.plugin.mutate.backend.database : SchemaStatus;
528 
529         log.trace("starting store actor");
530 
531         ctx.state.get.isToolVersionDifferent = ctx.db.get.isToolVersionDifferent(toolVersion);
532         ctx.state.get.schemas = SchemataSaver(ctx.conf.schema.minMutantsPerSchema,
533                 ctx.conf.schema.mutantsPerSchema);
534 
535         if (ctx.conf.analyze.fastDbStore) {
536             log.info(
537                     "Turning OFF sqlite3 synchronization protection to improve the write performance");
538             log.warning("Do NOT interrupt dextool in any way because it may corrupt the database");
539             ctx.db.get.run("PRAGMA synchronous = OFF");
540             ctx.db.get.run("PRAGMA journal_mode = MEMORY");
541         }
542 
543         {
544             auto trans = ctx.db.get.transaction;
545             auto profile = Profile("update schema probability");
546             log.info("Update schema probability");
547 
548             ctx.state.get.schemas.builder.schemaQ = updateSchemaQ(ctx.db.get);
549             ctx.state.get.schemas.builder.mutantsPerSchema = updateSchemaSizeQ(ctx.db.get,
550                     ctx.conf.schema.mutantsPerSchema.get, ctx.conf.schema.minMutantsPerSchema.get)
551                 .currentSize;
552 
553             trans.commit;
554         }
555         {
556             auto trans = ctx.db.get.transaction;
557             auto profile = Profile("prune old schemas");
558             if (ctx.state.get.isToolVersionDifferent) {
559                 log.info("Prune database of scheman created by the old version");
560                 ctx.db.get.schemaApi.deleteAllSchemas;
561             }
562             trans.commit;
563         }
564         {
565             import std.traits : EnumMembers;
566 
567             auto trans = ctx.db.get.transaction;
568             auto profile = Profile("prune used schemas");
569             log.info("Prune the database of used schemas");
570             const removed = () {
571                 if (ctx.conf.analyze.forceSaveAnalyze)
572                     return ctx.db.get.schemaApi.pruneUsedSchemas([
573                             EnumMembers!SchemaStatus
574                             ]);
575                 return ctx.db.get.schemaApi.pruneUsedSchemas([
576                         SchemaStatus.allKilled, SchemaStatus.broken
577                         ]);
578             }();
579             trans.commit;
580             if (removed != 0) {
581                 logger.infof("Removed %s schemas", removed);
582                 ctx.db.get.vacuum;
583             }
584         }
585 
586         send(ctx.self, CheckPostProcess.init);
587         log.trace("store actor active");
588     }
589 
590     static bool isDone(ref Ctx ctx, IsDone) {
591         return ctx.state.get.isDone;
592     }
593 
594     static void startedAnalyzers(ref Ctx ctx, StartedAnalyzer) {
595         ctx.state.get.startedAnalyzers++;
596     }
597 
598     static void doneStartAnalyzers(ref Ctx ctx, DoneStartingAnalyzers) {
599         ctx.state.get.doneStarting = true;
600     }
601 
602     static void failedFileAnalyze(ref Ctx ctx, Token) {
603         send(ctx.flowCtrl, ReturnTokenMsg.init);
604         // a failed file has to count as well.
605         ctx.state.get.savedResult++;
606     }
607 
608     static void checkPostProcess(ref Ctx ctx, CheckPostProcess) {
609         if (ctx.state.get.doneStarting && ctx.state.get.savedTestFileResult
610                 && (ctx.state.get.startedAnalyzers == ctx.state.get.savedResult))
611             send(ctx.self, PostProcess.init);
612         else
613             delayedSend(ctx.self, delay(500.dur!"msecs"), CheckPostProcess.init);
614     }
615 
616     static void savedTestFileResult(ref Ctx ctx, TestFileResult result) {
617         auto profile = Profile("save test files");
618 
619         ctx.state.get.savedTestFileResult = true;
620 
621         Set!Checksum old;
622 
623         auto t = ctx.db.get.transaction;
624 
625         foreach (a; ctx.db.get.testFileApi.getTestFiles) {
626             old.add(a.checksum.get);
627             if (a.checksum.get !in result.files) {
628                 log.info("Removed test file ", a.file.get.toString);
629                 ctx.db.get.testFileApi.removeFile(a.file);
630             }
631         }
632 
633         foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) {
634             log.info("Saving test file ", a.file.get.toString);
635             ctx.db.get.testFileApi.put(a);
636         }
637 
638         t.commit;
639 
640         send(ctx.self, CheckPostProcess.init);
641     }
642 
643     static void save(ref Ctx ctx, Analyze.Result result, Token) {
644         import dextool.cachetools : nullableCache;
645         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
646         import dextool.plugin.mutate.backend.type : Language;
647 
648         auto profile = Profile("save " ~ result.root);
649 
650         // by returning the token now another file analyze can start while we
651         // are saving the current one.
652         send(ctx.flowCtrl, ReturnTokenMsg.init);
653 
654         ctx.state.get.savedResult++;
655         log.infof("Analyzed file %s/%s", ctx.state.get.savedResult,
656                 ctx.state.get.startedAnalyzers);
657 
658         auto getFileId = nullableCache!(string, FileId, (string p) => ctx.db.get.getFileId(p.Path))(256,
659                 30.dur!"seconds");
660         auto getFileDbChecksum = nullableCache!(string, Checksum,
661                 (string p) => ctx.db.get.getFileChecksum(p.Path))(256, 30.dur!"seconds");
662         auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
663             return checksum(ctx.fio.makeInput(AbsolutePath(Path(p))).content[]);
664         })(256, 30.dur!"seconds");
665 
666         static struct Files {
667             Checksum[Path] value;
668 
669             this(ref Database db) {
670                 foreach (a; db.getDetailedFiles) {
671                     value[a.file] = a.fileChecksum;
672                 }
673             }
674         }
675 
676         auto trans = ctx.db.get.transaction;
677 
678         // mark files that have an unchanged checksum as "already saved"
679         foreach (f; result.idFile
680                 .byKey
681                 .filter!(a => a !in ctx.state.get.savedFiles)
682                 .filter!(a => getFileDbChecksum(ctx.fio.toRelativeRoot(a)) == getFileFsChecksum(a)
683                     && !ctx.conf.analyze.forceSaveAnalyze && !ctx.state.get.isToolVersionDifferent)) {
684             log.info("Unchanged ".color(Color.yellow), f);
685             ctx.state.get.savedFiles.add(f);
686         }
687 
688         // only saves mutation points to a file one time.
689         {
690             auto app = appender!(MutationPointEntry2[])();
691             bool isChanged = ctx.state.get.isToolVersionDifferent;
692             foreach (mp; result.mutationPoints
693                     .map!(a => tuple!("data", "file")(a, ctx.fio.toAbsoluteRoot(a.file)))
694                     .filter!(a => a.file !in ctx.state.get.savedFiles)) {
695                 app.put(mp.data);
696             }
697             foreach (f; result.idFile.byKey.filter!(a => a !in ctx.state.get.savedFiles)) {
698                 isChanged = true;
699                 log.info("Saving ".color(Color.green), f);
700                 const relp = ctx.fio.toRelativeRoot(f);
701 
702                 // this is critical in order to remove old data about a file.
703                 ctx.db.get.removeFile(relp);
704 
705                 const info = result.infoId[result.idFile[f]];
706                 ctx.db.get.put(relp, info.checksum, info.language, f == result.root);
707                 ctx.state.get.savedFiles.add(f);
708             }
709             ctx.db.get.mutantApi.put(app.data, ctx.fio.getOutputDir);
710 
711             if (result.root !in ctx.state.get.savedFiles) {
712                 // this occurs when the file is e.g. a unittest that uses a
713                 // header only library. The unittests are not mutated thus
714                 // no mutation points exists in them but we want dextool to
715                 // still, if possible, track the unittests for changes.
716                 isChanged = true;
717                 const relp = ctx.fio.toRelativeRoot(result.root);
718                 ctx.db.get.removeFile(relp);
719                 // the language do not matter because it is a file without
720                 // any mutants.
721                 ctx.db.get.put(relp, result.rootCs, Language.init, true);
722                 ctx.state.get.savedFiles.add(ctx.fio.toAbsoluteRoot(result.root));
723             }
724 
725             // must always update dependencies because they may not contain
726             // mutants. Only files that are changed and contain mutants
727             // trigger isChanged to be true.
728             try {
729                 // not all files are tracked thus this may throw an exception.
730                 ctx.db.get.dependencyApi.set(ctx.fio.toRelativeRoot(result.root),
731                         result.dependencies);
732             } catch (Exception e) {
733             }
734 
735             if (isChanged) {
736                 foreach (a; result.coverage.byKeyValue) {
737                     const fid = getFileId(ctx.fio.toRelativeRoot(result.fileId[a.key]));
738                     if (!fid.isNull) {
739                         ctx.db.get.coverageApi.clearCoverageMap(fid.get);
740                         ctx.db.get.coverageApi.putCoverageMap(fid.get, a.value);
741                     }
742                 }
743 
744                 // only save the schematas if mutation points where saved.
745                 // This ensure that only schematas for changed/new files
746                 // are saved.
747                 ctx.state.get.schemas.put(ctx.fio, result.schematas);
748                 ctx.state.get.schemas.setIntermediate;
749                 ctx.state.get.schemas.run(ctx.db.get);
750             }
751         }
752 
753         {
754             Set!long printed;
755             auto app = appender!(LineMetadata[])();
756             foreach (md; result.metadata) {
757                 const localId = Analyze.Result.LocalFileId(md.id.get);
758                 // transform the ID from local to global.
759                 const fid = getFileId(ctx.fio.toRelativeRoot(result.fileId[localId]));
760                 if (fid.isNull && !printed.contains(md.id.get)) {
761                     printed.add(md.id.get);
762                     log.info("File with suppressed mutants (// NOMUT) not in the database: ",
763                             result.fileId[localId]).collectException;
764                 } else if (!fid.isNull) {
765                     app.put(LineMetadata(fid.get, md.line, md.attr));
766                 }
767             }
768             ctx.db.get.metaDataApi.put(app.data);
769         }
770 
771         trans.commit;
772 
773         send(ctx.self, CheckPostProcess.init);
774     }
775 
776     static void postProcess(ref Ctx ctx, PostProcess) {
777         import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
778 
779         if (ctx.state.get.isDone)
780             return;
781 
782         ctx.state.get.isDone = true;
783 
784         void fastDbOff() {
785             if (!ctx.conf.analyze.fastDbStore)
786                 return;
787             ctx.db.get.run("PRAGMA synchronous = ON");
788             ctx.db.get.run("PRAGMA journal_mode = DELETE");
789         }
790 
791         void pruneFiles() {
792             import std.path : buildPath;
793 
794             auto profile = Profile("prune files");
795 
796             log.info("Pruning the database of dropped files");
797             auto files = ctx.db.get.getFiles.map!(a => ctx.fio.toAbsoluteRoot(a)).toSet;
798 
799             foreach (f; files.setDifference(ctx.state.get.savedFiles).toRange) {
800                 log.info("Removing ".color(Color.red), f);
801                 ctx.db.get.removeFile(ctx.fio.toRelativeRoot(f));
802             }
803         }
804 
805         void addRoots() {
806             if (ctx.conf.analyze.forceSaveAnalyze || ctx.state.get.isToolVersionDifferent)
807                 return;
808 
809             // add root files and their dependencies that has not been analyzed because nothing has changed.
810             // By adding them they are not removed.
811 
812             auto profile = Profile("add roots and dependencies");
813             foreach (a; ctx.rootFiles) {
814                 auto p = ctx.fio.toAbsoluteRoot(a);
815                 if (p !in ctx.state.get.savedFiles) {
816                     ctx.state.get.savedFiles.add(p);
817                     // fejk text for the user to tell them that yes, the files have
818                     // been analyzed.
819                     log.info("Analyzing ", a);
820                     log.info("Unchanged ".color(Color.yellow), a);
821                 }
822             }
823             foreach (a; ctx.rootFiles.map!(a => ctx.db.get.dependencyApi.get(a)).joiner) {
824                 ctx.state.get.savedFiles.add(ctx.fio.toAbsoluteRoot(a));
825             }
826         }
827 
828         void finalizeSchema() {
829             auto trans = ctx.db.get.transaction;
830 
831             immutable magic = 10; // reduce the size until it is 1/10 of the original
832             immutable magic2 = 5; // if it goes <95% then it is too high probability to fail
833             foreach (sizeDiv; 1 .. magic) {
834                 foreach (threshold; 0 .. magic2) {
835                     ctx.state.get.schemas.setReducedIntermediate(sizeDiv, threshold);
836                     ctx.state.get.schemas.run(ctx.db.get);
837                 }
838             }
839 
840             ctx.state.get.schemas.finalize(ctx.db.get);
841 
842             trans.commit;
843             ctx.state.get.schemas = SchemataSaver.init;
844         }
845 
846         finalizeSchema;
847 
848         auto trans = ctx.db.get.transaction;
849 
850         addRoots;
851 
852         log.info("Resetting timeout context");
853         resetTimeoutContext(ctx.db.get);
854 
855         log.info("Updating metadata");
856         ctx.db.get.metaDataApi.updateMetadata;
857 
858         if (ctx.conf.analyze.prune) {
859             pruneFiles();
860             {
861                 auto profile = Profile("prune mangled schemas");
862                 log.info("Prune the database of mangled schemas");
863                 ctx.db.get.schemaApi.pruneSchemas;
864             }
865             {
866                 auto profile = Profile("prune dependencies");
867                 log.info("Prune dependencies");
868                 ctx.db.get.dependencyApi.cleanup;
869             }
870             {
871                 auto profile = Profile("remove orphaned mutants");
872                 log.info("Removing orphaned mutants");
873                 ctx.db.get.mutantApi.removeOrphanedMutants;
874             }
875         }
876 
877         log.info("Updating manually marked mutants");
878         updateMarkedMutants(ctx.db.get);
879         printLostMarkings(ctx.db.get.markMutantApi.getLostMarkings);
880 
881         if (ctx.state.get.isToolVersionDifferent) {
882             log.info("Updating tool version");
883             ctx.db.get.updateToolVersion(ToolVersion(dextoolBinaryId));
884         }
885 
886         log.info("Committing changes");
887         trans.commit;
888         log.info("Ok".color(Color.green));
889 
890         fastDbOff();
891 
892         if (ctx.state.get.isToolVersionDifferent) {
893             auto profile = Profile("compact");
894             log.info("Compacting the database");
895             ctx.db.get.vacuum;
896         }
897     }
898 
899     self.name = "store";
900 
901     auto s = impl(self, &start, capture(st), &isDone, capture(st),
902             &startedAnalyzers, capture(st), &save, capture(st), &doneStartAnalyzers,
903             capture(st), &savedTestFileResult, capture(st), &checkPostProcess,
904             capture(st), &postProcess, capture(st), &failedFileAnalyze, capture(st));
905     s.exceptionHandler = toDelegate(&logExceptionHandler);
906     return s;
907 }
908 
909 /// Analyze a file for mutants.
910 struct Analyze {
911     import std.regex : Regex, regex, matchFirst;
912     import std.typecons : Yes;
913     import libclang_ast.context : ClangContext;
914 
915     static struct Config {
916         bool forceSystemIncludes;
917         bool saveCoverage;
918         bool allowErrors;
919         SchemaQ sq;
920     }
921 
922     private {
923         static immutable rawReNomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
924 
925         Regex!char re_nomut;
926 
927         ValidateLoc valLoc;
928         FilesysIO fio;
929 
930         Cache cache;
931 
932         Result result;
933 
934         Config conf;
935 
936         Mutation.Kind[] kinds;
937     }
938 
939     this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted {
940         this.kinds = kinds;
941         this.valLoc = valLoc;
942         this.fio = fio;
943         this.cache = new Cache;
944         this.re_nomut = regex(rawReNomut);
945         this.result = new Result;
946         this.conf = conf;
947     }
948 
949     void process(ParsedCompileCommand commandsForFileToAnalyze) @safe {
950         import std.file : exists;
951 
952         commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes;
953 
954         try {
955             if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) {
956                 log.warningf("Failed to analyze %s. Do not exist",
957                         commandsForFileToAnalyze.cmd.absoluteFile);
958                 return;
959             }
960         } catch (Exception e) {
961             log.warning(e.msg);
962             return;
963         }
964 
965         result.root = commandsForFileToAnalyze.cmd.absoluteFile;
966 
967         try {
968             result.rootCs = checksum(result.root);
969 
970             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
971             auto tstream = new TokenStreamImpl(ctx);
972 
973             analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream);
974             foreach (f; result.fileId.byValue)
975                 analyzeForComments(f, tstream);
976         } catch (Exception e) {
977             () @trusted { log.trace(e); }();
978             log.info(e.msg);
979             log.error("failed analyze of ",
980                     commandsForFileToAnalyze.cmd.absoluteFile).collectException;
981         }
982     }
983 
984     void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze,
985             AbsolutePath fileToAnalyze, ref ClangContext ctx, TokenStream tstream) @safe {
986         import my.gc.refc : RefCounted;
987         import dextool.plugin.mutate.backend.analyze.ast : Ast;
988         import dextool.plugin.mutate.backend.analyze.pass_clang;
989         import dextool.plugin.mutate.backend.analyze.pass_coverage;
990         import dextool.plugin.mutate.backend.analyze.pass_filter;
991         import dextool.plugin.mutate.backend.analyze.pass_mutant;
992         import dextool.plugin.mutate.backend.analyze.pass_schemata;
993         import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic;
994 
995         log.info("Analyzing ", fileToAnalyze);
996         RefCounted!(Ast) ast;
997         {
998             auto tu = ctx.makeTranslationUnit(fileToAnalyze,
999                     commandsForFileToAnalyze.flags.completeFlags);
1000             if (tu.hasParseErrors) {
1001                 logDiagnostic(tu);
1002                 log.warningf("Compile error in %s", fileToAnalyze);
1003                 if (!conf.allowErrors) {
1004                     log.warning("Skipping");
1005                     return;
1006                 }
1007             }
1008 
1009             auto res = toMutateAst(tu.cursor, fio);
1010             ast = res.ast;
1011             saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies);
1012             log!"analyze.pass_clang".trace(ast.get.toString);
1013         }
1014 
1015         auto codeMutants = () {
1016             auto mutants = toMutants(ast.ptr, fio, valLoc, kinds);
1017             log!"analyze.pass_mutant".trace(mutants);
1018 
1019             log!"analyze.pass_filter".trace("filter mutants");
1020             mutants = filterMutants(fio, mutants);
1021             log!"analyze.pass_filter".trace(mutants);
1022 
1023             return toCodeMutants(mutants, fio, tstream);
1024         }();
1025         debug logger.trace(codeMutants);
1026 
1027         {
1028             auto schemas = toSchemata(ast.ptr, fio, codeMutants, conf.sq);
1029             log!"analyze.pass_schema".trace(schemas);
1030             log.tracef("path dedup count:%s length_acc:%s",
1031                     ast.get.paths.count, ast.get.paths.lengthAccum);
1032 
1033             result.schematas = schemas.getSchematas;
1034         }
1035 
1036         result.mutationPoints = codeMutants.points.byKeyValue.map!(
1037                 a => a.value.map!(b => MutationPointEntry2(fio.toRelativeRoot(a.key),
1038                 b.offset, b.sloc.begin, b.sloc.end, b.mutants))).joiner.array;
1039         foreach (f; codeMutants.points.byKey) {
1040             const id = Result.LocalFileId(result.idFile.length);
1041             result.idFile[f] = id;
1042             result.fileId[id] = f;
1043             result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang);
1044         }
1045 
1046         if (conf.saveCoverage) {
1047             auto cov = toCoverage(ast.ptr, fio, valLoc);
1048             debug logger.trace(cov);
1049 
1050             foreach (a; cov.points.byKeyValue) {
1051                 if (auto id = a.key in result.idFile) {
1052                     result.coverage[*id] = a.value;
1053                 }
1054             }
1055         }
1056     }
1057 
1058     /** Tokens are always from the same file.
1059      *
1060      * TODO: move this to pass_clang.
1061      */
1062     void analyzeForComments(AbsolutePath file, TokenStream tstream) @trusted {
1063         import std.algorithm : filter;
1064         import clang.c.Index : CXTokenKind;
1065         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
1066 
1067         if (auto localId = file in result.idFile) {
1068             const fid = FileId(localId.get);
1069 
1070             auto mdata = appender!(LineMetadata[])();
1071             foreach (t; cache.getTokens(AbsolutePath(file), tstream)
1072                     .filter!(a => a.kind == CXTokenKind.comment)) {
1073                 auto m = matchFirst(t.spelling, re_nomut);
1074                 if (m.whichPattern == 0)
1075                     continue;
1076 
1077                 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
1078                 log.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
1079             }
1080 
1081             result.metadata ~= mdata.data;
1082         }
1083     }
1084 
1085     void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted {
1086         import std.algorithm : cache;
1087         import std.mmfile;
1088 
1089         auto rootDir = root.dirName;
1090 
1091         foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir,
1092                 flags.includes, flags.systemIncludes))
1093                 .cache
1094                 .filter!(a => a.hasValue)
1095                 .map!(a => a.orElse(AbsolutePath.init))
1096                 .filter!(a => valLoc.isInsideOutputDir(a))) {
1097             try {
1098                 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p));
1099             } catch (Exception e) {
1100                 log.trace(e.msg).collectException;
1101             }
1102         }
1103 
1104         log.trace(result.dependencies);
1105     }
1106 
1107     static class Result {
1108         import dextool.plugin.mutate.backend.analyze.ast : Interval;
1109         import dextool.plugin.mutate.backend.database.type : SchemataFragment;
1110         import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum;
1111 
1112         alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init,
1113                 TagStringable, Hashable);
1114         alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init,
1115                 TagStringable, Hashable);
1116 
1117         MutationPointEntry2[] mutationPoints;
1118 
1119         static struct FileInfo {
1120             Checksum checksum;
1121             Language language;
1122         }
1123 
1124         /// The file that is analyzed, which is a root
1125         AbsolutePath root;
1126         Checksum rootCs;
1127 
1128         /// The dependencies the root has.
1129         DepFile[] dependencies;
1130 
1131         /// The key is the ID from idFile.
1132         FileInfo[LocalFileId] infoId;
1133 
1134         /// The IDs is unique for *this* analyze, not globally.
1135         LocalFileId[AbsolutePath] idFile;
1136         AbsolutePath[LocalFileId] fileId;
1137 
1138         // The FileID used in the metadata is local to this analysis. It has to
1139         // be remapped when added to the database.
1140         LineMetadata[] metadata;
1141 
1142         /// Mutant schematas that has been generated.
1143         SchemataResult.Schemata[AbsolutePath] schematas;
1144 
1145         /// Coverage intervals that can be instrumented.
1146         Interval[][LocalFileId] coverage;
1147     }
1148 }
1149 
1150 @(
1151         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
1152 unittest {
1153     import std.regex : regex, matchFirst;
1154     import unit_threaded.runner.io : writelnUt;
1155 
1156     auto re_nomut = regex(Analyze.rawReNomut);
1157     // NOMUT in other type of comments should NOT match.
1158     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
1159     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
1160     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
1161     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
1162 
1163     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
1164     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
1165     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
1166     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
1167     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
1168     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
1169     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
1170     m["tag"].shouldEqual("arch");
1171     m["comment"].shouldEqual("smurf");
1172 }
1173 
1174 /// Stream of tokens excluding comment tokens.
1175 class TokenStreamImpl : TokenStream {
1176     import libclang_ast.context : ClangContext;
1177     import dextool.plugin.mutate.backend.type : Token;
1178     import dextool.plugin.mutate.backend.utility : tokenize;
1179 
1180     ClangContext* ctx;
1181 
1182     /// The context must outlive any instance of this class.
1183     // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000
1184     this(ref ClangContext ctx) @trusted {
1185         this.ctx = &ctx;
1186     }
1187 
1188     Token[] getTokens(Path p) {
1189         return tokenize(*ctx, p);
1190     }
1191 
1192     Token[] getFilteredTokens(Path p) {
1193         import clang.c.Index : CXTokenKind;
1194 
1195         // Filter a stream of tokens for those that should affect the checksum.
1196         return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
1197     }
1198 }
1199 
1200 /// Returns: true if `f` is inside any `roots`.
1201 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
1202     import dextool.utility : isPathInsideRoot;
1203 
1204     foreach (root; roots) {
1205         if (isPathInsideRoot(root, f))
1206             return true;
1207     }
1208 
1209     return false;
1210 }
1211 
1212 /** Update the connection between the marked mutants and their mutation status
1213  * id and mutation id.
1214  */
1215 void updateMarkedMutants(ref Database db) {
1216     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
1217     import dextool.plugin.mutate.backend.type : ExitStatus;
1218 
1219     void update(MarkedMutant m) {
1220         const stId = db.mutantApi.getMutationStatusId(m.statusChecksum);
1221         if (stId.isNull)
1222             return;
1223         const mutId = db.mutantApi.getMutationId(stId.get);
1224         if (mutId.isNull)
1225             return;
1226         db.markMutantApi.removeMarkedMutant(m.statusChecksum);
1227         db.markMutantApi.markMutant(mutId.get, m.path, m.sloc, stId.get,
1228                 m.statusChecksum, m.toStatus, m.rationale, m.mutText);
1229         db.mutantApi.updateMutationStatus(stId.get, m.toStatus, ExitStatus(0));
1230     }
1231 
1232     // find those marked mutants that have a checksum that is different from
1233     // the mutation status the marked mutant is related to. If possible change
1234     // the relation to the correct mutation status id.
1235     foreach (m; db.markMutantApi
1236             .getMarkedMutants
1237             .map!(a => tuple(a, db.mutantApi.getChecksum(a.statusId)))
1238             .filter!(a => !a[1].isNull)
1239             .filter!(a => a[0].statusChecksum != a[1].get)) {
1240         update(m[0]);
1241     }
1242 }
1243 
1244 /// Prints a marked mutant that has become lost due to rerun of analyze
1245 void printLostMarkings(MarkedMutant[] lostMutants) {
1246     import std.algorithm : sort;
1247     import std.array : empty;
1248     import std.conv : to;
1249     import std.stdio : writeln;
1250 
1251     if (lostMutants.empty)
1252         return;
1253 
1254     Table!6 tbl = Table!6([
1255             "ID", "File", "Line", "Column", "Status", "Rationale"
1256             ]);
1257     foreach (m; lostMutants) {
1258         typeof(tbl).Row r = [
1259             m.mutationId.get.to!string, m.path, m.sloc.line.to!string,
1260             m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get
1261         ];
1262         tbl.put(r);
1263     }
1264     log.warning("Marked mutants was lost");
1265     writeln(tbl);
1266 }
1267 
1268 @("shall only let files in the diff through")
1269 unittest {
1270     import std..string : lineSplitter;
1271     import dextool.plugin.mutate.backend.diff_parser;
1272 
1273     immutable lines = `diff --git a/standalone2.d b/standalone2.d
1274 index 0123..2345 100644
1275 --- a/standalone.d
1276 +++ b/standalone2.d
1277 @@ -31,7 +31,6 @@ import std.algorithm : map;
1278  import std.array : Appender, appender, array;
1279  import std.datetime : SysTime;
1280 +import std.format : format;
1281 -import std.typecons : Tuple;
1282 
1283  import d2sqlite3 : sqlDatabase = Database;
1284 
1285 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
1286  struct Database {
1287      import std.conv : to;
1288      import std.exception : collectException;
1289 -    import std.typecons : Nullable;
1290 +    import std.typecons : Nullable, Flag, No;
1291      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
1292 
1293 +    sqlDatabase db;`;
1294 
1295     UnifiedDiffParser p;
1296     foreach (line; lines.lineSplitter)
1297         p.process(line);
1298     auto diff = p.result;
1299 
1300     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
1301 
1302     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
1303     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
1304 }
1305 
1306 /// Convert to an absolute path by finding the first match among the compiler flags
1307 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir,
1308         ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow {
1309     import std.algorithm : map, filter;
1310     import std.file : exists;
1311     import std.path : buildPath;
1312 
1313     Optional!AbsolutePath lookup(string dir) nothrow {
1314         const p = buildPath(dir, file);
1315         try {
1316             if (exists(p))
1317                 return some(AbsolutePath(p));
1318         } catch (Exception e) {
1319         }
1320         return none!AbsolutePath;
1321     }
1322 
1323     {
1324         auto a = lookup(workDir.toString);
1325         if (a.hasValue)
1326             return a;
1327     }
1328 
1329     foreach (a; includes.map!(a => lookup(a.payload))
1330             .filter!(a => a.hasValue)) {
1331         return a;
1332     }
1333 
1334     foreach (a; systemIncludes.map!(a => lookup(a.value))
1335             .filter!(a => a.hasValue)) {
1336         return a;
1337     }
1338 
1339     return none!AbsolutePath;
1340 }
1341 
1342 /** Returns: the root files that need to be re-analyzed because either them or
1343  * their dependency has changed.
1344  */
1345 bool[Path] dependencyAnalyze(ref Database db, FilesysIO fio) @trusted {
1346     import dextool.cachetools : nullableCache;
1347     import dextool.plugin.mutate.backend.database : FileId;
1348 
1349     typeof(return) rval;
1350 
1351     // pessimistic. Add all as needing to be analyzed.
1352     foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) {
1353         rval[a] = false;
1354     }
1355 
1356     try {
1357         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
1358                 30.dur!"seconds");
1359         auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256,
1360                 30.dur!"seconds");
1361         auto getFileDbChecksum = nullableCache!(string, Checksum,
1362                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
1363         auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) {
1364             return checksum(p);
1365         })(256, 30.dur!"seconds");
1366 
1367         Checksum[Path] dbDeps;
1368         foreach (a; db.dependencyApi.getAll)
1369             dbDeps[a.file] = a.checksum;
1370 
1371         const isToolVersionDifferent = db.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
1372         bool isChanged(T)(T f) {
1373             if (isToolVersionDifferent) {
1374                 // because the tool version is updated then all files need to
1375                 // be re-analyzed. an update can mean that scheman are
1376                 // improved, mutants has been changed/removed etc. it is
1377                 // unknown. the only way to be sure is to re-analyze all files.
1378                 return true;
1379             }
1380 
1381             if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root)))
1382                 return true;
1383 
1384             foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) {
1385                 return true;
1386             }
1387 
1388             return false;
1389         }
1390 
1391         foreach (f; db.getRootFiles
1392                 .map!(a => db.getFile(a).get)
1393                 .map!(a => tuple!("root", "rootCs", "deps")(a,
1394                     getFileDbChecksum(a), db.dependencyApi.get(a)))
1395                 .cache
1396                 .filter!(a => isChanged(a))
1397                 .map!(a => a.root)) {
1398             rval[f] = true;
1399         }
1400     } catch (Exception e) {
1401         log.warning(e.msg);
1402     }
1403 
1404     log.trace("Dependency analyze: ", rval);
1405 
1406     return rval;
1407 }
1408 
1409 /// Only utf-8 files are supported
1410 bool isFileSupported(FilesysIO fio, AbsolutePath p) @safe {
1411     import std.algorithm : among;
1412     import std.encoding : getBOM, BOM;
1413 
1414     auto entry = fio.makeInput(p).content.getBOM();
1415     const res = entry.schema.among(BOM.utf8, BOM.none);
1416 
1417     if (res == 1)
1418         log.warningf("%s has a utf-8 BOM marker. It will make all coverage and scheman fail to compile",
1419                 p);
1420 
1421     return res != 0;
1422 }
1423 
1424 auto updateSchemaQ(ref Database db) {
1425     import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
1426     import dextool.plugin.mutate.backend.database : SchemaStatus;
1427     import my.hash : Checksum64;
1428     import my.set;
1429 
1430     auto sq = SchemaQ.make;
1431     sq.state = db.schemaApi.getMutantProbability;
1432 
1433     auto paths = db.getFiles;
1434     Set!Checksum64 latestFiles;
1435 
1436     foreach (path; paths) {
1437         scope getPath = (SchemaStatus s) => db.schemaApi.getSchemaUsedKinds(path, s);
1438         sq.update(path, getPath);
1439         latestFiles.add(sq.pathCache[path]);
1440         debug logger.tracef("updating %s %s", path, sq.pathCache[path]);
1441     }
1442 
1443     foreach (p; sq.state.byKey.toSet.setDifference(latestFiles).toRange) {
1444         db.schemaApi.removeMutantProbability(p);
1445         sq.state.remove(p);
1446         debug logger.trace("removing ", p);
1447     }
1448 
1449     sq.scatterTick;
1450 
1451     foreach (p; sq.state.byKeyValue) {
1452         db.schemaApi.saveMutantProbability(p.key, p.value, SchemaQ.MaxState);
1453         debug logger.tracef("saving %s with %s values", p.key, p.value.length);
1454     }
1455 
1456     return sq;
1457 }
1458 
1459 auto updateSchemaSizeQ(ref Database db, const long userInit, const long minSize) {
1460     import std.traits : EnumMembers;
1461     import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaSizeQ;
1462     import dextool.plugin.mutate.backend.database : SchemaStatus;
1463 
1464     // *3 is a magic number. it feels good.
1465     auto sq = SchemaSizeQ.make(minSize, userInit * 3);
1466     sq.currentSize = db.schemaApi.getSchemaSize(userInit);
1467     scope getStatusCnt = (SchemaStatus s) => db.schemaApi.schemaMutantCount(s);
1468     const kinds = [EnumMembers!(Mutation.Kind)];
1469     sq.update(getStatusCnt, db.mutantApi.totalSrcMutants(kinds)
1470             .count + db.mutantApi.unknownSrcMutants(kinds).count);
1471     db.schemaApi.saveSchemaSize(sq.currentSize);
1472     return sq;
1473 }