1 /**cpptooling.analyzer.clang
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import core.thread : Thread;
17 import logger = std.experimental.logger;
18 import std.algorithm : map, filter, joiner, cache;
19 import std.array : array, appender, empty;
20 import std.concurrency;
21 import std.datetime : dur, Duration;
22 import std.exception : collectException;
23 import std.functional : toDelegate;
24 import std.parallelism : TaskPool, totalCPUs;
25 import std.range : tee, enumerate;
26 import std.typecons : tuple;
27 
28 import colorlog;
29 import my.actor.utility.limiter;
30 import my.actor;
31 import my.filter : GlobFilter;
32 import my.gc.refc;
33 import my.named_type;
34 import my.optional;
35 import my.set;
36 
37 static import colorlog;
38 
39 import dextool.utility : dextoolBinaryId;
40 
41 import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
42 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB,
43     ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath;
44 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
45 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult;
46 import dextool.plugin.mutate.backend.database : Database, LineMetadata,
47     MutationPointEntry2, DepFile;
48 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile,
49     TestFilePath, TestFileChecksum, ToolVersion;
50 import dextool.plugin.mutate.backend.diff_parser : Diff;
51 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
52 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
53 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile;
54 import dextool.plugin.mutate.backend.type : Mutation;
55 import dextool.plugin.mutate.type : MutationKind;
56 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze, ConfigSchema, ConfigCoverage;
57 import dextool.type : ExitStatusType, AbsolutePath, Path;
58 
59 version (unittest) {
60     import unit_threaded.assertions;
61 }
62 
63 alias log = colorlog.log!"analyze";
64 
65 /** Analyze the files in `frange` for mutations.
66  */
67 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const MutationKind[] userKinds,
68         ConfigAnalyze analyzeConf, ConfigCompiler compilerConf,
69         ConfigSchema schemaConf, ConfigCoverage covConf,
70         ParsedCompileCommandRange frange, ValidateLoc valLoc, FilesysIO fio) @trusted {
71     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
72     import dextool.plugin.mutate.backend.mutation_type : toInternal;
73 
74     auto fileFilter = () {
75         try {
76             return FileFilter(fio.getOutputDir, analyzeConf.unifiedDiffFromStdin,
77                     analyzeConf.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
78         } catch (Exception e) {
79             log.info(e.msg);
80             log.warning("Unable to parse diff");
81         }
82         return FileFilter.init;
83     }();
84 
85     bool shouldAnalyze(AbsolutePath p) {
86         return analyzeConf.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p);
87     }
88 
89     auto sys = makeSystem;
90 
91     auto flowCtrl = sys.spawn(&spawnFlowControl, analyzeConf.poolSize == 0
92             ? (totalCPUs + 1) : analyzeConf.poolSize);
93 
94     auto db = refCounted(Database.make(dbPath));
95 
96     // if a dependency of a root file has been changed.
97     auto changedDeps = dependencyAnalyze(db.get, fio);
98     auto schemaQ = SchemaQ(db.get.schemaApi.getMutantProbability);
99 
100     auto store = sys.spawn(&spawnStoreActor, flowCtrl, db,
101             StoreConfig(analyzeConf, schemaConf, covConf), fio, changedDeps.byKeyValue
102             .filter!(a => !a.value)
103             .map!(a => a.key)
104             .array);
105     db.release;
106     // it crashes if the store actor try to call dextoolBinaryId. I don't know
107     // why... TLS store trashed? But it works, somehow, if I put some writeln
108     // inside dextoolBinaryId.
109     send(store, Start.init, ToolVersion(dextoolBinaryId));
110 
111     sys.spawn(&spawnTestPathActor, store, analyzeConf.testPaths, analyzeConf.testFileMatcher, fio);
112 
113     auto kinds = toInternal(userKinds);
114     //int taskCnt;
115     Set!AbsolutePath alreadyAnalyzed;
116     // dfmt off
117     foreach (f; frange
118             // The tool only supports analyzing a file one time.
119             // This optimize it in some cases where the same file occurs
120             // multiple times in the compile commands database.
121             .filter!(a => a.cmd.absoluteFile !in alreadyAnalyzed)
122             .tee!(a => alreadyAnalyzed.add(a.cmd.absoluteFile))
123             .cache
124             .filter!(a => shouldAnalyze(a.cmd.absoluteFile))
125             ) {
126         try {
127             if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) {
128                 if (!(*v || analyzeConf.forceSaveAnalyze))
129                     continue;
130             }
131 
132             // TODO: how to "slow down" if store is working too slow.
133 
134             // must dup schemaQ or we run into multithreaded bugs because a
135             // SchemaQ have mutable caches internally.  also must allocate on
136             // the GC because otherwise they share the same associative array.
137             // Don't ask me how that happens because `.dup` should have created
138             // a unique one. If you print the address here of `.state` and the
139             // receiving end you will see that they are re-used between actors!
140             auto sq = new SchemaQ(schemaQ.dup.state);
141             auto a = sys.spawn(&spawnAnalyzer, flowCtrl, store, kinds, f, valLoc.dup, fio.dup, AnalyzeConfig(compilerConf, analyzeConf, covConf, sq));
142             send(store, StartedAnalyzer.init);
143         } catch (Exception e) {
144             log.trace(e);
145             log.warning(e.msg);
146         }
147     }
148     // dfmt on
149 
150     send(store, DoneStartingAnalyzers.init);
151 
152     changedDeps = typeof(changedDeps).init; // free the memory
153 
154     auto self = scopedActor;
155     bool waiting = true;
156     while (waiting) {
157         try {
158             self.request(store, infTimeout).send(IsDone.init).then((bool x) {
159                 waiting = !x;
160             });
161         } catch (ScopedActorException e) {
162             logger.warning(e.error);
163             return ExitStatusType.Errors;
164         }
165         () @trusted { Thread.sleep(100.dur!"msecs"); }();
166     }
167 
168     if (analyzeConf.profile)
169         try {
170             import std.stdio : writeln;
171 
172             writeln(getProfileResult.toString);
173         } catch (Exception e) {
174             log.warning("Unable to print the profile data: ", e.msg).collectException;
175         }
176 
177     return ExitStatusType.Ok;
178 }
179 
180 @safe:
181 
182 /** Filter function for files. Either all or those in stdin.
183  *
184  * The matching ignores the file extension in order to lessen the problem of a
185  * file that this approach skip headers because they do not exist in
186  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
187  * `foo.cpp` is in `compile_commands.json`.
188  *
189  * TODO: this may create problems for header only libraries because only the
190  * unittest would include the header which mean that for this to work the
191  * unittest would have to reside in the same directory as the header file.
192  * Which they normally never do. This then lead to a diff of a header only lib
193  * lead to "no files analyzed".
194  */
195 struct FileFilter {
196     import std.path : stripExtension;
197 
198     Set!string files;
199     bool useFileFilter;
200     AbsolutePath root;
201 
202     this(AbsolutePath root, bool fromStdin, Diff diff) {
203         this.root = root;
204         this.useFileFilter = fromStdin;
205         foreach (a; diff.toRange(root)) {
206             files.add(a.key.stripExtension);
207         }
208     }
209 
210     bool shouldAnalyze(AbsolutePath p) {
211         import std.path : relativePath;
212 
213         if (!useFileFilter) {
214             return true;
215         }
216 
217         return relativePath(p, root).stripExtension in files;
218     }
219 }
220 
221 struct StartedAnalyzer {
222 }
223 
224 struct DoneStartingAnalyzers {
225 }
226 
227 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
228 struct AnalyzeCntMsg {
229     int value;
230 }
231 
232 /// The main thread is waiting for storeActor to send this message.
233 struct StoreDoneMsg {
234 }
235 
236 struct AnalyzeConfig {
237     import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
238 
239     ConfigCompiler compiler;
240     ConfigAnalyze analyze;
241     ConfigCoverage coverage;
242     SchemaQ* sq;
243 }
244 
245 struct WaitForToken {
246 }
247 
248 struct RunAnalyze {
249 }
250 
251 alias AnalyzeActor = typedActor!(void function(WaitForToken), void function(RunAnalyze));
252 
253 /// Start an analyze of a file
254 auto spawnAnalyzer(AnalyzeActor.Impl self, FlowControlActor.Address flowCtrl, StoreActor.Address storeAddr,
255         Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze,
256         ValidateLoc vloc, FilesysIO fio, AnalyzeConfig conf) {
257     auto st = tuple!("self", "flowCtrl", "storeAddr", "kinds", "fileToAnalyze",
258             "vloc", "fio", "conf")(self, flowCtrl, storeAddr, kinds,
259             fileToAnalyze, vloc, fio.dup, conf);
260     alias Ctx = typeof(st);
261 
262     static void wait(ref Ctx ctx, WaitForToken) {
263         ctx.self.request(ctx.flowCtrl, infTimeout).send(TakeTokenMsg.init)
264             .capture(ctx).then((ref Ctx ctx, Token _) => send(ctx.self, RunAnalyze.init));
265     }
266 
267     static void run(ref Ctx ctx, RunAnalyze) @safe {
268         auto profile = Profile("analyze file " ~ ctx.fileToAnalyze.cmd.absoluteFile);
269 
270         bool onlyValidFiles = true;
271 
272         try {
273             log.tracef("%s begin", ctx.fileToAnalyze.cmd.absoluteFile);
274             auto analyzer = Analyze(ctx.kinds, ctx.vloc, ctx.fio,
275                     Analyze.Config(ctx.conf.compiler.forceSystemIncludes,
276                         ctx.conf.coverage.use, ctx.conf.compiler.allowErrors.get, *ctx.conf.sq));
277             analyzer.process(ctx.fileToAnalyze);
278 
279             foreach (a; analyzer.result.idFile.byKey) {
280                 if (!isFileSupported(ctx.fio, a)) {
281                     log.warningf(
282                             "%s: file not supported. It must be in utf-8 format without a BOM marker");
283                     onlyValidFiles = false;
284                     break;
285                 }
286             }
287 
288             if (onlyValidFiles)
289                 send(ctx.storeAddr, analyzer.result, Token.init);
290             log.tracef("%s end", ctx.fileToAnalyze.cmd.absoluteFile);
291         } catch (Exception e) {
292             onlyValidFiles = false;
293             log.error(e.msg).collectException;
294         }
295 
296         if (!onlyValidFiles) {
297             log.tracef("%s failed", ctx.fileToAnalyze.cmd.absoluteFile).collectException;
298             send(ctx.storeAddr, Token.init);
299         }
300 
301         ctx.self.shutdown;
302     }
303 
304     self.name = "analyze";
305     send(self, WaitForToken.init);
306     return impl(self, &run, capture(st), &wait, capture(st));
307 }
308 
309 class TestFileResult {
310     Duration time;
311     TestFile[Checksum] files;
312 }
313 
314 alias TestPathActor = typedActor!(void function(Start, StoreActor.Address));
315 
316 auto spawnTestPathActor(TestPathActor.Impl self, StoreActor.Address store,
317         AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio) {
318     import std.datetime : Clock;
319     import std.datetime.stopwatch : StopWatch, AutoStart;
320     import std.file : isDir, isFile, dirEntries, SpanMode;
321     import my.container.vector;
322 
323     auto st = tuple!("self", "matcher", "fio", "userPaths")(self, matcher, fio.dup, userPaths);
324     alias Ctx = typeof(st);
325 
326     static void start(ref Ctx ctx, Start, StoreActor.Address store) {
327         auto profile = Profile("checksum test files");
328 
329         auto sw = StopWatch(AutoStart.yes);
330 
331         TestFile makeTestFile(const AbsolutePath file) {
332             auto cs = checksum(ctx.fio.makeInput(file).content[]);
333             return TestFile(TestFilePath(ctx.fio.toRelativeRoot(file)),
334                     TestFileChecksum(cs), Clock.currTime);
335         }
336 
337         auto paths = vector(ctx.userPaths);
338 
339         auto tfiles = new TestFileResult;
340         scope (exit)
341             tfiles.time = sw.peek;
342 
343         while (!paths.empty) {
344             try {
345                 if (isDir(paths.front)) {
346                     log.trace("  Test directory ", paths.front);
347                     foreach (a; dirEntries(paths.front, SpanMode.shallow).map!(
348                             a => AbsolutePath(a.name))) {
349                         paths.put(a);
350                     }
351                 } else if (isFile(paths.front) && ctx.matcher.match(paths.front)) {
352                     log.trace("  Test saved ", paths.front);
353                     auto t = makeTestFile(paths.front);
354                     tfiles.files[t.checksum.get] = t;
355                 }
356             } catch (Exception e) {
357                 log.warning(e.msg).collectException;
358             }
359 
360             paths.popFront;
361         }
362 
363         log.infof("Found %s test files", tfiles.files.length).collectException;
364         send(store, tfiles);
365         ctx.self.shutdown;
366     }
367 
368     self.name = "test path";
369     send(self, Start.init, store);
370     return impl(self, &start, capture(st));
371 }
372 
373 struct Start {
374 }
375 
376 struct IsDone {
377 }
378 
379 struct SetDone {
380 }
381 
382 // Check if it is time to post process
383 struct CheckPostProcess {
384 }
385 // Run the post processning.
386 struct PostProcess {
387 }
388 
389 struct StoreConfig {
390     ConfigAnalyze analyze;
391     ConfigSchema schema;
392     ConfigCoverage coverage;
393 }
394 
395 alias StoreActor = typedActor!(void function(Start, ToolVersion), bool function(IsDone),
396         void function(StartedAnalyzer), void function(Analyze.Result, Token), // failed to analyze the file, but still returning the token.
397         void function(Token),
398         void function(DoneStartingAnalyzers), void function(TestFileResult),
399         void function(CheckPostProcess), void function(PostProcess),);
400 
401 /// Store the result of the analyze.
402 auto spawnStoreActor(StoreActor.Impl self, FlowControlActor.Address flowCtrl,
403         RefCounted!(Database) db, StoreConfig conf, FilesysIO fio, Path[] rootFiles) @trusted {
404     static struct SchemataSaver {
405         import sumtype;
406         import my.optional;
407         import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataBuilder;
408 
409         typeof(ConfigSchema.minMutantsPerSchema) minMutantsPerSchema;
410         typeof(ConfigSchema.mutantsPerSchema) mutantsPerSchema;
411         SchemataBuilder builder;
412 
413         size_t cacheSize() @safe pure nothrow const @nogc {
414             return builder.cacheSize;
415         }
416 
417         void put(FilesysIO fio, SchemataResult.Schemata[AbsolutePath] a) {
418             builder.put(fio, a);
419         }
420 
421         void process(ref Database db, Optional!(SchemataBuilder.ET) value) {
422             value.match!((Some!(SchemataBuilder.ET) a) {
423                 try {
424                     auto mutants = a.mutants
425                         .map!(a => db.mutantApi.getMutationStatusId(a.id))
426                         .filter!(a => !a.isNull)
427                         .map!(a => a.get)
428                         .array;
429                     if (!mutants.empty) {
430                         const id = db.schemaApi.putSchemata(a.checksum, a.fragments, mutants);
431                         log.infof(!id.isNull, "Saving schema with %s mutants (cache %0.2f Mbyte)",
432                             mutants.length, cast(double) cacheSize / (1024 * 1024));
433                     }
434                 } catch (Exception e) {
435                     log.trace(e.msg);
436                 }
437             }, (None a) {});
438         }
439 
440         /// Consume fragments used by scheman containing >min mutants.
441         void setIntermediate() {
442             log.trace("schema generator phase: intermediate");
443             builder.discardMinScheman = false;
444             builder.useProbability = true;
445             builder.useProbablitySmallSize = false;
446             builder.mutantsPerSchema = mutantsPerSchema.get;
447             builder.minMutantsPerSchema = mutantsPerSchema.get;
448             builder.thresholdStartValue = 1.0;
449         }
450 
451         void setReducedIntermediate(long sizeDiv, long threshold) {
452             import std.algorithm : max;
453 
454             log.tracef("schema generator phase: reduced size:%s threshold:%s", sizeDiv, threshold);
455             builder.discardMinScheman = false;
456             builder.useProbability = true;
457             builder.useProbablitySmallSize = false;
458             builder.mutantsPerSchema = mutantsPerSchema.get;
459             builder.minMutantsPerSchema = max(minMutantsPerSchema.get,
460                     mutantsPerSchema.get / sizeDiv);
461             // TODO: interresting effect. this need to be studied. I think this
462             // is the behavior that is "best".
463             builder.thresholdStartValue = 1.0 - (cast(double) threshold / 100.0);
464         }
465 
466         void run(ref Database db) {
467             // sort the fragments by file which should allow those with high
468             // probability to result in larger scheman while those with smaller
469             // end up with small scheman. Smaller are thus those that higly
470             // likely fail to compile.
471             // 2021-09-03: sorting the fragments where a bad idea. It lead to
472             // very large schemas in one and the same file which failed
473             // compilation because the computer ran out of memory.
474             // Therefor testing a strategy of shuffling instead.
475             builder.shuffle;
476 
477             while (!builder.isDone) {
478                 process(db, builder.next);
479             }
480 
481             builder.restart;
482         }
483 
484         /// Consume all fragments or discard.
485         void finalize(ref Database db) {
486             log.trace("schema generator phase: finalize");
487             builder.discardMinScheman = true;
488             builder.useProbability = false;
489             builder.useProbablitySmallSize = true;
490             builder.mutantsPerSchema = mutantsPerSchema.get;
491             builder.minMutantsPerSchema = minMutantsPerSchema.get;
492             builder.thresholdStartValue = 0;
493 
494             // two loops to pass over all mutants and retry new schema
495             // compositions. Any schema that is less than the minimum will be
496             // discarded so the number of mutants will shrink.
497             while (!builder.isDone) {
498                 while (!builder.isDone) {
499                     process(db, builder.next);
500                 }
501                 builder.restart;
502             }
503         }
504     }
505 
506     static struct State {
507         // analyze of src
508         int startedAnalyzers;
509         int savedResult;
510         bool doneStarting;
511 
512         // if a file is modified then the timeout context need to be reset
513         bool resetTimeoutCtx;
514 
515         /// Set when the whole process is done.
516         bool isDone;
517 
518         bool savedTestFileResult;
519 
520         bool isToolVersionDifferent;
521 
522         // A file is at most saved one time to the database.
523         Set!AbsolutePath savedFiles;
524 
525         SchemataSaver schemas;
526     }
527 
528     auto st = tuple!("self", "db", "state", "fio", "conf", "rootFiles", "flowCtrl")(self,
529             db, refCounted(State.init), fio.dup, conf, rootFiles, flowCtrl);
530     alias Ctx = typeof(st);
531 
532     static void start(ref Ctx ctx, Start, ToolVersion toolVersion) {
533         import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
534         import dextool.plugin.mutate.backend.database : SchemaStatus;
535 
536         log.trace("starting store actor");
537 
538         ctx.state.get.isToolVersionDifferent = ctx.db.get.isToolVersionDifferent(toolVersion);
539         ctx.state.get.schemas = SchemataSaver(ctx.conf.schema.minMutantsPerSchema,
540                 ctx.conf.schema.mutantsPerSchema);
541 
542         if (ctx.conf.analyze.fastDbStore) {
543             log.info(
544                     "Turning OFF sqlite3 synchronization protection to improve the write performance");
545             log.warning("Do NOT interrupt dextool in any way because it may corrupt the database");
546             ctx.db.get.run("PRAGMA synchronous = OFF");
547             ctx.db.get.run("PRAGMA journal_mode = MEMORY");
548         }
549 
550         {
551             auto trans = ctx.db.get.transaction;
552             auto profile = Profile("update schema probability");
553             log.info("Update schema probability");
554 
555             ctx.state.get.schemas.builder.schemaQ = updateSchemaQ(ctx.db.get);
556             ctx.state.get.schemas.builder.mutantsPerSchema = updateSchemaSizeQ(ctx.db.get,
557                     ctx.conf.schema.mutantsPerSchema.get, ctx.conf.schema.minMutantsPerSchema.get)
558                 .currentSize;
559 
560             trans.commit;
561         }
562         {
563             auto trans = ctx.db.get.transaction;
564             auto profile = Profile("prune old schemas");
565             if (ctx.state.get.isToolVersionDifferent) {
566                 log.info("Prune database of scheman created by the old version");
567                 ctx.db.get.schemaApi.deleteAllSchemas;
568             }
569             trans.commit;
570         }
571         {
572             import std.traits : EnumMembers;
573 
574             auto trans = ctx.db.get.transaction;
575             auto profile = Profile("prune used schemas");
576             log.info("Prune the database of used schemas");
577             const removed = () {
578                 if (ctx.conf.analyze.forceSaveAnalyze)
579                     return ctx.db.get.schemaApi.pruneUsedSchemas([
580                             EnumMembers!SchemaStatus
581                             ]);
582                 return ctx.db.get.schemaApi.pruneUsedSchemas([
583                         SchemaStatus.allKilled, SchemaStatus.broken
584                         ]);
585             }();
586             trans.commit;
587             if (removed != 0) {
588                 logger.infof("Removed %s schemas", removed);
589                 ctx.db.get.vacuum;
590             }
591         }
592 
593         send(ctx.self, CheckPostProcess.init);
594         log.trace("store actor active");
595     }
596 
597     static bool isDone(ref Ctx ctx, IsDone) {
598         return ctx.state.get.isDone;
599     }
600 
601     static void startedAnalyzers(ref Ctx ctx, StartedAnalyzer) {
602         ctx.state.get.startedAnalyzers++;
603     }
604 
605     static void doneStartAnalyzers(ref Ctx ctx, DoneStartingAnalyzers) {
606         ctx.state.get.doneStarting = true;
607     }
608 
609     static void failedFileAnalyze(ref Ctx ctx, Token) {
610         send(ctx.flowCtrl, ReturnTokenMsg.init);
611         // a failed file has to count as well.
612         ctx.state.get.savedResult++;
613     }
614 
615     static void checkPostProcess(ref Ctx ctx, CheckPostProcess) {
616         if (ctx.state.get.doneStarting && ctx.state.get.savedTestFileResult
617                 && (ctx.state.get.startedAnalyzers == ctx.state.get.savedResult))
618             send(ctx.self, PostProcess.init);
619         else
620             delayedSend(ctx.self, delay(500.dur!"msecs"), CheckPostProcess.init);
621     }
622 
623     static void savedTestFileResult(ref Ctx ctx, TestFileResult result) {
624         auto profile = Profile("save test files");
625 
626         ctx.state.get.savedTestFileResult = true;
627 
628         Set!Checksum old;
629 
630         auto t = ctx.db.get.transaction;
631 
632         foreach (a; ctx.db.get.testFileApi.getTestFiles) {
633             old.add(a.checksum.get);
634             if (a.checksum.get !in result.files) {
635                 log.info("Removed test file ", a.file.get.toString);
636                 ctx.db.get.testFileApi.removeFile(a.file);
637             }
638         }
639 
640         foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) {
641             log.info("Saving test file ", a.file.get.toString);
642             ctx.db.get.testFileApi.put(a);
643         }
644 
645         t.commit;
646 
647         send(ctx.self, CheckPostProcess.init);
648     }
649 
650     static void save(ref Ctx ctx, Analyze.Result result, Token) {
651         import dextool.cachetools : nullableCache;
652         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
653         import dextool.plugin.mutate.backend.type : Language;
654 
655         auto profile = Profile("save " ~ result.root);
656 
657         // by returning the token now another file analyze can start while we
658         // are saving the current one.
659         send(ctx.flowCtrl, ReturnTokenMsg.init);
660 
661         ctx.state.get.savedResult++;
662         log.infof("Analyzed file %s/%s", ctx.state.get.savedResult,
663                 ctx.state.get.startedAnalyzers);
664 
665         auto getFileId = nullableCache!(string, FileId, (string p) => ctx.db.get.getFileId(p.Path))(256,
666                 30.dur!"seconds");
667         auto getFileDbChecksum = nullableCache!(string, Checksum,
668                 (string p) => ctx.db.get.getFileChecksum(p.Path))(256, 30.dur!"seconds");
669         auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
670             return checksum(ctx.fio.makeInput(AbsolutePath(Path(p))).content[]);
671         })(256, 30.dur!"seconds");
672 
673         static struct Files {
674             Checksum[Path] value;
675 
676             this(ref Database db) {
677                 foreach (a; db.getDetailedFiles) {
678                     value[a.file] = a.fileChecksum;
679                 }
680             }
681         }
682 
683         auto trans = ctx.db.get.transaction;
684 
685         // mark files that have an unchanged checksum as "already saved"
686         foreach (f; result.idFile
687                 .byKey
688                 .filter!(a => a !in ctx.state.get.savedFiles)
689                 .filter!(a => getFileDbChecksum(ctx.fio.toRelativeRoot(a)) == getFileFsChecksum(a)
690                     && !ctx.conf.analyze.forceSaveAnalyze && !ctx.state.get.isToolVersionDifferent)) {
691             log.info("Unchanged ".color(Color.yellow), f);
692             ctx.state.get.savedFiles.add(f);
693         }
694 
695         // only saves mutation points to a file one time.
696         {
697             auto app = appender!(MutationPointEntry2[])();
698             bool isChanged = ctx.state.get.isToolVersionDifferent;
699             foreach (mp; result.mutationPoints
700                     .map!(a => tuple!("data", "file")(a, ctx.fio.toAbsoluteRoot(a.file)))
701                     .filter!(a => a.file !in ctx.state.get.savedFiles)) {
702                 app.put(mp.data);
703             }
704             foreach (f; result.idFile.byKey.filter!(a => a !in ctx.state.get.savedFiles)) {
705                 isChanged = true;
706                 log.info("Saving ".color(Color.green), f);
707                 const relp = ctx.fio.toRelativeRoot(f);
708 
709                 // this is critical in order to remove old data about a file.
710                 ctx.db.get.removeFile(relp);
711 
712                 const info = result.infoId[result.idFile[f]];
713                 ctx.db.get.put(relp, info.checksum, info.language, f == result.root);
714                 ctx.state.get.savedFiles.add(f);
715             }
716             ctx.db.get.mutantApi.put(app.data, ctx.fio.getOutputDir);
717 
718             if (result.root !in ctx.state.get.savedFiles) {
719                 // this occurs when the file is e.g. a unittest that uses a
720                 // header only library. The unittests are not mutated thus
721                 // no mutation points exists in them but we want dextool to
722                 // still, if possible, track the unittests for changes.
723                 isChanged = true;
724                 const relp = ctx.fio.toRelativeRoot(result.root);
725                 ctx.db.get.removeFile(relp);
726                 // the language do not matter because it is a file without
727                 // any mutants.
728                 ctx.db.get.put(relp, result.rootCs, Language.init, true);
729                 ctx.state.get.savedFiles.add(ctx.fio.toAbsoluteRoot(result.root));
730             }
731 
732             // must always update dependencies because they may not contain
733             // mutants. Only files that are changed and contain mutants
734             // trigger isChanged to be true.
735             try {
736                 // not all files are tracked thus this may throw an exception.
737                 ctx.db.get.dependencyApi.set(ctx.fio.toRelativeRoot(result.root),
738                         result.dependencies);
739             } catch (Exception e) {
740             }
741 
742             ctx.state.get.resetTimeoutCtx = ctx.state.get.resetTimeoutCtx || isChanged;
743 
744             if (isChanged) {
745                 foreach (a; result.coverage.byKeyValue) {
746                     const fid = getFileId(ctx.fio.toRelativeRoot(result.fileId[a.key]));
747                     if (!fid.isNull) {
748                         ctx.db.get.coverageApi.clearCoverageMap(fid.get);
749                         ctx.db.get.coverageApi.putCoverageMap(fid.get, a.value);
750                     }
751                 }
752 
753                 // only save the schematas if mutation points where saved.
754                 // This ensure that only schematas for changed/new files
755                 // are saved.
756                 ctx.state.get.schemas.put(ctx.fio, result.schematas);
757                 ctx.state.get.schemas.setIntermediate;
758                 ctx.state.get.schemas.run(ctx.db.get);
759 
760                 // seems like 200 Mbyte is large enough to generate scheman
761                 // with >1000 mutants easily when analyzing LLVM.
762                 enum MaxCache = 200 * 1024 * 1024;
763                 if (ctx.state.get.schemas.cacheSize > MaxCache) {
764                     // panic mode, just empty it as fast as possible.
765                     logger.infof("Schema cache is %s bytes (limit %s). Producing as many schemas as possible to flush the cache.",
766                             ctx.state.get.schemas.cacheSize, MaxCache);
767                     ctx.state.get.schemas.finalize(ctx.db.get);
768                     ctx.state.get.schemas.setIntermediate;
769                 }
770             }
771         }
772 
773         {
774             Set!long printed;
775             auto app = appender!(LineMetadata[])();
776             foreach (md; result.metadata) {
777                 const localId = Analyze.Result.LocalFileId(md.id.get);
778                 // transform the ID from local to global.
779                 const fid = getFileId(ctx.fio.toRelativeRoot(result.fileId[localId]));
780                 if (fid.isNull && !printed.contains(md.id.get)) {
781                     printed.add(md.id.get);
782                     log.info("File with suppressed mutants (// NOMUT) not in the database: ",
783                             result.fileId[localId]).collectException;
784                 } else if (!fid.isNull) {
785                     app.put(LineMetadata(fid.get, md.line, md.attr));
786                 }
787             }
788             ctx.db.get.metaDataApi.put(app.data);
789         }
790 
791         trans.commit;
792 
793         send(ctx.self, CheckPostProcess.init);
794     }
795 
796     static void postProcess(ref Ctx ctx, PostProcess) {
797         import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
798 
799         if (ctx.state.get.isDone)
800             return;
801 
802         ctx.state.get.isDone = true;
803 
804         void fastDbOff() {
805             if (!ctx.conf.analyze.fastDbStore)
806                 return;
807             ctx.db.get.run("PRAGMA synchronous = ON");
808             ctx.db.get.run("PRAGMA journal_mode = DELETE");
809         }
810 
811         void pruneFiles() {
812             import std.path : buildPath;
813 
814             auto profile = Profile("prune files");
815 
816             log.info("Pruning the database of dropped files");
817             auto files = ctx.db.get.getFiles.map!(a => ctx.fio.toAbsoluteRoot(a)).toSet;
818 
819             foreach (f; files.setDifference(ctx.state.get.savedFiles).toRange) {
820                 log.info("Removing ".color(Color.red), f);
821                 ctx.db.get.removeFile(ctx.fio.toRelativeRoot(f));
822             }
823         }
824 
825         void addRoots() {
826             if (ctx.conf.analyze.forceSaveAnalyze || ctx.state.get.isToolVersionDifferent)
827                 return;
828 
829             // add root files and their dependencies that has not been analyzed because nothing has changed.
830             // By adding them they are not removed.
831 
832             auto profile = Profile("add roots and dependencies");
833             foreach (a; ctx.rootFiles) {
834                 auto p = ctx.fio.toAbsoluteRoot(a);
835                 if (p !in ctx.state.get.savedFiles) {
836                     ctx.state.get.savedFiles.add(p);
837                     // fejk text for the user to tell them that yes, the files have
838                     // been analyzed.
839                     log.info("Analyzing ", a);
840                     log.info("Unchanged ".color(Color.yellow), a);
841                 }
842             }
843             foreach (a; ctx.rootFiles.map!(a => ctx.db.get.dependencyApi.get(a)).joiner) {
844                 ctx.state.get.savedFiles.add(ctx.fio.toAbsoluteRoot(a));
845             }
846         }
847 
848         void finalizeSchema() {
849             auto trans = ctx.db.get.transaction;
850 
851             immutable magic = 10; // reduce the size until it is 1/10 of the original
852             immutable magic2 = 5; // if it goes <95% then it is too high probability to fail
853             foreach (sizeDiv; 1 .. magic) {
854                 foreach (threshold; 0 .. magic2) {
855                     ctx.state.get.schemas.setReducedIntermediate(sizeDiv, threshold);
856                     ctx.state.get.schemas.run(ctx.db.get);
857                 }
858             }
859 
860             ctx.state.get.schemas.finalize(ctx.db.get);
861 
862             trans.commit;
863             ctx.state.get.schemas = SchemataSaver.init;
864         }
865 
866         finalizeSchema;
867 
868         auto trans = ctx.db.get.transaction;
869 
870         addRoots;
871 
872         if (ctx.state.get.resetTimeoutCtx) {
873             log.info("Resetting timeout context");
874             resetTimeoutContext(ctx.db.get);
875         }
876 
877         log.info("Updating metadata");
878         ctx.db.get.metaDataApi.updateMetadata;
879 
880         if (ctx.conf.analyze.prune) {
881             pruneFiles();
882             {
883                 auto profile = Profile("prune mangled schemas");
884                 log.info("Prune the database of mangled schemas");
885                 ctx.db.get.schemaApi.pruneSchemas;
886             }
887             {
888                 auto profile = Profile("prune dependencies");
889                 log.info("Prune dependencies");
890                 ctx.db.get.dependencyApi.cleanup;
891             }
892             {
893                 auto profile = Profile("remove orphaned mutants");
894                 log.info("Removing orphaned mutants");
895                 ctx.db.get.mutantApi.removeOrphanedMutants;
896             }
897         }
898 
899         log.info("Updating manually marked mutants");
900         updateMarkedMutants(ctx.db.get);
901         printLostMarkings(ctx.db.get.markMutantApi.getLostMarkings);
902 
903         if (ctx.state.get.isToolVersionDifferent) {
904             log.info("Updating tool version");
905             ctx.db.get.updateToolVersion(ToolVersion(dextoolBinaryId));
906         }
907 
908         log.info("Committing changes");
909         trans.commit;
910         log.info("Ok".color(Color.green));
911 
912         fastDbOff();
913 
914         if (ctx.state.get.isToolVersionDifferent) {
915             auto profile = Profile("compact");
916             log.info("Compacting the database");
917             ctx.db.get.vacuum;
918         }
919     }
920 
921     self.name = "store";
922 
923     auto s = impl(self, &start, capture(st), &isDone, capture(st),
924             &startedAnalyzers, capture(st), &save, capture(st), &doneStartAnalyzers,
925             capture(st), &savedTestFileResult, capture(st), &checkPostProcess,
926             capture(st), &postProcess, capture(st), &failedFileAnalyze, capture(st));
927     s.exceptionHandler = toDelegate(&logExceptionHandler);
928     return s;
929 }
930 
931 /// Analyze a file for mutants.
932 struct Analyze {
933     import std.regex : Regex, regex, matchFirst;
934     import std.typecons : Yes;
935     import libclang_ast.context : ClangContext;
936 
937     static struct Config {
938         bool forceSystemIncludes;
939         bool saveCoverage;
940         bool allowErrors;
941         SchemaQ sq;
942     }
943 
944     private {
945         static immutable rawReNomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
946 
947         Regex!char re_nomut;
948 
949         ValidateLoc valLoc;
950         FilesysIO fio;
951 
952         Cache cache;
953 
954         Result result;
955 
956         Config conf;
957 
958         Mutation.Kind[] kinds;
959     }
960 
961     this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted {
962         this.kinds = kinds;
963         this.valLoc = valLoc;
964         this.fio = fio;
965         this.cache = new Cache;
966         this.re_nomut = regex(rawReNomut);
967         this.result = new Result;
968         this.conf = conf;
969     }
970 
971     void process(ParsedCompileCommand commandsForFileToAnalyze) @safe {
972         import std.file : exists;
973 
974         commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes;
975 
976         try {
977             if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) {
978                 log.warningf("Failed to analyze %s. Do not exist",
979                         commandsForFileToAnalyze.cmd.absoluteFile);
980                 return;
981             }
982         } catch (Exception e) {
983             log.warning(e.msg);
984             return;
985         }
986 
987         result.root = commandsForFileToAnalyze.cmd.absoluteFile;
988 
989         try {
990             result.rootCs = checksum(result.root);
991 
992             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
993             auto tstream = new TokenStreamImpl(ctx);
994 
995             analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream);
996             foreach (f; result.fileId.byValue)
997                 analyzeForComments(f, tstream);
998         } catch (Exception e) {
999             () @trusted { log.trace(e); }();
1000             log.info(e.msg);
1001             log.error("failed analyze of ",
1002                     commandsForFileToAnalyze.cmd.absoluteFile).collectException;
1003         }
1004     }
1005 
1006     void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze,
1007             AbsolutePath fileToAnalyze, ref ClangContext ctx, TokenStream tstream) @safe {
1008         import my.gc.refc : RefCounted;
1009         import dextool.plugin.mutate.backend.analyze.ast : Ast;
1010         import dextool.plugin.mutate.backend.analyze.pass_clang;
1011         import dextool.plugin.mutate.backend.analyze.pass_coverage;
1012         import dextool.plugin.mutate.backend.analyze.pass_filter;
1013         import dextool.plugin.mutate.backend.analyze.pass_mutant;
1014         import dextool.plugin.mutate.backend.analyze.pass_schemata;
1015         import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic;
1016 
1017         log.info("Analyzing ", fileToAnalyze);
1018         RefCounted!(Ast) ast;
1019         {
1020             auto tu = ctx.makeTranslationUnit(fileToAnalyze,
1021                     commandsForFileToAnalyze.flags.completeFlags);
1022             if (tu.hasParseErrors) {
1023                 logDiagnostic(tu);
1024                 log.warningf("Compile error in %s", fileToAnalyze);
1025                 if (!conf.allowErrors) {
1026                     log.warning("Skipping");
1027                     return;
1028                 }
1029             }
1030 
1031             auto res = toMutateAst(tu.cursor, fio, valLoc);
1032             ast = res.ast;
1033             saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies);
1034             log!"analyze.pass_clang".trace(ast.get.toString);
1035         }
1036 
1037         auto codeMutants = () {
1038             auto mutants = toMutants(ast.ptr, fio, valLoc, kinds);
1039             log!"analyze.pass_mutant".trace(mutants);
1040 
1041             log!"analyze.pass_filter".trace("filter mutants");
1042             mutants = filterMutants(fio, mutants);
1043             log!"analyze.pass_filter".trace(mutants);
1044 
1045             return toCodeMutants(mutants, fio, tstream);
1046         }();
1047         debug logger.trace(codeMutants);
1048 
1049         {
1050             auto schemas = toSchemata(ast.ptr, fio, codeMutants, conf.sq);
1051             log!"analyze.pass_schema".trace(schemas);
1052             log.tracef("path dedup count:%s length_acc:%s",
1053                     ast.get.paths.count, ast.get.paths.lengthAccum);
1054 
1055             result.schematas = schemas.getSchematas;
1056         }
1057 
1058         result.mutationPoints = codeMutants.points.byKeyValue.map!(
1059                 a => a.value.map!(b => MutationPointEntry2(fio.toRelativeRoot(a.key),
1060                 b.offset, b.sloc.begin, b.sloc.end, b.mutants))).joiner.array;
1061         foreach (f; codeMutants.points.byKey) {
1062             const id = Result.LocalFileId(result.idFile.length);
1063             result.idFile[f] = id;
1064             result.fileId[id] = f;
1065             result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang);
1066         }
1067 
1068         if (conf.saveCoverage) {
1069             auto cov = toCoverage(ast.ptr, fio, valLoc);
1070             debug logger.trace(cov);
1071 
1072             foreach (a; cov.points.byKeyValue) {
1073                 if (auto id = a.key in result.idFile) {
1074                     result.coverage[*id] = a.value;
1075                 }
1076             }
1077         }
1078     }
1079 
1080     /** Tokens are always from the same file.
1081      *
1082      * TODO: move this to pass_clang.
1083      */
1084     void analyzeForComments(AbsolutePath file, TokenStream tstream) @trusted {
1085         import std.algorithm : filter;
1086         import clang.c.Index : CXTokenKind;
1087         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
1088 
1089         if (auto localId = file in result.idFile) {
1090             const fid = FileId(localId.get);
1091 
1092             auto mdata = appender!(LineMetadata[])();
1093             foreach (t; cache.getTokens(AbsolutePath(file), tstream)
1094                     .filter!(a => a.kind == CXTokenKind.comment)) {
1095                 auto m = matchFirst(t.spelling, re_nomut);
1096                 if (m.whichPattern == 0)
1097                     continue;
1098 
1099                 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
1100                 log.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
1101             }
1102 
1103             result.metadata ~= mdata.data;
1104         }
1105     }
1106 
1107     void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted {
1108         import std.algorithm : cache;
1109         import std.mmfile;
1110 
1111         auto rootDir = root.dirName;
1112 
1113         foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir,
1114                 flags.includes, flags.systemIncludes))
1115                 .cache
1116                 .filter!(a => a.hasValue)
1117                 .map!(a => a.orElse(AbsolutePath.init))
1118                 .filter!(a => valLoc.isInsideOutputDir(a))) {
1119             try {
1120                 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p));
1121             } catch (Exception e) {
1122                 log.trace(e.msg).collectException;
1123             }
1124         }
1125 
1126         log.trace(result.dependencies);
1127     }
1128 
1129     static class Result {
1130         import dextool.plugin.mutate.backend.analyze.ast : Interval;
1131         import dextool.plugin.mutate.backend.database.type : SchemataFragment;
1132         import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum;
1133 
1134         alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init,
1135                 TagStringable, Hashable);
1136         alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init,
1137                 TagStringable, Hashable);
1138 
1139         MutationPointEntry2[] mutationPoints;
1140 
1141         static struct FileInfo {
1142             Checksum checksum;
1143             Language language;
1144         }
1145 
1146         /// The file that is analyzed, which is a root
1147         AbsolutePath root;
1148         Checksum rootCs;
1149 
1150         /// The dependencies the root has.
1151         DepFile[] dependencies;
1152 
1153         /// The key is the ID from idFile.
1154         FileInfo[LocalFileId] infoId;
1155 
1156         /// The IDs is unique for *this* analyze, not globally.
1157         LocalFileId[AbsolutePath] idFile;
1158         AbsolutePath[LocalFileId] fileId;
1159 
1160         // The FileID used in the metadata is local to this analysis. It has to
1161         // be remapped when added to the database.
1162         LineMetadata[] metadata;
1163 
1164         /// Mutant schematas that has been generated.
1165         SchemataResult.Schemata[AbsolutePath] schematas;
1166 
1167         /// Coverage intervals that can be instrumented.
1168         Interval[][LocalFileId] coverage;
1169     }
1170 }
1171 
1172 @(
1173         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
1174 unittest {
1175     import std.regex : regex, matchFirst;
1176     import unit_threaded.runner.io : writelnUt;
1177 
1178     auto re_nomut = regex(Analyze.rawReNomut);
1179     // NOMUT in other type of comments should NOT match.
1180     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
1181     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
1182     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
1183     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
1184 
1185     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
1186     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
1187     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
1188     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
1189     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
1190     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
1191     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
1192     m["tag"].shouldEqual("arch");
1193     m["comment"].shouldEqual("smurf");
1194 }
1195 
1196 /// Stream of tokens excluding comment tokens.
1197 class TokenStreamImpl : TokenStream {
1198     import libclang_ast.context : ClangContext;
1199     import dextool.plugin.mutate.backend.type : Token;
1200     import dextool.plugin.mutate.backend.utility : tokenize;
1201 
1202     ClangContext* ctx;
1203 
1204     /// The context must outlive any instance of this class.
1205     // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000
1206     this(ref ClangContext ctx) @trusted {
1207         this.ctx = &ctx;
1208     }
1209 
1210     Token[] getTokens(Path p) {
1211         return tokenize(*ctx, p);
1212     }
1213 
1214     Token[] getFilteredTokens(Path p) {
1215         import clang.c.Index : CXTokenKind;
1216 
1217         // Filter a stream of tokens for those that should affect the checksum.
1218         return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
1219     }
1220 }
1221 
1222 /// Returns: true if `f` is inside any `roots`.
1223 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
1224     import dextool.utility : isPathInsideRoot;
1225 
1226     foreach (root; roots) {
1227         if (isPathInsideRoot(root, f))
1228             return true;
1229     }
1230 
1231     return false;
1232 }
1233 
1234 /** Update the connection between the marked mutants and their mutation status
1235  * id and mutation id.
1236  */
1237 void updateMarkedMutants(ref Database db) {
1238     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
1239     import dextool.plugin.mutate.backend.type : ExitStatus;
1240 
1241     void update(MarkedMutant m) {
1242         const stId = db.mutantApi.getMutationStatusId(m.statusChecksum);
1243         if (stId.isNull)
1244             return;
1245         const mutId = db.mutantApi.getMutationId(stId.get);
1246         if (mutId.isNull)
1247             return;
1248         db.markMutantApi.removeMarkedMutant(m.statusChecksum);
1249         db.markMutantApi.markMutant(mutId.get, m.path, m.sloc, stId.get,
1250                 m.statusChecksum, m.toStatus, m.rationale, m.mutText);
1251         db.mutantApi.updateMutationStatus(stId.get, m.toStatus, ExitStatus(0));
1252     }
1253 
1254     // find those marked mutants that have a checksum that is different from
1255     // the mutation status the marked mutant is related to. If possible change
1256     // the relation to the correct mutation status id.
1257     foreach (m; db.markMutantApi
1258             .getMarkedMutants
1259             .map!(a => tuple(a, db.mutantApi.getChecksum(a.statusId)))
1260             .filter!(a => !a[1].isNull)
1261             .filter!(a => a[0].statusChecksum != a[1].get)) {
1262         update(m[0]);
1263     }
1264 }
1265 
1266 /// Prints a marked mutant that has become lost due to rerun of analyze
1267 void printLostMarkings(MarkedMutant[] lostMutants) {
1268     import std.algorithm : sort;
1269     import std.array : empty;
1270     import std.conv : to;
1271     import std.stdio : writeln;
1272 
1273     if (lostMutants.empty)
1274         return;
1275 
1276     Table!6 tbl = Table!6([
1277             "ID", "File", "Line", "Column", "Status", "Rationale"
1278             ]);
1279     foreach (m; lostMutants) {
1280         typeof(tbl).Row r = [
1281             m.mutationId.get.to!string, m.path, m.sloc.line.to!string,
1282             m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get
1283         ];
1284         tbl.put(r);
1285     }
1286     log.warning("Marked mutants was lost");
1287     writeln(tbl);
1288 }
1289 
1290 @("shall only let files in the diff through")
1291 unittest {
1292     import std.string : lineSplitter;
1293     import dextool.plugin.mutate.backend.diff_parser;
1294 
1295     immutable lines = `diff --git a/standalone2.d b/standalone2.d
1296 index 0123..2345 100644
1297 --- a/standalone.d
1298 +++ b/standalone2.d
1299 @@ -31,7 +31,6 @@ import std.algorithm : map;
1300  import std.array : Appender, appender, array;
1301  import std.datetime : SysTime;
1302 +import std.format : format;
1303 -import std.typecons : Tuple;
1304 
1305  import d2sqlite3 : sqlDatabase = Database;
1306 
1307 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
1308  struct Database {
1309      import std.conv : to;
1310      import std.exception : collectException;
1311 -    import std.typecons : Nullable;
1312 +    import std.typecons : Nullable, Flag, No;
1313      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
1314 
1315 +    sqlDatabase db;`;
1316 
1317     UnifiedDiffParser p;
1318     foreach (line; lines.lineSplitter)
1319         p.process(line);
1320     auto diff = p.result;
1321 
1322     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
1323 
1324     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
1325     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
1326 }
1327 
1328 /// Convert to an absolute path by finding the first match among the compiler flags
1329 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir,
1330         ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow {
1331     import std.algorithm : map, filter;
1332     import std.file : exists;
1333     import std.path : buildPath;
1334 
1335     Optional!AbsolutePath lookup(string dir) nothrow {
1336         const p = buildPath(dir, file);
1337         try {
1338             if (exists(p))
1339                 return some(AbsolutePath(p));
1340         } catch (Exception e) {
1341         }
1342         return none!AbsolutePath;
1343     }
1344 
1345     {
1346         auto a = lookup(workDir.toString);
1347         if (a.hasValue)
1348             return a;
1349     }
1350 
1351     foreach (a; includes.map!(a => lookup(a.payload))
1352             .filter!(a => a.hasValue)) {
1353         return a;
1354     }
1355 
1356     foreach (a; systemIncludes.map!(a => lookup(a.value))
1357             .filter!(a => a.hasValue)) {
1358         return a;
1359     }
1360 
1361     return none!AbsolutePath;
1362 }
1363 
1364 /** Returns: the root files that need to be re-analyzed because either them or
1365  * their dependency has changed.
1366  */
1367 bool[Path] dependencyAnalyze(ref Database db, FilesysIO fio) @trusted {
1368     import dextool.cachetools : nullableCache;
1369     import dextool.plugin.mutate.backend.database : FileId;
1370 
1371     typeof(return) rval;
1372 
1373     // pessimistic. Add all as needing to be analyzed.
1374     foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) {
1375         rval[a] = false;
1376     }
1377 
1378     try {
1379         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
1380                 30.dur!"seconds");
1381         auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256,
1382                 30.dur!"seconds");
1383         auto getFileDbChecksum = nullableCache!(string, Checksum,
1384                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
1385         auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) {
1386             return checksum(p);
1387         })(256, 30.dur!"seconds");
1388 
1389         Checksum[Path] dbDeps;
1390         foreach (a; db.dependencyApi.getAll)
1391             dbDeps[a.file] = a.checksum;
1392 
1393         const isToolVersionDifferent = db.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
1394         bool isChanged(T)(T f) {
1395             if (isToolVersionDifferent) {
1396                 // because the tool version is updated then all files need to
1397                 // be re-analyzed. an update can mean that scheman are
1398                 // improved, mutants has been changed/removed etc. it is
1399                 // unknown. the only way to be sure is to re-analyze all files.
1400                 return true;
1401             }
1402 
1403             if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root)))
1404                 return true;
1405 
1406             foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) {
1407                 return true;
1408             }
1409 
1410             return false;
1411         }
1412 
1413         foreach (f; db.getRootFiles
1414                 .map!(a => db.getFile(a).get)
1415                 .map!(a => tuple!("root", "rootCs", "deps")(a,
1416                     getFileDbChecksum(a), db.dependencyApi.get(a)))
1417                 .cache
1418                 .filter!(a => isChanged(a))
1419                 .map!(a => a.root)) {
1420             rval[f] = true;
1421         }
1422     } catch (Exception e) {
1423         log.warning(e.msg);
1424     }
1425 
1426     log.trace("Dependency analyze: ", rval);
1427 
1428     return rval;
1429 }
1430 
1431 /// Only utf-8 files are supported
1432 bool isFileSupported(FilesysIO fio, AbsolutePath p) @safe {
1433     import std.algorithm : among;
1434     import std.encoding : getBOM, BOM;
1435 
1436     auto entry = fio.makeInput(p).content.getBOM();
1437     const res = entry.schema.among(BOM.utf8, BOM.none);
1438 
1439     if (res == 1)
1440         log.warningf("%s has a utf-8 BOM marker. It will make all coverage and scheman fail to compile",
1441                 p);
1442 
1443     return res != 0;
1444 }
1445 
1446 auto updateSchemaQ(ref Database db) {
1447     import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
1448     import dextool.plugin.mutate.backend.database : SchemaStatus;
1449     import my.hash : Checksum64;
1450     import my.set;
1451 
1452     auto sq = SchemaQ.make;
1453     sq.state = db.schemaApi.getMutantProbability;
1454 
1455     auto paths = db.getFiles;
1456     Set!Checksum64 latestFiles;
1457 
1458     foreach (path; paths) {
1459         scope getPath = (SchemaStatus s) => db.schemaApi.getSchemaUsedKinds(path, s);
1460         sq.update(path, getPath);
1461         latestFiles.add(sq.pathCache[path]);
1462         debug logger.tracef("updating %s %s", path, sq.pathCache[path]);
1463     }
1464 
1465     foreach (p; sq.state.byKey.toSet.setDifference(latestFiles).toRange) {
1466         db.schemaApi.removeMutantProbability(p);
1467         sq.state.remove(p);
1468         debug logger.trace("removing ", p);
1469     }
1470 
1471     sq.scatterTick;
1472 
1473     foreach (p; sq.state.byKeyValue) {
1474         db.schemaApi.saveMutantProbability(p.key, p.value, SchemaQ.MaxState);
1475         debug logger.tracef("saving %s with %s values", p.key, p.value.length);
1476     }
1477 
1478     return sq;
1479 }
1480 
1481 auto updateSchemaSizeQ(ref Database db, const long userInit, const long minSize) {
1482     import std.traits : EnumMembers;
1483     import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaSizeQ;
1484     import dextool.plugin.mutate.backend.database : SchemaStatus;
1485 
1486     // *3 is a magic number. it feels good.
1487     auto sq = SchemaSizeQ.make(minSize, userInit * 3);
1488     sq.currentSize = db.schemaApi.getSchemaSize(userInit);
1489     scope getStatusCnt = (SchemaStatus s) => db.schemaApi.schemaMutantCount(s);
1490     const kinds = [EnumMembers!(Mutation.Kind)];
1491     sq.update(getStatusCnt, db.mutantApi.totalSrcMutants(kinds)
1492             .count + db.mutantApi.unknownSrcMutants(kinds).count);
1493     db.schemaApi.saveSchemaSize(sq.currentSize);
1494     return sq;
1495 }