dextool.plugin.mutate.backend.analyze source code

1 /**
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import core.thread : Thread;
17 import logger = std.experimental.logger;
18 import std.algorithm : map, filter, joiner, cache, max;
19 import std.array : array, appender, empty;
20 import std.concurrency;
21 import std.datetime : dur, Duration;
22 import std.exception : collectException;
23 import std.functional : toDelegate;
24 import std.parallelism : TaskPool, totalCPUs;
25 import std.range : tee, enumerate;
26 import std.typecons : tuple;
27 
28 import colorlog;
29 import my.actor.utility.limiter;
30 import my.actor;
31 import my.filter : GlobFilter;
32 import my.gc.refc;
33 import my.named_type;
34 import my.optional;
35 import my.set;
36 
37 static import colorlog;
38 
39 import dextool.utility : dextoolBinaryId;
40 
41 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB,
42     ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath;
43 import dextool.plugin.mutate.backend.analyze.schema_ml : SchemaQ;
44 import dextool.plugin.mutate.backend.analyze.internal : TokenStream;
45 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult;
46 import dextool.plugin.mutate.backend.database : Database, LineMetadata,
47     MutationPointEntry2, DepFile;
48 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile,
49     TestFilePath, TestFileChecksum, ToolVersion;
50 import dextool.plugin.mutate.backend.diff_parser : Diff;
51 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
52 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
53 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile;
54 import dextool.plugin.mutate.backend.type : Mutation;
55 import dextool.plugin.mutate.type : MutationKind, MutantIdGeneratorConfig;
56 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze, ConfigSchema, ConfigCoverage;
57 import dextool.type : ExitStatusType, AbsolutePath, Path;
58 
59 version (unittest) {
60     import unit_threaded.assertions;
61 }
62 
63 alias log = colorlog.log!"analyze";
64 
65 /** Analyze the files in `frange` for mutations.
66  */
67 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const AbsolutePath confFile,
68         const MutationKind[] userKinds, ConfigAnalyze analyzeConf,
69         ConfigCompiler compilerConf, ConfigSchema schemaConf,
70         ConfigCoverage covConf, ParsedCompileCommandRange frange, ValidateLoc valLoc, FilesysIO fio) @trusted {
71     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
72     import dextool.plugin.mutate.backend.mutation_type : toInternal;
73 
74     auto fileFilter = () {
75         try {
76             return FileFilter(fio.getOutputDir, analyzeConf.unifiedDiffFromStdin,
77                     analyzeConf.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
78         } catch (Exception e) {
79             log.info(e.msg);
80             log.warning("Unable to parse diff");
81         }
82         return FileFilter.init;
83     }();
84 
85     bool shouldAnalyze(AbsolutePath p) {
86         return analyzeConf.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p);
87     }
88 
89     auto sys = makeSystem;
90 
91     auto flowCtrl = sys.spawn(&spawnFlowControl, () {
92         const x = analyzeConf.poolSize == 0 ? (totalCPUs + 1) : analyzeConf.poolSize;
93         // TODO: investigate further why <4 lead to a livelock of the analyzer.
94         return max(x, 4);
95     }());
96 
97     auto db = refCounted(Database.make(dbPath));
98 
99     auto needFullAnalyzeRes = needFullAnalyze(db.get, confFile);
100 
101     // if a dependency of a root file has been changed.
102     auto changedDeps = dependencyAnalyze(db.get, needFullAnalyzeRes.status, fio);
103     auto schemaQ = SchemaQ(db.get.schemaApi.getMutantProbability);
104 
105     auto store = sys.spawn(&spawnStoreActor, flowCtrl, db,
106             StoreConfig(analyzeConf, schemaConf, covConf), fio, changedDeps.byKeyValue
107             .filter!(a => !a.value)
108             .map!(a => a.key)
109             .array, needFullAnalyzeRes);
110     db.release;
111     // it crashes if the store actor try to call dextoolBinaryId. I don't know
112     // why... TLS store trashed? But it works, somehow, if I put some writeln
113     // inside dextoolBinaryId.
114     send(store, Start.init, ToolVersion(dextoolBinaryId));
115 
116     sys.spawn(&spawnTestPathActor, store, analyzeConf.testPaths, analyzeConf.testFileMatcher, fio);
117 
118     auto kinds = toInternal(userKinds);
119 
120     foreach (f; frange.filter!(a => shouldAnalyze(a.cmd.absoluteFile))) {
121         try {
122             if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) {
123                 if (!(*v || analyzeConf.forceSaveAnalyze))
124                     continue;
125             }
126 
127             // TODO: how to "slow down" if store is working too slow.
128 
129             // must dup schemaQ or we run into multithreaded bugs because a
130             // SchemaQ have mutable caches internally.  also must allocate on
131             // the GC because otherwise they share the same associative array.
132             // Don't ask me how that happens because `.dup` should have created
133             // a unique one. If you print the address here of `.state` and the
134             // receiving end you will see that they are re-used between actors!
135             auto sq = new SchemaQ(schemaQ.dup.state);
136             auto a = sys.spawn(&spawnAnalyzer, flowCtrl, store, kinds, f, valLoc.dup,
137                     fio.dup, AnalyzeConfig(compilerConf, analyzeConf, covConf, sq));
138             send(store, StartedAnalyzer.init);
139         } catch (Exception e) {
140             log.trace(e);
141             log.warning(e.msg);
142         }
143     }
144 
145     send(store, DoneStartingAnalyzers.init);
146 
147     changedDeps = typeof(changedDeps).init; // free the memory
148 
149     auto self = scopedActor;
150     bool waiting = true;
151     while (waiting) {
152         try {
153             self.request(store, infTimeout).send(IsDone.init).then((bool x) {
154                 waiting = !x;
155             });
156         } catch (ScopedActorException e) {
157             logger.warning(e.error);
158             return ExitStatusType.Errors;
159         }
160         () @trusted { Thread.sleep(100.dur!"msecs"); }();
161     }
162 
163     if (analyzeConf.profile)
164         try {
165             import std.stdio : writeln;
166 
167             writeln(getProfileResult.toString);
168         } catch (Exception e) {
169             log.warning("Unable to print the profile data: ", e.msg).collectException;
170         }
171 
172     return ExitStatusType.Ok;
173 }
174 
175 @safe:
176 
177 /** Filter function for files. Either all or those in stdin.
178  *
179  * The matching ignores the file extension in order to lessen the problem of a
180  * file that this approach skip headers because they do not exist in
181  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
182  * `foo.cpp` is in `compile_commands.json`.
183  *
184  * TODO: this may create problems for header only libraries because only the
185  * unittest would include the header which mean that for this to work the
186  * unittest would have to reside in the same directory as the header file.
187  * Which they normally never do. This then lead to a diff of a header only lib
188  * lead to "no files analyzed".
189  */
190 struct FileFilter {
191     import std.path : stripExtension;
192 
193     Set!string files;
194     bool useFileFilter;
195     AbsolutePath root;
196 
197     this(AbsolutePath root, bool fromStdin, Diff diff) {
198         this.root = root;
199         this.useFileFilter = fromStdin;
200         foreach (a; diff.toRange(root)) {
201             files.add(a.key.stripExtension);
202         }
203     }
204 
205     bool shouldAnalyze(AbsolutePath p) {
206         import std.path : relativePath;
207 
208         if (!useFileFilter) {
209             return true;
210         }
211 
212         return relativePath(p, root).stripExtension in files;
213     }
214 }
215 
216 struct StartedAnalyzer {
217 }
218 
219 struct DoneStartingAnalyzers {
220 }
221 
222 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
223 struct AnalyzeCntMsg {
224     int value;
225 }
226 
227 /// The main thread is waiting for storeActor to send this message.
228 struct StoreDoneMsg {
229 }
230 
231 struct AnalyzeConfig {
232     ConfigCompiler compiler;
233     ConfigAnalyze analyze;
234     ConfigCoverage coverage;
235     SchemaQ* sq;
236 }
237 
238 struct WaitForToken {
239 }
240 
241 struct RunAnalyze {
242 }
243 
244 alias AnalyzeActor = typedActor!(void function(WaitForToken), void function(RunAnalyze));
245 
246 /// Start an analyze of a file
247 auto spawnAnalyzer(AnalyzeActor.Impl self, FlowControlActor.Address flowCtrl, StoreActor.Address storeAddr,
248         Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze,
249         ValidateLoc vloc, FilesysIO fio, AnalyzeConfig conf) {
250     auto st = tuple!("self", "flowCtrl", "storeAddr", "kinds", "fileToAnalyze",
251             "vloc", "fio", "conf")(self, flowCtrl, storeAddr, kinds,
252             fileToAnalyze, vloc, fio.dup, conf);
253     alias Ctx = typeof(st);
254 
255     static void wait(ref Ctx ctx, WaitForToken) {
256         ctx.self.request(ctx.flowCtrl, infTimeout).send(TakeTokenMsg.init)
257             .capture(ctx).then((ref Ctx ctx, Token _) => send(ctx.self, RunAnalyze.init));
258     }
259 
260     static void run(ref Ctx ctx, RunAnalyze) @safe {
261         auto profile = Profile("analyze file " ~ ctx.fileToAnalyze.cmd.absoluteFile);
262 
263         bool onlyValidFiles = true;
264 
265         try {
266             log.tracef("%s begin", ctx.fileToAnalyze.cmd.absoluteFile);
267             auto analyzer = Analyze(ctx.kinds, ctx.vloc, ctx.fio,
268                     Analyze.Config(ctx.conf.compiler.forceSystemIncludes,
269                         ctx.conf.coverage.use, ctx.conf.compiler.allowErrors.get, *ctx.conf.sq));
270             analyzer.process(ctx.fileToAnalyze, ctx.conf.analyze.idGenConfig);
271 
272             foreach (a; analyzer.result.idFile.byKey) {
273                 if (!isFileSupported(ctx.fio, a)) {
274                     log.warningf(
275                             "%s: file not supported. It must be in utf-8 format without a BOM marker");
276                     onlyValidFiles = false;
277                     break;
278                 }
279             }
280 
281             if (onlyValidFiles)
282                 send(ctx.storeAddr, analyzer.result, Token.init);
283             log.tracef("%s end", ctx.fileToAnalyze.cmd.absoluteFile);
284         } catch (Exception e) {
285             onlyValidFiles = false;
286             log.error(e.msg).collectException;
287         }
288 
289         if (!onlyValidFiles) {
290             log.tracef("%s failed", ctx.fileToAnalyze.cmd.absoluteFile).collectException;
291             send(ctx.storeAddr, Token.init);
292         }
293 
294         ctx.self.shutdown;
295     }
296 
297     self.name = "analyze";
298     send(self, WaitForToken.init);
299     return impl(self, &run, capture(st), &wait, capture(st));
300 }
301 
302 class TestFileResult {
303     Duration time;
304     TestFile[Checksum] files;
305 }
306 
307 alias TestPathActor = typedActor!(void function(Start, StoreActor.Address));
308 
309 auto spawnTestPathActor(TestPathActor.Impl self, StoreActor.Address store,
310         AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio) {
311     import std.datetime : Clock;
312     import std.datetime.stopwatch : StopWatch, AutoStart;
313     import std.file : isDir, isFile, dirEntries, SpanMode;
314     import my.container.vector;
315 
316     auto st = tuple!("self", "matcher", "fio", "userPaths")(self, matcher, fio.dup, userPaths);
317     alias Ctx = typeof(st);
318 
319     static void start(ref Ctx ctx, Start, StoreActor.Address store) {
320         auto profile = Profile("checksum test files");
321 
322         auto sw = StopWatch(AutoStart.yes);
323 
324         TestFile makeTestFile(const AbsolutePath file) {
325             auto cs = checksum(ctx.fio.makeInput(file).content[]);
326             return TestFile(TestFilePath(ctx.fio.toRelativeRoot(file)),
327                     TestFileChecksum(cs), Clock.currTime);
328         }
329 
330         auto paths = vector(ctx.userPaths);
331 
332         auto tfiles = new TestFileResult;
333         scope (exit)
334             tfiles.time = sw.peek;
335 
336         while (!paths.empty) {
337             try {
338                 if (isDir(paths.front)) {
339                     log.trace("  Test directory ", paths.front);
340                     foreach (a; dirEntries(paths.front, SpanMode.shallow).map!(
341                             a => AbsolutePath(a.name))) {
342                         paths.put(a);
343                     }
344                 } else if (isFile(paths.front) && ctx.matcher.match(paths.front)) {
345                     log.trace("  Test saved ", paths.front);
346                     auto t = makeTestFile(paths.front);
347                     tfiles.files[t.checksum.get] = t;
348                 }
349             } catch (Exception e) {
350                 log.warning(e.msg).collectException;
351             }
352 
353             paths.popFront;
354         }
355 
356         log.infof("Found %s test files", tfiles.files.length).collectException;
357         send(store, tfiles);
358         ctx.self.shutdown;
359     }
360 
361     self.name = "test path";
362     send(self, Start.init, store);
363     return impl(self, &start, capture(st));
364 }
365 
366 struct Start {
367 }
368 
369 struct IsDone {
370 }
371 
372 struct SetDone {
373 }
374 
375 // Check if it is time to post process
376 struct CheckPostProcess {
377 }
378 // Run the post processning.
379 struct PostProcess {
380 }
381 
382 struct StoreConfig {
383     ConfigAnalyze analyze;
384     ConfigSchema schema;
385     ConfigCoverage coverage;
386 }
387 
388 alias StoreActor = typedActor!(void function(Start, ToolVersion), bool function(IsDone),
389         void function(StartedAnalyzer), void function(Analyze.Result, Token), // failed to analyze the file, but still returning the token.
390         void function(Token),
391         void function(DoneStartingAnalyzers), void function(TestFileResult),
392         void function(CheckPostProcess), void function(PostProcess),);
393 
394 /// Store the result of the analyze.
395 auto spawnStoreActor(StoreActor.Impl self, FlowControlActor.Address flowCtrl, RefCounted!(Database) db,
396         StoreConfig conf, FilesysIO fio, Path[] rootFiles, NeedFullAnalyzeResult needFullAnalyze) @trusted {
397     static struct State {
398         import dextool.plugin.mutate.backend.type : CodeMutant;
399 
400         NeedFullAnalyzeResult needFullAnalyze;
401 
402         // conditions governing when the analyze is done
403         // if all analyze workers have been started and thus it is time to
404         // start checking if startedAnalyzers == savedResult.
405         bool doneStarting;
406         // number of analyze workers that have been started.
407         int startedAnalyzers;
408         // number of saved results.
409         int savedResult;
410         // if checksums of all test files have been saved to disk
411         bool savedTestFileResult;
412 
413         // if a file is modified then the timeout context need to be reset
414         bool resetTimeoutCtx;
415 
416         /// Set when the whole analyze process is done and all results are saved to the database.
417         bool isDone;
418 
419         // only save new mutants. assuming that it is faster to check if the
420         // mutants have been saved before than to go through multiple sql
421         // queries.
422         Set!CodeMutant saved;
423 
424         // files that have been saved to the database.
425         Set!AbsolutePath savedFiles;
426         // clearing a file should only happen once.
427         Set!AbsolutePath clearedFiles;
428     }
429 
430     auto st = tuple!("self", "db", "state", "fio", "conf", "rootFiles", "flowCtrl")(self,
431             db, refCounted(State(needFullAnalyze)), fio.dup, conf, rootFiles, flowCtrl);
432     alias Ctx = typeof(st);
433 
434     static void start(ref Ctx ctx, Start, ToolVersion toolVersion) {
435         log.trace("starting store actor");
436 
437         if (ctx.conf.analyze.fastDbStore) {
438             log.info(
439                     "Turning OFF sqlite3 synchronization protection to improve the write performance");
440             log.warning("Do NOT interrupt dextool in any way because it may corrupt the database");
441             ctx.db.get.run("PRAGMA synchronous = OFF");
442             ctx.db.get.run("PRAGMA journal_mode = MEMORY");
443         }
444 
445         send(ctx.self, CheckPostProcess.init);
446         log.trace("store actor active");
447     }
448 
449     static bool isDone(ref Ctx ctx, IsDone) {
450         return ctx.state.get.isDone;
451     }
452 
453     static void startedAnalyzers(ref Ctx ctx, StartedAnalyzer) {
454         ctx.state.get.startedAnalyzers++;
455     }
456 
457     static void doneStartAnalyzers(ref Ctx ctx, DoneStartingAnalyzers) {
458         ctx.state.get.doneStarting = true;
459     }
460 
461     static void failedFileAnalyze(ref Ctx ctx, Token) {
462         send(ctx.flowCtrl, ReturnTokenMsg.init);
463         // a failed file has to count as well.
464         ctx.state.get.savedResult++;
465     }
466 
467     static void checkPostProcess(ref Ctx ctx, CheckPostProcess) {
468         if (ctx.state.get.doneStarting && ctx.state.get.savedTestFileResult
469                 && (ctx.state.get.startedAnalyzers == ctx.state.get.savedResult))
470             send(ctx.self, PostProcess.init);
471         else
472             delayedSend(ctx.self, delay(500.dur!"msecs"), CheckPostProcess.init);
473     }
474 
475     static void savedTestFileResult(ref Ctx ctx, TestFileResult result) {
476         auto profile = Profile("save test files");
477 
478         ctx.state.get.savedTestFileResult = true;
479 
480         Set!Checksum old;
481 
482         auto t = ctx.db.get.transaction;
483 
484         foreach (a; ctx.db.get.testFileApi.getTestFiles) {
485             old.add(a.checksum.get);
486             if (a.checksum.get !in result.files) {
487                 log.info("Removed test file ", a.file.get.toString);
488                 ctx.db.get.testFileApi.removeFile(a.file);
489             }
490         }
491 
492         foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) {
493             log.info("Saving test file ", a.file.get.toString);
494             ctx.db.get.testFileApi.put(a);
495         }
496 
497         t.commit;
498 
499         send(ctx.self, CheckPostProcess.init);
500     }
501 
502     static void save(ref Ctx ctx, Analyze.Result result, Token) {
503         import dextool.cachetools : nullableCache;
504         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
505         import dextool.plugin.mutate.backend.type : Language;
506 
507         auto profile = Profile("save " ~ result.root);
508 
509         // by returning the token now another file analyze can start while we
510         // are saving the current one.
511         send(ctx.flowCtrl, ReturnTokenMsg.init);
512 
513         ctx.state.get.savedResult++;
514         log.infof("Analyzed %s/%s %s", ctx.state.get.savedResult,
515                 ctx.state.get.startedAnalyzers, result.root);
516 
517         auto getFileId = nullableCache!(string, FileId, (string p) => ctx.db.get.getFileId(p.Path))(256,
518                 10.dur!"seconds");
519         auto getFileDbChecksum = nullableCache!(string, Checksum,
520                 (string p) => ctx.db.get.getFileChecksum(p.Path))(256, 30.dur!"seconds");
521         auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
522             return checksum(ctx.fio.makeInput(AbsolutePath(Path(p))).content[]);
523         })(256, 10.dur!"seconds");
524 
525         static struct Files {
526             Checksum[Path] value;
527 
528             this(ref Database db) {
529                 foreach (a; db.getDetailedFiles) {
530                     value[a.file] = a.fileChecksum;
531                 }
532             }
533         }
534 
535         auto trans = ctx.db.get.transaction;
536 
537         // keeps both absolute and relative because then less transformations
538         // are needed. mutation points use relative...
539         Set!Path skipFile;
540 
541         // mark files that have an unchanged checksum as "already saved"
542         foreach (f; result.idFile.byKey.filter!(a => a !in ctx.state.get.clearedFiles)) {
543             const relp = ctx.fio.toRelativeRoot(f);
544 
545             if (getFileDbChecksum(relp) != getFileFsChecksum(f)
546                     || ctx.conf.analyze.forceSaveAnalyze || ctx.state.get.needFullAnalyze.status) {
547                 // this is critical in order to remove old data about a file.
548                 if (f !in ctx.state.get.clearedFiles) {
549                     ctx.db.get.removeFile(relp);
550                     ctx.state.get.clearedFiles.add(f);
551                 }
552             } else {
553                 log.info("Unchanged ".color(Color.yellow), f);
554                 ctx.state.get.savedFiles.add(f);
555                 skipFile.add(f);
556                 skipFile.add(relp);
557             }
558         }
559 
560         {
561             bool isChanged = ctx.state.get.needFullAnalyze.status;
562 
563             foreach (f; result.idFile.byKey.filter!(a => a !in skipFile
564                     && a !in ctx.state.get.savedFiles)) {
565                 isChanged = true;
566                 log.info("Saving ".color(Color.green), f);
567 
568                 const relp = ctx.fio.toRelativeRoot(f);
569                 const info = result.infoId[result.idFile[f]];
570                 ctx.db.get.fileApi.put(relp, info.checksum, info.language, f == result.root);
571 
572                 ctx.state.get.savedFiles.add(f);
573             }
574 
575             if (result.root !in ctx.state.get.savedFiles) {
576                 // this occurs when the file is e.g. a unittest that uses a
577                 // header only library. The unittests are not mutated thus
578                 // no mutation points exists in them but we want dextool to
579                 // still, if possible, track the unittests for changes.
580                 isChanged = true;
581                 const relp = ctx.fio.toRelativeRoot(result.root);
582                 ctx.db.get.removeFile(relp);
583                 // the language do not matter because it is a file without
584                 // any mutants.
585                 ctx.db.get.fileApi.put(relp, result.rootCs, Language.init, true);
586                 ctx.state.get.savedFiles.add(ctx.fio.toAbsoluteRoot(result.root));
587             }
588 
589             {
590                 auto app = appender!(MutationPointEntry2[])();
591                 foreach (mp; result.mutationPoints.filter!(a => a.file !in skipFile
592                         && a.cm !in ctx.state.get.saved)) {
593                     app.put(mp);
594                 }
595                 // only block new mutants of the same source code change after
596                 // a whole "pass" because the same mutant kind can result in
597                 // the same CodeChecksum.
598                 ctx.state.get.saved.add(app.data.map!(a => a.cm));
599                 ctx.db.get.mutantApi.put(app.data, ctx.fio.getOutputDir);
600             }
601 
602             // must always update dependencies because they may not contain
603             // mutants. Only files that are changed and contain mutants
604             // trigger isChanged to be true.
605             try {
606                 // not all files are tracked thus this may throw an exception.
607                 ctx.db.get.dependencyApi.set(ctx.fio.toRelativeRoot(result.root),
608                         result.dependencies);
609             } catch (Exception e) {
610             }
611 
612             ctx.state.get.resetTimeoutCtx = ctx.state.get.resetTimeoutCtx || isChanged;
613 
614             if (isChanged) {
615                 foreach (a; result.coverage.byKeyValue) {
616                     const fid = getFileId(ctx.fio.toRelativeRoot(result.fileId[a.key]));
617                     if (!fid.isNull) {
618                         ctx.db.get.coverageApi.clearCoverageMap(fid.get);
619                         ctx.db.get.coverageApi.putCoverageMap(fid.get, a.value);
620                     }
621                 }
622 
623                 saveSchemaFragments(ctx.db.get, ctx.fio, result.schematas);
624             }
625         }
626 
627         {
628             Set!long printed;
629             auto app = appender!(LineMetadata[])();
630             foreach (md; result.metadata) {
631                 const localId = Analyze.Result.LocalFileId(md.id.get);
632                 // transform the ID from local to global.
633                 const fid = getFileId(ctx.fio.toRelativeRoot(result.fileId[localId]));
634                 if (fid.isNull && !printed.contains(md.id.get)) {
635                     printed.add(md.id.get);
636                     log.info("File with suppressed mutants (// NOMUT) not in the database: ",
637                             result.fileId[localId]).collectException;
638                 } else if (!fid.isNull) {
639                     app.put(LineMetadata(fid.get, md.line, md.attr));
640                 }
641             }
642             ctx.db.get.metaDataApi.put(app.data);
643         }
644 
645         trans.commit;
646 
647         send(ctx.self, CheckPostProcess.init);
648     }
649 
650     static void postProcess(ref Ctx ctx, PostProcess) {
651         import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
652 
653         if (ctx.state.get.isDone)
654             return;
655 
656         ctx.state.get.isDone = true;
657 
658         void fastDbOff() {
659             if (!ctx.conf.analyze.fastDbStore)
660                 return;
661             ctx.db.get.run("PRAGMA synchronous = ON");
662             ctx.db.get.run("PRAGMA journal_mode = DELETE");
663         }
664 
665         void pruneFiles() {
666             import std.path : buildPath;
667 
668             auto profile = Profile("prune files");
669 
670             log.info("Pruning the database of dropped files");
671             auto files = ctx.db.get.getFiles.map!(a => ctx.fio.toAbsoluteRoot(a)).toSet;
672 
673             foreach (f; files.setDifference(ctx.state.get.savedFiles).toRange) {
674                 log.info("Removing ".color(Color.red), f);
675                 ctx.db.get.removeFile(ctx.fio.toRelativeRoot(f));
676             }
677         }
678 
679         void addRoots() {
680             if (ctx.conf.analyze.forceSaveAnalyze || ctx.state.get.needFullAnalyze.status)
681                 return;
682 
683             // add root files and their dependencies that has not been analyzed because nothing has changed.
684             // By adding them they are not removed.
685 
686             auto profile = Profile("add roots and dependencies");
687             foreach (a; ctx.rootFiles) {
688                 auto p = ctx.fio.toAbsoluteRoot(a);
689                 if (p !in ctx.state.get.savedFiles) {
690                     ctx.state.get.savedFiles.add(p);
691                     // fejk text for the user to tell them that yes, the files have
692                     // been analyzed.
693                     log.info("Analyzing ", a);
694                     log.info("Unchanged ".color(Color.yellow), a);
695                 }
696             }
697             foreach (a; ctx.rootFiles.map!(a => ctx.db.get.dependencyApi.get(a)).joiner) {
698                 ctx.state.get.savedFiles.add(ctx.fio.toAbsoluteRoot(a));
699             }
700         }
701 
702         void pruneSchemaMl() {
703             auto profile = Profile("prune schema_ml model");
704             log.info("Prune schema ML model");
705 
706             Set!Checksum files;
707             foreach (a; ctx.db.get.getFiles)
708                 files.add(checksum(cast(const(ubyte)[]) a.toString));
709 
710             foreach (a; ctx.db.get.schemaApi.getMutantProbability.byKey.filter!(a => a !in files)) {
711                 logger.trace("schema model. Dropping ", a);
712                 ctx.db.get.schemaApi.removeMutantProbability(a);
713             }
714         }
715 
716         auto trans = ctx.db.get.transaction;
717 
718         addRoots;
719 
720         if (ctx.state.get.resetTimeoutCtx) {
721             log.info("Resetting timeout context");
722             resetTimeoutContext(ctx.db.get);
723         }
724 
725         log.info("Updating metadata");
726         ctx.db.get.metaDataApi.updateMetadata;
727 
728         if (ctx.conf.analyze.prune) {
729             pruneFiles();
730             {
731                 auto profile = Profile("prune dependencies");
732                 log.info("Prune dependencies");
733                 ctx.db.get.dependencyApi.cleanup;
734             }
735             {
736                 auto profile = Profile("remove orphaned mutants");
737                 log.info("Removing orphaned mutants");
738                 auto progress = (size_t i, size_t total, const Duration avgRemoveTime,
739                         const Duration timeLeft, SysTime predDoneAt) {
740                     logger.infof("%s/%s removed (average %s) (%s) (%s)", i,
741                             total, avgRemoveTime, timeLeft, predDoneAt.toSimpleString);
742                 };
743                 auto done = (size_t total) {
744                     logger.infof(total > 0, "%1$s/%1$s removed", total);
745                 };
746                 ctx.db.get.mutantApi.removeOrphanedMutants(progress.toDelegate, done.toDelegate);
747             }
748             try {
749                 pruneSchemaMl;
750             } catch (Exception e) {
751                 logger.warning(e.msg);
752                 logger.warning("Unable to prune schema ML model");
753             }
754         }
755 
756         log.info("Updating manually marked mutants");
757         updateMarkedMutants(ctx.db.get);
758         printLostMarkings(ctx.db.get.markMutantApi.getLostMarkings);
759 
760         if (ctx.state.get.needFullAnalyze.status) {
761             log.info("Updating tool version");
762             ctx.db.get.miscApi.setToolVersion(ToolVersion(dextoolBinaryId));
763             log.info("Update config version");
764             ctx.db.get.miscApi.setConfigVersion(ctx.state.get.needFullAnalyze.cs);
765         }
766 
767         log.info("Committing changes");
768         trans.commit;
769         log.info("Ok".color(Color.green));
770 
771         fastDbOff();
772 
773         if (ctx.state.get.needFullAnalyze.status) {
774             auto profile = Profile("compact");
775             log.info("Compacting the database");
776             ctx.db.get.vacuum;
777         }
778     }
779 
780     self.name = "store";
781 
782     auto s = impl(self, &start, capture(st), &isDone, capture(st),
783             &startedAnalyzers, capture(st), &save, capture(st), &doneStartAnalyzers,
784             capture(st), &savedTestFileResult, capture(st), &checkPostProcess,
785             capture(st), &postProcess, capture(st), &failedFileAnalyze, capture(st));
786     s.exceptionHandler = toDelegate(&logExceptionHandler);
787     return s;
788 }
789 
790 /// Analyze a file for mutants.
791 struct Analyze {
792     import std.regex : Regex, regex, matchFirst;
793     import std.typecons : Yes;
794     import libclang_ast.context : ClangContext;
795 
796     static struct Config {
797         bool forceSystemIncludes;
798         bool saveCoverage;
799         bool allowErrors;
800         SchemaQ sq;
801     }
802 
803     private {
804         static immutable rawReNomut = `^((//)|(/\*+))\s*NOMUT(?P<type>\w*)\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
805 
806         Regex!char re_nomut;
807         ValidateLoc valLoc;
808         FilesysIO fio;
809 
810         Result result;
811 
812         Config conf;
813 
814         Mutation.Kind[] kinds;
815     }
816 
817     this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted {
818         this.kinds = kinds;
819         this.valLoc = valLoc;
820         this.fio = fio;
821         this.re_nomut = regex(rawReNomut);
822         this.result = new Result;
823         this.conf = conf;
824     }
825 
826     void process(ParsedCompileCommand commandsForFileToAnalyze, MutantIdGeneratorConfig idGenConf) @safe {
827         import std.file : exists;
828 
829         commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes;
830 
831         try {
832             if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) {
833                 log.warningf("Failed to analyze %s. Do not exist",
834                         commandsForFileToAnalyze.cmd.absoluteFile);
835                 return;
836             }
837         } catch (Exception e) {
838             log.warning(e.msg);
839             return;
840         }
841 
842         result.root = commandsForFileToAnalyze.cmd.absoluteFile;
843 
844         try {
845             result.rootCs = checksum(result.root);
846 
847             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
848             scope tstream = new TokenStreamImpl(ctx);
849 
850             analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream, idGenConf);
851             foreach (f; result.fileId.byValue)
852                 analyzeForComments(f, tstream);
853         } catch (Exception e) {
854             () @trusted { log.trace(e); }();
855             log.info(e.msg);
856             log.error("failed analyze of ",
857                     commandsForFileToAnalyze.cmd.absoluteFile).collectException;
858         }
859     }
860 
861     void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze, AbsolutePath fileToAnalyze,
862             ref ClangContext ctx, scope TokenStream tstream, MutantIdGeneratorConfig idGenConf) @safe {
863         import my.gc.refc : RefCounted;
864         import dextool.plugin.mutate.backend.analyze.ast : Ast;
865         import dextool.plugin.mutate.backend.analyze.pass_clang;
866         import dextool.plugin.mutate.backend.analyze.pass_coverage;
867         import dextool.plugin.mutate.backend.analyze.pass_filter;
868         import dextool.plugin.mutate.backend.analyze.pass_mutant;
869         import dextool.plugin.mutate.backend.analyze.pass_schemata;
870         import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic;
871 
872         log.info("Analyzing ", fileToAnalyze);
873         RefCounted!(Ast) ast;
874         {
875             auto tu = ctx.makeTranslationUnit(fileToAnalyze,
876                     commandsForFileToAnalyze.flags.completeFlags);
877             if (tu.hasParseErrors) {
878                 logDiagnostic(tu);
879                 log.warningf("Compile error in %s", fileToAnalyze);
880                 if (!conf.allowErrors) {
881                     log.warning("Skipping");
882                     return;
883                 }
884             }
885 
886             auto res = toMutateAst(tu.cursor, fio, valLoc);
887             ast = res.ast;
888             saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies);
889             log!"analyze.pass_clang".trace(ast.get.toString);
890         }
891 
892         auto codeMutants = () {
893             auto mutants = toMutants(ast.ptr, fio, valLoc, kinds);
894             log!"analyze.pass_mutant".trace(mutants);
895 
896             log!"analyze.pass_filter".trace("filter mutants");
897             mutants = filterMutants(fio, mutants);
898             log!"analyze.pass_filter".trace(mutants);
899 
900             return toCodeMutants(mutants, fio, tstream, idGenConf);
901         }();
902         debug logger.trace(codeMutants);
903 
904         {
905             auto schemas = toSchemata(ast.ptr, fio, codeMutants, conf.sq);
906             log!"analyze.pass_schema".trace(schemas);
907             log.tracef("path dedup count:%s length_acc:%s",
908                     ast.get.paths.count, ast.get.paths.lengthAccum);
909 
910             result.schematas = schemas.getFragments;
911         }
912 
913         {
914             auto app = appender!(MutationPointEntry2[])();
915             foreach (a; codeMutants.points.byKeyValue) {
916                 foreach (b; a.value) {
917                     app.put(MutationPointEntry2(fio.toRelativeRoot(a.key),
918                             b.offset, b.sloc.begin, b.sloc.end, b.mutant));
919                 }
920             }
921             result.mutationPoints = app.data;
922         }
923         foreach (f; codeMutants.points.byKey) {
924             const id = Result.LocalFileId(result.idFile.length);
925             result.idFile[f] = id;
926             result.fileId[id] = f;
927             result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang);
928         }
929 
930         if (conf.saveCoverage) {
931             auto cov = toCoverage(ast.ptr, fio, valLoc);
932             debug logger.trace(cov);
933 
934             foreach (a; cov.points.byKeyValue) {
935                 if (auto id = a.key in result.idFile) {
936                     result.coverage[*id] = a.value;
937                 }
938             }
939         }
940     }
941 
942     /** Tokens are always from the same file.
943      *
944      * TODO: move this to pass_clang.
945      */
946     void analyzeForComments(AbsolutePath file, scope TokenStream tstream) @safe {
947         import std.algorithm : filter;
948         import clang.c.Index : CXTokenKind;
949         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
950 
951         if (auto localId = file in result.idFile) {
952             const fid = FileId(localId.get);
953 
954             auto mdata = appender!(LineMetadata[])();
955 
956             int sectionStart = -1;
957             LineMetadata sectionData;
958 
959             foreach (t; tstream.getTokens(file).filter!(a => a.kind == CXTokenKind.comment)) {
960                 auto m = matchFirst(t.spelling, re_nomut);
961 
962                 if (m.whichPattern == 0)
963                     continue;
964 
965                 switch (m["type"]) {
966                 case "BEGIN":
967                     if (sectionStart == -1) {
968                         sectionStart = t.loc.line;
969                         sectionData = LineMetadata(fid, t.loc.line + 1,
970                                 LineAttr(NoMut(m["tag"], m["comment"])));
971                     } else {
972                         logger.warningf("NOMUT: Found multiple NOMUTBEGIN in a row! Will use the first one on line %s",
973                                 sectionStart);
974                     }
975                     break;
976                 case "END":
977                     if (sectionStart == -1) {
978                         logger.warningf("NOMUT: Found a NOMUTEND without a NOMUTBEGIN on line %s! Ignoring",
979                                 t.loc.line);
980                     } else {
981                         foreach (const i; sectionStart .. t.loc.line) {
982                             sectionData.line = i;
983                             () @trusted { mdata.put(sectionData); }();
984                             log.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
985                         }
986 
987                         sectionStart = -1;
988                         sectionData = LineMetadata.init;
989                     }
990                     break;
991                 case "NEXT":
992                     () @trusted {
993                         mdata.put(LineMetadata(fid, t.loc.line + 1,
994                                 LineAttr(NoMut(m["tag"], m["comment"]))));
995                     }();
996                     log.tracef("NOMUT ON NEXT LINE found at %s:%s:%s", file,
997                             t.loc.line, t.loc.column);
998                     break;
999                 default:
1000                     () @trusted {
1001                         mdata.put(LineMetadata(fid, t.loc.line,
1002                                 LineAttr(NoMut(m["tag"], m["comment"]))));
1003                     }();
1004                     log.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
1005                     break;
1006                 }
1007             }
1008             result.metadata ~= mdata.data;
1009         }
1010     }
1011 
1012     void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted {
1013         import std.algorithm : cache;
1014         import std.mmfile;
1015 
1016         auto rootDir = root.dirName;
1017 
1018         foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir,
1019                 flags.includes, flags.systemIncludes))
1020                 .cache
1021                 .filter!(a => a.hasValue)
1022                 .map!(a => a.orElse(AbsolutePath.init))
1023                 .filter!(a => valLoc.isInsideOutputDir(a))) {
1024             try {
1025                 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p));
1026             } catch (Exception e) {
1027                 log.trace(e.msg).collectException;
1028             }
1029         }
1030 
1031         log.trace(result.dependencies);
1032     }
1033 
1034     static class Result {
1035         import dextool.plugin.mutate.backend.analyze.ast : Interval;
1036         import dextool.plugin.mutate.backend.database.type : SchemataFragment;
1037         import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum;
1038 
1039         alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init,
1040                 TagStringable, Hashable);
1041         alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init,
1042                 TagStringable, Hashable);
1043 
1044         MutationPointEntry2[] mutationPoints;
1045 
1046         static struct FileInfo {
1047             Checksum checksum;
1048             Language language;
1049         }
1050 
1051         /// The file that is analyzed, which is a root
1052         AbsolutePath root;
1053         Checksum rootCs;
1054 
1055         /// The dependencies the root has.
1056         DepFile[] dependencies;
1057 
1058         /// The key is the ID from idFile.
1059         FileInfo[LocalFileId] infoId;
1060 
1061         /// The IDs is unique for *this* analyze, not globally.
1062         LocalFileId[AbsolutePath] idFile;
1063         AbsolutePath[LocalFileId] fileId;
1064 
1065         // The FileID used in the metadata is local to this analysis. It has to
1066         // be remapped when added to the database.
1067         LineMetadata[] metadata;
1068 
1069         /// Mutant schematas that has been generated.
1070         SchemataResult.Fragments[AbsolutePath] schematas;
1071 
1072         /// Coverage intervals that can be instrumented.
1073         Interval[][LocalFileId] coverage;
1074     }
1075 }
1076 
1077 @(
1078         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
1079 unittest {
1080     import std.algorithm : canFind;
1081     import std.format : format;
1082     import std.regex : regex, matchFirst;
1083     import unit_threaded.runner.io : writelnUt;
1084 
1085     auto reNomut = regex(Analyze.rawReNomut);
1086     const types = ["NOMUT", "NOMUTBEGIN", "NOMUTEND", "NOMUTNEXT"];
1087     auto okParseTypes = ["", "BEGIN", "END", "NEXT"];
1088     // NOMUT in other type of comments should NOT match.
1089     foreach (line; [
1090             "/// %s", "// stuff with %s in it", "/* stuff with %s in it */"
1091         ]) {
1092         foreach (type; types) {
1093             matchFirst(format(line, type), reNomut).whichPattern.shouldEqual(0);
1094         }
1095     }
1096 
1097     foreach (line; ["//%s", "// %s", "/*%s*/", "/* %s */", "/**%s*/"]) {
1098         foreach (type; types) {
1099             auto m = matchFirst(format(line, type), reNomut);
1100             m.whichPattern.shouldEqual(1);
1101             m["comment"].shouldEqual("");
1102             m["tag"].shouldEqual("");
1103         }
1104     }
1105 
1106     foreach (line; ["//%s (my tag)", "// %s (my tag)", "/* %s (my tag) */",]) {
1107         foreach (type; types) {
1108             auto m = matchFirst(format(line, type), reNomut);
1109             m.whichPattern.shouldEqual(1);
1110             m["comment"].shouldEqual("");
1111             m["tag"].shouldEqual("my tag");
1112         }
1113     }
1114 
1115     // TODO: should work but doesn't.... : "/* %s my comment */"
1116     foreach (line; ["//%s my comment", "// %s my comment"]) {
1117         foreach (type; types) {
1118             auto m = matchFirst(format(line, type), reNomut);
1119             m.whichPattern.shouldEqual(1);
1120             okParseTypes.canFind(m["type"]).shouldBeGreaterThan(0);
1121             m["comment"].shouldEqual("my comment");
1122             m["tag"].shouldEqual("");
1123         }
1124     }
1125 
1126     foreach (line; ["//%s (my tag) my comment", "// %s (my tag) my comment"]) {
1127         foreach (type; types) {
1128             auto m = matchFirst(format(line, type), reNomut);
1129             m.whichPattern.shouldEqual(1);
1130             okParseTypes.canFind(m["type"]).shouldBeGreaterThan(0);
1131             m["comment"].shouldEqual("my comment");
1132             m["tag"].shouldEqual("my tag");
1133         }
1134     }
1135 }
1136 
1137 /// Stream of tokens excluding comment tokens.
1138 class TokenStreamImpl : TokenStream {
1139     import libclang_ast.context : ClangContext;
1140     import dextool.plugin.mutate.backend.type : Token;
1141     import dextool.plugin.mutate.backend.utility : tokenize;
1142 
1143     ClangContext* ctx;
1144 
1145     /// The context must outlive any instance of this class.
1146     // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000
1147     this(ref ClangContext ctx) @trusted {
1148         this.ctx = &ctx;
1149     }
1150 
1151     Token[] getTokens(Path p) scope {
1152         return tokenize(*ctx, p);
1153     }
1154 
1155     Token[] getFilteredTokens(Path p) scope {
1156         import clang.c.Index : CXTokenKind;
1157 
1158         // Filter a stream of tokens for those that should affect the checksum.
1159         return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
1160     }
1161 }
1162 
1163 /** Update the connection between the marked mutants and their mutation status
1164  * id and mutation id.
1165  */
1166 void updateMarkedMutants(ref Database db) @trusted {
1167     import dextool.plugin.mutate.backend.database.type : MutationStatusId,
1168         toMutationStatusId, toChecksum;
1169     import dextool.plugin.mutate.backend.type : ExitStatus;
1170 
1171     void update(MarkedMutant m) {
1172         const stId = toMutationStatusId(m.statusChecksum);
1173         db.markMutantApi.remove(m.statusChecksum);
1174         db.markMutantApi.mark(m.path, m.sloc, stId, m.statusChecksum,
1175                 m.toStatus, m.rationale, m.mutText);
1176         db.mutantApi.update(stId, m.toStatus, ExitStatus(0));
1177     }
1178 
1179     // find those marked mutants that have a checksum that is different from
1180     // the mutation status the marked mutant is related to. If possible change
1181     // the relation to the correct mutation status id.
1182     foreach (m; db.markMutantApi
1183             .getMarkedMutants
1184             .map!(a => tuple(a, toChecksum(a.statusId)))
1185             .filter!(a => a[0].statusChecksum != a[1])) {
1186         update(m[0]);
1187     }
1188 }
1189 
1190 /// Prints a marked mutant that has become lost due to rerun of analyze
1191 void printLostMarkings(MarkedMutant[] lostMutants) {
1192     import std.algorithm : sort;
1193     import std.array : empty;
1194     import std.conv : to;
1195     import std.stdio : writeln;
1196 
1197     if (lostMutants.empty)
1198         return;
1199 
1200     Table!6 tbl = Table!6([
1201         "ID", "File", "Line", "Column", "Status", "Rationale"
1202     ]);
1203     foreach (m; lostMutants) {
1204         typeof(tbl).Row r = [
1205             m.statusId.get.to!string, m.path, m.sloc.line.to!string,
1206             m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get
1207         ];
1208         tbl.put(r);
1209     }
1210     log.warning("Marked mutants was lost");
1211     writeln(tbl);
1212 }
1213 
1214 @("shall only let files in the diff through")
1215 unittest {
1216     import std.string : lineSplitter;
1217     import dextool.plugin.mutate.backend.diff_parser;
1218 
1219     immutable lines = `diff --git a/standalone2.d b/standalone2.d
1220 index 0123..2345 100644
1221 --- a/standalone.d
1222 +++ b/standalone2.d
1223 @@ -31,7 +31,6 @@ import std.algorithm : map;
1224  import std.array : Appender, appender, array;
1225  import std.datetime : SysTime;
1226 +import std.format : format;
1227 -import std.typecons : Tuple;
1228 
1229  import d2sqlite3 : sqlDatabase = Database;
1230 
1231 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
1232  struct Database {
1233      import std.conv : to;
1234      import std.exception : collectException;
1235 -    import std.typecons : Nullable;
1236 +    import std.typecons : Nullable, Flag, No;
1237      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
1238 
1239 +    sqlDatabase db;`;
1240 
1241     UnifiedDiffParser p;
1242     foreach (line; lines.lineSplitter)
1243         p.process(line);
1244     auto diff = p.result;
1245 
1246     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
1247 
1248     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
1249     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
1250 }
1251 
1252 /// Convert to an absolute path by finding the first match among the compiler flags
1253 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir,
1254         ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow {
1255     import std.algorithm : map, filter;
1256     import std.file : exists;
1257     import std.path : buildPath;
1258 
1259     Optional!AbsolutePath lookup(string dir) nothrow {
1260         const p = buildPath(dir, file);
1261         try {
1262             if (exists(p))
1263                 return some(AbsolutePath(p));
1264         } catch (Exception e) {
1265         }
1266         return none!AbsolutePath;
1267     }
1268 
1269     {
1270         auto a = lookup(workDir.toString);
1271         if (a.hasValue)
1272             return a;
1273     }
1274 
1275     foreach (a; includes.map!(a => lookup(a.payload))
1276             .filter!(a => a.hasValue)) {
1277         return a;
1278     }
1279 
1280     foreach (a; systemIncludes.map!(a => lookup(a.value))
1281             .filter!(a => a.hasValue)) {
1282         return a;
1283     }
1284 
1285     return none!AbsolutePath;
1286 }
1287 
1288 /** Returns: the root files that need to be re-analyzed because either them or
1289  * their dependency has changed.
1290  */
1291 bool[Path] dependencyAnalyze(ref Database db, const bool needFullAnalyze, FilesysIO fio) @trusted {
1292     import dextool.cachetools : nullableCache;
1293     import dextool.plugin.mutate.backend.database : FileId;
1294 
1295     typeof(return) rval;
1296 
1297     // pessimistic. Add all as needing to be analyzed.
1298     foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) {
1299         rval[a] = false;
1300     }
1301 
1302     try {
1303         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
1304                 30.dur!"seconds");
1305         auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256,
1306                 30.dur!"seconds");
1307         auto getFileDbChecksum = nullableCache!(string, Checksum,
1308                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
1309         auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) {
1310             return checksum(p);
1311         })(256, 30.dur!"seconds");
1312 
1313         Checksum[Path] dbDeps;
1314         foreach (a; db.dependencyApi.getAll)
1315             dbDeps[a.file] = a.checksum;
1316 
1317         bool isChanged(T)(T f) {
1318             if (needFullAnalyze) {
1319                 // because the tool version is updated then all files need to
1320                 // be re-analyzed. an update can mean that scheman are
1321                 // improved, mutants has been changed/removed etc. it is
1322                 // unknown. the only way to be sure is to re-analyze all files.
1323                 return true;
1324             }
1325 
1326             if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root)))
1327                 return true;
1328 
1329             foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) {
1330                 return true;
1331             }
1332 
1333             return false;
1334         }
1335 
1336         foreach (f; db.getRootFiles
1337                 .map!(a => db.getFile(a).get)
1338                 .map!(a => tuple!("root", "rootCs", "deps")(a,
1339                     getFileDbChecksum(a), db.dependencyApi.get(a)))
1340                 .cache
1341                 .filter!(a => isChanged(a))
1342                 .map!(a => a.root)) {
1343             rval[f] = true;
1344         }
1345     } catch (Exception e) {
1346         log.warning(e.msg);
1347     }
1348 
1349     log.trace("Dependency analyze: ", rval);
1350 
1351     return rval;
1352 }
1353 
1354 /// Only utf-8 files are supported
1355 bool isFileSupported(FilesysIO fio, AbsolutePath p) @safe {
1356     import std.algorithm : among;
1357     import std.encoding : getBOM, BOM;
1358 
1359     auto entry = fio.makeInput(p).content.getBOM();
1360     const res = entry.schema.among(BOM.utf8, BOM.none);
1361 
1362     if (res == 1)
1363         log.warningf("%s has a utf-8 BOM marker. It will make all coverage and scheman fail to compile",
1364                 p);
1365 
1366     return res != 0;
1367 }
1368 
1369 void saveSchemaFragments(ref Database db, FilesysIO fio,
1370         ref SchemataResult.Fragments[AbsolutePath] fragments) {
1371     import std.typecons : tuple;
1372     import dextool.plugin.mutate.backend.database.type : SchemaFragmentV2, toMutationStatusId;
1373 
1374     foreach (a; fragments.byKeyValue
1375             .map!(a => tuple!("fileId",
1376                 "fragments")(db.getFileId(fio.toRelativeRoot(a.key)), a.value))
1377             .filter!(a => !a.fileId.isNull)) {
1378         // TODO: SchemaFragmentV2 and SchemataResult.Fragment are pretty
1379         // similare to each other. Only CodeMutant is different.
1380         db.schemaApi.putFragments(a.fileId.get,
1381                 a.fragments.fragments.map!(a => SchemaFragmentV2(a.offset,
1382                     a.text, a.mutants.map!(a => a.id.toMutationStatusId).array)).array);
1383     }
1384 }
1385 
1386 struct NeedFullAnalyzeResult {
1387     Checksum cs;
1388     bool status;
1389 }
1390 
1391 NeedFullAnalyzeResult needFullAnalyze(ref Database db, AbsolutePath config) @safe nothrow {
1392     try {
1393         const cs = checksum(config);
1394         const prevConfigCs = db.miscApi.getConfigVersion;
1395         const status = cs != prevConfigCs
1396             || db.miscApi.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
1397         logger.tracef("Config prev:%s curr:%s status:%s", prevConfigCs.c0, cs.c0, status);
1398         return typeof(return)(cs, status);
1399     } catch (Exception e) {
1400         logger.trace(e.msg).collectException;
1401     }
1402     return typeof(return)(Checksum.init, true);
1403 }