1 /**cpptooling.analyzer.clang
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import logger = std.experimental.logger;
17 import std.algorithm : map, filter, joiner, cache;
18 import std.array : array, appender, empty;
19 import std.concurrency;
20 import std.datetime : dur, Duration;
21 import std.exception : collectException;
22 import std.parallelism;
23 import std.range : tee, enumerate;
24 import std.typecons : tuple;
25 
26 import colorlog;
27 import my.filter : GlobFilter;
28 import my.named_type;
29 import my.optional;
30 import my.set;
31 
32 static import colorlog;
33 
34 import dextool.utility : dextoolBinaryId;
35 
36 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB,
37     ParsedCompileCommandRange, ParsedCompileCommand, ParseFlags, SystemIncludePath;
38 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
39 import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataResult;
40 import dextool.plugin.mutate.backend.database : Database, LineMetadata,
41     MutationPointEntry2, DepFile;
42 import dextool.plugin.mutate.backend.database.type : MarkedMutant, TestFile,
43     TestFilePath, TestFileChecksum, ToolVersion;
44 import dextool.plugin.mutate.backend.diff_parser : Diff;
45 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
46 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
47 import dextool.plugin.mutate.backend.utility : checksum, Checksum, getProfileResult, Profile;
48 import dextool.plugin.mutate.backend.type : Mutation;
49 import dextool.plugin.mutate.type : MutationKind;
50 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze, ConfigSchema, ConfigCoverage;
51 import dextool.type : ExitStatusType, AbsolutePath, Path;
52 
53 version (unittest) {
54     import unit_threaded.assertions;
55 }
56 
57 alias log = colorlog.log!"analyze";
58 
59 /** Analyze the files in `frange` for mutations.
60  */
61 ExitStatusType runAnalyzer(const AbsolutePath dbPath, const MutationKind[] userKinds,
62         ConfigAnalyze analyzeConf, ConfigCompiler compilerConf,
63         ConfigSchema schemaConf, ConfigCoverage covConf,
64         ParsedCompileCommandRange frange, ValidateLoc valLoc, FilesysIO fio) @trusted {
65     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
66     import dextool.plugin.mutate.backend.mutation_type : toInternal;
67 
68     auto fileFilter = () {
69         try {
70             return FileFilter(fio.getOutputDir, analyzeConf.unifiedDiffFromStdin,
71                     analyzeConf.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
72         } catch (Exception e) {
73             log.info(e.msg);
74             log.warning("Unable to parse diff");
75         }
76         return FileFilter.init;
77     }();
78 
79     bool shouldAnalyze(AbsolutePath p) {
80         return analyzeConf.fileMatcher.match(p.toString) && fileFilter.shouldAnalyze(p);
81     }
82 
83     auto pool = () {
84         if (analyzeConf.poolSize == 0)
85             return new TaskPool();
86         return new TaskPool(analyzeConf.poolSize);
87     }();
88 
89     // if a dependency of a root file has been changed.
90     auto changedDeps = dependencyAnalyze(dbPath, fio);
91 
92     // will only be used by one thread at a time.
93     auto store = spawn(&storeActor, dbPath, cast(shared) fio.dup,
94             cast(shared) StoreConfig(analyzeConf, schemaConf, covConf),
95             cast(immutable) changedDeps.byKeyValue
96             .filter!(a => !a.value)
97             .map!(a => a.key)
98             .array);
99 
100     try {
101         pool.put(task!testPathActor(analyzeConf.testPaths,
102                 analyzeConf.testFileMatcher, fio.dup, store));
103     } catch (Exception e) {
104         log.trace(e);
105         log.warning(e.msg);
106     }
107 
108     auto kinds = toInternal(userKinds);
109     int taskCnt;
110     Set!AbsolutePath alreadyAnalyzed;
111     // dfmt off
112     foreach (f; frange
113             // The tool only supports analyzing a file one time.
114             // This optimize it in some cases where the same file occurs
115             // multiple times in the compile commands database.
116             .filter!(a => a.cmd.absoluteFile !in alreadyAnalyzed)
117             .tee!(a => alreadyAnalyzed.add(a.cmd.absoluteFile))
118             .cache
119             .filter!(a => shouldAnalyze(a.cmd.absoluteFile))
120             ) {
121         try {
122             if (auto v = fio.toRelativeRoot(f.cmd.absoluteFile) in changedDeps) {
123                 if (!(*v || analyzeConf.forceSaveAnalyze))
124                     continue;
125             }
126 
127             //log.infof("%s sending", f.cmd.absoluteFile);
128             pool.put(task!analyzeActor(kinds, f, valLoc.dup, fio.dup, AnalyzeConfig(compilerConf, analyzeConf, covConf), store));
129             taskCnt++;
130         } catch (Exception e) {
131             log.trace(e);
132             log.warning(e.msg);
133         }
134     }
135     // dfmt on
136 
137     changedDeps = typeof(changedDeps).init; // free the memory
138 
139     // inform the store actor of how many analyse results it should *try* to
140     // save.
141     send(store, AnalyzeCntMsg(taskCnt));
142     // wait for all files to be analyzed
143     pool.finish(true);
144     // wait for the store actor to finish
145     receiveOnly!StoreDoneMsg;
146 
147     if (analyzeConf.profile)
148         try {
149             import std.stdio : writeln;
150 
151             writeln(getProfileResult.toString);
152         } catch (Exception e) {
153             log.warning("Unable to print the profile data: ", e.msg).collectException;
154         }
155 
156     return ExitStatusType.Ok;
157 }
158 
159 @safe:
160 
161 /** Filter function for files. Either all or those in stdin.
162  *
163  * The matching ignores the file extension in order to lessen the problem of a
164  * file that this approach skip headers because they do not exist in
165  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
166  * `foo.cpp` is in `compile_commands.json`.
167  *
168  * TODO: this may create problems for header only libraries because only the
169  * unittest would include the header which mean that for this to work the
170  * unittest would have to reside in the same directory as the header file.
171  * Which they normally never do. This then lead to a diff of a header only lib
172  * lead to "no files analyzed".
173  */
174 struct FileFilter {
175     import std.path : stripExtension;
176 
177     Set!string files;
178     bool useFileFilter;
179     AbsolutePath root;
180 
181     this(AbsolutePath root, bool fromStdin, Diff diff) {
182         this.root = root;
183         this.useFileFilter = fromStdin;
184         foreach (a; diff.toRange(root)) {
185             files.add(a.key.stripExtension);
186         }
187     }
188 
189     bool shouldAnalyze(AbsolutePath p) {
190         import std.path : relativePath;
191 
192         if (!useFileFilter) {
193             return true;
194         }
195 
196         return relativePath(p, root).stripExtension in files;
197     }
198 }
199 
200 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
201 struct AnalyzeCntMsg {
202     int value;
203 }
204 
205 /// The main thread is waiting for storeActor to send this message.
206 struct StoreDoneMsg {
207 }
208 
209 struct AnalyzeConfig {
210     ConfigCompiler compiler;
211     ConfigAnalyze analyze;
212     ConfigCoverage coverage;
213 }
214 
215 /// Start an analyze of a file
216 void analyzeActor(Mutation.Kind[] kinds, ParsedCompileCommand fileToAnalyze,
217         ValidateLoc vloc, FilesysIO fio, AnalyzeConfig conf, Tid storeActor) @trusted nothrow {
218     auto profile = Profile("analyze file " ~ fileToAnalyze.cmd.absoluteFile);
219 
220     try {
221         log.tracef("%s begin", fileToAnalyze.cmd.absoluteFile);
222         auto analyzer = Analyze(kinds, vloc, fio, Analyze.Config(conf.compiler.forceSystemIncludes,
223                 conf.coverage.use, conf.compiler.allowErrors.get));
224         analyzer.process(fileToAnalyze);
225 
226         bool onlyValidFiles = true;
227         foreach (a; analyzer.result.idFile.byKey) {
228             if (!isFileSupported(fio, a)) {
229                 log.warningf(
230                         "%s: file not supported. It must be in utf-8 format without a BOM marker");
231                 onlyValidFiles = false;
232                 break;
233             }
234         }
235 
236         if (onlyValidFiles)
237             send(storeActor, cast(immutable) analyzer.result);
238         else
239             send(storeActor, cast(immutable) Analyze.Result.init);
240         log.tracef("%s end", fileToAnalyze.cmd.absoluteFile);
241         return;
242     } catch (Exception e) {
243         log.error(e.msg).collectException;
244     }
245 
246     // send a dummy result
247     try {
248         log.tracef("%s failed", fileToAnalyze.cmd.absoluteFile);
249         send(storeActor, cast(immutable) new Analyze.Result);
250     } catch (Exception e) {
251         log.error(e.msg).collectException;
252     }
253 }
254 
255 class TestFileResult {
256     Duration time;
257     TestFile[Checksum] files;
258 }
259 
260 void testPathActor(const AbsolutePath[] userPaths, GlobFilter matcher, FilesysIO fio, Tid storeActor) @trusted nothrow {
261     import std.datetime : Clock;
262     import std.datetime.stopwatch : StopWatch, AutoStart;
263     import std.file : isDir, isFile, dirEntries, SpanMode;
264     import my.container.vector;
265 
266     auto profile = Profile("checksum test files");
267 
268     auto sw = StopWatch(AutoStart.yes);
269 
270     TestFile makeTestFile(const AbsolutePath file) {
271         auto cs = checksum(fio.makeInput(file).content[]);
272         return TestFile(TestFilePath(fio.toRelativeRoot(file)),
273                 TestFileChecksum(cs), Clock.currTime);
274     }
275 
276     auto paths = vector(userPaths.dup);
277 
278     auto tfiles = new TestFileResult;
279     scope (exit)
280         tfiles.time = sw.peek;
281 
282     while (!paths.empty) {
283         try {
284             if (isDir(paths.front)) {
285                 log.trace("  Test directory ", paths.front);
286                 foreach (a; dirEntries(paths.front, SpanMode.shallow).map!(
287                         a => AbsolutePath(a.name))) {
288                     paths.put(a);
289                 }
290             } else if (isFile(paths.front) && matcher.match(paths.front)) {
291                 log.trace("  Test saved ", paths.front);
292                 auto t = makeTestFile(paths.front);
293                 tfiles.files[t.checksum.get] = t;
294             }
295         } catch (Exception e) {
296             log.warning(e.msg).collectException;
297         }
298 
299         paths.popFront;
300     }
301 
302     log.infof("Found %s test files", tfiles.files.length).collectException;
303 
304     try {
305         send(storeActor, cast(immutable) tfiles);
306     } catch (Exception e) {
307     }
308 }
309 
310 struct StoreConfig {
311     ConfigAnalyze analyze;
312     ConfigSchema schema;
313     ConfigCoverage coverage;
314 }
315 
316 /// Store the result of the analyze.
317 void storeActor(const AbsolutePath dbPath, scope shared FilesysIO fioShared,
318         scope shared StoreConfig confShared, immutable Path[] rootFiles) @trusted nothrow {
319     import cachetools : CacheLRU;
320     import dextool.cachetools : nullableCache;
321     import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
322 
323     const conf = cast() confShared;
324 
325     // The conditions that the storeActor is waiting for receiving the results
326     // from the workers.
327     static struct RecvWaiter {
328         int analyzeFileWaitCnt = int.max;
329         int analyzeFileCnt;
330 
331         bool isTestFilesDone;
332 
333         bool isWaiting() {
334             return analyzeFileCnt < analyzeFileWaitCnt || !isTestFilesDone;
335         }
336     }
337 
338     static struct SchemataSaver {
339         import sumtype;
340         import my.optional;
341         import dextool.plugin.mutate.backend.analyze.pass_schemata : SchemataBuilder;
342 
343         typeof(ConfigSchema.minMutantsPerSchema) minMutantsPerSchema;
344         typeof(ConfigSchema.mutantsPerSchema) mutantsPerSchema;
345         SchemataBuilder builder;
346 
347         void put(FilesysIO fio, SchemataResult.Schemata[AbsolutePath] a) {
348             builder.put(fio, a);
349         }
350 
351         void process(ref Database db, Optional!(SchemataBuilder.ET) value) {
352             value.match!((Some!(SchemataBuilder.ET) a) {
353                 try {
354                     auto mutants = a.mutants
355                         .map!(a => db.getMutationStatusId(a.id))
356                         .filter!(a => !a.isNull)
357                         .map!(a => a.get)
358                         .array;
359                     if (!mutants.empty) {
360                         const id = db.putSchemata(a.checksum, a.fragments, mutants);
361                         log.tracef(!id.isNull, "Saving schema %s with %s mutants",
362                             id.get.get, mutants.length);
363                     }
364                 } catch (Exception e) {
365                     log.trace(e.msg);
366                 }
367             }, (None a) {});
368         }
369 
370         /// Consume fragments used by scheman containing >min mutants.
371         void intermediate(ref Database db) {
372             builder.discardMinScheman = false;
373             builder.mutantsPerSchema = mutantsPerSchema.get;
374             builder.minMutantsPerSchema = mutantsPerSchema.get;
375 
376             while (!builder.isDone) {
377                 process(db, builder.next);
378             }
379 
380             builder.restart;
381         }
382 
383         /// Consume all fragments or discard.
384         void finalize(ref Database db) {
385             builder.discardMinScheman = true;
386             builder.mutantsPerSchema = mutantsPerSchema.get;
387             builder.minMutantsPerSchema = minMutantsPerSchema.get;
388 
389             // two loops to pass over all mutants and retry new schema
390             // compositions. Any schema that is less than the minimum will be
391             // discarded so the number of mutants will shrink.
392             while (!builder.isDone) {
393                 while (!builder.isDone) {
394                     process(db, builder.next);
395                 }
396                 builder.restart;
397             }
398         }
399     }
400 
401     auto schemas = SchemataSaver(conf.schema.minMutantsPerSchema, conf.schema.mutantsPerSchema);
402 
403     void helper(FilesysIO fio, ref Database db) nothrow {
404         // A file is at most saved one time to the database.
405         Set!AbsolutePath savedFiles;
406 
407         const isToolVersionDifferent = () nothrow{
408             try {
409                 return db.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
410             } catch (Exception e) {
411             }
412             return true;
413         }();
414 
415         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
416                 30.dur!"seconds");
417         auto getFileDbChecksum = nullableCache!(string, Checksum,
418                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
419         auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
420             return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]);
421         })(256, 30.dur!"seconds");
422 
423         static struct Files {
424             Checksum[Path] value;
425 
426             this(ref Database db) {
427                 foreach (a; db.getDetailedFiles) {
428                     value[a.file] = a.fileChecksum;
429                 }
430             }
431         }
432 
433         void save(immutable Analyze.Result result_) {
434             import dextool.plugin.mutate.backend.type : Language;
435 
436             auto result = cast() result_;
437 
438             auto profile = Profile("save " ~ result.root);
439 
440             // mark files that have an unchanged checksum as "already saved"
441             foreach (f; result.idFile
442                     .byKey
443                     .filter!(a => a !in savedFiles)
444                     .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a)
445                         && !conf.analyze.forceSaveAnalyze && !isToolVersionDifferent)) {
446                 log.info("Unchanged ".color(Color.yellow), f);
447                 savedFiles.add(f);
448             }
449 
450             // only saves mutation points to a file one time.
451             {
452                 auto app = appender!(MutationPointEntry2[])();
453                 bool isChanged = isToolVersionDifferent;
454                 foreach (mp; result.mutationPoints
455                         .map!(a => tuple!("data", "file")(a, fio.toAbsoluteRoot(a.file)))
456                         .filter!(a => a.file !in savedFiles)) {
457                     app.put(mp.data);
458                 }
459                 foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) {
460                     isChanged = true;
461                     log.info("Saving ".color(Color.green), f);
462                     const relp = fio.toRelativeRoot(f);
463 
464                     // this is critical in order to remove old data about a file.
465                     db.removeFile(relp);
466 
467                     const info = result.infoId[result.idFile[f]];
468                     db.put(relp, info.checksum, info.language, f == result.root);
469                     savedFiles.add(f);
470                 }
471                 db.put(app.data, fio.getOutputDir);
472 
473                 if (result.root !in savedFiles) {
474                     // this occurs when the file is e.g. a unittest that uses a
475                     // header only library. The unittests are not mutated thus
476                     // no mutation points exists in them but we want dextool to
477                     // still, if possible, track the unittests for changes.
478                     isChanged = true;
479                     const relp = fio.toRelativeRoot(result.root);
480                     db.removeFile(relp);
481                     // the language do not matter because it is a file without
482                     // any mutants.
483                     db.put(relp, result.rootCs, Language.init, true);
484                     savedFiles.add(fio.toAbsoluteRoot(result.root));
485                 }
486 
487                 // must always update dependencies because they may not contain
488                 // mutants. Only files that are changed and contain mutants
489                 // trigger isChanged to be true.
490                 db.dependencyApi.set(fio.toRelativeRoot(result.root), result.dependencies);
491 
492                 if (isChanged) {
493                     foreach (a; result.coverage.byKeyValue) {
494                         const fid = getFileId(fio.toRelativeRoot(result.fileId[a.key]));
495                         if (!fid.isNull) {
496                             db.clearCoverageMap(fid.get);
497                             db.putCoverageMap(fid.get, a.value);
498                         }
499                     }
500 
501                     // only save the schematas if mutation points where saved.
502                     // This ensure that only schematas for changed/new files
503                     // are saved.
504                     schemas.put(fio, result.schematas);
505                     schemas.intermediate(db);
506                 }
507             }
508 
509             {
510                 Set!long printed;
511                 auto app = appender!(LineMetadata[])();
512                 foreach (md; result.metadata) {
513                     const localId = Analyze.Result.LocalFileId(md.id.get);
514                     // transform the ID from local to global.
515                     const fid = getFileId(fio.toRelativeRoot(result.fileId[localId]));
516                     if (fid.isNull && !printed.contains(md.id.get)) {
517                         printed.add(md.id.get);
518                         log.info("File with suppressed mutants (// NOMUT) not in the database: ",
519                                 result.fileId[localId]).collectException;
520                     } else if (!fid.isNull) {
521                         app.put(LineMetadata(fid.get, md.line, md.attr));
522                     }
523                 }
524                 db.put(app.data);
525             }
526         }
527 
528         void saveTestResult(immutable TestFileResult result) {
529             auto profile = Profile("save test files");
530             Set!Checksum old;
531 
532             foreach (a; db.getTestFiles) {
533                 old.add(a.checksum.get);
534                 if (a.checksum.get !in result.files) {
535                     log.info("Removed test file ", a.file.get.toString);
536                     db.removeFile(a.file);
537                 }
538             }
539 
540             foreach (a; result.files.byValue.filter!(a => a.checksum.get !in old)) {
541                 log.info("Saving test file ", a.file.get.toString);
542                 db.put(a);
543             }
544         }
545 
546         // listen for results from workers until the expected number is processed.
547         void recv() {
548             log.info("Updating files");
549             RecvWaiter waiter;
550 
551             while (waiter.isWaiting) {
552                 try {
553                     receive((AnalyzeCntMsg a) {
554                         waiter.analyzeFileWaitCnt = a.value;
555                     }, (immutable Analyze.Result a) {
556                         auto trans = db.transaction;
557                         waiter.analyzeFileCnt++;
558                         save(a);
559                         trans.commit;
560 
561                         log.infof("Analyzed file %s/%s", waiter.analyzeFileCnt,
562                             waiter.analyzeFileWaitCnt);
563                     }, (immutable TestFileResult a) {
564                         auto trans = db.transaction;
565                         waiter.isTestFilesDone = true;
566                         saveTestResult(a);
567                         trans.commit;
568 
569                         log.info("Done analyzing test files in ", a.time);
570                     });
571                 } catch (Exception e) {
572                     log.trace(e).collectException;
573                     log.warning(e.msg).collectException;
574                 }
575             }
576         }
577 
578         void pruneFiles() {
579             import std.path : buildPath;
580 
581             auto profile = Profile("prune files");
582 
583             log.info("Pruning the database of dropped files");
584             auto files = db.getFiles.map!(a => fio.toAbsoluteRoot(a)).toSet;
585 
586             foreach (f; files.setDifference(savedFiles).toRange) {
587                 log.info("Removing ".color(Color.red), f);
588                 db.removeFile(fio.toRelativeRoot(f));
589             }
590         }
591 
592         void addRoots() {
593             if (conf.analyze.forceSaveAnalyze || isToolVersionDifferent)
594                 return;
595 
596             // add root files and their dependencies that has not been analyzed because nothing has changed.
597             // By adding them they are not removed.
598 
599             auto profile = Profile("add roots and dependencies");
600             foreach (a; rootFiles) {
601                 auto p = fio.toAbsoluteRoot(a);
602                 if (p !in savedFiles) {
603                     savedFiles.add(p);
604                     // fejk text for the user to tell them that yes, the files have
605                     // been analyzed.
606                     log.info("Analyzing ", a);
607                     log.info("Unchanged ".color(Color.yellow), a);
608                 }
609             }
610             foreach (a; rootFiles.map!(a => db.dependencyApi.get(a)).joiner) {
611                 savedFiles.add(fio.toAbsoluteRoot(a));
612             }
613         }
614 
615         void fastDbOn() {
616             if (!conf.analyze.fastDbStore)
617                 return;
618             log.info(
619                     "Turning OFF sqlite3 synchronization protection to improve the write performance");
620             log.warning("Do NOT interrupt dextool in any way because it may corrupt the database");
621             db.run("PRAGMA synchronous = OFF");
622             db.run("PRAGMA journal_mode = MEMORY");
623         }
624 
625         void fastDbOff() {
626             if (!conf.analyze.fastDbStore)
627                 return;
628             db.run("PRAGMA synchronous = ON");
629             db.run("PRAGMA journal_mode = DELETE");
630         }
631 
632         try {
633             import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
634 
635             // by making the mailbox size follow the number of workers the overall
636             // behavior will slow down if saving to the database is too slow. This
637             // avoids excessive or even fatal memory usage.
638             setMaxMailboxSize(thisTid, 2, OnCrowding.block);
639 
640             fastDbOn();
641 
642             {
643                 auto trans = db.transaction;
644                 auto profile = Profile("prune old schemas");
645                 if (isToolVersionDifferent) {
646                     log.info("Prune database of schematan created by the old version");
647                     db.deleteAllSchemas;
648                 }
649                 trans.commit;
650             }
651 
652             recv();
653             {
654                 auto trans = db.transaction;
655                 schemas.finalize(db);
656                 trans.commit;
657             }
658 
659             {
660                 auto trans = db.transaction;
661                 addRoots();
662 
663                 log.info("Resetting timeout context");
664                 resetTimeoutContext(db);
665 
666                 log.info("Updating metadata");
667                 db.updateMetadata;
668 
669                 if (conf.analyze.prune) {
670                     pruneFiles();
671                     {
672                         auto profile = Profile("remove orphaned mutants");
673                         log.info("Removing orphaned mutants");
674                         db.removeOrphanedMutants;
675                     }
676                     {
677                         auto profile = Profile("prune schemas");
678                         log.info("Prune the database of unused schemas");
679                         db.pruneSchemas;
680                     }
681                     {
682                         auto profile = Profile("prune dependencies");
683                         log.info("Prune dependencies");
684                         db.dependencyApi.cleanup;
685                     }
686                 }
687 
688                 log.info("Updating manually marked mutants");
689                 updateMarkedMutants(db);
690                 printLostMarkings(db.getLostMarkings);
691 
692                 if (isToolVersionDifferent) {
693                     log.info("Updating tool version");
694                     db.updateToolVersion(ToolVersion(dextoolBinaryId));
695                 }
696 
697                 log.info("Committing changes");
698                 trans.commit;
699                 log.info("Ok".color(Color.green));
700             }
701 
702             fastDbOff();
703 
704             if (isToolVersionDifferent) {
705                 auto profile = Profile("compact");
706                 log.info("Compacting the database");
707                 db.vacuum;
708             }
709         } catch (Exception e) {
710             log.error(e.msg).collectException;
711             log.error("Failed to save the result of the analyze to the database").collectException;
712         }
713 
714         try {
715             send(ownerTid, StoreDoneMsg.init);
716         } catch (Exception e) {
717             log.errorf("Fatal error. Unable to send %s to the main thread",
718                     StoreDoneMsg.init).collectException;
719         }
720     }
721 
722     try {
723         FilesysIO fio = cast(FilesysIO) fioShared;
724         auto db = Database.make(dbPath);
725         helper(fio, db);
726     } catch (Exception e) {
727         log.error(e.msg).collectException;
728     }
729 }
730 
731 /// Analyze a file for mutants.
732 struct Analyze {
733     import std.regex : Regex, regex, matchFirst;
734     import std.typecons : Yes;
735     import libclang_ast.context : ClangContext;
736 
737     static struct Config {
738         bool forceSystemIncludes;
739         bool saveCoverage;
740         bool allowErrors;
741     }
742 
743     private {
744         static immutable rawReNomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
745 
746         Regex!char re_nomut;
747 
748         ValidateLoc valLoc;
749         FilesysIO fio;
750 
751         Cache cache;
752 
753         Result result;
754 
755         Config conf;
756 
757         Mutation.Kind[] kinds;
758     }
759 
760     this(Mutation.Kind[] kinds, ValidateLoc valLoc, FilesysIO fio, Config conf) @trusted {
761         this.kinds = kinds;
762         this.valLoc = valLoc;
763         this.fio = fio;
764         this.cache = new Cache;
765         this.re_nomut = regex(rawReNomut);
766         this.result = new Result;
767         this.conf = conf;
768     }
769 
770     void process(ParsedCompileCommand commandsForFileToAnalyze) @safe {
771         import std.file : exists;
772 
773         commandsForFileToAnalyze.flags.forceSystemIncludes = conf.forceSystemIncludes;
774 
775         try {
776             if (!exists(commandsForFileToAnalyze.cmd.absoluteFile)) {
777                 log.warningf("Failed to analyze %s. Do not exist",
778                         commandsForFileToAnalyze.cmd.absoluteFile);
779                 return;
780             }
781         } catch (Exception e) {
782             log.warning(e.msg);
783             return;
784         }
785 
786         result.root = commandsForFileToAnalyze.cmd.absoluteFile;
787 
788         try {
789             result.rootCs = checksum(result.root);
790 
791             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
792             auto tstream = new TokenStreamImpl(ctx);
793 
794             analyzeForMutants(commandsForFileToAnalyze, result.root, ctx, tstream);
795             foreach (f; result.fileId.byValue)
796                 analyzeForComments(f, tstream);
797         } catch (Exception e) {
798             () @trusted { log.trace(e); }();
799             log.info(e.msg);
800             log.error("failed analyze of ",
801                     commandsForFileToAnalyze.cmd.absoluteFile).collectException;
802         }
803     }
804 
805     void analyzeForMutants(ParsedCompileCommand commandsForFileToAnalyze,
806             AbsolutePath fileToAnalyze, ref ClangContext ctx, TokenStream tstream) @safe {
807         import my.gc.refc : RefCounted;
808         import dextool.plugin.mutate.backend.analyze.ast : Ast;
809         import dextool.plugin.mutate.backend.analyze.pass_clang;
810         import dextool.plugin.mutate.backend.analyze.pass_coverage;
811         import dextool.plugin.mutate.backend.analyze.pass_filter;
812         import dextool.plugin.mutate.backend.analyze.pass_mutant;
813         import dextool.plugin.mutate.backend.analyze.pass_schemata;
814         import libclang_ast.check_parse_result : hasParseErrors, logDiagnostic;
815 
816         log.info("Analyzing ", fileToAnalyze);
817         RefCounted!(Ast) ast;
818         {
819             auto tu = ctx.makeTranslationUnit(fileToAnalyze,
820                     commandsForFileToAnalyze.flags.completeFlags);
821             if (tu.hasParseErrors) {
822                 logDiagnostic(tu);
823                 log.warningf("Compile error in %s", fileToAnalyze);
824                 if (!conf.allowErrors) {
825                     log.warning("Skipping");
826                     return;
827                 }
828             }
829 
830             auto res = toMutateAst(tu.cursor, fio);
831             ast = res.ast;
832             saveDependencies(commandsForFileToAnalyze.flags, result.root, res.dependencies);
833             log!"analyze.pass_clang".trace(ast);
834         }
835 
836         auto codeMutants = () {
837             auto mutants = toMutants(ast, fio, valLoc, kinds);
838             log!"analyze.pass_mutant".trace(mutants);
839 
840             log!"analyze.pass_filter".trace("filter mutants");
841             mutants = filterMutants(fio, mutants);
842             log!"analyze.pass_filter".trace(mutants);
843 
844             return toCodeMutants(mutants, fio, tstream);
845         }();
846         debug logger.trace(codeMutants);
847 
848         {
849             auto schemas = toSchemata(ast, fio, codeMutants);
850             log!"analyze.pass_schema".trace(schemas);
851             log.tracef("path dedup count:%s length_acc:%s", ast.paths.count,
852                     ast.paths.lengthAccum);
853 
854             result.schematas = schemas.getSchematas;
855         }
856 
857         result.mutationPoints = codeMutants.points.byKeyValue.map!(
858                 a => a.value.map!(b => MutationPointEntry2(fio.toRelativeRoot(a.key),
859                 b.offset, b.sloc.begin, b.sloc.end, b.mutants))).joiner.array;
860         foreach (f; codeMutants.points.byKey) {
861             const id = Result.LocalFileId(result.idFile.length);
862             result.idFile[f] = id;
863             result.fileId[id] = f;
864             result.infoId[id] = Result.FileInfo(codeMutants.csFiles[f], codeMutants.lang);
865         }
866 
867         if (conf.saveCoverage) {
868             auto cov = toCoverage(ast, fio, valLoc);
869             debug logger.trace(cov);
870 
871             foreach (a; cov.points.byKeyValue) {
872                 if (auto id = a.key in result.idFile) {
873                     result.coverage[*id] = a.value;
874                 }
875             }
876         }
877     }
878 
879     /** Tokens are always from the same file.
880      *
881      * TODO: move this to pass_clang.
882      */
883     void analyzeForComments(AbsolutePath file, TokenStream tstream) @trusted {
884         import std.algorithm : filter;
885         import clang.c.Index : CXTokenKind;
886         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
887 
888         if (auto localId = file in result.idFile) {
889             const fid = FileId(localId.get);
890 
891             auto mdata = appender!(LineMetadata[])();
892             foreach (t; cache.getTokens(AbsolutePath(file), tstream)
893                     .filter!(a => a.kind == CXTokenKind.comment)) {
894                 auto m = matchFirst(t.spelling, re_nomut);
895                 if (m.whichPattern == 0)
896                     continue;
897 
898                 mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
899                 log.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
900             }
901 
902             result.metadata ~= mdata.data;
903         }
904     }
905 
906     void saveDependencies(ParseFlags flags, AbsolutePath root, Path[] dependencies) @trusted {
907         import std.algorithm : cache;
908         import std.mmfile;
909 
910         auto rootDir = root.dirName;
911 
912         foreach (p; dependencies.map!(a => toAbsolutePath(a, rootDir,
913                 flags.includes, flags.systemIncludes))
914                 .cache
915                 .filter!(a => a.hasValue)
916                 .map!(a => a.orElse(AbsolutePath.init))
917                 .filter!(a => valLoc.isInsideOutputDir(a))) {
918             try {
919                 result.dependencies ~= DepFile(fio.toRelativeRoot(p), checksum(p));
920             } catch (Exception e) {
921                 log.trace(e.msg).collectException;
922             }
923         }
924 
925         log.trace(result.dependencies);
926     }
927 
928     static class Result {
929         import dextool.plugin.mutate.backend.analyze.ast : Interval;
930         import dextool.plugin.mutate.backend.database.type : SchemataFragment;
931         import dextool.plugin.mutate.backend.type : Language, CodeChecksum, SchemataChecksum;
932 
933         alias LocalFileId = NamedType!(long, Tag!"LocalFileId", long.init,
934                 TagStringable, Hashable);
935         alias LocalSchemaId = NamedType!(long, Tag!"LocalSchemaId", long.init,
936                 TagStringable, Hashable);
937 
938         MutationPointEntry2[] mutationPoints;
939 
940         static struct FileInfo {
941             Checksum checksum;
942             Language language;
943         }
944 
945         /// The file that is analyzed, which is a root
946         AbsolutePath root;
947         Checksum rootCs;
948 
949         /// The dependencies the root has.
950         DepFile[] dependencies;
951 
952         /// The key is the ID from idFile.
953         FileInfo[LocalFileId] infoId;
954 
955         /// The IDs is unique for *this* analyze, not globally.
956         LocalFileId[AbsolutePath] idFile;
957         AbsolutePath[LocalFileId] fileId;
958 
959         // The FileID used in the metadata is local to this analysis. It has to
960         // be remapped when added to the database.
961         LineMetadata[] metadata;
962 
963         /// Mutant schematas that has been generated.
964         SchemataResult.Schemata[AbsolutePath] schematas;
965 
966         /// Coverage intervals that can be instrumented.
967         Interval[][LocalFileId] coverage;
968     }
969 }
970 
971 @(
972         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
973 unittest {
974     import std.regex : regex, matchFirst;
975     import unit_threaded.runner.io : writelnUt;
976 
977     auto re_nomut = regex(Analyze.rawReNomut);
978     // NOMUT in other type of comments should NOT match.
979     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
980     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
981     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
982     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
983 
984     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
985     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
986     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
987     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
988     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
989     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
990     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
991     m["tag"].shouldEqual("arch");
992     m["comment"].shouldEqual("smurf");
993 }
994 
995 /// Stream of tokens excluding comment tokens.
996 class TokenStreamImpl : TokenStream {
997     import libclang_ast.context : ClangContext;
998     import dextool.plugin.mutate.backend.type : Token;
999     import dextool.plugin.mutate.backend.utility : tokenize;
1000 
1001     ClangContext* ctx;
1002 
1003     /// The context must outlive any instance of this class.
1004     // TODO remove @trusted when upgrading to dmd-fe 2.091.0+ and activate dip25 + 1000
1005     this(ref ClangContext ctx) @trusted {
1006         this.ctx = &ctx;
1007     }
1008 
1009     Token[] getTokens(Path p) {
1010         return tokenize(*ctx, p);
1011     }
1012 
1013     Token[] getFilteredTokens(Path p) {
1014         import clang.c.Index : CXTokenKind;
1015 
1016         // Filter a stream of tokens for those that should affect the checksum.
1017         return tokenize(*ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
1018     }
1019 }
1020 
1021 /// Returns: true if `f` is inside any `roots`.
1022 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
1023     import dextool.utility : isPathInsideRoot;
1024 
1025     foreach (root; roots) {
1026         if (isPathInsideRoot(root, f))
1027             return true;
1028     }
1029 
1030     return false;
1031 }
1032 
1033 /** Update the connection between the marked mutants and their mutation status
1034  * id and mutation id.
1035  */
1036 void updateMarkedMutants(ref Database db) {
1037     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
1038     import dextool.plugin.mutate.backend.type : ExitStatus;
1039 
1040     void update(MarkedMutant m) {
1041         const stId = db.getMutationStatusId(m.statusChecksum);
1042         if (stId.isNull)
1043             return;
1044         const mutId = db.getMutationId(stId.get);
1045         if (mutId.isNull)
1046             return;
1047         db.removeMarkedMutant(m.statusChecksum);
1048         db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum,
1049                 m.toStatus, m.rationale, m.mutText);
1050         db.updateMutationStatus(stId.get, m.toStatus, ExitStatus(0));
1051     }
1052 
1053     // find those marked mutants that have a checksum that is different from
1054     // the mutation status the marked mutant is related to. If possible change
1055     // the relation to the correct mutation status id.
1056     foreach (m; db.getMarkedMutants
1057             .map!(a => tuple(a, db.getChecksum(a.statusId)))
1058             .filter!(a => !a[1].isNull)
1059             .filter!(a => a[0].statusChecksum != a[1].get)) {
1060         update(m[0]);
1061     }
1062 }
1063 
1064 /// Prints a marked mutant that has become lost due to rerun of analyze
1065 void printLostMarkings(MarkedMutant[] lostMutants) {
1066     import std.algorithm : sort;
1067     import std.array : empty;
1068     import std.conv : to;
1069     import std.stdio : writeln;
1070 
1071     if (lostMutants.empty)
1072         return;
1073 
1074     Table!6 tbl = Table!6([
1075             "ID", "File", "Line", "Column", "Status", "Rationale"
1076             ]);
1077     foreach (m; lostMutants) {
1078         typeof(tbl).Row r = [
1079             m.mutationId.get.to!string, m.path, m.sloc.line.to!string,
1080             m.sloc.column.to!string, m.toStatus.to!string, m.rationale.get
1081         ];
1082         tbl.put(r);
1083     }
1084     log.warning("Marked mutants was lost");
1085     writeln(tbl);
1086 }
1087 
1088 @("shall only let files in the diff through")
1089 unittest {
1090     import std..string : lineSplitter;
1091     import dextool.plugin.mutate.backend.diff_parser;
1092 
1093     immutable lines = `diff --git a/standalone2.d b/standalone2.d
1094 index 0123..2345 100644
1095 --- a/standalone.d
1096 +++ b/standalone2.d
1097 @@ -31,7 +31,6 @@ import std.algorithm : map;
1098  import std.array : Appender, appender, array;
1099  import std.datetime : SysTime;
1100 +import std.format : format;
1101 -import std.typecons : Tuple;
1102 
1103  import d2sqlite3 : sqlDatabase = Database;
1104 
1105 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
1106  struct Database {
1107      import std.conv : to;
1108      import std.exception : collectException;
1109 -    import std.typecons : Nullable;
1110 +    import std.typecons : Nullable, Flag, No;
1111      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
1112 
1113 +    sqlDatabase db;`;
1114 
1115     UnifiedDiffParser p;
1116     foreach (line; lines.lineSplitter)
1117         p.process(line);
1118     auto diff = p.result;
1119 
1120     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
1121 
1122     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
1123     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
1124 }
1125 
1126 /// Convert to an absolute path by finding the first match among the compiler flags
1127 Optional!AbsolutePath toAbsolutePath(Path file, AbsolutePath workDir,
1128         ParseFlags.Include[] includes, SystemIncludePath[] systemIncludes) @trusted nothrow {
1129     import std.algorithm : map, filter;
1130     import std.file : exists;
1131     import std.path : buildPath;
1132 
1133     Optional!AbsolutePath lookup(string dir) nothrow {
1134         const p = buildPath(dir, file);
1135         try {
1136             if (exists(p))
1137                 return some(AbsolutePath(p));
1138         } catch (Exception e) {
1139         }
1140         return none!AbsolutePath;
1141     }
1142 
1143     {
1144         auto a = lookup(workDir.toString);
1145         if (a.hasValue)
1146             return a;
1147     }
1148 
1149     foreach (a; includes.map!(a => lookup(a.payload))
1150             .filter!(a => a.hasValue)) {
1151         return a;
1152     }
1153 
1154     foreach (a; systemIncludes.map!(a => lookup(a.value))
1155             .filter!(a => a.hasValue)) {
1156         return a;
1157     }
1158 
1159     return none!AbsolutePath;
1160 }
1161 
1162 /** Returns: the root files that need to be re-analyzed because either them or
1163  * their dependency has changed.
1164  */
1165 bool[Path] dependencyAnalyze(const AbsolutePath dbPath, FilesysIO fio) @trusted {
1166     import dextool.cachetools : nullableCache;
1167     import dextool.plugin.mutate.backend.database : FileId;
1168 
1169     auto db = Database.make(dbPath);
1170 
1171     typeof(return) rval;
1172 
1173     // pessimistic. Add all as needing to be analyzed.
1174     foreach (a; db.getRootFiles.map!(a => db.getFile(a).get)) {
1175         rval[a] = false;
1176     }
1177 
1178     try {
1179         auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
1180                 30.dur!"seconds");
1181         auto getFileName = nullableCache!(FileId, Path, (FileId id) => db.getFile(id))(256,
1182                 30.dur!"seconds");
1183         auto getFileDbChecksum = nullableCache!(string, Checksum,
1184                 (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
1185         auto getFileFsChecksum = nullableCache!(AbsolutePath, Checksum, (AbsolutePath p) {
1186             return checksum(p);
1187         })(256, 30.dur!"seconds");
1188 
1189         Checksum[Path] dbDeps;
1190         foreach (a; db.dependencyApi.getAll)
1191             dbDeps[a.file] = a.checksum;
1192 
1193         const isToolVersionDifferent = db.isToolVersionDifferent(ToolVersion(dextoolBinaryId));
1194         bool isChanged(T)(T f) {
1195             if (isToolVersionDifferent) {
1196                 // because the tool version is updated then all files need to
1197                 // be re-analyzed. an update can mean that scheman are
1198                 // improved, mutants has been changed/removed etc. it is
1199                 // unknown. the only way to be sure is to re-analyze all files.
1200                 return true;
1201             }
1202 
1203             if (f.rootCs != getFileFsChecksum(fio.toAbsoluteRoot(f.root)))
1204                 return true;
1205 
1206             foreach (a; f.deps.filter!(a => getFileFsChecksum(fio.toAbsoluteRoot(a)) != dbDeps[a])) {
1207                 return true;
1208             }
1209 
1210             return false;
1211         }
1212 
1213         foreach (f; db.getRootFiles
1214                 .map!(a => db.getFile(a).get)
1215                 .map!(a => tuple!("root", "rootCs", "deps")(a,
1216                     getFileDbChecksum(a), db.dependencyApi.get(a)))
1217                 .cache
1218                 .filter!(a => isChanged(a))
1219                 .map!(a => a.root)) {
1220             rval[f] = true;
1221         }
1222     } catch (Exception e) {
1223         log.warning(e.msg);
1224     }
1225 
1226     log.trace("Dependency analyze: ", rval);
1227 
1228     return rval;
1229 }
1230 
1231 /// Only utf-8 files are supported
1232 bool isFileSupported(FilesysIO fio, AbsolutePath p) @safe {
1233     import std.algorithm : among;
1234     import std.encoding : getBOM, BOM;
1235 
1236     auto entry = fio.makeInput(p).content.getBOM();
1237     const res = entry.schema.among(BOM.utf8, BOM.none);
1238 
1239     if (res == 1)
1240         log.warningf("%s has a utf-8 BOM marker. It will make all coverage and scheman fail to compile",
1241                 p);
1242 
1243     return res != 0;
1244 }