1 /**
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import logger = std.experimental.logger;
17 import std.algorithm : map, filter;
18 import std.array : array, appender;
19 import std.concurrency;
20 import std.datetime : dur;
21 import std.exception : collectException;
22 import std.parallelism;
23 import std.typecons;
24 
25 import colorlog;
26 
27 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter,
28     CompileCommandDB, SearchResult;
29 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
30 import dextool.plugin.mutate.backend.analyze.visitor : makeRootVisitor;
31 import dextool.plugin.mutate.backend.database : Database, LineMetadata, MutationPointEntry2;
32 import dextool.plugin.mutate.backend.database.type : MarkedMutant;
33 import dextool.plugin.mutate.backend.diff_parser : Diff;
34 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
35 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
36 import dextool.plugin.mutate.backend.utility : checksum, trustedRelativePath, Checksum;
37 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze;
38 import dextool.set;
39 import dextool.type : ExitStatusType, AbsolutePath, Path;
40 import dextool.user_filerange;
41 
42 version (unittest) {
43     import unit_threaded.assertions;
44 }
45 
46 /** Analyze the files in `frange` for mutations.
47  */
48 ExitStatusType runAnalyzer(ref Database db, ConfigAnalyze conf_analyze,
49         ConfigCompiler conf_compiler, UserFileRange frange, ValidateLoc val_loc, FilesysIO fio) @trusted {
50     import std.algorithm : filter, map;
51     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
52 
53     auto fileFilter = () {
54         try {
55             return FileFilter(fio.getOutputDir, conf_analyze.unifiedDiffFromStdin,
56                     conf_analyze.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
57         } catch (Exception e) {
58             logger.warning("Unable to parse diff");
59             logger.info(e.msg);
60         }
61         return FileFilter.init;
62     }();
63 
64     auto pool = () {
65         if (conf_analyze.poolSize == 0)
66             return new TaskPool();
67         return new TaskPool(conf_analyze.poolSize);
68     }();
69 
70     // will only be used by one thread at a time.
71     auto store = spawn(&storeActor, cast(shared)&db, cast(shared) fio.dup,
72             conf_analyze.prune, conf_analyze.fastDbStore);
73 
74     int taskCnt;
75     foreach (f; frange.filter!(a => !a.isNull)
76             .map!(a => a.get)
77             .filter!(a => !isPathInsideAnyRoot(conf_analyze.exclude, a.absoluteFile))
78             .filter!(a => fileFilter.shouldAnalyze(a.absoluteFile))) {
79         try {
80             pool.put(task!analyzeActor(f, val_loc.dup, fio.dup, conf_compiler, store));
81             taskCnt++;
82         } catch (Exception e) {
83             logger.trace(e);
84             logger.warning(e.msg);
85         }
86     }
87 
88     // inform the store actor of how many analyse results it should *try* to
89     // save.
90     send(store, AnalyzeCntMsg(taskCnt));
91     // wait for all files to be analyzed
92     pool.finish(true);
93     // wait for the store actor to finish
94     receiveOnly!StoreDoneMsg;
95 
96     return ExitStatusType.Ok;
97 }
98 
99 @safe:
100 
101 /** Filter function for files. Either all or those in stdin.
102  *
103  * The matching ignores the file extension in order to lessen the problem of a
104  * file that this approach skip headers because they do not exist in
105  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
106  * `foo.cpp` is in `compile_commands.json`.
107  */
108 struct FileFilter {
109     import std.path : stripExtension;
110 
111     Set!string files;
112     bool useFileFilter;
113     AbsolutePath root;
114 
115     this(AbsolutePath root, bool fromStdin, Diff diff) {
116         this.root = root;
117         this.useFileFilter = fromStdin;
118         foreach (a; diff.toRange(root)) {
119             files.add(a.key.stripExtension);
120         }
121     }
122 
123     bool shouldAnalyze(AbsolutePath p) {
124         import std.path : relativePath;
125 
126         if (!useFileFilter) {
127             return true;
128         }
129 
130         return relativePath(p, root).stripExtension in files;
131     }
132 }
133 
134 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
135 struct AnalyzeCntMsg {
136     int value;
137 }
138 
139 struct StoreDoneMsg {
140 }
141 
142 /// Start an analyze of a file
143 void analyzeActor(SearchResult fileToAnalyze, ValidateLoc vloc, FilesysIO fio,
144         ConfigCompiler conf, Tid storeActor) @trusted nothrow {
145     try {
146         auto analyzer = Analyze(vloc, fio, conf.forceSystemIncludes);
147         analyzer.process(fileToAnalyze);
148         send(storeActor, cast(immutable) analyzer.result);
149         return;
150     } catch (Exception e) {
151     }
152 
153     // send a dummy result
154     try {
155         send(storeActor, cast(immutable) new Analyze.Result);
156     } catch (Exception e) {
157     }
158 }
159 
160 /// Store the result of the analyze.
161 void storeActor(scope shared Database* dbShared, scope shared FilesysIO fioShared,
162         const bool prune, const bool fastDbStore) @trusted nothrow {
163     import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
164     import cachetools : CacheLRU;
165     import dextool.cachetools : nullableCache;
166 
167     Database* db = cast(Database*) dbShared;
168     FilesysIO fio = cast(FilesysIO) fioShared;
169 
170     // A file is at most saved one time to the database.
171     Set!Path savedFiles;
172 
173     auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
174             30.dur!"seconds");
175     auto getFileDbChecksum = nullableCache!(string, Checksum,
176             (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
177     auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
178         return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]);
179     })(256, 30.dur!"seconds");
180 
181     static struct Files {
182         Checksum[Path] value;
183 
184         this(ref Database db) {
185             foreach (a; db.getDetailedFiles) {
186                 value[a.file] = a.fileChecksum;
187             }
188         }
189     }
190 
191     void save(immutable Analyze.Result result) {
192         // mark files that have an unchanged checksum as "already saved"
193         foreach (f; result.idFile
194                 .byKey
195                 .filter!(a => a !in savedFiles)
196                 .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a))) {
197             logger.info("Unchanged ".color(Color.yellow), f);
198             savedFiles.add(f);
199         }
200 
201         // only saves mutation points to a file one time.
202         {
203             auto app = appender!(MutationPointEntry2[])();
204             foreach (mp; result.mutationPoints // remove those that has been globally saved
205                 .filter!(a => a.file !in savedFiles)) {
206                 app.put(mp);
207             }
208             foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) {
209                 logger.info("Saving ".color(Color.green), f);
210                 db.removeFile(fio.toRelativeRoot(f));
211                 const info = result.infoId[result.idFile[f]];
212                 db.put(fio.toRelativeRoot(f), info.checksum, info.language);
213                 savedFiles.add(f);
214             }
215             db.put(app.data, fio.getOutputDir);
216         }
217 
218         {
219             Set!long printed;
220             auto app = appender!(LineMetadata[])();
221             foreach (md; result.metadata) {
222                 // transform the ID from local to global.
223                 const fid = getFileId(fio.toRelativeRoot(result.fileId[md.id]));
224                 if (fid.isNull && !printed.contains(md.id)) {
225                     printed.add(md.id);
226                     logger.warningf("File with suppressed mutants (// NOMUT) not in the database: %s. Skipping...",
227                             result.fileId[md.id]).collectException;
228                     continue;
229                 }
230                 app.put(LineMetadata(fid.get, md.line, md.attr));
231             }
232             db.put(app.data);
233         }
234     }
235 
236     // listen for results from workers until the expected number is processed.
237     void recv() {
238         logger.info("Updating files");
239 
240         int resultCnt;
241         Nullable!int maxResults;
242         bool running = true;
243 
244         while (running) {
245             try {
246                 receive((AnalyzeCntMsg a) { maxResults = a.value; }, (immutable Analyze.Result a) {
247                     resultCnt++;
248                     save(a);
249                 },);
250             } catch (Exception e) {
251                 logger.trace(e).collectException;
252                 logger.warning(e.msg).collectException;
253             }
254 
255             if (!maxResults.isNull && resultCnt >= maxResults.get) {
256                 running = false;
257             }
258         }
259     }
260 
261     void pruneFiles() {
262         import std.path : buildPath;
263 
264         logger.info("Pruning the database of dropped files");
265         auto files = db.getFiles.map!(a => buildPath(fio.getOutputDir, a).Path).toSet;
266 
267         foreach (f; files.setDifference(savedFiles).toRange) {
268             logger.info("Removing ".color(Color.red), f);
269             db.removeFile(fio.toRelativeRoot(f));
270         }
271     }
272 
273     void fastDbOn() {
274         if (!fastDbStore)
275             return;
276         logger.info(
277                 "Turning OFF sqlite3 synchronization protection to improve the write performance");
278         logger.warning("Do NOT interrupt dextool in any way because it may corrupt the database");
279         db.run("PRAGMA synchronous = OFF");
280         db.run("PRAGMA journal_mode = MEMORY");
281     }
282 
283     void fastDbOff() {
284         if (!fastDbStore)
285             return;
286         db.run("PRAGMA synchronous = ON");
287         db.run("PRAGMA journal_mode = DELETE");
288     }
289 
290     try {
291         import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
292 
293         setMaxMailboxSize(thisTid, 64, OnCrowding.block);
294 
295         fastDbOn();
296 
297         auto trans = db.transaction;
298 
299         // TODO: only remove those files that are modified.
300         logger.info("Removing metadata");
301         db.clearMetadata;
302 
303         recv();
304 
305         // TODO: print what files has been updated.
306         logger.info("Resetting timeout context");
307         resetTimeoutContext(*db);
308 
309         logger.info("Updating metadata");
310         db.updateMetadata;
311 
312         if (prune) {
313             pruneFiles();
314             logger.info("Removing orphant mutants");
315             db.removeOrphanedMutants;
316         }
317 
318         logger.info("Updating manually marked mutants");
319         updateMarkedMutants(*db);
320         printLostMarkings(db.getLostMarkings);
321 
322         logger.info("Committing changes");
323         trans.commit;
324         logger.info("Ok".color(Color.green));
325 
326         fastDbOff();
327     } catch (Exception e) {
328         logger.error(e.msg).collectException;
329     }
330 
331     try {
332         send(ownerTid, StoreDoneMsg.init);
333     } catch (Exception e) {
334         logger.errorf("Fatal error. Unable to send %s to the main thread",
335                 StoreDoneMsg.init).collectException;
336     }
337 }
338 
339 /// Analyze a file for mutants.
340 struct Analyze {
341     import std.regex : Regex, regex, matchFirst;
342     import std.typecons : NullableRef, Nullable, Yes;
343     import miniorm : Transaction;
344     import cpptooling.analyzer.clang.context : ClangContext;
345     import cpptooling.utility.virtualfilesystem;
346     import dextool.compilation_db : SearchResult;
347     import dextool.type : Exists, makeExists;
348     import dextool.utility : analyzeFile;
349 
350     private {
351         static immutable raw_re_nomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
352 
353         Regex!char re_nomut;
354 
355         ValidateLoc val_loc;
356         FilesysIO fio;
357         bool forceSystemIncludes;
358 
359         Cache cache;
360 
361         Result result;
362     }
363 
364     this(ValidateLoc val_loc, FilesysIO fio, bool forceSystemIncludes) @trusted {
365         this.val_loc = val_loc;
366         this.fio = fio;
367         this.cache = new Cache;
368         this.re_nomut = regex(raw_re_nomut);
369         this.forceSystemIncludes = forceSystemIncludes;
370         this.result = new Result;
371     }
372 
373     void process(SearchResult in_file) @safe {
374         in_file.flags.forceSystemIncludes = forceSystemIncludes;
375 
376         // find the file and flags to analyze
377         Exists!AbsolutePath checked_in_file;
378         try {
379             checked_in_file = makeExists(in_file.absoluteFile);
380         } catch (Exception e) {
381             logger.warning(e.msg);
382             return;
383         }
384 
385         () @trusted {
386             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
387             auto tstream = new TokenStreamImpl(ctx);
388 
389             analyzeForMutants(in_file, checked_in_file, ctx, tstream);
390             // TODO: filter files so they are only analyzed once for comments
391             foreach (f; result.fileId.byValue)
392                 analyzeForComments(f, tstream);
393         }();
394     }
395 
396     void analyzeForMutants(SearchResult in_file,
397             Exists!AbsolutePath checked_in_file, ref ClangContext ctx, TokenStream tstream) @safe {
398         auto root = makeRootVisitor(fio, val_loc, tstream, cache);
399         analyzeFile(checked_in_file, in_file.flags.completeFlags, root.visitor, ctx);
400 
401         result.mutationPoints = root.mutationPoints;
402         foreach (f; root.mutationPointFiles) {
403             const id = result.idFile.length;
404             result.idFile[f.path] = id;
405             result.fileId[id] = f.path;
406             result.infoId[id] = Result.FileInfo(f.cs, f.lang);
407         }
408     }
409 
410     /**
411      * Tokens are always from the same file.
412      */
413     void analyzeForComments(Path file, TokenStream tstream) @trusted {
414         import std.algorithm : filter;
415         import clang.c.Index : CXTokenKind;
416         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
417 
418         const fid = result.idFile.require(file, result.fileId.length).FileId;
419 
420         auto mdata = appender!(LineMetadata[])();
421         foreach (t; cache.getTokens(AbsolutePath(file), tstream)
422                 .filter!(a => a.kind == CXTokenKind.comment)) {
423             auto m = matchFirst(t.spelling, re_nomut);
424             if (m.whichPattern == 0)
425                 continue;
426 
427             mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
428             logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
429         }
430 
431         result.metadata ~= mdata.data;
432     }
433 
434     static class Result {
435         import dextool.plugin.mutate.backend.type : Language;
436 
437         MutationPointEntry2[] mutationPoints;
438 
439         static struct FileInfo {
440             Checksum checksum;
441             Language language;
442         }
443 
444         /// The key is the ID from idFile.
445         FileInfo[ulong] infoId;
446 
447         /// The IDs is unique for *this* analyze, not globally.
448         long[Path] idFile;
449         Path[long] fileId;
450 
451         // The FileID used in the metadata is local to this analysis. It has to
452         // be remapped when added to the database.
453         LineMetadata[] metadata;
454     }
455 }
456 
457 @(
458         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
459 unittest {
460     import std.regex : regex, matchFirst;
461     import unit_threaded.runner.io : writelnUt;
462 
463     auto re_nomut = regex(Analyze.raw_re_nomut);
464     // NOMUT in other type of comments should NOT match.
465     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
466     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
467     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
468     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
469 
470     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
471     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
472     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
473     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
474     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
475     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
476     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
477     m["tag"].shouldEqual("arch");
478     m["comment"].shouldEqual("smurf");
479 }
480 
481 /// Stream of tokens excluding comment tokens.
482 class TokenStreamImpl : TokenStream {
483     import std.typecons : NullableRef, nullableRef;
484     import cpptooling.analyzer.clang.context : ClangContext;
485     import dextool.plugin.mutate.backend.type : Token;
486 
487     NullableRef!ClangContext ctx;
488 
489     /// The context must outlive any instance of this class.
490     this(ref ClangContext ctx) {
491         this.ctx = nullableRef(&ctx);
492     }
493 
494     Token[] getTokens(Path p) {
495         import dextool.plugin.mutate.backend.utility : tokenize;
496 
497         return tokenize(ctx, p);
498     }
499 
500     Token[] getFilteredTokens(Path p) {
501         import std.array : array;
502         import std.algorithm : filter;
503         import clang.c.Index : CXTokenKind;
504         import dextool.plugin.mutate.backend.utility : tokenize;
505 
506         // Filter a stream of tokens for those that should affect the checksum.
507         return tokenize(ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
508     }
509 }
510 
511 /// Returns: true if `f` is inside any `roots`.
512 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
513     import dextool.utility : isPathInsideRoot;
514 
515     foreach (root; roots) {
516         if (isPathInsideRoot(root, f))
517             return true;
518     }
519 
520     return false;
521 }
522 
523 /** Update the connection between the marked mutants and their mutation status
524  * id and mutation id.
525  */
526 void updateMarkedMutants(ref Database db) {
527     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
528 
529     void update(MarkedMutant m) {
530         const stId = db.getMutationStatusId(m.statusChecksum);
531         if (stId.isNull)
532             return;
533         const mutId = db.getMutationId(stId.get);
534         if (mutId.isNull)
535             return;
536         db.removeMarkedMutant(m.statusChecksum);
537         db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum,
538                 m.toStatus, m.rationale, m.mutText);
539         db.updateMutationStatus(stId.get, m.toStatus);
540     }
541 
542     // find those marked mutants that have a checksum that is different from
543     // the mutation status the marked mutant is related to. If possible change
544     // the relation to the correct mutation status id.
545     foreach (m; db.getMarkedMutants
546             .map!(a => tuple(a, db.getChecksum(a.statusId)))
547             .filter!(a => !a[1].isNull)
548             .filter!(a => a[0].statusChecksum != a[1].get)) {
549         update(m[0]);
550     }
551 }
552 
553 /// Prints a marked mutant that has become lost due to rerun of analyze
554 void printLostMarkings(MarkedMutant[] lostMutants) {
555     import std.algorithm : sort;
556     import std.array : empty;
557     import std.conv : to;
558     import std.stdio : writeln;
559 
560     if (lostMutants.empty)
561         return;
562 
563     Table!6 tbl = Table!6([
564             "ID", "File", "Line", "Column", "Status", "Rationale"
565             ]);
566     foreach (m; lostMutants) {
567         typeof(tbl).Row r = [
568             m.mutationId.to!string, m.path, m.sloc.line.to!string,
569             m.sloc.column.to!string, m.toStatus.to!string, m.rationale
570         ];
571         tbl.put(r);
572     }
573     logger.warning("Marked mutants was lost");
574     writeln(tbl);
575 }
576 
577 @("shall only let files in the diff through")
578 unittest {
579     import std.string : lineSplitter;
580     import dextool.plugin.mutate.backend.diff_parser;
581 
582     immutable lines = `diff --git a/standalone2.d b/standalone2.d
583 index 0123..2345 100644
584 --- a/standalone.d
585 +++ b/standalone2.d
586 @@ -31,7 +31,6 @@ import std.algorithm : map;
587  import std.array : Appender, appender, array;
588  import std.datetime : SysTime;
589 +import std.format : format;
590 -import std.typecons : Tuple;
591 
592  import d2sqlite3 : sqlDatabase = Database;
593 
594 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
595  struct Database {
596      import std.conv : to;
597      import std.exception : collectException;
598 -    import std.typecons : Nullable;
599 +    import std.typecons : Nullable, Flag, No;
600      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
601 
602 +    sqlDatabase db;`;
603 
604     UnifiedDiffParser p;
605     foreach (line; lines.lineSplitter)
606         p.process(line);
607     auto diff = p.result;
608 
609     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
610 
611     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
612     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
613 }