1 /**
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import logger = std.experimental.logger;
17 import std.algorithm : map, filter;
18 import std.array : array, appender;
19 import std.concurrency;
20 import std.datetime : dur;
21 import std.exception : collectException;
22 import std.parallelism;
23 import std.typecons;
24 
25 import colorlog;
26 
27 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter,
28     CompileCommandDB, SearchResult;
29 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
30 import dextool.plugin.mutate.backend.analyze.visitor : makeRootVisitor;
31 import dextool.plugin.mutate.backend.database : Database, LineMetadata, MutationPointEntry2;
32 import dextool.plugin.mutate.backend.database.type : MarkedMutant;
33 import dextool.plugin.mutate.backend.diff_parser : Diff;
34 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
35 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
36 import dextool.plugin.mutate.backend.utility : checksum, trustedRelativePath, Checksum;
37 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze;
38 import dextool.set;
39 import dextool.type : ExitStatusType, AbsolutePath, Path, DirName;
40 import dextool.user_filerange;
41 
42 version (unittest) {
43     import unit_threaded.assertions;
44 }
45 
46 /** Analyze the files in `frange` for mutations.
47  */
48 ExitStatusType runAnalyzer(ref Database db, ConfigAnalyze conf_analyze,
49         ConfigCompiler conf_compiler, UserFileRange frange, ValidateLoc val_loc, FilesysIO fio) @trusted {
50     import std.algorithm : filter, map;
51     import dextool.plugin.mutate.backend.diff_parser : diffFromStdin, Diff;
52 
53     auto fileFilter = () {
54         try {
55             return FileFilter(fio.getOutputDir, conf_analyze.unifiedDiffFromStdin,
56                     conf_analyze.unifiedDiffFromStdin ? diffFromStdin : Diff.init);
57         } catch (Exception e) {
58             logger.warning("Unable to parse diff");
59             logger.info(e.msg);
60         }
61         return FileFilter.init;
62     }();
63 
64     auto pool = () {
65         if (conf_analyze.poolSize == 0)
66             return new TaskPool();
67         return new TaskPool(conf_analyze.poolSize);
68     }();
69 
70     // will only be used by one thread at a time.
71     auto store = spawn(&storeActor, cast(shared)&db, cast(shared) fio.dup,
72             conf_analyze.prune, conf_analyze.fastDbStore);
73 
74     int taskCnt;
75     foreach (f; frange.filter!(a => !a.isNull)
76             .map!(a => a.get)
77             .filter!(a => !isPathInsideAnyRoot(conf_analyze.exclude, a.absoluteFile))
78             .filter!(a => fileFilter.shouldAnalyze(a.absoluteFile))) {
79         try {
80             pool.put(task!analyzeActor(f, val_loc.dup, fio.dup, conf_compiler, store));
81             taskCnt++;
82         } catch (Exception e) {
83             logger.trace(e);
84             logger.warning(e.msg);
85         }
86     }
87 
88     // inform the store actor of how many analyse results it should *try* to
89     // save.
90     send(store, AnalyzeCntMsg(taskCnt));
91     // wait for all files to be analyzed
92     pool.finish(true);
93     // wait for the store actor to finish
94     receiveOnly!StoreDoneMsg;
95 
96     return ExitStatusType.Ok;
97 }
98 
99 @safe:
100 
101 /** Filter function for files. Either all or those in stdin.
102  *
103  * The matching ignores the file extension in order to lessen the problem of a
104  * file that this approach skip headers because they do not exist in
105  * `compile_commands.json`. It means that e.g. "foo.hpp" would match `true` if
106  * `foo.cpp` is in `compile_commands.json`.
107  */
108 struct FileFilter {
109     import std.path : stripExtension;
110 
111     Set!string files;
112     bool useFileFilter;
113     AbsolutePath root;
114 
115     this(AbsolutePath root, bool fromStdin, Diff diff) {
116         this.root = root;
117         this.useFileFilter = fromStdin;
118         foreach (a; diff.toRange(root)) {
119             files.add(a.key.stripExtension);
120         }
121     }
122 
123     bool shouldAnalyze(AbsolutePath p) {
124         import std.path : relativePath;
125 
126         if (!useFileFilter) {
127             return true;
128         }
129 
130         return relativePath(p, root).stripExtension in files;
131     }
132 }
133 
134 /// Number of analyze tasks that has been spawned that the `storeActor` should wait for.
135 struct AnalyzeCntMsg {
136     int value;
137 }
138 
139 struct StoreDoneMsg {
140 }
141 
142 /// Start an analyze of a file
143 void analyzeActor(SearchResult fileToAnalyze, ValidateLoc vloc, FilesysIO fio,
144         ConfigCompiler conf, Tid storeActor) @trusted nothrow {
145     try {
146         auto analyzer = Analyze(vloc, fio, conf.forceSystemIncludes);
147         analyzer.process(fileToAnalyze);
148         send(storeActor, cast(immutable) analyzer.result);
149         return;
150     } catch (Exception e) {
151     }
152 
153     // send a dummy result
154     try {
155         send(storeActor, cast(immutable) new Analyze.Result);
156     } catch (Exception e) {
157     }
158 }
159 
160 /// Store the result of the analyze.
161 void storeActor(scope shared Database* dbShared, scope shared FilesysIO fioShared,
162         const bool prune, const bool fastDbStore) @trusted nothrow {
163     import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
164     import cachetools : CacheLRU;
165     import dextool.cachetools : nullableCache;
166 
167     Database* db = cast(Database*) dbShared;
168     FilesysIO fio = cast(FilesysIO) fioShared;
169 
170     // A file is at most saved one time to the database.
171     Set!Path savedFiles;
172 
173     auto getFileId = nullableCache!(string, FileId, (string p) => db.getFileId(p.Path))(256,
174             30.dur!"seconds");
175     auto getFileDbChecksum = nullableCache!(string, Checksum,
176             (string p) => db.getFileChecksum(p.Path))(256, 30.dur!"seconds");
177     auto getFileFsChecksum = nullableCache!(string, Checksum, (string p) {
178         return checksum(fio.makeInput(AbsolutePath(Path(p))).content[]);
179     })(256, 30.dur!"seconds");
180 
181     static struct Files {
182         Checksum[Path] value;
183 
184         this(ref Database db) {
185             foreach (a; db.getDetailedFiles) {
186                 value[a.file] = a.fileChecksum;
187             }
188         }
189     }
190 
191     void save(immutable Analyze.Result result) {
192         // mark files that have an unchanged checksum as "already saved"
193         foreach (f; result.idFile
194                 .byKey
195                 .filter!(a => a !in savedFiles)
196                 .filter!(a => getFileDbChecksum(fio.toRelativeRoot(a)) == getFileFsChecksum(a))) {
197             logger.info("Unchanged ".color(Color.yellow), f);
198             savedFiles.add(f);
199         }
200 
201         // only saves mutation points to a file one time.
202         {
203             auto app = appender!(MutationPointEntry2[])();
204             foreach (mp; result.mutationPoints // remove those that has been globally saved
205                 .filter!(a => a.file !in savedFiles)) {
206                 app.put(mp);
207             }
208             foreach (f; result.idFile.byKey.filter!(a => a !in savedFiles)) {
209                 logger.info("Saving ".color(Color.green), f);
210                 db.removeFile(fio.toRelativeRoot(f));
211                 const info = result.infoId[result.idFile[f]];
212                 db.put(fio.toRelativeRoot(f), info.checksum, info.language);
213                 savedFiles.add(f);
214             }
215             db.put(app.data, fio.getOutputDir);
216         }
217 
218         {
219             Set!long printed;
220             auto app = appender!(LineMetadata[])();
221             foreach (md; result.metadata) {
222                 // transform the ID from local to global.
223                 const fid = getFileId(fio.toRelativeRoot(result.fileId[md.id]));
224                 if (fid.isNull && !printed.contains(md.id)) {
225                     printed.add(md.id);
226                     logger.warningf("File with suppressed mutants (// NOMUT) not in the database: %s. Skipping...",
227                             result.fileId[md.id]).collectException;
228                     continue;
229                 }
230                 app.put(LineMetadata(fid.get, md.line, md.attr));
231             }
232             db.put(app.data);
233         }
234     }
235 
236     // listen for results from workers until the expected number is processed.
237     void recv() {
238         logger.info("Updating files");
239 
240         int resultCnt;
241         Nullable!int maxResults;
242         bool running = true;
243 
244         while (running) {
245             try {
246                 receive((AnalyzeCntMsg a) { maxResults = a.value; }, (immutable Analyze.Result a) {
247                     resultCnt++;
248                     save(a);
249                 },);
250             } catch (Exception e) {
251                 logger.trace(e).collectException;
252                 logger.warning(e.msg).collectException;
253             }
254 
255             if (!maxResults.isNull && resultCnt >= maxResults.get) {
256                 running = false;
257             }
258         }
259     }
260 
261     void pruneFiles() {
262         import std.path : buildPath;
263 
264         logger.info("Pruning the database of dropped files");
265         auto files = db.getFiles.map!(a => buildPath(fio.getOutputDir, a).Path).toSet;
266 
267         foreach (f; files.setDifference(savedFiles).toRange) {
268             logger.info("Removing ".color(Color.red), f);
269             db.removeFile(fio.toRelativeRoot(f));
270         }
271     }
272 
273     void fastDbOn() {
274         if (!fastDbStore)
275             return;
276         logger.info(
277                 "Turning OFF sqlite3 synchronization protection to improve the write performance");
278         logger.warning("Do NOT interrupt dextool in any way because it may corrupt the database");
279         db.run("PRAGMA synchronous = OFF");
280         db.run("PRAGMA journal_mode = MEMORY");
281     }
282 
283     void fastDbOff() {
284         if (!fastDbStore)
285             return;
286         db.run("PRAGMA synchronous = ON");
287         db.run("PRAGMA journal_mode = DELETE");
288     }
289 
290     try {
291         import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
292 
293         setMaxMailboxSize(thisTid, 64, OnCrowding.block);
294 
295         fastDbOn();
296 
297         auto trans = db.transaction;
298 
299         // TODO: only remove those files that are modified.
300         logger.info("Removing metadata");
301         db.clearMetadata;
302 
303         recv();
304 
305         // TODO: print what files has been updated.
306         logger.info("Resetting timeout context");
307         resetTimeoutContext(*db);
308 
309         logger.info("Updating metadata");
310         db.updateMetadata;
311 
312         if (prune) {
313             pruneFiles();
314         }
315 
316         logger.info("Removing orphant mutants");
317         db.removeOrphanedMutants;
318 
319         logger.info("Updating manually marked mutants");
320         updateMarkedMutants(*db);
321         printLostMarkings(db.getLostMarkings);
322 
323         logger.info("Committing changes");
324         trans.commit;
325         logger.info("Ok".color(Color.green));
326 
327         fastDbOff();
328     } catch (Exception e) {
329         logger.error(e.msg).collectException;
330     }
331 
332     try {
333         send(ownerTid, StoreDoneMsg.init);
334     } catch (Exception e) {
335         logger.errorf("Fatal error. Unable to send %s to the main thread",
336                 StoreDoneMsg.init).collectException;
337     }
338 }
339 
340 /// Analyze a file for mutants.
341 struct Analyze {
342     import std.regex : Regex, regex, matchFirst;
343     import std.typecons : NullableRef, Nullable, Yes;
344     import miniorm : Transaction;
345     import cpptooling.analyzer.clang.context : ClangContext;
346     import cpptooling.utility.virtualfilesystem;
347     import dextool.compilation_db : SearchResult;
348     import dextool.type : FileName, Exists, makeExists;
349     import dextool.utility : analyzeFile;
350 
351     private {
352         static immutable raw_re_nomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
353 
354         Regex!char re_nomut;
355 
356         ValidateLoc val_loc;
357         FilesysIO fio;
358         bool forceSystemIncludes;
359 
360         Cache cache;
361 
362         Result result;
363     }
364 
365     this(ValidateLoc val_loc, FilesysIO fio, bool forceSystemIncludes) @trusted {
366         this.val_loc = val_loc;
367         this.fio = fio;
368         this.cache = new Cache;
369         this.re_nomut = regex(raw_re_nomut);
370         this.forceSystemIncludes = forceSystemIncludes;
371         this.result = new Result;
372     }
373 
374     void process(SearchResult in_file) @safe {
375         in_file.flags.forceSystemIncludes = forceSystemIncludes;
376 
377         // find the file and flags to analyze
378         Exists!AbsolutePath checked_in_file;
379         try {
380             checked_in_file = makeExists(in_file.absoluteFile);
381         } catch (Exception e) {
382             logger.warning(e.msg);
383             return;
384         }
385 
386         () @trusted {
387             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
388             auto tstream = new TokenStreamImpl(ctx);
389 
390             analyzeForMutants(in_file, checked_in_file, ctx, tstream);
391             // TODO: filter files so they are only analyzed once for comments
392             foreach (f; result.fileId.byValue)
393                 analyzeForComments(f, tstream);
394         }();
395     }
396 
397     void analyzeForMutants(SearchResult in_file,
398             Exists!AbsolutePath checked_in_file, ref ClangContext ctx, TokenStream tstream) @safe {
399         auto root = makeRootVisitor(fio, val_loc, tstream, cache);
400         analyzeFile(checked_in_file, in_file.flags.completeFlags, root.visitor, ctx);
401 
402         result.mutationPoints = root.mutationPoints;
403         foreach (f; root.mutationPointFiles) {
404             const id = result.idFile.length;
405             result.idFile[f.path] = id;
406             result.fileId[id] = f.path;
407             result.infoId[id] = Result.FileInfo(f.cs, f.lang);
408         }
409     }
410 
411     /**
412      * Tokens are always from the same file.
413      */
414     void analyzeForComments(Path file, TokenStream tstream) @trusted {
415         import std.algorithm : filter;
416         import clang.c.Index : CXTokenKind;
417         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
418 
419         const fid = result.idFile.require(file, result.fileId.length).FileId;
420 
421         auto mdata = appender!(LineMetadata[])();
422         foreach (t; cache.getTokens(AbsolutePath(file), tstream)
423                 .filter!(a => a.kind == CXTokenKind.comment)) {
424             auto m = matchFirst(t.spelling, re_nomut);
425             if (m.whichPattern == 0)
426                 continue;
427 
428             mdata.put(LineMetadata(fid, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
429             logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
430         }
431 
432         result.metadata ~= mdata.data;
433     }
434 
435     static class Result {
436         import dextool.plugin.mutate.backend.type : Language;
437 
438         MutationPointEntry2[] mutationPoints;
439 
440         static struct FileInfo {
441             Checksum checksum;
442             Language language;
443         }
444 
445         /// The key is the ID from idFile.
446         FileInfo[ulong] infoId;
447 
448         /// The IDs is unique for *this* analyze, not globally.
449         long[Path] idFile;
450         Path[long] fileId;
451 
452         // The FileID used in the metadata is local to this analysis. It has to
453         // be remapped when added to the database.
454         LineMetadata[] metadata;
455     }
456 }
457 
458 @(
459         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
460 unittest {
461     import std.regex : regex, matchFirst;
462     import unit_threaded.runner.io : writelnUt;
463 
464     auto re_nomut = regex(Analyze.raw_re_nomut);
465     // NOMUT in other type of comments should NOT match.
466     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
467     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
468     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
469     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
470 
471     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
472     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
473     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
474     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
475     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
476     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
477     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
478     m["tag"].shouldEqual("arch");
479     m["comment"].shouldEqual("smurf");
480 }
481 
482 /// Stream of tokens excluding comment tokens.
483 class TokenStreamImpl : TokenStream {
484     import std.typecons : NullableRef, nullableRef;
485     import cpptooling.analyzer.clang.context : ClangContext;
486     import dextool.plugin.mutate.backend.type : Token;
487 
488     NullableRef!ClangContext ctx;
489 
490     /// The context must outlive any instance of this class.
491     this(ref ClangContext ctx) {
492         this.ctx = nullableRef(&ctx);
493     }
494 
495     Token[] getTokens(Path p) {
496         import dextool.plugin.mutate.backend.utility : tokenize;
497 
498         return tokenize(ctx, p);
499     }
500 
501     Token[] getFilteredTokens(Path p) {
502         import std.array : array;
503         import std.algorithm : filter;
504         import clang.c.Index : CXTokenKind;
505         import dextool.plugin.mutate.backend.utility : tokenize;
506 
507         // Filter a stream of tokens for those that should affect the checksum.
508         return tokenize(ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
509     }
510 }
511 
512 /// Returns: true if `f` is inside any `roots`.
513 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
514     import dextool.utility : isPathInsideRoot;
515 
516     foreach (root; roots) {
517         if (isPathInsideRoot(root, f))
518             return true;
519     }
520 
521     return false;
522 }
523 
524 /** Update the connection between the marked mutants and their mutation status
525  * id and mutation id.
526  */
527 void updateMarkedMutants(ref Database db) {
528     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
529 
530     void update(MarkedMutant m) {
531         const stId = db.getMutationStatusId(m.statusChecksum);
532         if (stId.isNull)
533             return;
534         const mutId = db.getMutationId(stId.get);
535         if (mutId.isNull)
536             return;
537         db.removeMarkedMutant(m.statusChecksum);
538         db.markMutant(mutId.get, m.path, m.sloc, stId.get, m.statusChecksum,
539                 m.toStatus, m.rationale, m.mutText);
540         db.updateMutationStatus(stId.get, m.toStatus);
541     }
542 
543     // find those marked mutants that have a checksum that is different from
544     // the mutation status the marked mutant is related to. If possible change
545     // the relation to the correct mutation status id.
546     foreach (m; db.getMarkedMutants
547             .map!(a => tuple(a, db.getChecksum(a.statusId)))
548             .filter!(a => !a[1].isNull)
549             .filter!(a => a[0].statusChecksum != a[1].get)) {
550         update(m[0]);
551     }
552 }
553 
554 /// Prints a marked mutant that has become lost due to rerun of analyze
555 void printLostMarkings(MarkedMutant[] lostMutants) {
556     import std.algorithm : sort;
557     import std.array : empty;
558     import std.conv : to;
559     import std.stdio : writeln;
560 
561     if (lostMutants.empty)
562         return;
563 
564     Table!6 tbl = Table!6([
565             "ID", "File", "Line", "Column", "Status", "Rationale"
566             ]);
567     foreach (m; lostMutants) {
568         typeof(tbl).Row r = [
569             m.mutationId.to!string, m.path, m.sloc.line.to!string,
570             m.sloc.column.to!string, m.toStatus.to!string, m.rationale
571         ];
572         tbl.put(r);
573     }
574     logger.warning("Marked mutants was lost");
575     writeln(tbl);
576 }
577 
578 @("shall only let files in the diff through")
579 unittest {
580     import std.string : lineSplitter;
581     import dextool.plugin.mutate.backend.diff_parser;
582 
583     immutable lines = `diff --git a/standalone2.d b/standalone2.d
584 index 0123..2345 100644
585 --- a/standalone.d
586 +++ b/standalone2.d
587 @@ -31,7 +31,6 @@ import std.algorithm : map;
588  import std.array : Appender, appender, array;
589  import std.datetime : SysTime;
590 +import std.format : format;
591 -import std.typecons : Tuple;
592 
593  import d2sqlite3 : sqlDatabase = Database;
594 
595 @@ -46,7 +45,7 @@ import dextool.plugin.mutate.backend.type : Language;
596  struct Database {
597      import std.conv : to;
598      import std.exception : collectException;
599 -    import std.typecons : Nullable;
600 +    import std.typecons : Nullable, Flag, No;
601      import dextool.plugin.mutate.backend.type : MutationPoint, Mutation, Checksum;
602 
603 +    sqlDatabase db;`;
604 
605     UnifiedDiffParser p;
606     foreach (line; lines.lineSplitter)
607         p.process(line);
608     auto diff = p.result;
609 
610     auto files = FileFilter(".".Path.AbsolutePath, true, diff);
611 
612     files.shouldAnalyze("standalone.d".Path.AbsolutePath).shouldBeFalse;
613     files.shouldAnalyze("standalone2.d".Path.AbsolutePath).shouldBeTrue;
614 }