1 /**
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 #SPC-analyzer
11 
12 TODO cache the checksums. They are *heavy*.
13 */
14 module dextool.plugin.mutate.backend.analyze;
15 
16 import logger = std.experimental.logger;
17 
18 import dextool.compilation_db : CompileCommandFilter, defaultCompilerFlagFilter, CompileCommandDB;
19 import dextool.set;
20 import dextool.type : ExitStatusType, AbsolutePath, Path, DirName;
21 import dextool.user_filerange;
22 
23 import dextool.plugin.mutate.backend.analyze.internal : Cache, TokenStream;
24 import dextool.plugin.mutate.backend.analyze.visitor : makeRootVisitor;
25 import dextool.plugin.mutate.backend.database : Database;
26 import dextool.plugin.mutate.backend.database.type : MarkedMutant;
27 import dextool.plugin.mutate.backend.interface_ : ValidateLoc, FilesysIO;
28 import dextool.plugin.mutate.backend.utility : checksum, trustedRelativePath, Checksum;
29 import dextool.plugin.mutate.config : ConfigCompiler, ConfigAnalyze;
30 import dextool.plugin.mutate.backend.report.utility : statusToString, Table;
31 
32 version (unittest) {
33     import unit_threaded.assertions;
34 }
35 
36 /** Analyze the files in `frange` for mutations.
37  */
38 ExitStatusType runAnalyzer(ref Database db, ConfigAnalyze conf_analyze,
39         ConfigCompiler conf_compiler, UserFileRange frange, ValidateLoc val_loc, FilesysIO fio) @trusted {
40     import std.algorithm : filter, map;
41 
42     auto analyzer = Analyzer(db, val_loc, fio, conf_compiler);
43 
44     foreach (in_file; frange.filter!(a => !a.isNull)
45             .map!(a => a.get)
46             .filter!(a => !isPathInsideAnyRoot(conf_analyze.exclude, a.absoluteFile))) {
47         try {
48             analyzer.process(in_file);
49         } catch (Exception e) {
50             () @trusted { logger.trace(e); logger.warning(e.msg); }();
51         }
52     }
53     analyzer.finalize;
54 
55     return ExitStatusType.Ok;
56 }
57 
58 private:
59 
60 struct Analyzer {
61     import std.regex : Regex, regex, matchFirst;
62     import std.typecons : NullableRef, Nullable, Yes;
63     import miniorm : Transaction;
64     import cpptooling.analyzer.clang.context : ClangContext;
65     import cpptooling.utility.virtualfilesystem;
66     import dextool.compilation_db : SearchResult;
67     import dextool.type : FileName, Exists, makeExists;
68     import dextool.utility : analyzeFile;
69 
70     private {
71         static immutable raw_re_nomut = `^((//)|(/\*))\s*NOMUT\s*(\((?P<tag>.*)\))?\s*((?P<comment>.*)\*/|(?P<comment>.*))?`;
72 
73         // they are not by necessity the same.
74         // Input could be a file that is excluded via --restrict but pull in a
75         // header-only library that is allowed to be mutated.
76         Set!AbsolutePath analyzed_files;
77         Set!AbsolutePath files_with_mutations;
78 
79         Set!Path before_files;
80 
81         NullableRef!Database db;
82 
83         ValidateLoc val_loc;
84         FilesysIO fio;
85         ConfigCompiler conf;
86 
87         Cache cache;
88 
89         Regex!char re_nomut;
90 
91         Transaction trans;
92     }
93 
94     this(ref Database db, ValidateLoc val_loc, FilesysIO fio, ConfigCompiler conf) @trusted {
95         this.db = &db;
96         this.before_files = db.getFiles.toSet;
97         this.val_loc = val_loc;
98         this.fio = fio;
99         this.conf = conf;
100         this.cache = new Cache;
101         this.re_nomut = regex(raw_re_nomut);
102 
103         trans = db.transaction;
104         db.removeAllFiles;
105     }
106 
107     void process(SearchResult in_file) @safe {
108         // TODO: this should be generic for Dextool.
109         in_file.flags.forceSystemIncludes = conf.forceSystemIncludes;
110 
111         // find the file and flags to analyze
112         Exists!AbsolutePath checked_in_file;
113         try {
114             checked_in_file = makeExists(in_file.absoluteFile);
115         } catch (Exception e) {
116             logger.warning(e.msg);
117             return;
118         }
119 
120         if (analyzed_files.contains(checked_in_file))
121             return;
122 
123         analyzed_files.add(checked_in_file);
124 
125         () @trusted {
126             auto ctx = ClangContext(Yes.useInternalHeaders, Yes.prependParamSyntaxOnly);
127             auto tstream = new TokenStreamImpl(ctx);
128 
129             auto files = analyzeForMutants(in_file, checked_in_file, ctx, tstream);
130             // TODO: filter files so they are only analyzed once for comments
131             foreach (f; files)
132                 analyzeForComments(f, tstream);
133         }();
134     }
135 
136     Path[] analyzeForMutants(SearchResult in_file,
137             Exists!AbsolutePath checked_in_file, ref ClangContext ctx, TokenStream tstream) @safe {
138         import std.algorithm : map;
139         import std.array : array;
140 
141         auto root = makeRootVisitor(fio, val_loc, tstream, cache);
142         analyzeFile(checked_in_file, in_file.flags.completeFlags, root.visitor, ctx);
143 
144         foreach (a; root.mutationPointFiles) {
145             auto abs_path = AbsolutePath(a.path.FileName);
146             analyzed_files.add(abs_path);
147             files_with_mutations.add(abs_path);
148 
149             auto relp = trustedRelativePath(a.path.FileName, fio.getOutputDir);
150 
151             try {
152                 auto f_status = isFileChanged(db, relp, a.cs);
153                 if (f_status == FileStatus.changed) {
154                     logger.infof("Updating analyze of '%s'", a);
155                 }
156 
157                 db.put(Path(relp), a.cs, a.lang);
158             } catch (Exception e) {
159                 logger.warning(e.msg);
160             }
161         }
162 
163         db.put(root.mutationPoints, fio.getOutputDir);
164         return root.mutationPointFiles.map!(a => a.path).array;
165     }
166 
167     /**
168      * Tokens are always from the same file.
169      */
170     void analyzeForComments(Path file, TokenStream tstream) @trusted {
171         import std.algorithm : filter, countUntil, among, startsWith;
172         import std.array : appender;
173         import std.string : stripLeft;
174         import std.utf : byCodeUnit;
175         import clang.c.Index : CXTokenKind;
176         import dextool.plugin.mutate.backend.database : LineMetadata, FileId, LineAttr, NoMut;
177 
178         const fid = db.getFileId(fio.toRelativeRoot(file));
179         if (fid.isNull) {
180             logger.warningf("File with suppressed mutants (// NOMUT) not in the DB: %s. Skipping...",
181                     file);
182             return;
183         }
184 
185         auto mdata = appender!(LineMetadata[])();
186         foreach (t; cache.getTokens(AbsolutePath(file), tstream)
187                 .filter!(a => a.kind == CXTokenKind.comment)) {
188             auto m = matchFirst(t.spelling, re_nomut);
189             if (m.whichPattern == 0)
190                 continue;
191 
192             mdata.put(LineMetadata(fid.get, t.loc.line, LineAttr(NoMut(m["tag"], m["comment"]))));
193             logger.tracef("NOMUT found at %s:%s:%s", file, t.loc.line, t.loc.column);
194         }
195 
196         db.put(mdata.data);
197     }
198 
199     void finalize() @trusted {
200         import dextool.plugin.mutate.backend.test_mutant.timeout : resetTimeoutContext;
201 
202         resetTimeoutContext(db);
203         db.removeOrphanedMutants;
204         printLostMarkings(db.getLostMarkings);
205 
206         trans.commit;
207 
208         printPrunedFiles(before_files, files_with_mutations, fio.getOutputDir);
209     }
210 }
211 
212 @(
213         "shall extract the tag and comment from the input following the pattern NOMUT with optional tag and comment")
214 unittest {
215     import std.regex : regex, matchFirst;
216     import unit_threaded.runner.io : writelnUt;
217 
218     auto re_nomut = regex(Analyzer.raw_re_nomut);
219     // NOMUT in other type of comments should NOT match.
220     matchFirst("/// NOMUT", re_nomut).whichPattern.shouldEqual(0);
221     matchFirst("// stuff with NOMUT in it", re_nomut).whichPattern.shouldEqual(0);
222     matchFirst("/** NOMUT*/", re_nomut).whichPattern.shouldEqual(0);
223     matchFirst("/* stuff with NOMUT in it */", re_nomut).whichPattern.shouldEqual(0);
224 
225     matchFirst("/*NOMUT*/", re_nomut).whichPattern.shouldEqual(1);
226     matchFirst("/*NOMUT*/", re_nomut)["comment"].shouldEqual("");
227     matchFirst("//NOMUT", re_nomut).whichPattern.shouldEqual(1);
228     matchFirst("// NOMUT", re_nomut).whichPattern.shouldEqual(1);
229     matchFirst("// NOMUT (arch)", re_nomut)["tag"].shouldEqual("arch");
230     matchFirst("// NOMUT smurf", re_nomut)["comment"].shouldEqual("smurf");
231     auto m = matchFirst("// NOMUT (arch) smurf", re_nomut);
232     m["tag"].shouldEqual("arch");
233     m["comment"].shouldEqual("smurf");
234 }
235 
236 /// Stream of tokens excluding comment tokens.
237 class TokenStreamImpl : TokenStream {
238     import std.typecons : NullableRef, nullableRef;
239     import cpptooling.analyzer.clang.context : ClangContext;
240     import dextool.plugin.mutate.backend.type : Token;
241 
242     NullableRef!ClangContext ctx;
243 
244     /// The context must outlive any instance of this class.
245     this(ref ClangContext ctx) {
246         this.ctx = nullableRef(&ctx);
247     }
248 
249     Token[] getTokens(Path p) {
250         import dextool.plugin.mutate.backend.utility : tokenize;
251 
252         return tokenize(ctx, p);
253     }
254 
255     Token[] getFilteredTokens(Path p) {
256         import std.array : array;
257         import std.algorithm : filter;
258         import clang.c.Index : CXTokenKind;
259         import dextool.plugin.mutate.backend.utility : tokenize;
260 
261         // Filter a stream of tokens for those that should affect the checksum.
262         return tokenize(ctx, p).filter!(a => a.kind != CXTokenKind.comment).array;
263     }
264 }
265 
266 enum FileStatus {
267     noChange,
268     notInDatabase,
269     changed
270 }
271 
272 /// Print the files that has been removed from the database since last analysis.
273 void printPrunedFiles(ref Set!Path before_files,
274         ref Set!AbsolutePath analyzed_files, const AbsolutePath root_dir) @safe {
275     import dextool.type : FileName;
276 
277     foreach (const f; before_files.toRange) {
278         auto abs_f = AbsolutePath(FileName(f), DirName(cast(string) root_dir));
279         logger.infof(!analyzed_files.contains(abs_f), "Removed from files to mutate: '%s'", abs_f);
280     }
281 }
282 
283 FileStatus isFileChanged(ref Database db, Path relp, Checksum f_checksum) @safe {
284     if (!db.isAnalyzed(relp))
285         return FileStatus.notInDatabase;
286 
287     auto db_checksum = db.getFileChecksum(relp);
288 
289     auto rval = (!db_checksum.isNull && db_checksum != f_checksum) ? FileStatus.changed
290         : FileStatus.noChange;
291     debug logger.trace(rval == FileStatus.changed, "db: ", db_checksum, " file: ", f_checksum);
292 
293     return rval;
294 }
295 
296 /// Returns: true if `f` is inside any `roots`.
297 bool isPathInsideAnyRoot(AbsolutePath[] roots, AbsolutePath f) @safe {
298     import dextool.utility : isPathInsideRoot;
299 
300     foreach (root; roots) {
301         if (isPathInsideRoot(root, f))
302             return true;
303     }
304 
305     return false;
306 }
307 
308 /// prints a marked mutant that has become lost due to rerun of analyze
309 void printLostMarkings(MarkedMutant[] lostMutants) {
310     import std.array : empty;
311     if (lostMutants.empty)
312         return;
313 
314     import std.stdio: writeln;
315     import std.conv : to;
316 
317     Table!6 tbl = Table!6(["ID", "File", "Line", "Column", "Status", "Rationale"]);
318     foreach(m; lostMutants) {
319         typeof(tbl).Row r = [to!string(m.mutationId), m.path, to!string(m.line), to!string(m.column), statusToString(m.toStatus), m.rationale];
320         tbl.put(r);
321     }
322     logger.warning("Marked mutants was lost");
323     writeln(tbl);
324 }