1 /**
2 Copyright: Copyright (c) 2016-2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 Utility functions for Clang Compilation Databases.
11 
12 # Usage
13 Call the function `fromArgCompileDb` to create one, merged database.
14 
15 Extract flags the flags for a file by calling `appendOrError`.
16 
17 Example:
18 ---
19 auto dbs = fromArgCompileDb(["foo.json]);
20 auto flags = dbs.appendOrError(dbs, null, "foo.cpp", defaultCompilerFlagFilter);
21 ---
22 */
23 module dextool.compilation_db;
24 
25 import logger = std.experimental.logger;
26 import std.exception : collectException;
27 import std.json : JSONValue;
28 import std.typecons : Nullable;
29 
30 import dextool.type : AbsolutePath, Path;
31 
32 public import dextool.compilation_db.user_filerange;
33 public import dextool.compilation_db.system_compiler : deduceSystemIncludes,
34     SystemIncludePath, Compiler;
35 
36 version (unittest) {
37     import std.path : buildPath;
38     import unit_threaded : shouldEqual;
39 }
40 
41 @safe:
42 
43 /** Hold an entry from the compilation database.
44  *
45  * The following information is from the official specification.
46  * $(LINK2 http://clang.llvm.org/docs/JSONCompilationDatabase.html, Standard)
47  *
48  * directory: The working directory of the compilation. All paths specified in
49  * the command or file fields must be either absolute or relative to this
50  * directory.
51  *
52  * file: The main translation unit source processed by this compilation step.
53  * This is used by tools as the key into the compilation database. There can be
54  * multiple command objects for the same file, for example if the same source
55  * file is compiled with different configurations.
56  *
57  * command: The compile command executed. After JSON unescaping, this must be a
58  * valid command to rerun the exact compilation step for the translation unit
59  * in the environment the build system uses. Parameters use shell quoting and
60  * shell escaping of quotes, with ‘"‘ and ‘\‘ being the only special
61  * characters. Shell expansion is not supported.
62  *
63  * argumets: The compile command executed as list of strings. Either arguments
64  * or command is required.
65  *
66  * output: The name of the output created by this compilation step. This field
67  * is optional. It can be used to distinguish different processing modes of the
68  * same input file.
69  *
70  * Dextool additions.
71  * The standard do not specify how to treat "directory" when it is a relative
72  * path. The logic chosen in dextool is to treat it as relative to the path
73  * the compilation database file is read from.
74  */
75 @safe struct CompileCommand {
76     import dextool.type : Path, AbsolutePath;
77 
78     /// The raw command from the tuples "command" or "arguments value.
79     static struct Command {
80         string[] payload;
81         alias payload this;
82         bool hasValue() @safe pure nothrow const @nogc {
83             return payload.length != 0;
84         }
85     }
86 
87     /// File that where compiled.
88     Path file;
89     /// ditto.
90     AbsolutePath absoluteFile;
91     /// Working directory of the command that compiled the input.
92     AbsolutePath directory;
93     /// The executing command when compiling.
94     Command command;
95     /// The resulting object file.
96     Path output;
97     /// ditto.
98     AbsolutePath absoluteOutput;
99 }
100 
101 /// The path to the compilation database.
102 struct CompileDbFile {
103     Path payload;
104     alias payload this;
105 
106     this(string p) @safe pure nothrow @nogc {
107         payload = Path(p);
108     }
109 }
110 
111 /// The absolute path to the directory the compilation database reside at.
112 struct AbsoluteCompileDbDirectory {
113     AbsolutePath payload;
114     alias payload this;
115 
116     this(Path path) {
117         import std.path : dirName;
118 
119         payload = AbsolutePath(path.dirName.Path);
120     }
121 }
122 
123 /// A complete compilation database.
124 struct CompileCommandDB {
125     CompileCommand[] payload;
126     alias payload this;
127 }
128 
129 // The result of searching for a file in a compilation DB.
130 // The file may be occur more than one time therefor an array.
131 struct CompileCommandSearch {
132     CompileCommand[] payload;
133     alias payload this;
134 }
135 
136 /**
137  * Trusted: opIndex for JSONValue is @safe in DMD-2.077.0
138  * remove the trusted attribute when the minimal requirement is upgraded.
139  */
140 private Nullable!CompileCommand toCompileCommand(JSONValue v, AbsoluteCompileDbDirectory db_dir) nothrow @trusted {
141     import std.algorithm : map, filter, splitter;
142     import std.array : array;
143     import std.exception : assumeUnique;
144     import std.range : only;
145     import std.utf : byUTF;
146 
147     static if (__VERSION__ < 2085L) {
148         import std.json : JSON_TYPE;
149 
150         alias JSONType = JSON_TYPE;
151         alias JSONType_array = JSON_TYPE.ARRAY;
152         alias JSONType_string = JSON_TYPE.STRING;
153     } else {
154         import std.json : JSONType;
155 
156         alias JSONType_array = JSONType.array;
157         alias JSONType_string = JSONType..string;
158     }
159 
160     string[] command = () {
161         string[] cmd;
162         try {
163             cmd = v["command"].str.splitter.filter!(a => a.length != 0).array;
164         } catch (Exception ex) {
165         }
166 
167         // prefer command over arguments if both are present because of bugs in
168         // tools that produce compile_commands.json.
169         if (cmd.length != 0)
170             return cmd;
171 
172         try {
173             enum j_arg = "arguments";
174             const auto j_type = v[j_arg].type;
175             if (j_type == JSONType_string)
176                 cmd = v[j_arg].str.splitter.filter!(a => a.length != 0).array;
177             else if (j_type == JSONType_array) {
178                 import std.range;
179 
180                 cmd = v[j_arg].arrayNoRef
181                     .filter!(a => a.type == JSONType_string)
182                     .map!(a => a.str)
183                     .filter!(a => a.length != 0)
184                     .array;
185             }
186         } catch (Exception ex) {
187         }
188 
189         return cmd;
190     }();
191 
192     if (command.length == 0) {
193         logger.error("Unable to parse the JSON tuple. Both command and arguments are empty")
194             .collectException;
195         return typeof(return)();
196     }
197 
198     string output;
199     try {
200         output = v["output"].str;
201     } catch (Exception ex) {
202     }
203 
204     try {
205         const directory = v["directory"];
206         const file = v["file"];
207 
208         foreach (a; only(directory, file).map!(a => !a.isNull && a.type == JSONType_string)
209                 .filter!(a => !a)) {
210             // sanity check.
211             // if any element is false then break early.
212             return typeof(return)();
213         }
214 
215         return toCompileCommand(directory.str, file.str, command, db_dir, output);
216     } catch (Exception e) {
217         logger.info("Input JSON: ", v.toPrettyString).collectException;
218         logger.error("Unable to parse json: ", e.msg).collectException;
219     }
220 
221     return typeof(return)();
222 }
223 
224 /** Transform a json entry to a CompileCommand.
225  *
226  * This function is under no circumstances meant to be exposed outside this module.
227  * The API is badly designed for common use because it relies on the position
228  * order of the strings for their meaning.
229  */
230 Nullable!CompileCommand toCompileCommand(string directory, string file,
231         string[] command, AbsoluteCompileDbDirectory db_dir, string output) nothrow {
232     // expects that v is a tuple of 3 json values with the keys directory,
233     // command, file
234 
235     Nullable!CompileCommand rval;
236 
237     try {
238         auto abs_workdir = AbsolutePath(directory.Path, db_dir);
239         auto abs_file = AbsolutePath(file.Path, abs_workdir);
240         auto abs_output = AbsolutePath(output.Path, abs_workdir);
241         // dfmt off
242         rval = CompileCommand(
243             Path(file),
244             abs_file,
245             abs_workdir,
246             CompileCommand.Command(command),
247             Path(output),
248             abs_output);
249         // dfmt on
250     } catch (Exception ex) {
251         logger.error("Unable to parse json: ", ex.msg).collectException;
252     }
253 
254     return rval;
255 }
256 
257 /** Parse a CompilationDatabase.
258  *
259  * Params:
260  *  raw_input = the content of the CompilationDatabase.
261  *  db = path to the compilation database file.
262  *  out_range = range to write the output to.
263  */
264 private void parseCommands(T)(string raw_input, CompileDbFile db, ref T out_range) nothrow {
265     import std.json : parseJSON, JSONException;
266 
267     static void put(T)(JSONValue v, AbsoluteCompileDbDirectory dbdir, ref T out_range) nothrow {
268         import std.algorithm : map, filter;
269         import std.array : array;
270 
271         try {
272             // dfmt off
273             foreach (e; v.array()
274                      // map the JSON tuples to D structs
275                      .map!(a => toCompileCommand(a, dbdir))
276                      // remove invalid
277                      .filter!(a => !a.isNull)
278                      .map!(a => a.get)) {
279                 out_range.put(e);
280             }
281             // dfmt on
282         } catch (Exception ex) {
283             logger.error("Unable to parse json:", ex.msg).collectException;
284         }
285     }
286 
287     try {
288         // trusted: is@safe in DMD-2.077.0
289         // remove the trusted attribute when the minimal requirement is upgraded.
290         auto json = () @trusted { return parseJSON(raw_input); }();
291         auto as_dir = AbsoluteCompileDbDirectory(db.AbsolutePath);
292 
293         // trusted: this function is private so the only user of it is this module.
294         // the only problem would be in the out_range. It is assumed that the
295         // out_range takes care of the validation and other security aspects.
296         () @trusted { put(json, as_dir, out_range); }();
297     } catch (Exception ex) {
298         logger.error("Error while parsing compilation database: " ~ ex.msg).collectException;
299     }
300 }
301 
302 void fromFile(T)(CompileDbFile filename, ref T app) {
303     import std.file : readText;
304 
305     auto raw = readText(filename);
306     if (raw.length == 0)
307         logger.warning("File is empty: ", filename);
308 
309     raw.parseCommands(filename, app);
310 }
311 
312 void fromFiles(T)(CompileDbFile[] fnames, ref T app) {
313     import std.file : exists;
314 
315     foreach (f; fnames) {
316         if (!exists(f))
317             throw new Exception("File do not exist: " ~ f);
318         f.fromFile(app);
319     }
320 }
321 
322 /** Return default path if argument is null.
323  */
324 CompileDbFile[] orDefaultDb(string[] cli_path) @safe pure nothrow {
325     import std.array : array;
326     import std.algorithm : map;
327 
328     if (cli_path.length == 0) {
329         return [CompileDbFile("compile_commands.json")];
330     }
331 
332     return cli_path.map!(a => CompileDbFile(a)).array();
333 }
334 
335 /** Contains the results of a search in the compilation database.
336  *
337  * When searching for the compile command for a file, the compilation db can
338  * return several commands, as the file may have been compiled with different
339  * options in different parts of the project.
340  *
341  * Params:
342  *  glob = glob pattern to find a matching file in the DB against
343  */
344 CompileCommandSearch find(CompileCommandDB db, string glob) @safe
345 in {
346     debug logger.trace("Looking for " ~ glob);
347 }
348 out (result) {
349     import std.conv : to;
350 
351     debug logger.trace("Found " ~ to!string(result));
352 }
353 body {
354     import std.path : globMatch;
355 
356     foreach (a; db) {
357         if (a.absoluteFile == glob)
358             return CompileCommandSearch([a]);
359         else if (a.file == glob)
360             return CompileCommandSearch([a]);
361         else if (globMatch(a.absoluteFile, glob))
362             return CompileCommandSearch([a]);
363         else if (a.absoluteOutput == glob)
364             return CompileCommandSearch([a]);
365         else if (a.output == glob)
366             return CompileCommandSearch([a]);
367         else if (globMatch(a.absoluteOutput, glob))
368             return CompileCommandSearch([a]);
369     }
370 
371     logger.errorf("\n%s\nNo match found in the compile command database", db.toString);
372 
373     return CompileCommandSearch();
374 }
375 
376 struct SearchResult {
377     ParseFlags flags;
378     AbsolutePath absoluteFile;
379 
380     this(ParseFlags flags, AbsolutePath p) {
381         this.flags = flags;
382         this.absoluteFile = p;
383     }
384 
385     // TODO: consider deprecating.
386     this(string[] flags, AbsolutePath p) {
387         this(ParseFlags(null, flags), p);
388     }
389 
390     // TODO: consider deprecating.
391     string[] cflags() @safe pure nothrow const {
392         return flags.completeFlags;
393     }
394 }
395 
396 /** Append the compiler flags if a match is found in the DB or error out.
397  */
398 Nullable!(SearchResult) appendOrError(ref CompileCommandDB compilation_db,
399         const string[] cflags, const string input_file, const Compiler user_compiler = Compiler
400         .init) @safe {
401 
402     return appendOrError(compilation_db, cflags, input_file, defaultCompilerFilter, user_compiler);
403 }
404 
405 /** Append the compiler flags if a match is found in the DB or error out.
406  *
407  * TODO: consider using exceptions instead of Nullable.
408  */
409 Nullable!SearchResult appendOrError(ref CompileCommandDB compilation_db, const string[] cflags, const string input_file,
410         const CompileCommandFilter flag_filter, const Compiler user_compiler = Compiler.init) @safe {
411 
412     auto compile_commands = compilation_db.find(input_file.idup);
413     debug {
414         logger.trace(compile_commands.length > 0,
415                 "CompilationDatabase match (by filename):\n", compile_commands.toString);
416         if (compile_commands.length == 0) {
417             logger.trace(compilation_db.toString);
418         }
419 
420         logger.tracef("CompilationDatabase filter: %s", flag_filter);
421     }
422 
423     typeof(return) rval;
424     if (compile_commands.length == 0) {
425         logger.warning("File not found in compilation database: ", input_file);
426         return rval;
427     } else {
428         rval = SearchResult.init;
429         auto p = compile_commands[0].parseFlag(flag_filter, user_compiler);
430         p.prependCflags(cflags.dup);
431         rval.get.flags = p;
432         rval.get.absoluteFile = compile_commands[0].absoluteFile;
433     }
434 
435     return rval;
436 }
437 
438 string toString(CompileCommand[] db) @safe pure {
439     import std.array;
440     import std.algorithm : map, joiner;
441     import std.conv : text;
442     import std.format : formattedWrite;
443 
444     auto app = appender!string();
445 
446     foreach (a; db) {
447         formattedWrite(app, "%s\n  %s\n  %s\n", a.directory, a.file, a.absoluteFile);
448 
449         if (!a.output.empty) {
450             formattedWrite(app, "  %s\n", a.output);
451             formattedWrite(app, "  %s\n", a.absoluteOutput);
452         }
453 
454         if (!a.command.empty)
455             formattedWrite(app, "  %-(%s %)\n", a.command);
456     }
457 
458     return app.data;
459 }
460 
461 string toString(CompileCommandDB db) @safe pure {
462     return toString(db.payload);
463 }
464 
465 string toString(CompileCommandSearch search) @safe pure {
466     return toString(search.payload);
467 }
468 
469 const auto defaultCompilerFilter = CompileCommandFilter(defaultCompilerFlagFilter, 0);
470 
471 /// Returns: array of default flags to exclude.
472 auto defaultCompilerFlagFilter() @safe {
473     import std.array : appender;
474 
475     auto app = appender!(FilterClangFlag[])();
476 
477     // dfmt off
478     foreach (f; [
479              // remove basic compile flag irrelevant for AST generation
480              "-c", "-o",
481              // machine dependent flags
482              "-m",
483              // machine dependent flags, AVR
484              "-nodevicelib", "-Waddr-space-convert",
485              // machine dependent flags, VxWorks
486              "-non-static", "-Bstatic", "-Bdynamic", "-Xbind-lazy", "-Xbind-now",
487              // blacklist all -f because most aren not compatible with clang
488              "-f",
489              // linker flags, irrelevant for the AST
490              "-static", "-shared", "-rdynamic", "-s", "-l", "-L", "-z", "-u", "-T", "-Xlinker",
491              // a linker flag with filename as one argument
492              "-l",
493              // remove some of the preprocessor flags, irrelevant for the AST
494              "-MT", "-MF", "-MD", "-MQ", "-MMD", "-MP", "-MG", "-E", "-cc1", "-S", "-M", "-MM", "-###",
495              ]) {
496         app.put(FilterClangFlag(f));
497     }
498     // dfmt on
499 
500     return app.data;
501 }
502 
503 struct CompileCommandFilter {
504     FilterClangFlag[] filter;
505     int skipCompilerArgs = 0;
506 }
507 
508 /// Parsed compiler flags.
509 struct ParseFlags {
510     /// The includes used in the compile command
511     static struct Include {
512         string payload;
513         alias payload this;
514     }
515 
516     private {
517         bool forceSystemIncludes_;
518     }
519 
520     /// The includes used in the compile command.
521     Include[] includes;
522 
523     /// System include paths extracted from the compiler used for the file.
524     SystemIncludePath[] systemIncludes;
525 
526     /// Specific flags for the file as parsed from the DB.
527     string[] cflags;
528 
529     /// Compiler used to compile the item.
530     Compiler compiler;
531 
532     void prependCflags(string[] v) {
533         this.cflags = v ~ this.cflags;
534     }
535 
536     void appendCflags(string[] v) {
537         this.cflags ~= v;
538     }
539 
540     /// Set to true to use -I instead of -isystem for system includes.
541     auto forceSystemIncludes(bool v) {
542         this.forceSystemIncludes_ = v;
543         return this;
544     }
545 
546     bool hasSystemIncludes() @safe pure nothrow const @nogc {
547         return systemIncludes.length != 0;
548     }
549 
550     string toString() @safe pure const {
551         import std.format : format;
552 
553         return format("Compiler: %-(%s %) flags: %-(%s %)", compiler, completeFlags);
554     }
555 
556     /** Easy to use method that has the complete flags ready to use with a GCC
557      * complient compiler.
558      *
559      * This method assumes that -isystem is how to add system flags.
560      *
561      * Returns: flags with the system flags appended.
562      */
563     string[] completeFlags() @safe pure nothrow const {
564         import std.algorithm : map, joiner;
565         import std.array : array;
566 
567         auto incl_param = forceSystemIncludes_ ? "-I" : "-isystem";
568 
569         return cflags.idup ~ systemIncludes.map!(a => [incl_param, a.value]).joiner.array;
570     }
571 
572     alias completeFlags this;
573 
574     this(Include[] incls, string[] flags) {
575         this(Compiler.init, incls, SystemIncludePath[].init, flags);
576     }
577 
578     this(Compiler compiler, Include[] incls, string[] flags) {
579         this(compiler, incls, null, flags);
580     }
581 
582     this(Compiler compiler, Include[] incls, SystemIncludePath[] sysincls, string[] flags) {
583         this.compiler = compiler;
584         this.includes = incls;
585         this.systemIncludes = sysincls;
586         this.cflags = flags;
587     }
588 }
589 
590 /** Filter and normalize the compiler flags.
591  *
592  *  - Sanitize the compiler command by removing flags matching the filter.
593  *  - Remove excess white space.
594  *  - Convert all filenames to absolute path.
595  */
596 ParseFlags parseFlag(CompileCommand cmd, const CompileCommandFilter flag_filter,
597         const Compiler user_compiler = Compiler.init) @safe {
598     import std.algorithm : among, map, strip, startsWith, filter, count;
599     import std.string : empty, split;
600 
601     static bool excludeStartWith(const string raw_flag, const FilterClangFlag[] flag_filter) @safe {
602         // the purpuse is to find if any of the flags in flag_filter matches
603         // the start of flag.
604 
605         bool delegate(const FilterClangFlag) @safe cmp;
606 
607         const parts = raw_flag.split('=');
608         if (parts.length == 2) {
609             // is a -foo=bar flag thus exact match is the only sensible
610             cmp = (const FilterClangFlag a) => raw_flag == a.payload;
611         } else {
612             // the flag has the argument merged thus have to check if the start match
613             cmp = (const FilterClangFlag a) => raw_flag.startsWith(a.payload);
614         }
615 
616         // dfmt off
617         return 0 != flag_filter
618             .filter!(a => a.kind == FilterClangFlag.Kind.exclude)
619             // keep flags that are at least the length of values
620             .filter!(a => raw_flag.length >= a.length)
621             // if the flag is any of those in filter
622             .filter!cmp
623             .count();
624         // dfmt on
625     }
626 
627     static bool isQuotationMark(char c) @safe {
628         return c == '"';
629     }
630 
631     static bool isBackslash(char c) @safe {
632         return c == '\\';
633     }
634 
635     static bool isInclude(string flag) @safe {
636         return flag.length >= 2 && flag[0 .. 2] == "-I";
637     }
638 
639     static bool isCombinedIncludeFlag(string flag) @safe {
640         // if an include flag make it absolute, as one argument by checking
641         // length. 3 is to only match those that are -Ixyz
642         return flag.length >= 3 && isInclude(flag);
643     }
644 
645     static bool isNotAFlag(string flag) @safe {
646         // good enough if it seem to be a file
647         return flag.length >= 1 && flag[0] != '-';
648     }
649 
650     /// Flags that take an argument that is a path that need to be transformed
651     /// to an absolute path.
652     static bool isFlagAndPath(string flag) @safe {
653         // list derived from clang --help
654         return 0 != flag.among("-I", "-idirafter", "-iframework", "-imacros", "-include-pch",
655                 "-include", "-iquote", "-isysroot", "-isystem-after", "-isystem", "--sysroot");
656     }
657 
658     /// Flags that take an argument that is NOT a path.
659     static bool isFlagAndValue(string flag) @safe {
660         return 0 != flag.among("-D");
661     }
662 
663     /// Flags that are includes, but contains spaces, are wrapped in quotation marks (or slash).
664     static bool isIncludeWithQuotationMark(string flag) @safe {
665         // length is checked in isCombinedIncludeFlag
666         return isCombinedIncludeFlag(flag) && (isQuotationMark(flag[2]) || isBackslash(flag[2]));
667     }
668 
669     /// Flags that are paths and contain spaces will start with a quotation mark (or slash).
670     static bool isStartingWithQuotationMark(string flag) @safe {
671         return !flag.empty && (isQuotationMark(flag[0]) || isBackslash(flag[0]));
672     }
673 
674     /// When we know we are building a path that is space separated,
675     /// the last index of the last string should be a quotation mark.
676     static bool isEndingWithQuotationMark(string flag) @safe {
677         return !flag.empty && isQuotationMark(flag[$ - 1]);
678     }
679 
680     static ParseFlags filterPair(string[] r, AbsolutePath workdir,
681             const FilterClangFlag[] flag_filter) @safe {
682         enum State {
683             /// keep the next flag IF none of the other transitions happens
684             keep,
685             /// forcefully keep the next argument as raw data
686             priorityKeepNextArg,
687             /// keep the next argument and transform to an absolute path
688             pathArgumentToAbsolute,
689             /// skip the next arg
690             skip,
691             /// skip the next arg, if it is not a flag
692             skipIfNotFlag,
693             /// use the next arg to create a complete path
694             checkingForEndQuotation,
695         }
696 
697         import std.array : Appender, appender, array, join;
698         import std.range : ElementType;
699 
700         auto st = State.keep;
701         auto rval = appender!(string[]);
702         auto includes = appender!(string[]);
703         auto compiler = Compiler(r.length == 0 ? null : r[0]);
704         auto path = appender!(char[])();
705 
706         string removeBackslashesAndQuotes(string arg) {
707             import std.conv : text;
708             import std.uni : byCodePoint, byGrapheme, Grapheme;
709 
710             return arg.byGrapheme.filter!(a => !a.among(Grapheme('\\'),
711                     Grapheme('"'))).byCodePoint.text;
712         }
713 
714         void putNormalizedAbsolute(string arg) {
715             import std.path : buildNormalizedPath, absolutePath;
716 
717             auto p = buildNormalizedPath(workdir, removeBackslashesAndQuotes(arg)).absolutePath;
718             rval.put(p);
719             includes.put(p);
720         }
721 
722         foreach (arg; r) {
723             // First states and how to handle those.
724             // Then transitions from the state keep, which is the default state.
725             //
726             // The user controlled excludeStartWith must be before any other
727             // conditions after the states. It is to give the user the ability
728             // to filter out any flag.
729 
730             if (st == State.skip) {
731                 st = State.keep;
732             } else if (st == State.skipIfNotFlag && isNotAFlag(arg)) {
733                 st = State.keep;
734             } else if (st == State.pathArgumentToAbsolute) {
735                 if (isStartingWithQuotationMark(arg)) {
736                     if (isEndingWithQuotationMark(arg)) {
737                         st = State.keep;
738                         putNormalizedAbsolute(arg);
739                     } else {
740                         st = State.checkingForEndQuotation;
741                         path.put(arg);
742                     }
743                 } else {
744                     st = State.keep;
745                     putNormalizedAbsolute(arg);
746                 }
747             } else if (st == State.priorityKeepNextArg) {
748                 st = State.keep;
749                 rval.put(arg);
750             } else if (st == State.checkingForEndQuotation) {
751                 path.put(" ");
752                 path.put(arg);
753                 if (isEndingWithQuotationMark(arg)) {
754                     // the end of a divided path
755                     st = State.keep;
756                     putNormalizedAbsolute(path.data.idup);
757                     path.clear;
758                 }
759             } else if (excludeStartWith(arg, flag_filter)) {
760                 st = State.skipIfNotFlag;
761             } else if (isIncludeWithQuotationMark(arg)) {
762                 rval.put("-I");
763                 if (arg.length >= 4) {
764                     if (isEndingWithQuotationMark(arg)) {
765                         // the path is wrapped in quotes (ex ['-I"path/to src"'] or ['-I\"path/to src\"'])
766                         putNormalizedAbsolute(arg[2 .. $]);
767                     } else {
768                         // the path is divided (ex ['-I"path/to', 'src"'] or ['-I\"path/to', 'src\"'])
769                         st = State.checkingForEndQuotation;
770                         path.put(arg[2 .. $]);
771                     }
772                 }
773             } else if (isCombinedIncludeFlag(arg)) {
774                 rval.put("-I");
775                 putNormalizedAbsolute(arg[2 .. $]);
776             } else if (isFlagAndPath(arg)) {
777                 rval.put(arg);
778                 st = State.pathArgumentToAbsolute;
779             } else if (isFlagAndValue(arg)) {
780                 rval.put(arg);
781                 st = State.priorityKeepNextArg;
782             }  // parameter that seem to be filenames, remove
783             else if (isNotAFlag(arg)) {
784                 // skipping
785             } else {
786                 rval.put(arg);
787             }
788         }
789         return ParseFlags(compiler, includes.data.map!(a => ParseFlags.Include(a)).array, rval.data);
790     }
791 
792     import std.algorithm : filter, splitter, min;
793 
794     string[] skipArgs = () @safe {
795         string[] args;
796         if (cmd.command.hasValue)
797             args = cmd.command.payload.dup;
798         if (args.length > flag_filter.skipCompilerArgs && flag_filter.skipCompilerArgs != 0)
799             args = args[min(flag_filter.skipCompilerArgs, args.length) .. $];
800         return args;
801     }();
802 
803     auto pargs = filterPair(skipArgs, cmd.directory, flag_filter.filter);
804     auto compiler = user_compiler.length == 0 ? pargs.compiler : user_compiler;
805 
806     auto sysincls = () {
807         try {
808             import dextool.compilation_db.system_compiler : deduceSystemIncludes;
809 
810             return deduceSystemIncludes(cmd, compiler);
811         } catch (Exception e) {
812             logger.info(e.msg);
813         }
814         return SystemIncludePath[].init;
815     }();
816 
817     return ParseFlags(compiler, pargs.includes, sysincls, pargs.cflags);
818 }
819 
820 /** Convert the string to a CompileCommandDB.
821  *
822  * Params:
823  * path = changes relative paths to be relative this parameter
824  * data = input to convert
825  */
826 CompileCommandDB toCompileCommandDB(string data, Path path) @safe {
827     import std.array : appender;
828 
829     auto app = appender!(CompileCommand[])();
830     data.parseCommands(CompileDbFile(cast(string) path), app);
831     return CompileCommandDB(app.data);
832 }
833 
834 CompileCommandDB fromArgCompileDb(AbsolutePath[] paths) @safe {
835     import std.algorithm : map;
836     import std.array : array;
837 
838     return fromArgCompileDb(paths.map!(a => cast(string) a).array);
839 }
840 
841 /// Import and merge many compilation databases into one DB.
842 CompileCommandDB fromArgCompileDb(string[] paths) @safe {
843     import std.array : appender;
844 
845     auto app = appender!(CompileCommand[])();
846     paths.orDefaultDb.fromFiles(app);
847 
848     return CompileCommandDB(app.data);
849 }
850 
851 /// Flags to exclude from the flags passed on to the clang parser.
852 struct FilterClangFlag {
853     string payload;
854     alias payload this;
855 
856     enum Kind {
857         exclude
858     }
859 
860     Kind kind;
861 }
862 
863 @("Should be cflags with all unnecessary flags removed")
864 unittest {
865     auto cmd = toCompileCommand("/home", "file1.cpp", [
866             "g++", "-MD", "-lfoo.a", "-l", "bar.a", "-I", "bar", "-Igun", "-c",
867             "a_filename.c"
868             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
869     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
870     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
871     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
872 }
873 
874 @("Should be cflags with some excess spacing")
875 unittest {
876     auto cmd = toCompileCommand("/home", "file1.cpp", [
877             "g++", "-MD", "-lfoo.a", "-l", "bar.a", "-I", "bar", "-Igun"
878             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
879 
880     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
881     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
882     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
883 }
884 
885 @("Should be cflags with machine dependent removed")
886 unittest {
887     auto cmd = toCompileCommand("/home", "file1.cpp", [
888             "g++", "-mfoo", "-m", "bar", "-MD", "-lfoo.a", "-l", "bar.a", "-I",
889             "bar", "-Igun", "-c", "a_filename.c"
890             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
891 
892     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
893     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
894     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
895 }
896 
897 @("Should be cflags with all -f removed")
898 unittest {
899     auto cmd = toCompileCommand("/home", "file1.cpp", [
900             "g++", "-fmany-fooo", "-I", "bar", "-fno-fooo", "-Igun", "-flolol",
901             "-c", "a_filename.c"
902             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
903 
904     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
905     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
906     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
907 }
908 
909 @("shall NOT remove -std=xyz flags")
910 unittest {
911     auto cmd = toCompileCommand("/home", "file1.cpp", [
912             "g++", "-std=c++11", "-c", "a_filename.c"
913             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
914 
915     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
916     s.cflags.shouldEqual(["-std=c++11"]);
917 }
918 
919 @("shall remove -mfloat-gprs=double")
920 unittest {
921     auto cmd = toCompileCommand("/home", "file1.cpp", [
922             "g++", "-std=c++11", "-mfloat-gprs=double", "-c", "a_filename.c"
923             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
924     auto my_filter = CompileCommandFilter(defaultCompilerFlagFilter, 0);
925     my_filter.filter ~= FilterClangFlag("-mfloat-gprs=double", FilterClangFlag.Kind.exclude);
926     auto s = cmd.get.parseFlag(my_filter, Compiler.init);
927     s.cflags.shouldEqual(["-std=c++11"]);
928 }
929 
930 @("Shall keep all compiler flags as they are")
931 unittest {
932     auto cmd = toCompileCommand("/home", "file1.cpp", ["g++", "-Da", "-D",
933             "b"], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
934 
935     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
936     s.cflags.shouldEqual(["-Da", "-D", "b"]);
937 }
938 
939 version (unittest) {
940     import std.file : getcwd;
941     import std.path : absolutePath;
942     import std.format : format;
943 
944     // contains a bit of extra junk that is expected to be removed
945     immutable string dummy_path = "/path/to/../to/./db/compilation_db.json";
946     immutable string dummy_dir = "/path/to/db";
947 
948     enum raw_dummy1 = `[
949     {
950         "directory": "dir1/dir2",
951         "command": "g++ -Idir1 -c -o binary file1.cpp",
952         "file": "file1.cpp"
953     }
954 ]`;
955 
956     enum raw_dummy2 = `[
957     {
958         "directory": "dir",
959         "command": "g++ -Idir1 -c -o binary file1.cpp",
960         "file": "file1.cpp"
961     },
962     {
963         "directory": "dir",
964         "command": "g++ -Idir1 -c -o binary file2.cpp",
965         "file": "file2.cpp"
966     }
967 ]`;
968 
969     enum raw_dummy3 = `[
970     {
971         "directory": "dir1",
972         "command": "g++ -Idir1 -c -o binary file3.cpp",
973         "file": "file3.cpp"
974     },
975     {
976         "directory": "dir2",
977         "command": "g++ -Idir1 -c -o binary file3.cpp",
978         "file": "file3.cpp"
979     }
980 ]`;
981 
982     enum raw_dummy4 = `[
983     {
984         "directory": "dir1",
985         "arguments": "g++ -Idir1 -c -o binary file3.cpp",
986         "file": "file3.cpp",
987         "output": "file3.o"
988     },
989     {
990         "directory": "dir2",
991         "arguments": "g++ -Idir1 -c -o binary file3.cpp",
992         "file": "file3.cpp",
993         "output": "file3.o"
994     }
995 ]`;
996 
997     enum raw_dummy5 = `[
998     {
999         "directory": "dir1",
1000         "arguments": ["g++", "-Idir1", "-c", "-o", "binary", "file3.cpp"],
1001         "file": "file3.cpp",
1002         "output": "file3.o"
1003     },
1004     {
1005         "directory": "dir2",
1006         "arguments": ["g++", "-Idir1", "-c", "-o", "binary", "file3.cpp"],
1007         "file": "file3.cpp",
1008         "output": "file3.o"
1009     }
1010 ]`;
1011 }
1012 
1013 version (unittest) {
1014     import std.array : appender;
1015 }
1016 
1017 @("Should be a compile command DB")
1018 unittest {
1019     auto app = appender!(CompileCommand[])();
1020     raw_dummy1.parseCommands(CompileDbFile(dummy_path), app);
1021     auto cmds = app.data;
1022 
1023     assert(cmds.length == 1);
1024     (cast(string) cmds[0].directory).shouldEqual(dummy_dir ~ "/dir1/dir2");
1025     cmds[0].command.shouldEqual([
1026             "g++", "-Idir1", "-c", "-o", "binary", "file1.cpp"
1027             ]);
1028     (cast(string) cmds[0].file).shouldEqual("file1.cpp");
1029     (cast(string) cmds[0].absoluteFile).shouldEqual(dummy_dir ~ "/dir1/dir2/file1.cpp");
1030 }
1031 
1032 @("Should be a DB with two entries")
1033 unittest {
1034     auto app = appender!(CompileCommand[])();
1035     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1036     auto cmds = app.data;
1037 
1038     (cast(string) cmds[0].file).shouldEqual("file1.cpp");
1039     (cast(string) cmds[1].file).shouldEqual("file2.cpp");
1040 }
1041 
1042 @("Should find filename")
1043 unittest {
1044     auto app = appender!(CompileCommand[])();
1045     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1046     auto cmds = CompileCommandDB(app.data);
1047 
1048     auto found = cmds.find(dummy_dir ~ "/dir/file2.cpp");
1049     assert(found.length == 1);
1050     (cast(string) found[0].file).shouldEqual("file2.cpp");
1051 }
1052 
1053 @("Should find no match by using an absolute path that doesn't exist in DB")
1054 unittest {
1055     auto app = appender!(CompileCommand[])();
1056     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1057     auto cmds = CompileCommandDB(app.data);
1058 
1059     auto found = cmds.find("./file2.cpp");
1060     assert(found.length == 0);
1061 }
1062 
1063 @("Should find one match by using the absolute filename to disambiguous")
1064 unittest {
1065     auto app = appender!(CompileCommand[])();
1066     raw_dummy3.parseCommands(CompileDbFile(dummy_path), app);
1067     auto cmds = CompileCommandDB(app.data);
1068 
1069     auto found = cmds.find(dummy_dir ~ "/dir2/file3.cpp");
1070     assert(found.length == 1);
1071 
1072     found.toString.shouldEqual(format("%s/dir2
1073   file3.cpp
1074   %s/dir2/file3.cpp
1075   g++ -Idir1 -c -o binary file3.cpp
1076 ", dummy_dir, dummy_dir));
1077 }
1078 
1079 @("Should be a pretty printed search result")
1080 unittest {
1081     auto app = appender!(CompileCommand[])();
1082     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1083     auto cmds = CompileCommandDB(app.data);
1084     auto found = cmds.find(dummy_dir ~ "/dir/file2.cpp");
1085 
1086     found.toString.shouldEqual(format("%s/dir
1087   file2.cpp
1088   %s/dir/file2.cpp
1089   g++ -Idir1 -c -o binary file2.cpp
1090 ", dummy_dir, dummy_dir));
1091 }
1092 
1093 @("Should be a compile command DB with relative path")
1094 unittest {
1095     enum raw = `[
1096     {
1097         "directory": ".",
1098         "command": "g++ -Idir1 -c -o binary file1.cpp",
1099         "file": "file1.cpp"
1100     }
1101     ]`;
1102     auto app = appender!(CompileCommand[])();
1103     raw.parseCommands(CompileDbFile(dummy_path), app);
1104     auto cmds = app.data;
1105 
1106     assert(cmds.length == 1);
1107     (cast(string) cmds[0].directory).shouldEqual(dummy_dir);
1108     (cast(string) cmds[0].file).shouldEqual("file1.cpp");
1109     (cast(string) cmds[0].absoluteFile).shouldEqual(dummy_dir ~ "/file1.cpp");
1110 }
1111 
1112 @("Should be a DB read from a relative path with the contained paths adjusted appropriately")
1113 unittest {
1114     auto app = appender!(CompileCommand[])();
1115     raw_dummy3.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1116     auto cmds = CompileCommandDB(app.data);
1117 
1118     // trusted: constructing a path in memory which is never used for writing.
1119     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1120 
1121     auto found = cmds.find(abs_path ~ "/dir2/file3.cpp");
1122     assert(found.length == 1);
1123 
1124     found.toString.shouldEqual(format("%s/dir2
1125   file3.cpp
1126   %s/dir2/file3.cpp
1127   g++ -Idir1 -c -o binary file3.cpp
1128 ", abs_path, abs_path));
1129 }
1130 
1131 @("shall extract arguments, file, directory and output with absolute paths")
1132 unittest {
1133     auto app = appender!(CompileCommand[])();
1134     raw_dummy4.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1135     auto cmds = CompileCommandDB(app.data);
1136 
1137     // trusted: constructing a path in memory which is never used for writing.
1138     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1139 
1140     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.cpp"));
1141     assert(found.length == 1);
1142 
1143     found.toString.shouldEqual(format("%s/dir2
1144   file3.cpp
1145   %s/dir2/file3.cpp
1146   file3.o
1147   %s/dir2/file3.o
1148   g++ -Idir1 -c -o binary file3.cpp
1149 ", abs_path, abs_path, abs_path));
1150 }
1151 
1152 @("shall be the compiler flags derived from the arguments attribute")
1153 unittest {
1154     auto app = appender!(CompileCommand[])();
1155     raw_dummy4.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1156     auto cmds = CompileCommandDB(app.data);
1157 
1158     // trusted: constructing a path in memory which is never used for writing.
1159     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1160 
1161     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.cpp"));
1162     assert(found.length == 1);
1163 
1164     found[0].parseFlag(defaultCompilerFilter, Compiler.init)
1165         .cflags.shouldEqual(["-I", buildPath(abs_path, "dir2", "dir1")]);
1166 }
1167 
1168 @("shall find the entry based on an output match")
1169 unittest {
1170     auto app = appender!(CompileCommand[])();
1171     raw_dummy4.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1172     auto cmds = CompileCommandDB(app.data);
1173 
1174     // trusted: constructing a path in memory which is never used for writing.
1175     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1176 
1177     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.o"));
1178     assert(found.length == 1);
1179 
1180     (cast(string) found[0].absoluteFile).shouldEqual(buildPath(abs_path, "dir2", "file3.cpp"));
1181 }
1182 
1183 @("shall parse the compilation database when *arguments* is a json list")
1184 unittest {
1185     auto app = appender!(CompileCommand[])();
1186     raw_dummy5.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1187     auto cmds = CompileCommandDB(app.data);
1188 
1189     // trusted: constructing a path in memory which is never used for writing.
1190     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1191 
1192     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.o"));
1193     assert(found.length == 1);
1194 
1195     (cast(string) found[0].absoluteFile).shouldEqual(buildPath(abs_path, "dir2", "file3.cpp"));
1196 }
1197 
1198 @("shall parse the compilation database and find a match via the glob pattern")
1199 unittest {
1200     import std.path : baseName;
1201 
1202     auto app = appender!(CompileCommand[])();
1203     raw_dummy5.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1204     auto cmds = CompileCommandDB(app.data);
1205 
1206     auto found = cmds.find("*/dir2/file3.cpp");
1207     assert(found.length == 1);
1208 
1209     found[0].absoluteFile.baseName.shouldEqual("file3.cpp");
1210 }
1211 
1212 @("shall extract filepath from includes correctly when there is spaces in the path")
1213 unittest {
1214     auto cmd = toCompileCommand("/home", "file.cpp", [
1215             "-I", `"dir with spaces"`, "-I", `\"dir with spaces\"`
1216             ], AbsoluteCompileDbDirectory("/home".Path.AbsolutePath), null);
1217     auto pargs = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
1218     pargs.cflags.shouldEqual([
1219             "-I", "/home/dir with spaces", "-I", "/home/dir with spaces"
1220             ]);
1221     pargs.includes.shouldEqual([
1222             "/home/dir with spaces", "/home/dir with spaces"
1223             ]);
1224 }
1225 
1226 @("shall handle path with spaces, both as separate string and combined with backslash")
1227 unittest {
1228     auto cmd = toCompileCommand("/project", "file.cpp", [
1229             "-I", `"separate dir/with space"`, "-I", `\"separate dir/with space\"`,
1230             `-I"combined dir/with space"`, `-I\"combined dir/with space\"`,
1231             ], AbsoluteCompileDbDirectory("/project".Path.AbsolutePath), null);
1232     auto pargs = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
1233     pargs.cflags.shouldEqual([
1234             "-I", "/project/separate dir/with space", "-I",
1235             "/project/separate dir/with space", "-I",
1236             "/project/combined dir/with space", "-I",
1237             "/project/combined dir/with space"
1238             ]);
1239     pargs.includes.shouldEqual([
1240             "/project/separate dir/with space", "/project/separate dir/with space",
1241             "/project/combined dir/with space", "/project/combined dir/with space"
1242             ]);
1243 }
1244 
1245 @("shall handle path with consecutive spaces")
1246 unittest {
1247     auto cmd = toCompileCommand("/project", "file.cpp",
1248             [
1249                 `-I"one space/lots of     space"`,
1250                 `-I\"one space/lots of     space\"`, `-I`,
1251                 `"one space/lots of     space"`, `-I`,
1252                 `\"one space/lots of     space\"`,
1253             ], AbsoluteCompileDbDirectory("/project".Path.AbsolutePath), null);
1254     auto pargs = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
1255     pargs.cflags.shouldEqual([
1256             "-I", "/project/one space/lots of     space", "-I",
1257             "/project/one space/lots of     space", "-I",
1258             "/project/one space/lots of     space", "-I",
1259             "/project/one space/lots of     space",
1260             ]);
1261     pargs.includes.shouldEqual([
1262             "/project/one space/lots of     space",
1263             "/project/one space/lots of     space",
1264             "/project/one space/lots of     space",
1265             "/project/one space/lots of     space"
1266             ]);
1267 }