1 /**
2 Copyright: Copyright (c) 2016-2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 Utility functions for Clang Compilation Databases.
11 
12 # Usage
13 Call the function `fromArgCompileDb` to create one, merged database.
14 
15 Extract flags the flags for a file by calling `appendOrError`.
16 
17 Example:
18 ---
19 auto dbs = fromArgCompileDb(["foo.json]);
20 auto flags = dbs.appendOrError(dbs, null, "foo.cpp", defaultCompilerFlagFilter);
21 ---
22 */
23 module dextool.compilation_db;
24 
25 import logger = std.experimental.logger;
26 import std.exception : collectException;
27 import std.json : JSONValue;
28 import std.typecons : Nullable;
29 
30 import dextool.type : AbsolutePath, Path;
31 
32 public import dextool.compilation_db.user_filerange;
33 public import dextool.compilation_db.system_compiler : deduceSystemIncludes,
34     SystemIncludePath, Compiler;
35 
36 version (unittest) {
37     import std.path : buildPath;
38     import unit_threaded : shouldEqual;
39 }
40 
41 @safe:
42 
43 /** Hold an entry from the compilation database.
44  *
45  * The following information is from the official specification.
46  * $(LINK2 http://clang.llvm.org/docs/JSONCompilationDatabase.html, Standard)
47  *
48  * directory: The working directory of the compilation. All paths specified in
49  * the command or file fields must be either absolute or relative to this
50  * directory.
51  *
52  * file: The main translation unit source processed by this compilation step.
53  * This is used by tools as the key into the compilation database. There can be
54  * multiple command objects for the same file, for example if the same source
55  * file is compiled with different configurations.
56  *
57  * command: The compile command executed. After JSON unescaping, this must be a
58  * valid command to rerun the exact compilation step for the translation unit
59  * in the environment the build system uses. Parameters use shell quoting and
60  * shell escaping of quotes, with ‘"‘ and ‘\‘ being the only special
61  * characters. Shell expansion is not supported.
62  *
63  * argumets: The compile command executed as list of strings. Either arguments
64  * or command is required.
65  *
66  * output: The name of the output created by this compilation step. This field
67  * is optional. It can be used to distinguish different processing modes of the
68  * same input file.
69  *
70  * Dextool additions.
71  * The standard do not specify how to treat "directory" when it is a relative
72  * path. The logic chosen in dextool is to treat it as relative to the path
73  * the compilation database file is read from.
74  */
75 @safe struct CompileCommand {
76     import dextool.type : DirName;
77 
78     static import dextool.type;
79 
80     /// The raw filename from the tuples "file" value.
81     alias FileName = dextool.type.FileName;
82 
83     /// The combination of the tuples "file" and "directory" value.
84     static struct AbsoluteFileName {
85         dextool.type.AbsoluteFileName payload;
86         alias payload this;
87 
88         this(AbsoluteDirectory work_dir, string raw_path) {
89             payload = AbsolutePath(FileName(raw_path), DirName(work_dir));
90         }
91     }
92 
93     /// The tuples "directory" value converted to the absolute path.
94     static struct AbsoluteDirectory {
95         dextool.type.AbsoluteDirectory payload;
96         alias payload this;
97 
98         this(AbsoluteCompileDbDirectory db_path, string raw_path) {
99             payload = AbsolutePath(FileName(raw_path), DirName(db_path));
100         }
101     }
102 
103     /// The raw command from the tuples "command" or "arguments value.
104     static struct Command {
105         string[] payload;
106         alias payload this;
107         bool hasValue() @safe pure nothrow const @nogc {
108             return payload.length != 0;
109         }
110     }
111 
112     /// The path to the output from running the command
113     static struct Output {
114         string payload;
115         alias payload this;
116         bool hasValue() @safe pure nothrow const @nogc {
117             return payload.length != 0;
118         }
119     }
120 
121     /// File that where compiled.
122     FileName file;
123     /// ditto.
124     AbsoluteFileName absoluteFile;
125     /// Working directory of the command that compiled the input.
126     AbsoluteDirectory directory;
127     /// The executing command when compiling.
128     Command command;
129     /// The resulting object file.
130     Output output;
131     /// ditto.
132     AbsoluteFileName absoluteOutput;
133 }
134 
135 /// The path to the compilation database.
136 struct CompileDbFile {
137     string payload;
138     alias payload this;
139 }
140 
141 /// The absolute path to the directory the compilation database reside at.
142 struct AbsoluteCompileDbDirectory {
143     string payload;
144     alias payload this;
145 
146     invariant {
147         import std.path : isAbsolute;
148 
149         assert(payload.isAbsolute);
150     }
151 
152     this(string file_path) {
153         import std.path : buildNormalizedPath, dirName, absolutePath;
154 
155         payload = buildNormalizedPath(file_path).absolutePath.dirName;
156     }
157 
158     this(CompileDbFile db) {
159         this(cast(string) db);
160     }
161 
162     unittest {
163         import std.path;
164 
165         auto dir = AbsoluteCompileDbDirectory(".");
166         assert(dir.isAbsolute);
167     }
168 }
169 
170 /// A complete compilation database.
171 struct CompileCommandDB {
172     CompileCommand[] payload;
173     alias payload this;
174 }
175 
176 // The result of searching for a file in a compilation DB.
177 // The file may be occur more than one time therefor an array.
178 struct CompileCommandSearch {
179     CompileCommand[] payload;
180     alias payload this;
181 }
182 
183 /**
184  * Trusted: opIndex for JSONValue is @safe in DMD-2.077.0
185  * remove the trusted attribute when the minimal requirement is upgraded.
186  */
187 private Nullable!CompileCommand toCompileCommand(JSONValue v, AbsoluteCompileDbDirectory db_dir) nothrow @trusted {
188     import std.algorithm : map, filter, splitter;
189     import std.array : array;
190     import std.exception : assumeUnique;
191     import std.range : only;
192     import std.utf : byUTF;
193 
194     static if (__VERSION__ < 2085L) {
195         import std.json : JSON_TYPE;
196 
197         alias JSONType = JSON_TYPE;
198         alias JSONType_array = JSON_TYPE.ARRAY;
199         alias JSONType_string = JSON_TYPE.STRING;
200     } else {
201         import std.json : JSONType;
202 
203         alias JSONType_array = JSONType.array;
204         alias JSONType_string = JSONType..string;
205     }
206 
207     string[] command = () {
208         string[] cmd;
209         try {
210             cmd = v["command"].str.splitter.filter!(a => a.length != 0).array;
211         } catch (Exception ex) {
212         }
213 
214         // prefer command over arguments if both are present because of bugs in
215         // tools that produce compile_commands.json.
216         if (cmd.length != 0)
217             return cmd;
218 
219         try {
220             enum j_arg = "arguments";
221             const auto j_type = v[j_arg].type;
222             if (j_type == JSONType_string)
223                 cmd = v[j_arg].str.splitter.filter!(a => a.length != 0).array;
224             else if (j_type == JSONType_array) {
225                 import std.range;
226 
227                 cmd = v[j_arg].arrayNoRef
228                     .filter!(a => a.type == JSONType_string)
229                     .map!(a => a.str)
230                     .filter!(a => a.length != 0)
231                     .array;
232             }
233         } catch (Exception ex) {
234         }
235 
236         return cmd;
237     }();
238 
239     if (command.length == 0) {
240         logger.error("Unable to parse the JSON tuple. Both command and arguments are empty")
241             .collectException;
242         return typeof(return)();
243     }
244 
245     string output;
246     try {
247         output = v["output"].str;
248     } catch (Exception ex) {
249     }
250 
251     try {
252         const directory = v["directory"];
253         const file = v["file"];
254 
255         foreach (a; only(directory, file).map!(a => !a.isNull && a.type == JSONType_string)
256                 .filter!(a => !a)) {
257             // sanity check.
258             // if any element is false then break early.
259             return typeof(return)();
260         }
261 
262         return toCompileCommand(directory.str, file.str, command, db_dir, output);
263     } catch (Exception e) {
264         logger.info("Input JSON: ", v.toPrettyString).collectException;
265         logger.error("Unable to parse json: ", e.msg).collectException;
266     }
267 
268     return typeof(return)();
269 }
270 
271 /** Transform a json entry to a CompileCommand.
272  *
273  * This function is under no circumstances meant to be exposed outside this module.
274  * The API is badly designed for common use because it relies on the position
275  * order of the strings for their meaning.
276  */
277 Nullable!CompileCommand toCompileCommand(string directory, string file,
278         string[] command, AbsoluteCompileDbDirectory db_dir, string output) nothrow {
279     // expects that v is a tuple of 3 json values with the keys directory,
280     // command, file
281 
282     Nullable!CompileCommand rval;
283 
284     try {
285         auto abs_workdir = CompileCommand.AbsoluteDirectory(db_dir, directory);
286         auto abs_file = CompileCommand.AbsoluteFileName(abs_workdir, file);
287         auto abs_output = CompileCommand.AbsoluteFileName(abs_workdir, output);
288         // dfmt off
289         rval = CompileCommand(
290             CompileCommand.FileName(file),
291             abs_file,
292             abs_workdir,
293             CompileCommand.Command(command),
294             CompileCommand.Output(output),
295             abs_output);
296         // dfmt on
297     } catch (Exception ex) {
298         logger.error("Unable to parse json: ", ex.msg).collectException;
299     }
300 
301     return rval;
302 }
303 
304 /** Parse a CompilationDatabase.
305  *
306  * Params:
307  *  raw_input = the content of the CompilationDatabase.
308  *  in_file = path to the compilation database file.
309  *  out_range = range to write the output to.
310  */
311 private void parseCommands(T)(string raw_input, CompileDbFile in_file, ref T out_range) nothrow {
312     import std.json : parseJSON, JSONException;
313 
314     static void put(T)(JSONValue v, AbsoluteCompileDbDirectory dbdir, ref T out_range) nothrow {
315         import std.algorithm : map, filter;
316         import std.array : array;
317 
318         try {
319             // dfmt off
320             foreach (e; v.array()
321                      // map the JSON tuples to D structs
322                      .map!(a => toCompileCommand(a, dbdir))
323                      // remove invalid
324                      .filter!(a => !a.isNull)
325                      .map!(a => a.get)) {
326                 out_range.put(e);
327             }
328             // dfmt on
329         } catch (Exception ex) {
330             logger.error("Unable to parse json:", ex.msg).collectException;
331         }
332     }
333 
334     try {
335         // trusted: is@safe in DMD-2.077.0
336         // remove the trusted attribute when the minimal requirement is upgraded.
337         auto json = () @trusted { return parseJSON(raw_input); }();
338         auto as_dir = AbsoluteCompileDbDirectory(in_file);
339 
340         // trusted: this function is private so the only user of it is this module.
341         // the only problem would be in the out_range. It is assumed that the
342         // out_range takes care of the validation and other security aspects.
343         () @trusted { put(json, as_dir, out_range); }();
344     } catch (Exception ex) {
345         logger.error("Error while parsing compilation database: " ~ ex.msg).collectException;
346     }
347 }
348 
349 void fromFile(T)(CompileDbFile filename, ref T app) {
350     import std.file : readText;
351 
352     auto raw = readText(filename);
353     if (raw.length == 0)
354         logger.warning("File is empty: ", filename);
355 
356     raw.parseCommands(filename, app);
357 }
358 
359 void fromFiles(T)(CompileDbFile[] fnames, ref T app) {
360     import std.file : exists;
361 
362     foreach (f; fnames) {
363         if (!exists(f))
364             throw new Exception("File do not exist: " ~ f);
365         f.fromFile(app);
366     }
367 }
368 
369 /** Return default path if argument is null.
370  */
371 CompileDbFile[] orDefaultDb(string[] cli_path) @safe pure nothrow {
372     import std.array : array;
373     import std.algorithm : map;
374 
375     if (cli_path.length == 0) {
376         return [CompileDbFile("compile_commands.json")];
377     }
378 
379     return cli_path.map!(a => CompileDbFile(a)).array();
380 }
381 
382 /** Contains the results of a search in the compilation database.
383  *
384  * When searching for the compile command for a file, the compilation db can
385  * return several commands, as the file may have been compiled with different
386  * options in different parts of the project.
387  *
388  * Params:
389  *  glob = glob pattern to find a matching file in the DB against
390  */
391 CompileCommandSearch find(CompileCommandDB db, string glob) @safe
392 in {
393     debug logger.trace("Looking for " ~ glob);
394 }
395 out (result) {
396     import std.conv : to;
397 
398     debug logger.trace("Found " ~ to!string(result));
399 }
400 body {
401     import std.path : globMatch;
402 
403     foreach (a; db) {
404         if (a.absoluteFile == glob)
405             return CompileCommandSearch([a]);
406         else if (a.file == glob)
407             return CompileCommandSearch([a]);
408         else if (globMatch(a.absoluteFile, glob))
409             return CompileCommandSearch([a]);
410         else if (a.absoluteOutput == glob)
411             return CompileCommandSearch([a]);
412         else if (a.output == glob)
413             return CompileCommandSearch([a]);
414         else if (globMatch(a.absoluteOutput, glob))
415             return CompileCommandSearch([a]);
416     }
417 
418     logger.errorf("\n%s\nNo match found in the compile command database", db.toString);
419 
420     return CompileCommandSearch();
421 }
422 
423 struct SearchResult {
424     ParseFlags flags;
425     AbsolutePath absoluteFile;
426 
427     this(ParseFlags flags, AbsolutePath p) {
428         this.flags = flags;
429         this.absoluteFile = p;
430     }
431 
432     // TODO: consider deprecating.
433     this(string[] flags, AbsolutePath p) {
434         this(ParseFlags(null, flags), p);
435     }
436 
437     // TODO: consider deprecating.
438     string[] cflags() @safe pure nothrow const {
439         return flags.completeFlags;
440     }
441 }
442 
443 /** Append the compiler flags if a match is found in the DB or error out.
444  */
445 Nullable!(SearchResult) appendOrError(ref CompileCommandDB compilation_db,
446         const string[] cflags, const string input_file, const Compiler user_compiler = Compiler
447         .init) @safe {
448 
449     return appendOrError(compilation_db, cflags, input_file, defaultCompilerFilter, user_compiler);
450 }
451 
452 /** Append the compiler flags if a match is found in the DB or error out.
453  *
454  * TODO: consider using exceptions instead of Nullable.
455  */
456 Nullable!SearchResult appendOrError(ref CompileCommandDB compilation_db, const string[] cflags, const string input_file,
457         const CompileCommandFilter flag_filter, const Compiler user_compiler = Compiler.init) @safe {
458 
459     auto compile_commands = compilation_db.find(input_file.idup);
460     debug {
461         logger.trace(compile_commands.length > 0,
462                 "CompilationDatabase match (by filename):\n", compile_commands.toString);
463         if (compile_commands.length == 0) {
464             logger.trace(compilation_db.toString);
465         }
466 
467         logger.tracef("CompilationDatabase filter: %s", flag_filter);
468     }
469 
470     typeof(return) rval;
471     if (compile_commands.length == 0) {
472         logger.warning("File not found in compilation database: ", input_file);
473         return rval;
474     } else {
475         rval = SearchResult.init;
476         auto p = compile_commands[0].parseFlag(flag_filter, user_compiler);
477         p.prependCflags(cflags.dup);
478         rval.get.flags = p;
479         rval.get.absoluteFile = compile_commands[0].absoluteFile;
480     }
481 
482     return rval;
483 }
484 
485 string toString(CompileCommand[] db) @safe pure {
486     import std.array;
487     import std.algorithm : map, joiner;
488     import std.conv : text;
489     import std.format : formattedWrite;
490 
491     auto app = appender!string();
492 
493     foreach (a; db) {
494         formattedWrite(app, "%s\n  %s\n  %s\n", a.directory, a.file, a.absoluteFile);
495 
496         if (a.output.hasValue) {
497             formattedWrite(app, "  %s\n", a.output);
498             formattedWrite(app, "  %s\n", a.absoluteOutput);
499         }
500 
501         if (a.command.hasValue)
502             formattedWrite(app, "  %-(%s %)\n", a.command);
503     }
504 
505     return app.data;
506 }
507 
508 string toString(CompileCommandDB db) @safe pure {
509     return toString(db.payload);
510 }
511 
512 string toString(CompileCommandSearch search) @safe pure {
513     return toString(search.payload);
514 }
515 
516 const auto defaultCompilerFilter = CompileCommandFilter(defaultCompilerFlagFilter, 0);
517 
518 /// Returns: array of default flags to exclude.
519 auto defaultCompilerFlagFilter() @safe {
520     import std.array : appender;
521 
522     auto app = appender!(FilterClangFlag[])();
523 
524     // dfmt off
525     foreach (f; [
526              // remove basic compile flag irrelevant for AST generation
527              "-c", "-o",
528              // machine dependent flags
529              "-m",
530              // machine dependent flags, AVR
531              "-nodevicelib", "-Waddr-space-convert",
532              // machine dependent flags, VxWorks
533              "-non-static", "-Bstatic", "-Bdynamic", "-Xbind-lazy", "-Xbind-now",
534              // blacklist all -f because most aren not compatible with clang
535              "-f",
536              // linker flags, irrelevant for the AST
537              "-static", "-shared", "-rdynamic", "-s", "-l", "-L", "-z", "-u", "-T", "-Xlinker",
538              // a linker flag with filename as one argument
539              "-l",
540              // remove some of the preprocessor flags, irrelevant for the AST
541              "-MT", "-MF", "-MD", "-MQ", "-MMD", "-MP", "-MG", "-E", "-cc1", "-S", "-M", "-MM", "-###",
542              ]) {
543         app.put(FilterClangFlag(f));
544     }
545     // dfmt on
546 
547     return app.data;
548 }
549 
550 struct CompileCommandFilter {
551     FilterClangFlag[] filter;
552     int skipCompilerArgs = 0;
553 }
554 
555 /// Parsed compiler flags.
556 struct ParseFlags {
557     /// The includes used in the compile command
558     static struct Include {
559         string payload;
560         alias payload this;
561     }
562 
563     private {
564         bool forceSystemIncludes_;
565     }
566 
567     /// The includes used in the compile command.
568     Include[] includes;
569 
570     /// System include paths extracted from the compiler used for the file.
571     SystemIncludePath[] systemIncludes;
572 
573     /// Specific flags for the file as parsed from the DB.
574     string[] cflags;
575 
576     /// Compiler used to compile the item.
577     Compiler compiler;
578 
579     void prependCflags(string[] v) {
580         this.cflags = v ~ this.cflags;
581     }
582 
583     void appendCflags(string[] v) {
584         this.cflags ~= v;
585     }
586 
587     /// Set to true to use -I instead of -isystem for system includes.
588     auto forceSystemIncludes(bool v) {
589         this.forceSystemIncludes_ = v;
590         return this;
591     }
592 
593     bool hasSystemIncludes() @safe pure nothrow const @nogc {
594         return systemIncludes.length != 0;
595     }
596 
597     string toString() @safe pure const {
598         import std.format : format;
599 
600         return format("Compiler: %-(%s %) flags: %-(%s %)", compiler, completeFlags);
601     }
602 
603     /** Easy to use method that has the complete flags ready to use with a GCC
604      * complient compiler.
605      *
606      * This method assumes that -isystem is how to add system flags.
607      *
608      * Returns: flags with the system flags appended.
609      */
610     string[] completeFlags() @safe pure nothrow const {
611         import std.algorithm : map, joiner;
612         import std.array : array;
613 
614         auto incl_param = forceSystemIncludes_ ? "-I" : "-isystem";
615 
616         return cflags.idup ~ systemIncludes.map!(a => [incl_param, a.value]).joiner.array;
617     }
618 
619     alias completeFlags this;
620 
621     this(Include[] incls, string[] flags) {
622         this(Compiler.init, incls, SystemIncludePath[].init, flags);
623     }
624 
625     this(Compiler compiler, Include[] incls, string[] flags) {
626         this(compiler, incls, null, flags);
627     }
628 
629     this(Compiler compiler, Include[] incls, SystemIncludePath[] sysincls, string[] flags) {
630         this.compiler = compiler;
631         this.includes = incls;
632         this.systemIncludes = sysincls;
633         this.cflags = flags;
634     }
635 }
636 
637 /** Filter and normalize the compiler flags.
638  *
639  *  - Sanitize the compiler command by removing flags matching the filter.
640  *  - Remove excess white space.
641  *  - Convert all filenames to absolute path.
642  */
643 ParseFlags parseFlag(CompileCommand cmd, const CompileCommandFilter flag_filter,
644         const Compiler user_compiler = Compiler.init) @safe {
645     import std.algorithm : among, map, strip, startsWith, filter, count;
646     import std.string : empty, split;
647 
648     static bool excludeStartWith(const string raw_flag, const FilterClangFlag[] flag_filter) @safe {
649         // the purpuse is to find if any of the flags in flag_filter matches
650         // the start of flag.
651 
652         bool delegate(const FilterClangFlag) @safe cmp;
653 
654         const parts = raw_flag.split('=');
655         if (parts.length == 2) {
656             // is a -foo=bar flag thus exact match is the only sensible
657             cmp = (const FilterClangFlag a) => raw_flag == a.payload;
658         } else {
659             // the flag has the argument merged thus have to check if the start match
660             cmp = (const FilterClangFlag a) => raw_flag.startsWith(a.payload);
661         }
662 
663         // dfmt off
664         return 0 != flag_filter
665             .filter!(a => a.kind == FilterClangFlag.Kind.exclude)
666             // keep flags that are at least the length of values
667             .filter!(a => raw_flag.length >= a.length)
668             // if the flag is any of those in filter
669             .filter!cmp
670             .count();
671         // dfmt on
672     }
673 
674     static bool isQuotationMark(char c) @safe {
675         return c == '"';
676     }
677 
678     static bool isBackslash(char c) @safe {
679         return c == '\\';
680     }
681 
682     static bool isInclude(string flag) @safe {
683         return flag.length >= 2 && flag[0 .. 2] == "-I";
684     }
685 
686     static bool isCombinedIncludeFlag(string flag) @safe {
687         // if an include flag make it absolute, as one argument by checking
688         // length. 3 is to only match those that are -Ixyz
689         return flag.length >= 3 && isInclude(flag);
690     }
691 
692     static bool isNotAFlag(string flag) @safe {
693         // good enough if it seem to be a file
694         return flag.length >= 1 && flag[0] != '-';
695     }
696 
697     /// Flags that take an argument that is a path that need to be transformed
698     /// to an absolute path.
699     static bool isFlagAndPath(string flag) @safe {
700         // list derived from clang --help
701         return 0 != flag.among("-I", "-idirafter", "-iframework", "-imacros", "-include-pch",
702                 "-include", "-iquote", "-isysroot", "-isystem-after", "-isystem", "--sysroot");
703     }
704 
705     /// Flags that take an argument that is NOT a path.
706     static bool isFlagAndValue(string flag) @safe {
707         return 0 != flag.among("-D");
708     }
709 
710     /// Flags that are includes, but contains spaces, are wrapped in quotation marks (or slash).
711     static bool isIncludeWithQuotationMark(string flag) @safe {
712         // length is checked in isCombinedIncludeFlag
713         return isCombinedIncludeFlag(flag) && (isQuotationMark(flag[2]) || isBackslash(flag[2]));
714     }
715 
716     /// Flags that are paths and contain spaces will start with a quotation mark (or slash).
717     static bool isStartingWithQuotationMark(string flag) @safe {
718         return !flag.empty && (isQuotationMark(flag[0]) || isBackslash(flag[0]));
719     }
720 
721     /// When we know we are building a path that is space separated,
722     /// the last index of the last string should be a quotation mark.
723     static bool isEndingWithQuotationMark(string flag) @safe {
724         return !flag.empty && isQuotationMark(flag[$ - 1]);
725     }
726 
727     static ParseFlags filterPair(string[] r,
728             CompileCommand.AbsoluteDirectory workdir, const FilterClangFlag[] flag_filter) @safe {
729         enum State {
730             /// keep the next flag IF none of the other transitions happens
731             keep,
732             /// forcefully keep the next argument as raw data
733             priorityKeepNextArg,
734             /// keep the next argument and transform to an absolute path
735             pathArgumentToAbsolute,
736             /// skip the next arg
737             skip,
738             /// skip the next arg, if it is not a flag
739             skipIfNotFlag,
740             /// use the next arg to create a complete path
741             checkingForEndQuotation,
742         }
743 
744         import std.array : Appender, appender, array, join;
745         import std.range : ElementType;
746 
747         auto st = State.keep;
748         auto rval = appender!(string[]);
749         auto includes = appender!(string[]);
750         auto compiler = Compiler(r.length == 0 ? null : r[0]);
751         auto path = appender!(char[])();
752 
753         string removeBackslashesAndQuotes(string arg) {
754             import std.conv : text;
755             import std.uni : byCodePoint, byGrapheme, Grapheme;
756 
757             return arg.byGrapheme.filter!(a => !a.among(Grapheme('\\'),
758                     Grapheme('"'))).byCodePoint.text;
759         }
760 
761         void putNormalizedAbsolute(string arg) {
762             import std.path : buildNormalizedPath, absolutePath;
763 
764             auto p = buildNormalizedPath(workdir, removeBackslashesAndQuotes(arg)).absolutePath;
765             rval.put(p);
766             includes.put(p);
767         }
768 
769         foreach (arg; r) {
770             // First states and how to handle those.
771             // Then transitions from the state keep, which is the default state.
772             //
773             // The user controlled excludeStartWith must be before any other
774             // conditions after the states. It is to give the user the ability
775             // to filter out any flag.
776 
777             if (st == State.skip) {
778                 st = State.keep;
779             } else if (st == State.skipIfNotFlag && isNotAFlag(arg)) {
780                 st = State.keep;
781             } else if (st == State.pathArgumentToAbsolute) {
782                 if (isStartingWithQuotationMark(arg)) {
783                     if (isEndingWithQuotationMark(arg)) {
784                         st = State.keep;
785                         putNormalizedAbsolute(arg);
786                     } else {
787                         st = State.checkingForEndQuotation;
788                         path.put(arg);
789                     }
790                 } else {
791                     st = State.keep;
792                     putNormalizedAbsolute(arg);
793                 }
794             } else if (st == State.priorityKeepNextArg) {
795                 st = State.keep;
796                 rval.put(arg);
797             } else if (st == State.checkingForEndQuotation) {
798                 path.put(" ");
799                 path.put(arg);
800                 if (isEndingWithQuotationMark(arg)) {
801                     // the end of a divided path
802                     st = State.keep;
803                     putNormalizedAbsolute(path.data.idup);
804                     path.clear;
805                 }
806             } else if (excludeStartWith(arg, flag_filter)) {
807                 st = State.skipIfNotFlag;
808             } else if (isIncludeWithQuotationMark(arg)) {
809                 rval.put("-I");
810                 if (arg.length >= 4) {
811                     if (isEndingWithQuotationMark(arg)) {
812                         // the path is wrapped in quotes (ex ['-I"path/to src"'] or ['-I\"path/to src\"'])
813                         putNormalizedAbsolute(arg[2 .. $]);
814                     } else {
815                         // the path is divided (ex ['-I"path/to', 'src"'] or ['-I\"path/to', 'src\"'])
816                         st = State.checkingForEndQuotation;
817                         path.put(arg[2 .. $]);
818                     }
819                 }
820             } else if (isCombinedIncludeFlag(arg)) {
821                 rval.put("-I");
822                 putNormalizedAbsolute(arg[2 .. $]);
823             } else if (isFlagAndPath(arg)) {
824                 rval.put(arg);
825                 st = State.pathArgumentToAbsolute;
826             } else if (isFlagAndValue(arg)) {
827                 rval.put(arg);
828                 st = State.priorityKeepNextArg;
829             }  // parameter that seem to be filenames, remove
830             else if (isNotAFlag(arg)) {
831                 // skipping
832             } else {
833                 rval.put(arg);
834             }
835         }
836         return ParseFlags(compiler, includes.data.map!(a => ParseFlags.Include(a)).array, rval.data);
837     }
838 
839     import std.algorithm : filter, splitter, min;
840 
841     string[] skipArgs = () @safe {
842         string[] args;
843         if (cmd.command.hasValue)
844             args = cmd.command.payload.dup;
845         if (args.length > flag_filter.skipCompilerArgs && flag_filter.skipCompilerArgs != 0)
846             args = args[min(flag_filter.skipCompilerArgs, args.length) .. $];
847         return args;
848     }();
849 
850     auto pargs = filterPair(skipArgs, cmd.directory, flag_filter.filter);
851     auto compiler = user_compiler.length == 0 ? pargs.compiler : user_compiler;
852 
853     auto sysincls = () {
854         try {
855             import dextool.compilation_db.system_compiler : deduceSystemIncludes;
856 
857             return deduceSystemIncludes(cmd, compiler);
858         } catch (Exception e) {
859             logger.info(e.msg);
860         }
861         return SystemIncludePath[].init;
862     }();
863 
864     return ParseFlags(compiler, pargs.includes, sysincls, pargs.cflags);
865 }
866 
867 /** Convert the string to a CompileCommandDB.
868  *
869  * Params:
870  * path = changes relative paths to be relative this parameter
871  * data = input to convert
872  */
873 CompileCommandDB toCompileCommandDB(string data, Path path) @safe {
874     import std.array : appender;
875 
876     auto app = appender!(CompileCommand[])();
877     data.parseCommands(CompileDbFile(cast(string) path), app);
878     return CompileCommandDB(app.data);
879 }
880 
881 CompileCommandDB fromArgCompileDb(AbsolutePath[] paths) @safe {
882     import std.algorithm : map;
883     import std.array : array;
884 
885     return fromArgCompileDb(paths.map!(a => cast(string) a).array);
886 }
887 
888 /// Import and merge many compilation databases into one DB.
889 CompileCommandDB fromArgCompileDb(string[] paths) @safe {
890     import std.array : appender;
891 
892     auto app = appender!(CompileCommand[])();
893     paths.orDefaultDb.fromFiles(app);
894 
895     return CompileCommandDB(app.data);
896 }
897 
898 /// Flags to exclude from the flags passed on to the clang parser.
899 struct FilterClangFlag {
900     string payload;
901     alias payload this;
902 
903     enum Kind {
904         exclude
905     }
906 
907     Kind kind;
908 }
909 
910 @("Should be cflags with all unnecessary flags removed")
911 unittest {
912     auto cmd = toCompileCommand("/home", "file1.cpp", [
913             "g++", "-MD", "-lfoo.a", "-l", "bar.a", "-I", "bar", "-Igun", "-c",
914             "a_filename.c"
915             ], AbsoluteCompileDbDirectory("/home"), null);
916     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
917     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
918     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
919 }
920 
921 @("Should be cflags with some excess spacing")
922 unittest {
923     auto cmd = toCompileCommand("/home", "file1.cpp", [
924             "g++", "-MD", "-lfoo.a", "-l", "bar.a", "-I", "bar", "-Igun"
925             ], AbsoluteCompileDbDirectory("/home"), null);
926 
927     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
928     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
929     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
930 }
931 
932 @("Should be cflags with machine dependent removed")
933 unittest {
934     auto cmd = toCompileCommand("/home", "file1.cpp", [
935             "g++", "-mfoo", "-m", "bar", "-MD", "-lfoo.a", "-l", "bar.a", "-I",
936             "bar", "-Igun", "-c", "a_filename.c"
937             ], AbsoluteCompileDbDirectory("/home"), null);
938 
939     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
940     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
941     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
942 }
943 
944 @("Should be cflags with all -f removed")
945 unittest {
946     auto cmd = toCompileCommand("/home", "file1.cpp", [
947             "g++", "-fmany-fooo", "-I", "bar", "-fno-fooo", "-Igun", "-flolol",
948             "-c", "a_filename.c"
949             ], AbsoluteCompileDbDirectory("/home"), null);
950 
951     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
952     s.cflags.shouldEqual(["-I", "/home/bar", "-I", "/home/gun"]);
953     s.includes.shouldEqual(["/home/bar", "/home/gun"]);
954 }
955 
956 @("shall NOT remove -std=xyz flags")
957 unittest {
958     auto cmd = toCompileCommand("/home", "file1.cpp", [
959             "g++", "-std=c++11", "-c", "a_filename.c"
960             ], AbsoluteCompileDbDirectory("/home"), null);
961 
962     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
963     s.cflags.shouldEqual(["-std=c++11"]);
964 }
965 
966 @("shall remove -mfloat-gprs=double")
967 unittest {
968     auto cmd = toCompileCommand("/home", "file1.cpp", [
969             "g++", "-std=c++11", "-mfloat-gprs=double", "-c", "a_filename.c"
970             ], AbsoluteCompileDbDirectory("/home"), null);
971     auto my_filter = CompileCommandFilter(defaultCompilerFlagFilter, 0);
972     my_filter.filter ~= FilterClangFlag("-mfloat-gprs=double", FilterClangFlag.Kind.exclude);
973     auto s = cmd.get.parseFlag(my_filter, Compiler.init);
974     s.cflags.shouldEqual(["-std=c++11"]);
975 }
976 
977 @("Shall keep all compiler flags as they are")
978 unittest {
979     auto cmd = toCompileCommand("/home", "file1.cpp", ["g++", "-Da", "-D",
980             "b"], AbsoluteCompileDbDirectory("/home"), null);
981 
982     auto s = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
983     s.cflags.shouldEqual(["-Da", "-D", "b"]);
984 }
985 
986 version (unittest) {
987     import std.file : getcwd;
988     import std.path : absolutePath;
989     import std.format : format;
990 
991     // contains a bit of extra junk that is expected to be removed
992     immutable string dummy_path = "/path/to/../to/./db/compilation_db.json";
993     immutable string dummy_dir = "/path/to/db";
994 
995     enum raw_dummy1 = `[
996     {
997         "directory": "dir1/dir2",
998         "command": "g++ -Idir1 -c -o binary file1.cpp",
999         "file": "file1.cpp"
1000     }
1001 ]`;
1002 
1003     enum raw_dummy2 = `[
1004     {
1005         "directory": "dir",
1006         "command": "g++ -Idir1 -c -o binary file1.cpp",
1007         "file": "file1.cpp"
1008     },
1009     {
1010         "directory": "dir",
1011         "command": "g++ -Idir1 -c -o binary file2.cpp",
1012         "file": "file2.cpp"
1013     }
1014 ]`;
1015 
1016     enum raw_dummy3 = `[
1017     {
1018         "directory": "dir1",
1019         "command": "g++ -Idir1 -c -o binary file3.cpp",
1020         "file": "file3.cpp"
1021     },
1022     {
1023         "directory": "dir2",
1024         "command": "g++ -Idir1 -c -o binary file3.cpp",
1025         "file": "file3.cpp"
1026     }
1027 ]`;
1028 
1029     enum raw_dummy4 = `[
1030     {
1031         "directory": "dir1",
1032         "arguments": "g++ -Idir1 -c -o binary file3.cpp",
1033         "file": "file3.cpp",
1034         "output": "file3.o"
1035     },
1036     {
1037         "directory": "dir2",
1038         "arguments": "g++ -Idir1 -c -o binary file3.cpp",
1039         "file": "file3.cpp",
1040         "output": "file3.o"
1041     }
1042 ]`;
1043 
1044     enum raw_dummy5 = `[
1045     {
1046         "directory": "dir1",
1047         "arguments": ["g++", "-Idir1", "-c", "-o", "binary", "file3.cpp"],
1048         "file": "file3.cpp",
1049         "output": "file3.o"
1050     },
1051     {
1052         "directory": "dir2",
1053         "arguments": ["g++", "-Idir1", "-c", "-o", "binary", "file3.cpp"],
1054         "file": "file3.cpp",
1055         "output": "file3.o"
1056     }
1057 ]`;
1058 }
1059 
1060 version (unittest) {
1061     import std.array : appender;
1062     import unit_threaded : writelnUt;
1063 }
1064 
1065 @("Should be a compile command DB")
1066 unittest {
1067     auto app = appender!(CompileCommand[])();
1068     raw_dummy1.parseCommands(CompileDbFile(dummy_path), app);
1069     auto cmds = app.data;
1070 
1071     assert(cmds.length == 1);
1072     cmds[0].directory.shouldEqual(dummy_dir ~ "/dir1/dir2");
1073     cmds[0].command.shouldEqual([
1074             "g++", "-Idir1", "-c", "-o", "binary", "file1.cpp"
1075             ]);
1076     cmds[0].file.shouldEqual("file1.cpp");
1077     cmds[0].absoluteFile.shouldEqual(dummy_dir ~ "/dir1/dir2/file1.cpp");
1078 }
1079 
1080 @("Should be a DB with two entries")
1081 unittest {
1082     auto app = appender!(CompileCommand[])();
1083     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1084     auto cmds = app.data;
1085 
1086     cmds[0].file.shouldEqual("file1.cpp");
1087     cmds[1].file.shouldEqual("file2.cpp");
1088 }
1089 
1090 @("Should find filename")
1091 unittest {
1092     auto app = appender!(CompileCommand[])();
1093     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1094     auto cmds = CompileCommandDB(app.data);
1095 
1096     auto found = cmds.find(dummy_dir ~ "/dir/file2.cpp");
1097     assert(found.length == 1);
1098     found[0].file.shouldEqual("file2.cpp");
1099 }
1100 
1101 @("Should find no match by using an absolute path that doesn't exist in DB")
1102 unittest {
1103     auto app = appender!(CompileCommand[])();
1104     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1105     auto cmds = CompileCommandDB(app.data);
1106 
1107     auto found = cmds.find("./file2.cpp");
1108     assert(found.length == 0);
1109 }
1110 
1111 @("Should find one match by using the absolute filename to disambiguous")
1112 unittest {
1113     import unit_threaded : writelnUt;
1114 
1115     auto app = appender!(CompileCommand[])();
1116     raw_dummy3.parseCommands(CompileDbFile(dummy_path), app);
1117     auto cmds = CompileCommandDB(app.data);
1118 
1119     auto found = cmds.find(dummy_dir ~ "/dir2/file3.cpp");
1120     assert(found.length == 1);
1121 
1122     found.toString.shouldEqual(format("%s/dir2
1123   file3.cpp
1124   %s/dir2/file3.cpp
1125   g++ -Idir1 -c -o binary file3.cpp
1126 ", dummy_dir, dummy_dir));
1127 }
1128 
1129 @("Should be a pretty printed search result")
1130 unittest {
1131     auto app = appender!(CompileCommand[])();
1132     raw_dummy2.parseCommands(CompileDbFile(dummy_path), app);
1133     auto cmds = CompileCommandDB(app.data);
1134     auto found = cmds.find(dummy_dir ~ "/dir/file2.cpp");
1135 
1136     found.toString.shouldEqual(format("%s/dir
1137   file2.cpp
1138   %s/dir/file2.cpp
1139   g++ -Idir1 -c -o binary file2.cpp
1140 ", dummy_dir, dummy_dir));
1141 }
1142 
1143 @("Should be a compile command DB with relative path")
1144 unittest {
1145     enum raw = `[
1146     {
1147         "directory": ".",
1148         "command": "g++ -Idir1 -c -o binary file1.cpp",
1149         "file": "file1.cpp"
1150     }
1151     ]`;
1152     auto app = appender!(CompileCommand[])();
1153     raw.parseCommands(CompileDbFile(dummy_path), app);
1154     auto cmds = app.data;
1155 
1156     assert(cmds.length == 1);
1157     cmds[0].directory.shouldEqual(dummy_dir);
1158     cmds[0].file.shouldEqual("file1.cpp");
1159     cmds[0].absoluteFile.shouldEqual(dummy_dir ~ "/file1.cpp");
1160 }
1161 
1162 @("Should be a DB read from a relative path with the contained paths adjusted appropriately")
1163 unittest {
1164     auto app = appender!(CompileCommand[])();
1165     raw_dummy3.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1166     auto cmds = CompileCommandDB(app.data);
1167 
1168     // trusted: constructing a path in memory which is never used for writing.
1169     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1170 
1171     auto found = cmds.find(abs_path ~ "/dir2/file3.cpp");
1172     assert(found.length == 1);
1173 
1174     found.toString.shouldEqual(format("%s/dir2
1175   file3.cpp
1176   %s/dir2/file3.cpp
1177   g++ -Idir1 -c -o binary file3.cpp
1178 ", abs_path, abs_path));
1179 }
1180 
1181 @("shall extract arguments, file, directory and output with absolute paths")
1182 unittest {
1183     auto app = appender!(CompileCommand[])();
1184     raw_dummy4.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1185     auto cmds = CompileCommandDB(app.data);
1186 
1187     // trusted: constructing a path in memory which is never used for writing.
1188     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1189 
1190     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.cpp"));
1191     assert(found.length == 1);
1192 
1193     found.toString.shouldEqual(format("%s/dir2
1194   file3.cpp
1195   %s/dir2/file3.cpp
1196   file3.o
1197   %s/dir2/file3.o
1198   g++ -Idir1 -c -o binary file3.cpp
1199 ", abs_path, abs_path, abs_path));
1200 }
1201 
1202 @("shall be the compiler flags derived from the arguments attribute")
1203 unittest {
1204     auto app = appender!(CompileCommand[])();
1205     raw_dummy4.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1206     auto cmds = CompileCommandDB(app.data);
1207 
1208     // trusted: constructing a path in memory which is never used for writing.
1209     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1210 
1211     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.cpp"));
1212     assert(found.length == 1);
1213 
1214     found[0].parseFlag(defaultCompilerFilter, Compiler.init)
1215         .cflags.shouldEqual(["-I", buildPath(abs_path, "dir2", "dir1")]);
1216 }
1217 
1218 @("shall find the entry based on an output match")
1219 unittest {
1220     auto app = appender!(CompileCommand[])();
1221     raw_dummy4.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1222     auto cmds = CompileCommandDB(app.data);
1223 
1224     // trusted: constructing a path in memory which is never used for writing.
1225     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1226 
1227     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.o"));
1228     assert(found.length == 1);
1229 
1230     found[0].absoluteFile.shouldEqual(buildPath(abs_path, "dir2", "file3.cpp"));
1231 }
1232 
1233 @("shall parse the compilation database when *arguments* is a json list")
1234 unittest {
1235     auto app = appender!(CompileCommand[])();
1236     raw_dummy5.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1237     auto cmds = CompileCommandDB(app.data);
1238 
1239     // trusted: constructing a path in memory which is never used for writing.
1240     auto abs_path = () @trusted { return getcwd() ~ "/path"; }();
1241 
1242     auto found = cmds.find(buildPath(abs_path, "dir2", "file3.o"));
1243     assert(found.length == 1);
1244 
1245     found[0].absoluteFile.shouldEqual(buildPath(abs_path, "dir2", "file3.cpp"));
1246 }
1247 
1248 @("shall parse the compilation database and find a match via the glob pattern")
1249 unittest {
1250     import std.path : baseName;
1251 
1252     auto app = appender!(CompileCommand[])();
1253     raw_dummy5.parseCommands(CompileDbFile("path/compilation_db.json"), app);
1254     auto cmds = CompileCommandDB(app.data);
1255 
1256     auto found = cmds.find("*/dir2/file3.cpp");
1257     assert(found.length == 1);
1258 
1259     found[0].absoluteFile.baseName.shouldEqual("file3.cpp");
1260 }
1261 
1262 @("shall extract filepath from includes correctly when there is spaces in the path")
1263 unittest {
1264     auto cmd = toCompileCommand("/home", "file.cpp", [
1265             "-I", `"dir with spaces"`, "-I", `\"dir with spaces\"`
1266             ], AbsoluteCompileDbDirectory("/home"), null);
1267     auto pargs = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
1268     pargs.cflags.shouldEqual([
1269             "-I", "/home/dir with spaces", "-I", "/home/dir with spaces"
1270             ]);
1271     pargs.includes.shouldEqual([
1272             "/home/dir with spaces", "/home/dir with spaces"
1273             ]);
1274 }
1275 
1276 @("shall handle path with spaces, both as separate string and combined with backslash")
1277 unittest {
1278     auto cmd = toCompileCommand("/project", "file.cpp", [
1279             "-I", `"separate dir/with space"`, "-I", `\"separate dir/with space\"`,
1280             `-I"combined dir/with space"`, `-I\"combined dir/with space\"`,
1281             ], AbsoluteCompileDbDirectory("/project"), null);
1282     auto pargs = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
1283     pargs.cflags.shouldEqual([
1284             "-I", "/project/separate dir/with space", "-I",
1285             "/project/separate dir/with space", "-I",
1286             "/project/combined dir/with space", "-I",
1287             "/project/combined dir/with space"
1288             ]);
1289     pargs.includes.shouldEqual([
1290             "/project/separate dir/with space", "/project/separate dir/with space",
1291             "/project/combined dir/with space", "/project/combined dir/with space"
1292             ]);
1293 }
1294 
1295 @("shall handle path with consecutive spaces")
1296 unittest {
1297     auto cmd = toCompileCommand("/project", "file.cpp",
1298             [
1299                 `-I"one space/lots of     space"`,
1300                 `-I\"one space/lots of     space\"`, `-I`,
1301                 `"one space/lots of     space"`, `-I`,
1302                 `\"one space/lots of     space\"`,
1303             ], AbsoluteCompileDbDirectory("/project"), null);
1304     auto pargs = cmd.get.parseFlag(defaultCompilerFilter, Compiler.init);
1305     pargs.cflags.shouldEqual([
1306             "-I", "/project/one space/lots of     space", "-I",
1307             "/project/one space/lots of     space", "-I",
1308             "/project/one space/lots of     space", "-I",
1309             "/project/one space/lots of     space",
1310             ]);
1311     pargs.includes.shouldEqual([
1312             "/project/one space/lots of     space",
1313             "/project/one space/lots of     space",
1314             "/project/one space/lots of     space",
1315             "/project/one space/lots of     space"
1316             ]);
1317 }