dextool.plugin.mutate.backend.report.analyzers source code

1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains different kinds of report methods and statistical
11 analyzers of the data gathered in the database.
12 */
13 module dextool.plugin.mutate.backend.report.analyzers;
14 
15 import logger = std.experimental.logger;
16 import std.algorithm : sum, map, sort, filter, count, cmp, joiner, among;
17 import std.array : array, appender, empty;
18 import std.conv : to;
19 import std.datetime : SysTime;
20 import std.exception : collectException;
21 import std.format : format;
22 import std.range : take, retro, only;
23 import std.typecons : Flag, Yes, No, Tuple, Nullable, tuple;
24 
25 import my.named_type;
26 import my.optional;
27 import my.set;
28 
29 import dextool.plugin.mutate.backend.database : Database, spinSql, MutationId,
30     MarkedMutant, TestCaseId, MutationStatusId;
31 import dextool.plugin.mutate.backend.diff_parser : Diff;
32 import dextool.plugin.mutate.backend.generate_mutant : MakeMutationTextResult,
33     makeMutationText, makeMutation;
34 import dextool.plugin.mutate.backend.interface_ : FilesysIO;
35 import dextool.plugin.mutate.backend.report.utility : window, windowSize,
36     statusToString, kindToString;
37 import dextool.plugin.mutate.backend.type : Mutation, Offset, TestCase, TestGroup;
38 import dextool.plugin.mutate.backend.utility : Profile;
39 import dextool.plugin.mutate.type : ReportKillSortOrder, ReportSection;
40 import dextool.type;
41 
42 static import dextool.plugin.mutate.backend.database.type;
43 
44 public import dextool.plugin.mutate.backend.report.utility : Table;
45 public import dextool.plugin.mutate.backend.type : MutantTimeProfile;
46 
47 version (unittest) {
48     import unit_threaded.assertions;
49 }
50 
51 @safe:
52 
53 void reportMutationSubtypeStats(ref const long[MakeMutationTextResult] mut_stat, ref Table!4 tbl) @safe nothrow {
54     auto profile = Profile(ReportSection.mut_stat);
55 
56     long total = mut_stat.byValue.sum;
57 
58     foreach (v; mut_stat.byKeyValue.array.sort!((a, b) => a.value > b.value).take(20)) {
59         try {
60             auto percentage = (cast(double) v.value / cast(double) total) * 100.0;
61 
62             // dfmt off
63             typeof(tbl).Row r = [
64                 percentage.to!string,
65                 v.value.to!string,
66                 format("`%s`", window(v.key.original, windowSize)),
67                 format("`%s`", window(v.key.mutation, windowSize)),
68             ];
69             // dfmt on
70             tbl.put(r);
71         } catch (Exception e) {
72             logger.warning(e.msg).collectException;
73         }
74     }
75 }
76 
77 /** Test case score based on how many mutants they killed.
78  */
79 struct TestCaseStat {
80     import dextool.plugin.mutate.backend.database.type : TestCaseInfo;
81 
82     struct Info {
83         double ratio = 0.0;
84         TestCase tc;
85         TestCaseInfo info;
86         alias info this;
87     }
88 
89     Info[TestCase] testCases;
90 
91     /// Returns: the test cases sorted from most kills to least kills.
92     auto toSortedRange() {
93         static bool cmp(T)(ref T a, ref T b) {
94             if (a.killedMutants > b.killedMutants)
95                 return true;
96             else if (a.killedMutants < b.killedMutants)
97                 return false;
98             else if (a.tc.name > b.tc.name)
99                 return true;
100             else if (a.tc.name < b.tc.name)
101                 return false;
102             return false;
103         }
104 
105         return testCases.byValue.array.sort!cmp;
106     }
107 }
108 
109 /** Update the table with the score of test cases and how many mutants they killed.
110  *
111  * Params:
112  *  take_ = how many from the top should be moved to the table
113  *  sort_order = ctrl if the top or bottom of the test cases should be reported
114  *  tbl = table to write the data to
115  */
116 void toTable(ref TestCaseStat st, const long take_,
117         const ReportKillSortOrder sort_order, ref Table!3 tbl) @safe nothrow {
118     auto takeOrder(RangeT)(RangeT range) {
119         final switch (sort_order) {
120         case ReportKillSortOrder.top:
121             return range.take(take_).array;
122         case ReportKillSortOrder.bottom:
123             return range.retro.take(take_).array;
124         }
125     }
126 
127     foreach (v; takeOrder(st.toSortedRange)) {
128         try {
129             typeof(tbl).Row r = [
130                 (100.0 * v.ratio).to!string, v.info.killedMutants.to!string,
131                 v.tc.name
132             ];
133             tbl.put(r);
134         } catch (Exception e) {
135             logger.warning(e.msg).collectException;
136         }
137     }
138 }
139 
140 /** Extract the number of source code mutants that a test case has killed and
141  * how much the kills contributed to the total.
142  */
143 TestCaseStat reportTestCaseStats(ref Database db, const Mutation.Kind[] kinds) @safe nothrow {
144     import dextool.plugin.mutate.backend.database.type : TestCaseInfo;
145 
146     auto profile = Profile(ReportSection.tc_stat);
147 
148     const total = spinSql!(() { return db.mutantApi.totalSrcMutants(kinds).count; });
149     // nothing to do. this also ensure that we do not divide by zero.
150     if (total == 0)
151         return TestCaseStat.init;
152 
153     alias TcInfo = Tuple!(TestCase, "tc", TestCaseInfo, "info");
154     alias TcInfo2 = Tuple!(TestCase, "tc", Nullable!TestCaseInfo, "info");
155     TestCaseStat rval;
156 
157     foreach (v; spinSql!(() { return db.testCaseApi.getDetectedTestCases; }).map!(
158             a => TcInfo2(a, spinSql!(() {
159                 return db.testCaseApi.getTestCaseInfo(a, kinds);
160             })))
161             .filter!(a => !a.info.isNull)
162             .map!(a => TcInfo(a.tc, a.info.get))) {
163         try {
164             const ratio = cast(double) v.info.killedMutants / cast(double) total;
165             rval.testCases[v.tc] = TestCaseStat.Info(ratio, v.tc, v.info);
166         } catch (Exception e) {
167             logger.warning(e.msg).collectException;
168         }
169     }
170 
171     return rval;
172 }
173 
174 /** The result of analysing the test cases to see how similare they are to each
175  * other.
176  */
177 class TestCaseSimilarityAnalyse {
178     import dextool.plugin.mutate.backend.type : TestCase;
179 
180     static struct Similarity {
181         TestCaseId testCase;
182         double similarity = 0.0;
183         /// Mutants that are similare between `testCase` and the parent.
184         MutationStatusId[] intersection;
185         /// Unique mutants that are NOT verified by `testCase`.
186         MutationStatusId[] difference;
187     }
188 
189     Similarity[][TestCaseId] similarities;
190 }
191 
192 /// The result of the similarity analyse
193 private struct Similarity {
194     /// The quota |A intersect B| / |A|. Thus it is how similare A is to B. If
195     /// B ever fully encloses A then the score is 1.0.
196     double similarity = 0.0;
197     MutationStatusId[] intersection;
198     MutationStatusId[] difference;
199 }
200 
201 // The set similairty measures how much of lhs is in rhs. This is a
202 // directional metric.
203 private Similarity setSimilarity(MutationStatusId[] lhs_, MutationStatusId[] rhs_) {
204     auto lhs = lhs_.toSet;
205     auto rhs = rhs_.toSet;
206     auto intersect = lhs.intersect(rhs);
207     auto diff = lhs.setDifference(rhs);
208     return Similarity(cast(double) intersect.length / cast(double) lhs.length,
209             intersect.toArray, diff.toArray);
210 }
211 
212 /** Analyse the similarity between test cases.
213  *
214  * TODO: the algorithm used is slow. Maybe matrix representation and sorted is better?
215  *
216  * Params:
217  *  db = ?
218  *  kinds = mutation kinds to use in the distance analyze
219  *  limit = limit the number of test cases to the top `limit`.
220  */
221 TestCaseSimilarityAnalyse reportTestCaseSimilarityAnalyse(ref Database db,
222         const Mutation.Kind[] kinds, ulong limit) @safe {
223     import std.container.binaryheap;
224     import dextool.plugin.mutate.backend.database.type : TestCaseInfo;
225 
226     auto profile = Profile(ReportSection.tc_similarity);
227 
228     // TODO: reduce the code duplication of the caches.
229     // The DB lookups must be cached or otherwise the algorithm becomes too
230     // slow for practical use.
231 
232     MutationStatusId[][TestCaseId] kill_cache2;
233     MutationStatusId[] getKills(TestCaseId id) @trusted {
234         return kill_cache2.require(id, spinSql!(() {
235                 return db.testCaseApi.testCaseKilledSrcMutants(kinds, id);
236             }));
237     }
238 
239     alias TcKills = Tuple!(TestCaseId, "id", MutationStatusId[], "kills");
240 
241     const test_cases = spinSql!(() {
242         return db.testCaseApi.getDetectedTestCaseIds;
243     });
244 
245     auto rval = new typeof(return);
246 
247     foreach (tc_kill; test_cases.map!(a => TcKills(a, getKills(a)))
248             .filter!(a => a.kills.length != 0)) {
249         auto app = appender!(TestCaseSimilarityAnalyse.Similarity[])();
250         foreach (tc; test_cases.filter!(a => a != tc_kill.id)
251                 .map!(a => TcKills(a, getKills(a)))
252                 .filter!(a => a.kills.length != 0)) {
253             auto distance = setSimilarity(tc_kill.kills, tc.kills);
254             if (distance.similarity > 0)
255                 app.put(TestCaseSimilarityAnalyse.Similarity(tc.id,
256                         distance.similarity, distance.intersection, distance.difference));
257         }
258         if (app.data.length != 0) {
259             () @trusted {
260                 rval.similarities[tc_kill.id] = heapify!((a,
261                         b) => a.similarity < b.similarity)(app.data).take(limit).array;
262             }();
263         }
264     }
265 
266     return rval;
267 }
268 
269 /// Statistics about dead test cases.
270 struct TestCaseDeadStat {
271     import std.range : isOutputRange;
272 
273     /// The ratio of dead TC of the total.
274     double ratio = 0.0;
275     TestCase[] testCases;
276     long total;
277 
278     long numDeadTC() @safe pure nothrow const @nogc scope {
279         return testCases.length;
280     }
281 
282     string toString() @safe const {
283         auto buf = appender!string;
284         toString(buf);
285         return buf.data;
286     }
287 
288     void toString(Writer)(ref Writer w) @safe const 
289             if (isOutputRange!(Writer, char)) {
290         import std.ascii : newline;
291         import std.format : formattedWrite;
292         import std.range : put;
293 
294         if (total > 0)
295             formattedWrite(w, "%s/%s = %s of all test cases\n", numDeadTC, total, ratio);
296         foreach (tc; testCases) {
297             put(w, tc.name);
298             if (tc.location.length > 0) {
299                 put(w, " | ");
300                 put(w, tc.location);
301             }
302             put(w, newline);
303         }
304     }
305 }
306 
307 void toTable(ref TestCaseDeadStat st, ref Table!2 tbl) @safe pure nothrow {
308     foreach (tc; st.testCases) {
309         typeof(tbl).Row r = [tc.name, tc.location];
310         tbl.put(r);
311     }
312 }
313 
314 /** Returns: report of test cases that has killed zero mutants.
315  */
316 TestCaseDeadStat reportDeadTestCases(ref Database db) @safe {
317     auto profile = Profile(ReportSection.tc_killed_no_mutants);
318 
319     TestCaseDeadStat r;
320     r.total = db.testCaseApi.getNumOfTestCases;
321     r.testCases = db.testCaseApi.getTestCasesWithZeroKills;
322     if (r.total > 0)
323         r.ratio = cast(double) r.numDeadTC / cast(double) r.total;
324     return r;
325 }
326 
327 /// Only the mutation score thus a subset of all statistics.
328 struct MutationScore {
329     import core.time : Duration;
330 
331     long alive;
332     long killed;
333     long timeout;
334     long total;
335     long noCoverage;
336     long equivalent;
337     long skipped;
338     MutantTimeProfile totalTime;
339 
340     // Nr of mutants that are alive but tagged with nomut.
341     long aliveNoMut;
342 
343     double score() @safe pure nothrow const @nogc {
344         if (total > 0) {
345             return cast(double)(killed + timeout) / cast(double)(total - aliveNoMut);
346         }
347         return 0.0;
348     }
349 }
350 
351 MutationScore reportScore(ref Database db, const Mutation.Kind[] kinds, string file = null) @safe nothrow {
352     auto profile = Profile("reportScore");
353 
354     typeof(return) rval;
355     rval.alive = spinSql!(() => db.mutantApi.aliveSrcMutants(kinds, file)).count;
356     rval.killed = spinSql!(() => db.mutantApi.killedSrcMutants(kinds, file)).count;
357     rval.timeout = spinSql!(() => db.mutantApi.timeoutSrcMutants(kinds, file)).count;
358     rval.aliveNoMut = spinSql!(() => db.mutantApi.aliveNoMutSrcMutants(kinds, file)).count;
359     rval.noCoverage = spinSql!(() => db.mutantApi.noCovSrcMutants(kinds, file)).count;
360     rval.equivalent = spinSql!(() => db.mutantApi.equivalentMutants(kinds, file)).count;
361     rval.skipped = spinSql!(() => db.mutantApi.skippedMutants(kinds, file)).count;
362 
363     const total = spinSql!(() => db.mutantApi.totalSrcMutants(kinds, file));
364     rval.totalTime = total.time;
365     rval.total = total.count;
366 
367     return rval;
368 }
369 
370 /// Statistics for a group of mutants.
371 struct MutationStat {
372     import core.time : Duration;
373     import std.range : isOutputRange;
374 
375     long untested;
376     long killedByCompiler;
377     long worklist;
378 
379     long alive() @safe pure nothrow const @nogc {
380         return scoreData.alive;
381     }
382 
383     long noCoverage() @safe pure nothrow const @nogc {
384         return scoreData.noCoverage;
385     }
386 
387     /// Nr of mutants that are alive but tagged with nomut.
388     long aliveNoMut() @safe pure nothrow const @nogc {
389         return scoreData.aliveNoMut;
390     }
391 
392     long killed() @safe pure nothrow const @nogc {
393         return scoreData.killed;
394     }
395 
396     long timeout() @safe pure nothrow const @nogc {
397         return scoreData.timeout;
398     }
399 
400     long equivalent() @safe pure nothrow const @nogc {
401         return scoreData.equivalent;
402     }
403 
404     long skipped() @safe pure nothrow const @nogc {
405         return scoreData.skipped;
406     }
407 
408     long total() @safe pure nothrow const @nogc {
409         return scoreData.total;
410     }
411 
412     MutantTimeProfile totalTime() @safe pure nothrow const @nogc {
413         return scoreData.totalTime;
414     }
415 
416     MutationScore scoreData;
417     MutantTimeProfile killedByCompilerTime;
418     Duration predictedDone;
419 
420     /// Adjust the score with the alive mutants that are suppressed.
421     double score() @safe pure nothrow const @nogc {
422         return scoreData.score;
423     }
424 
425     /// Suppressed mutants of the total mutants.
426     double suppressedOfTotal() @safe pure nothrow const @nogc {
427         if (total > 0) {
428             return (cast(double)(aliveNoMut) / cast(double) total);
429         }
430         return 0.0;
431     }
432 
433     string toString() @safe const {
434         auto buf = appender!string;
435         toString(buf);
436         return buf.data;
437     }
438 
439     void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) {
440         import core.time : dur;
441         import std.ascii : newline;
442         import std.datetime : Clock;
443         import std.format : formattedWrite;
444         import std.range : put;
445         import dextool.plugin.mutate.backend.utility;
446 
447         immutable align_ = 19;
448 
449         formattedWrite(w, "%-*s %s\n", align_, "Time spent:", totalTime);
450         if (untested > 0 && predictedDone > 0.dur!"msecs") {
451             const pred = Clock.currTime + predictedDone;
452             formattedWrite(w, "Remaining: %s (%s)\n", predictedDone, pred.toISOExtString);
453         }
454         if (killedByCompiler > 0) {
455             formattedWrite(w, "%-*s %s\n", align_ * 3,
456                     "Time spent on mutants killed by compiler:", killedByCompilerTime);
457         }
458 
459         put(w, newline);
460 
461         // mutation score and details
462         formattedWrite(w, "%-*s %.3s\n", align_, "Score:", score);
463 
464         formattedWrite(w, "%-*s %s\n", align_, "Total:", total);
465         if (untested > 0) {
466             formattedWrite(w, "%-*s %s\n", align_, "Untested:", untested);
467         }
468         formattedWrite(w, "%-*s %s\n", align_, "Alive:", alive);
469         formattedWrite(w, "%-*s %s\n", align_, "Killed:", killed);
470         if (skipped > 0)
471             formattedWrite(w, "%-*s %s\n", align_, "Skipped:", skipped);
472         if (equivalent > 0)
473             formattedWrite(w, "%-*s %s\n", align_, "Equivalent:", equivalent);
474         formattedWrite(w, "%-*s %s\n", align_, "Timeout:", timeout);
475         formattedWrite(w, "%-*s %s\n", align_, "Killed by compiler:", killedByCompiler);
476         if (worklist > 0) {
477             formattedWrite(w, "%-*s %s\n", align_, "Worklist:", worklist);
478         }
479 
480         if (aliveNoMut > 0) {
481             formattedWrite(w, "%-*s %s (%.3s)\n", align_,
482                     "Suppressed (nomut):", aliveNoMut, suppressedOfTotal);
483         }
484     }
485 }
486 
487 MutationStat reportStatistics(ref Database db, const Mutation.Kind[] kinds, string file = null) @safe nothrow {
488     import core.time : dur;
489     import dextool.plugin.mutate.backend.utility;
490 
491     auto profile = Profile(ReportSection.summary);
492 
493     const untested = spinSql!(() => db.mutantApi.unknownSrcMutants(kinds, file));
494     const worklist = spinSql!(() => db.worklistApi.getWorklistCount);
495     const killedByCompiler = spinSql!(() => db.mutantApi.killedByCompilerSrcMutants(kinds, file));
496 
497     MutationStat st;
498     st.scoreData = reportScore(db, kinds, file);
499     st.untested = untested.count;
500     st.killedByCompiler = killedByCompiler.count;
501     st.worklist = worklist;
502 
503     st.predictedDone = st.total > 0 ? (st.worklist * (st.totalTime.sum / st.total)) : 0
504         .dur!"msecs";
505     st.killedByCompilerTime = killedByCompiler.time;
506 
507     return st;
508 }
509 
510 struct MarkedMutantsStat {
511     Table!6 tbl;
512 }
513 
514 MarkedMutantsStat reportMarkedMutants(ref Database db, const Mutation.Kind[] kinds,
515         string file = null) @safe {
516     MarkedMutantsStat st;
517     st.tbl.heading = [
518         "File", "Line", "Column", "Mutation", "Status", "Rationale"
519     ];
520 
521     foreach (m; db.markMutantApi.getMarkedMutants()) {
522         typeof(st.tbl).Row r = [
523             m.path, m.sloc.line.to!string, m.sloc.column.to!string,
524             m.mutText, statusToString(m.toStatus), m.rationale.get
525         ];
526         st.tbl.put(r);
527     }
528     return st;
529 }
530 
531 struct TestCaseOverlapStat {
532     import std.format : formattedWrite;
533     import std.range : put;
534     import my.hash;
535 
536     long overlap;
537     long total;
538     double ratio = 0.0;
539 
540     // map between test cases and the mutants they have killed.
541     TestCaseId[][Murmur3] tc_mut;
542     // map between mutation IDs and the test cases that killed them.
543     long[][Murmur3] mutid_mut;
544     string[TestCaseId] name_tc;
545 
546     string sumToString() @safe const {
547         return format("%s/%s = %s test cases", overlap, total, ratio);
548     }
549 
550     void sumToString(Writer)(ref Writer w) @trusted const {
551         formattedWrite(w, "%s/%s = %s test cases\n", overlap, total, ratio);
552     }
553 
554     string toString() @safe const {
555         auto buf = appender!string;
556         toString(buf);
557         return buf.data;
558     }
559 
560     void toString(Writer)(ref Writer w) @safe const {
561         sumToString(w);
562 
563         foreach (tcs; tc_mut.byKeyValue.filter!(a => a.value.length > 1)) {
564             bool first = true;
565             // TODO this is a bit slow. use a DB row iterator instead.
566             foreach (name; tcs.value.map!(id => name_tc[id])) {
567                 if (first) {
568                     () @trusted {
569                         formattedWrite(w, "%s %s\n", name, mutid_mut[tcs.key].length);
570                     }();
571                     first = false;
572                 } else {
573                     () @trusted { formattedWrite(w, "%s\n", name); }();
574                 }
575             }
576             put(w, "\n");
577         }
578     }
579 }
580 
581 /** Report test cases that completly overlap each other.
582  *
583  * Returns: a string with statistics.
584  */
585 template toTable(Flag!"colWithMutants" colMutants) {
586     static if (colMutants) {
587         alias TableT = Table!3;
588     } else {
589         alias TableT = Table!2;
590     }
591     alias RowT = TableT.Row;
592 
593     void toTable(ref TestCaseOverlapStat st, ref TableT tbl) {
594         foreach (tcs; st.tc_mut.byKeyValue.filter!(a => a.value.length > 1)) {
595             bool first = true;
596             // TODO this is a bit slow. use a DB row iterator instead.
597             foreach (name; tcs.value.map!(id => st.name_tc[id])) {
598                 RowT r;
599                 r[0] = name;
600                 if (first) {
601                     auto muts = st.mutid_mut[tcs.key];
602                     r[1] = muts.length.to!string;
603                     static if (colMutants) {
604                         r[2] = format("%-(%s,%)", muts);
605                     }
606                     first = false;
607                 }
608 
609                 tbl.put(r);
610             }
611             static if (colMutants)
612                 RowT r = ["", "", ""];
613             else
614                 RowT r = ["", ""];
615             tbl.put(r);
616         }
617     }
618 }
619 
620 /// Test cases that kill exactly the same mutants.
621 TestCaseOverlapStat reportTestCaseFullOverlap(ref Database db, const Mutation.Kind[] kinds) @safe {
622     import my.hash;
623 
624     auto profile = Profile(ReportSection.tc_full_overlap);
625 
626     TestCaseOverlapStat st;
627     st.total = db.testCaseApi.getNumOfTestCases;
628 
629     foreach (tc_id; db.testCaseApi.getTestCasesWithAtLeastOneKill(kinds)) {
630         auto muts = db.testCaseApi.getTestCaseMutantKills(tc_id, kinds)
631             .sort.map!(a => cast(long) a).array;
632         auto m3 = makeMurmur3(cast(ubyte[]) muts);
633         if (auto v = m3 in st.tc_mut)
634             (*v) ~= tc_id;
635         else {
636             st.tc_mut[m3] = [tc_id];
637             st.mutid_mut[m3] = muts;
638         }
639         st.name_tc[tc_id] = db.testCaseApi.getTestCaseName(tc_id);
640     }
641 
642     foreach (tcs; st.tc_mut.byKeyValue.filter!(a => a.value.length > 1)) {
643         st.overlap += tcs.value.count;
644     }
645 
646     if (st.total > 0)
647         st.ratio = cast(double) st.overlap / cast(double) st.total;
648 
649     return st;
650 }
651 
652 class TestGroupSimilarity {
653     static struct TestGroup {
654         string description;
655         string name;
656 
657         /// What the user configured as regex. Useful when e.g. generating reports
658         /// for a user.
659         string userInput;
660 
661         int opCmp(ref const TestGroup s) const {
662             return cmp(name, s.name);
663         }
664     }
665 
666     static struct Similarity {
667         /// The test group that the `key` is compared to.
668         TestGroup comparedTo;
669         /// How similare the `key` is to `comparedTo`.
670         double similarity = 0.0;
671         /// Mutants that are similare between `testCase` and the parent.
672         MutationStatusId[] intersection;
673         /// Unique mutants that are NOT verified by `testCase`.
674         MutationStatusId[] difference;
675     }
676 
677     Similarity[][TestGroup] similarities;
678 }
679 
680 /** Analyze the similarity between the test groups.
681  *
682  * Assuming that a limit on how many test groups to report isn't interesting
683  * because they are few so it is never a problem.
684  *
685  */
686 TestGroupSimilarity reportTestGroupsSimilarity(ref Database db,
687         const(Mutation.Kind)[] kinds, const(TestGroup)[] test_groups) @safe {
688     auto profile = Profile(ReportSection.tc_groups_similarity);
689 
690     alias TgKills = Tuple!(TestGroupSimilarity.TestGroup, "testGroup",
691             MutationStatusId[], "kills");
692 
693     const test_cases = spinSql!(() {
694         return db.testCaseApi.getDetectedTestCaseIds;
695     }).map!(a => Tuple!(TestCaseId, "id", TestCase, "tc")(a, spinSql!(() {
696                 return db.testCaseApi.getTestCase(a).get;
697             }))).array;
698 
699     MutationStatusId[] gatherKilledMutants(const(TestGroup) tg) {
700         auto kills = appender!(MutationStatusId[])();
701         foreach (tc; test_cases.filter!(a => a.tc.isTestCaseInTestGroup(tg.re))) {
702             kills.put(spinSql!(() {
703                     return db.testCaseApi.testCaseKilledSrcMutants(kinds, tc.id);
704                 }));
705         }
706         return kills.data;
707     }
708 
709     TgKills[] test_group_kills;
710     foreach (const tg; test_groups) {
711         auto kills = gatherKilledMutants(tg);
712         if (kills.length != 0)
713             test_group_kills ~= TgKills(TestGroupSimilarity.TestGroup(tg.description,
714                     tg.name, tg.userInput), kills);
715     }
716 
717     // calculate similarity between all test groups.
718     auto rval = new typeof(return);
719 
720     foreach (tg_parent; test_group_kills) {
721         auto app = appender!(TestGroupSimilarity.Similarity[])();
722         foreach (tg_other; test_group_kills.filter!(a => a.testGroup != tg_parent.testGroup)) {
723             auto similarity = setSimilarity(tg_parent.kills, tg_other.kills);
724             if (similarity.similarity > 0)
725                 app.put(TestGroupSimilarity.Similarity(tg_other.testGroup,
726                         similarity.similarity, similarity.intersection, similarity.difference));
727             if (app.data.length != 0)
728                 rval.similarities[tg_parent.testGroup] = app.data;
729         }
730     }
731 
732     return rval;
733 }
734 
735 class TestGroupStat {
736     import dextool.plugin.mutate.backend.database : FileId, MutantInfo;
737 
738     /// Human readable description for the test group.
739     string description;
740     /// Statistics for a test group.
741     MutationStat stats;
742     /// Map between test cases and their test group.
743     TestCase[] testCases;
744     /// Lookup for converting a id to a filename
745     Path[FileId] files;
746     /// Mutants alive in a file.
747     MutantInfo[][FileId] alive;
748     /// Mutants killed in a file.
749     MutantInfo[][FileId] killed;
750 }
751 
752 import std.regex : Regex;
753 
754 private bool isTestCaseInTestGroup(const TestCase tc, const Regex!char tg) {
755     import std.regex : matchFirst;
756 
757     auto m = matchFirst(tc.name, tg);
758     // the regex must match the full test case thus checking that
759     // nothing is left before or after
760     if (!m.empty && m.pre.length == 0 && m.post.length == 0) {
761         return true;
762     }
763     return false;
764 }
765 
766 TestGroupStat reportTestGroups(ref Database db, const(Mutation.Kind)[] kinds,
767         const(TestGroup) test_g) @safe {
768     auto profile = Profile(ReportSection.tc_groups);
769 
770     static struct TcStat {
771         Set!MutationStatusId alive;
772         Set!MutationStatusId killed;
773         Set!MutationStatusId timeout;
774         Set!MutationStatusId total;
775 
776         // killed by the specific test case
777         Set!MutationStatusId tcKilled;
778     }
779 
780     auto r = new TestGroupStat;
781     r.description = test_g.description;
782     TcStat tc_stat;
783 
784     // map test cases to this test group
785     foreach (tc; db.testCaseApi.getDetectedTestCases) {
786         if (tc.isTestCaseInTestGroup(test_g.re))
787             r.testCases ~= tc;
788     }
789 
790     // collect mutation statistics for each test case group
791     foreach (const tc; r.testCases) {
792         foreach (const id; db.testCaseApi.testCaseMutationPointAliveSrcMutants(kinds, tc))
793             tc_stat.alive.add(id);
794         foreach (const id; db.testCaseApi.testCaseMutationPointKilledSrcMutants(kinds, tc))
795             tc_stat.killed.add(id);
796         foreach (const id; db.testCaseApi.testCaseMutationPointTimeoutSrcMutants(kinds, tc))
797             tc_stat.timeout.add(id);
798         foreach (const id; db.testCaseApi.testCaseMutationPointTotalSrcMutants(kinds, tc))
799             tc_stat.total.add(id);
800         foreach (const id; db.testCaseApi.testCaseKilledSrcMutants(kinds, tc))
801             tc_stat.tcKilled.add(id);
802     }
803 
804     // update the mutation stat for the test group
805     r.stats.scoreData.alive = tc_stat.alive.length;
806     r.stats.scoreData.killed = tc_stat.killed.length;
807     r.stats.scoreData.timeout = tc_stat.timeout.length;
808     r.stats.scoreData.total = tc_stat.total.length;
809 
810     // associate mutants with their file
811     foreach (const m; db.mutantApi.getMutantsInfo(kinds, tc_stat.tcKilled.toArray)) {
812         auto fid = db.getFileId(m.id);
813         r.killed[fid.get] ~= m;
814 
815         if (fid.get !in r.files) {
816             r.files[fid.get] = Path.init;
817             r.files[fid.get] = db.getFile(fid.get).get;
818         }
819     }
820 
821     foreach (const m; db.mutantApi.getMutantsInfo(kinds, tc_stat.alive.toArray)) {
822         auto fid = db.getFileId(m.id);
823         r.alive[fid.get] ~= m;
824 
825         if (fid.get !in r.files) {
826             r.files[fid.get] = Path.init;
827             r.files[fid.get] = db.getFile(fid.get).get;
828         }
829     }
830 
831     return r;
832 }
833 
834 /// High interest mutants.
835 class MutantSample {
836     import dextool.plugin.mutate.backend.database : FileId, MutantInfo,
837         MutationStatus, MutationEntry, MutationStatusTime;
838 
839     MutationEntry[MutationStatusId] mutants;
840 
841     /// The mutant that had its status updated the furthest back in time.
842     MutationStatusTime[] oldest;
843 
844     /// The mutant that has survived the longest in the system.
845     MutationStatus[] highestPrio;
846 
847     /// The latest mutants that where added and survived.
848     MutationStatusTime[] latest;
849 }
850 
851 /// Returns: samples of mutants that are of high interest to the user.
852 MutantSample reportSelectedAliveMutants(ref Database db, const(Mutation.Kind)[] kinds,
853         long historyNr) {
854     auto profile = Profile(ReportSection.mut_recommend_kill);
855 
856     auto rval = new typeof(return);
857 
858     rval.highestPrio = db.mutantApi.getHighestPrioMutant(kinds, Mutation.Status.alive, historyNr);
859     foreach (const mutst; rval.highestPrio) {
860         auto ids = db.mutantApi.getMutationIds(kinds, [mutst.statusId]);
861         if (ids.length != 0)
862             rval.mutants[mutst.statusId] = db.mutantApi.getMutation(ids[0]).get;
863     }
864 
865     rval.oldest = db.mutantApi.getOldestMutants(kinds, historyNr);
866     foreach (const mutst; rval.oldest) {
867         auto ids = db.mutantApi.getMutationIds(kinds, [mutst.id]);
868         if (ids.length != 0)
869             rval.mutants[mutst.id] = db.mutantApi.getMutation(ids[0]).get;
870     }
871 
872     return rval;
873 }
874 
875 class DiffReport {
876     import dextool.plugin.mutate.backend.database : FileId, MutantInfo;
877     import dextool.plugin.mutate.backend.diff_parser : Diff;
878 
879     /// The mutation score.
880     double score = 0.0;
881 
882     /// The raw diff for a file
883     Diff.Line[][FileId] rawDiff;
884 
885     /// Lookup for converting a id to a filename
886     Path[FileId] files;
887     /// Mutants alive in a file.
888     MutantInfo[][FileId] alive;
889     /// Mutants killed in a file.
890     MutantInfo[][FileId] killed;
891     /// Test cases that killed mutants.
892     TestCase[] testCases;
893 
894     override string toString() @safe const {
895         import std.format : formattedWrite;
896         import std.range : put;
897 
898         auto w = appender!string;
899 
900         foreach (file; files.byKeyValue) {
901             put(w, file.value.toString);
902             foreach (mut; alive[file.key])
903                 formattedWrite(w, "  %s\n", mut);
904             foreach (mut; killed[file.key])
905                 formattedWrite(w, "  %s\n", mut);
906         }
907 
908         formattedWrite(w, "Test Cases killing mutants");
909         foreach (tc; testCases)
910             formattedWrite(w, "  %s", tc);
911 
912         return w.data;
913     }
914 }
915 
916 DiffReport reportDiff(ref Database db, const(Mutation.Kind)[] kinds,
917         ref Diff diff, AbsolutePath workdir) {
918     import dextool.plugin.mutate.backend.type : SourceLoc;
919 
920     auto profile = Profile(ReportSection.diff);
921 
922     auto rval = new DiffReport;
923 
924     Set!MutationStatusId total;
925     Set!MutationId alive;
926     Set!MutationId killed;
927 
928     foreach (kv; diff.toRange(workdir)) {
929         auto fid = db.getFileId(kv.key);
930         if (fid.isNull) {
931             logger.warning("This file in the diff has not been tested thus skipping it: ", kv.key);
932             continue;
933         }
934 
935         bool hasMutants;
936         foreach (id; kv.value
937                 .toRange
938                 .map!(line => spinSql!(() => db.mutantApi.getMutationsOnLine(kinds,
939                     fid.get, SourceLoc(line))))
940                 .joiner
941                 .filter!(a => a !in total)) {
942             hasMutants = true;
943             total.add(id);
944 
945             const info = db.mutantApi.getMutantsInfo(kinds, [id])[0];
946             if (info.status == Mutation.Status.alive) {
947                 rval.alive[fid.get] ~= info;
948                 alive.add(info.id);
949             } else if (info.status.among(Mutation.Status.killed, Mutation.Status.timeout)) {
950                 rval.killed[fid.get] ~= info;
951                 killed.add(info.id);
952             }
953         }
954 
955         if (hasMutants) {
956             rval.files[fid.get] = kv.key;
957             rval.rawDiff[fid.get] = diff.rawDiff[kv.key];
958         } else {
959             logger.info("This file in the diff has no mutants on changed lines: ", kv.key);
960         }
961     }
962 
963     Set!TestCase test_cases;
964     foreach (tc; killed.toRange.map!(a => db.testCaseApi.getTestCases(a)).joiner) {
965         test_cases.add(tc);
966     }
967 
968     rval.testCases = test_cases.toArray.sort.array;
969 
970     if (total.length == 0) {
971         rval.score = 1.0;
972     } else {
973         // TODO: use total to compute e.g. a standard deviation or some other
974         // useful statistical metric to convey a "confidence" of the value.
975         rval.score = cast(double) killed.length / cast(double)(killed.length + alive.length);
976     }
977 
978     return rval;
979 }
980 
981 struct MinimalTestSet {
982     import dextool.plugin.mutate.backend.database.type : TestCaseInfo;
983 
984     long total;
985 
986     /// Minimal set that achieve the mutation test score.
987     TestCase[] minimalSet;
988     /// Test cases that do not contribute to the mutation test score.
989     TestCase[] redundant;
990     /// Map between test case name and sum of all the test time of the mutants it killed.
991     TestCaseInfo[string] testCaseTime;
992 }
993 
994 MinimalTestSet reportMinimalSet(ref Database db, const Mutation.Kind[] kinds) {
995     import dextool.plugin.mutate.backend.database : TestCaseInfo;
996 
997     auto profile = Profile(ReportSection.tc_min_set);
998 
999     alias TcIdInfo = Tuple!(TestCase, "tc", TestCaseId, "id", TestCaseInfo, "info");
1000 
1001     MinimalTestSet rval;
1002 
1003     Set!MutationId killedMutants;
1004 
1005     // start by picking test cases that have the fewest kills.
1006     foreach (const val; db.testCaseApi
1007             .getDetectedTestCases
1008             .map!(a => tuple(a, db.testCaseApi.getTestCaseId(a)))
1009             .filter!(a => !a[1].isNull)
1010             .map!(a => TcIdInfo(a[0], a[1].get, db.testCaseApi.getTestCaseInfo(a[0], kinds).get))
1011             .filter!(a => a.info.killedMutants != 0)
1012             .array
1013             .sort!((a, b) => a.info.killedMutants < b.info.killedMutants)) {
1014         rval.testCaseTime[val.tc.name] = val.info;
1015 
1016         const killed = killedMutants.length;
1017         foreach (const id; db.testCaseApi.getTestCaseMutantKills(val.id, kinds)) {
1018             killedMutants.add(id);
1019         }
1020 
1021         if (killedMutants.length > killed)
1022             rval.minimalSet ~= val.tc;
1023         else
1024             rval.redundant ~= val.tc;
1025     }
1026 
1027     rval.total = rval.minimalSet.length + rval.redundant.length;
1028 
1029     return rval;
1030 }
1031 
1032 struct TestCaseUniqueness {
1033     MutationStatusId[][TestCaseId] uniqueKills;
1034 
1035     // test cases that have no unique kills. These are candidates for being
1036     // refactored/removed.
1037     Set!TestCaseId noUniqueKills;
1038 }
1039 
1040 /// Returns: a report of the mutants that a test case is the only one that kills.
1041 TestCaseUniqueness reportTestCaseUniqueness(ref Database db, const Mutation.Kind[] kinds) {
1042     import dextool.plugin.mutate.backend.database.type : MutationStatusId;
1043 
1044     auto profile = Profile(ReportSection.tc_unique);
1045 
1046     // any time a mutant is killed by more than one test case it is removed.
1047     TestCaseId[MutationStatusId] killedBy;
1048     // killed by multiple test cases
1049     Set!MutationStatusId multiKill;
1050 
1051     foreach (tc_id; db.testCaseApi.getTestCasesWithAtLeastOneKill(kinds)) {
1052         auto muts = db.testCaseApi.testCaseKilledSrcMutants(kinds, tc_id);
1053         foreach (m; muts.filter!(a => a !in multiKill)) {
1054             if (m in killedBy) {
1055                 killedBy.remove(m);
1056                 multiKill.add(m);
1057             } else {
1058                 killedBy[m] = tc_id;
1059             }
1060         }
1061     }
1062 
1063     typeof(return) rval;
1064     Set!TestCaseId uniqueTc;
1065     foreach (kv; killedBy.byKeyValue) {
1066         rval.uniqueKills[kv.value] ~= kv.key;
1067         uniqueTc.add(kv.value);
1068     }
1069     foreach (tc_id; db.testCaseApi.getDetectedTestCaseIds.filter!(a => !uniqueTc.contains(a)))
1070         rval.noUniqueKills.add(tc_id);
1071 
1072     return rval;
1073 }
1074 
1075 /// Estimate the mutation score.
1076 struct EstimateMutationScore {
1077     import my.signal_theory.kalman : KalmanFilter;
1078 
1079     private KalmanFilter kf;
1080 
1081     void update(const double a) {
1082         kf.updateEstimate(a);
1083     }
1084 
1085     /// The estimated mutation score.
1086     NamedType!(double, Tag!"EstimatedMutationScore", 0.0, TagStringable) value() @safe pure nothrow const @nogc {
1087         return typeof(return)(kf.currentEstimate);
1088     }
1089 
1090     /// The error in the estimate. The unit is the same as `estimate`.
1091     NamedType!(double, Tag!"MutationScoreError", 0.0, TagStringable) error() @safe pure nothrow const @nogc {
1092         return typeof(return)(kf.estimateError);
1093     }
1094 }
1095 
1096 /// Estimate the mutation score.
1097 struct EstimateScore {
1098     import my.signal_theory.kalman : KalmanFilter;
1099 
1100     // 0.5 because then it starts in the middle of range possible values.
1101     // 0.01 such that the trend is "slowly" changing over the last 100 mutants.
1102     // 0.001 is to "insensitive" for an on the fly analysis so it mostly just
1103     //  end up being the current mutation score.
1104     private EstimateMutationScore estimate = EstimateMutationScore(KalmanFilter(0.5, 0.5, 0.01));
1105 
1106     /// Update the estimate with the status of a mutant.
1107     void update(const Mutation.Status s) {
1108         import std.algorithm : among;
1109 
1110         if (s.among(Mutation.Status.unknown, Mutation.Status.killedByCompiler)) {
1111             return;
1112         }
1113 
1114         const v = () {
1115             final switch (s) with (Mutation.Status) {
1116             case unknown:
1117                 goto case;
1118             case killedByCompiler:
1119                 return 0.5; // shouldnt happen but...
1120             case skipped:
1121                 goto case;
1122             case noCoverage:
1123                 goto case;
1124             case alive:
1125                 return 0.0;
1126             case killed:
1127                 goto case;
1128             case timeout:
1129                 goto case;
1130             case equivalent:
1131                 return 1.0;
1132             }
1133         }();
1134 
1135         estimate.update(v);
1136     }
1137 
1138     /// The estimated mutation score.
1139     auto value() @safe pure nothrow const @nogc {
1140         return estimate.value;
1141     }
1142 
1143     /// The error in the estimate. The unit is the same as `estimate`.
1144     auto error() @safe pure nothrow const @nogc {
1145         return estimate.error;
1146     }
1147 }
1148 
1149 /// Estimated trend based on the latest code changes.
1150 struct ScoreTrendByCodeChange {
1151     static struct Point {
1152         SysTime timeStamp;
1153 
1154         /// The estimated mutation score.
1155         NamedType!(double, Tag!"EstimatedMutationScore", 0.0, TagStringable) value;
1156 
1157         /// The error in the estimate. The unit is the same as `estimate`.
1158         NamedType!(double, Tag!"MutationScoreError", 0.0, TagStringable) error;
1159     }
1160 
1161     Point[] sample;
1162 
1163     NamedType!(double, Tag!"EstimatedMutationScore", 0.0, TagStringable) value() @safe pure nothrow const @nogc {
1164         if (sample.empty)
1165             return typeof(return).init;
1166         return sample[$ - 1].value;
1167     }
1168 
1169     NamedType!(double, Tag!"MutationScoreError", 0.0, TagStringable) error() @safe pure nothrow const @nogc {
1170         if (sample.empty)
1171             return typeof(return).init;
1172         return sample[$ - 1].error;
1173     }
1174 }
1175 
1176 /** Estimate the mutation score by running a kalman filter over the mutants in
1177  * the order they have been tested. It gives a rough estimate of where the test
1178  * suites quality is going over time.
1179  *
1180  */
1181 ScoreTrendByCodeChange reportTrendByCodeChange(ref Database db, const Mutation.Kind[] kinds) @trusted nothrow {
1182     auto app = appender!(ScoreTrendByCodeChange.Point[])();
1183     EstimateScore estimate;
1184 
1185     try {
1186         SysTime lastAdded;
1187         SysTime last;
1188         bool first = true;
1189         void fn(const Mutation.Status s, const SysTime added) {
1190             estimate.update(s);
1191             debug logger.trace(estimate.estimate.kf).collectException;
1192 
1193             if (first)
1194                 lastAdded = added;
1195 
1196             if (added != lastAdded) {
1197                 app.put(ScoreTrendByCodeChange.Point(added, estimate.value, estimate.error));
1198                 lastAdded = added;
1199             }
1200 
1201             last = added;
1202             first = false;
1203         }
1204 
1205         db.iterateMutantStatus(kinds, &fn);
1206         app.put(ScoreTrendByCodeChange.Point(last, estimate.value, estimate.error));
1207     } catch (Exception e) {
1208         logger.warning(e.msg).collectException;
1209     }
1210     return ScoreTrendByCodeChange(app.data);
1211 }
1212 
1213 /** History of how the mutation score have evolved over time.
1214  *
1215  * The history is ordered iascending by date. Each day is the average of the
1216  * recorded mutation score.
1217  */
1218 struct MutationScoreHistory {
1219     import dextool.plugin.mutate.backend.database.type : MutationScore;
1220 
1221     static struct Estimate {
1222         SysTime x;
1223         double avg = 0;
1224         SysTime predX;
1225         double predScore = 0;
1226         bool posTrend = 0;
1227     }
1228 
1229     /// only one score for each date.
1230     MutationScore[] data;
1231     Estimate estimate;
1232 
1233     this(MutationScore[] data) {
1234         import std.algorithm : sum, map, min;
1235 
1236         this.data = data;
1237         if (data.length < 6)
1238             return;
1239 
1240         const values = data[$ - 5 .. $];
1241         const avg = sum(values.map!(a => a.score.get)) / 5.0;
1242         const xDiff = values[$ - 1].timeStamp - values[0].timeStamp;
1243         const dy = (values[$ - 1].score.get - avg) / (xDiff.total!"days" / 2.0);
1244 
1245         estimate.x = values[0].timeStamp + xDiff / 2;
1246         estimate.avg = avg;
1247         estimate.predX = values[$ - 1].timeStamp + xDiff / 2;
1248         estimate.predScore = min(1.0, dy * xDiff.total!"days" / 2.0 + values[$ - 1].score.get);
1249         estimate.posTrend = estimate.predScore > values[$ - 1].score.get;
1250     }
1251 }
1252 
1253 MutationScoreHistory reportMutationScoreHistory(ref Database db) @safe {
1254     return reportMutationScoreHistory(db.getMutationScoreHistory);
1255 }
1256 
1257 private MutationScoreHistory reportMutationScoreHistory(
1258         dextool.plugin.mutate.backend.database.type.MutationScore[] data) {
1259     import std.datetime : DateTime, Date, SysTime;
1260     import dextool.plugin.mutate.backend.database.type : MutationScore;
1261 
1262     auto pretty = appender!(MutationScore[])();
1263 
1264     if (data.length < 2) {
1265         return MutationScoreHistory(data);
1266     }
1267 
1268     auto last = (cast(DateTime) data[0].timeStamp).date;
1269     double acc = data[0].score.get;
1270     double nr = 1;
1271     foreach (a; data[1 .. $]) {
1272         auto curr = (cast(DateTime) a.timeStamp).date;
1273         if (curr == last) {
1274             acc += a.score.get;
1275             nr++;
1276         } else {
1277             pretty.put(MutationScore(SysTime(last), typeof(MutationScore.score)(acc / nr)));
1278             last = curr;
1279             acc = a.score.get;
1280             nr = 1;
1281         }
1282     }
1283     pretty.put(MutationScore(SysTime(last), typeof(MutationScore.score)(acc / nr)));
1284 
1285     return MutationScoreHistory(pretty.data);
1286 }
1287 
1288 @("shall calculate the mean of the mutation scores")
1289 unittest {
1290     import core.time : days;
1291     import std.datetime : DateTime;
1292     import dextool.plugin.mutate.backend.database.type : MutationScore;
1293 
1294     auto data = appender!(MutationScore[])();
1295     auto d = DateTime(2000, 6, 1, 10, 30, 0);
1296 
1297     data.put(MutationScore(SysTime(d), typeof(MutationScore.score)(10.0)));
1298     data.put(MutationScore(SysTime(d), typeof(MutationScore.score)(5.0)));
1299     data.put(MutationScore(SysTime(d + 1.days), typeof(MutationScore.score)(5.0)));
1300 
1301     auto res = reportMutationScoreHistory(data.data);
1302 
1303     res.data[0].score.get.shouldEqual(7.5);
1304     res.data[1].score.get.shouldEqual(5.0);
1305 }
1306 
1307 /** Sync status is how old the information about mutants and their status is
1308  * compared to when the tests or source code where last changed.
1309  */
1310 struct SyncStatus {
1311     import dextool.plugin.mutate.backend.database : MutationStatusTime;
1312 
1313     SysTime test;
1314     SysTime code;
1315     SysTime coverage;
1316     MutationStatusTime[] mutants;
1317 }
1318 
1319 SyncStatus reportSyncStatus(ref Database db, const(Mutation.Kind)[] kinds, const long nrMutants) {
1320     import std.datetime : Clock;
1321     import dextool.plugin.mutate.backend.database : TestFile, TestFileChecksum, TestFilePath;
1322 
1323     typeof(return) rval;
1324     rval.test = spinSql!(() => db.testFileApi.getNewestTestFile)
1325         .orElse(TestFile(TestFilePath.init, TestFileChecksum.init, Clock.currTime)).timeStamp;
1326     rval.code = spinSql!(() => db.getNewestFile).orElse(Clock.currTime);
1327     rval.coverage = spinSql!(() => db.coverageApi.getCoverageTimeStamp).orElse(Clock.currTime);
1328     rval.mutants = spinSql!(() => db.mutantApi.getOldestMutants(kinds, nrMutants));
1329     return rval;
1330 }
1331 
1332 struct TestCaseClassifier {
1333     long threshold;
1334 }
1335 
1336 TestCaseClassifier makeTestCaseClassifier(ref Database db, const long minThreshold) {
1337     import std.algorithm : maxElement, max, minElement;
1338     import std.datetime : dur;
1339     import std.math : abs;
1340     import dextool.plugin.mutate.backend.report.kmean;
1341 
1342     auto profile = Profile("test case classifier");
1343 
1344     // the distribution is bimodal (U shaped) with one or more tops depending
1345     // on the architecture. The left most edge is the leaf functionality and
1346     // the rest of the edges are the main data flows.
1347     //
1348     // Even though the formula below assume a normal distribution and,
1349     // obviously, this isn't one the result is totally fine because the purpuse
1350     // is to classify "bad" test cases by checking if all mutants that they
1351     // kill are above the threshold. The threshold, as calculcated, thus
1352     // centers around the mean and moves further to the right the further the
1353     // edges are. It also, suitably, handle multiple edges because the only
1354     // important factor is to not get "too close" to the left most edge. That
1355     // would lead to false classifications.
1356 
1357     auto tcKills = db.mutantApi
1358         .getAllTestCaseKills
1359         .filter!"a>0"
1360         .map!(a => Point(cast(double) a))
1361         .array;
1362     // no use in a classifier if there are too mutants.
1363     if (tcKills.length < 100)
1364         return TestCaseClassifier(minThreshold);
1365 
1366     // 0.1 is good enough because it is then rounded.
1367     auto iter = KmeanIterator!Point(0.1);
1368     iter.clusters ~= Cluster!Point(0);
1369     // div by 2 reduces the number of iterations for a typical sample.
1370     iter.clusters ~= Cluster!Point(cast(double) tcKills.map!(a => a.value).maxElement / 2.0);
1371 
1372     iter.fit(tcKills, 1000, 10.dur!"seconds");
1373 
1374     TestCaseClassifier rval;
1375     rval.threshold = 1 + cast(long)(
1376             iter.clusters.map!"a.mean".minElement + abs(
1377             iter.clusters[0].mean - iter.clusters[1].mean) / 2.0);
1378 
1379     logger.tracef("calculated threshold: %s iterations:%s time:%s cluster.mean: %s",
1380             rval.threshold, iter.iterations, iter.time, iter.clusters.map!(a => a.mean));
1381     rval.threshold = max(rval.threshold, minThreshold);
1382 
1383     return rval;
1384 }
1385 
1386 struct TestCaseMetadata {
1387     static struct Location {
1388         string file;
1389         Optional!uint line;
1390     }
1391 
1392     string[TestCase] text;
1393     Location[TestCase] loc;
1394 
1395     /// If the user has manually marked a test case as redundant or not.
1396     bool[TestCase] redundant;
1397 }
1398 
1399 TestCaseMetadata parseTestCaseMetadata(AbsolutePath metadataPath) @trusted {
1400     import std.json;
1401     import std.file : readText;
1402 
1403     TestCaseMetadata rval;
1404     JSONValue jraw;
1405     try {
1406         jraw = parseJSON(readText(metadataPath.toString));
1407     } catch (Exception e) {
1408         logger.warning("Error reading ", metadataPath);
1409         logger.info(e.msg);
1410         return rval;
1411     }
1412 
1413     try {
1414         foreach (jtc; jraw.array) {
1415             TestCase tc;
1416 
1417             try {
1418                 if (auto v = "name" in jtc) {
1419                     tc = TestCase(v.str);
1420                 } else {
1421                     logger.warning("Missing `name` in ", jtc.toPrettyString);
1422                     continue;
1423                 }
1424 
1425                 if (auto v = "text" in jtc)
1426                     rval.text[tc] = v.str;
1427                 if (auto v = "location" in jtc) {
1428                     TestCaseMetadata.Location loc;
1429                     if (auto f = "file" in *v)
1430                         loc.file = f.str;
1431                     if (auto l = "line" in *v)
1432                         loc.line = some(cast(uint) l.integer);
1433                     rval.loc[tc] = loc;
1434                 }
1435 
1436                 if (auto v = "redundant" in jtc)
1437                     rval.redundant[tc] = v.boolean;
1438             } catch (Exception e) {
1439                 logger.warning("Error parsing ", jtc.toPrettyString);
1440                 logger.warning(e.msg);
1441             }
1442         }
1443     } catch (Exception e) {
1444         logger.warning("Error parsing ", jraw.toPrettyString);
1445         logger.warning(e.msg);
1446     }
1447 
1448     return rval;
1449 }