1 /**
2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains different kinds of report methods and statistical
11 analyzers of the data gathered in the database.
12 */
13 module dextool.plugin.mutate.backend.report.kmean;
14 
15 import std.algorithm : map, sum;
16 import std.array : Appender, empty, array;
17 import std.datetime : Duration, Clock;
18 import std.datetime.stopwatch : StopWatch, AutoStart;
19 import std.math : abs;
20 
21 struct KmeanIterator(T) {
22     double tolerance = 0;
23     Cluster!T[] clusters;
24 
25     // statistics
26     int iterations;
27     Duration time;
28 
29     void fit(T[] data, int maxIterations, Duration maxTime) {
30         auto sw = StopWatch(AutoStart.yes);
31         const stopAt = Clock.currTime + maxTime;
32         double[] prevMean = clusters.map!(a => a.mean).array;
33 
34         foreach (const iter; 0 .. maxIterations) {
35             foreach (i; 0 .. clusters.length) {
36                 clusters[i].updateMean;
37             }
38 
39             foreach (ref a; clusters)
40                 a.reset;
41             foreach (a; data) {
42                 const bestMatch = () {
43                     auto rval = 0L;
44                     auto d0 = clusters[rval].distance(a);
45                     foreach (i; 1 .. clusters.length) {
46                         const d = clusters[i].distance(a);
47                         if (d < d0) {
48                             rval = i;
49                             d0 = d;
50                         }
51                     }
52                     return rval;
53                 }();
54                 clusters[bestMatch].put(a);
55             }
56 
57             bool term = iter > 1;
58             if (Clock.currTime < stopAt) {
59                 foreach (i; 0 .. clusters.length) {
60                     if (abs(clusters[i].mean - prevMean[i]) > tolerance)
61                         term = false;
62                     prevMean[i] = clusters[i].mean;
63                 }
64             }
65 
66             if (term)
67                 break;
68             ++iterations;
69         }
70 
71         time = sw.peek;
72     }
73 }
74 
75 struct Point {
76     double value;
77 
78     T opCast(T : double)() pure const nothrow {
79         return cast(double) value;
80     }
81 
82     double distance(Point p) {
83         return abs(value - p.value);
84     }
85 }
86 
87 struct Cluster(T) {
88     double mean = 0;
89     Appender!(T[]) data;
90 
91     void put(T a) {
92         data.put(a);
93     }
94 
95     void reset() {
96         data.clear;
97     }
98 
99     void updateMean() {
100         if (data.data.empty)
101             return;
102         mean = data.data.map!(a => cast(double) a).sum() / cast(double) data.data.length;
103     }
104 
105     double distance(T a) {
106         return abs(mean - cast(double) a);
107     }
108 }