1 /** 2 Copyright: Copyright (c) 2018, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains different kinds of report methods and statistical 11 analyzers of the data gathered in the database. 12 */ 13 module dextool.plugin.mutate.backend.report.kmean; 14 15 import std.algorithm : map, sum; 16 import std.array : Appender, empty, array; 17 import std.datetime : Duration, Clock; 18 import std.datetime.stopwatch : StopWatch, AutoStart; 19 import std.math : abs; 20 21 struct KmeanIterator(T) { 22 double tolerance = 0; 23 Cluster!T[] clusters; 24 25 // statistics 26 int iterations; 27 Duration time; 28 29 void fit(T[] data, int maxIterations, Duration maxTime) { 30 auto sw = StopWatch(AutoStart.yes); 31 const stopAt = Clock.currTime + maxTime; 32 double[] prevMean = clusters.map!(a => a.mean).array; 33 34 foreach (const iter; 0 .. maxIterations) { 35 foreach (i; 0 .. clusters.length) { 36 clusters[i].updateMean; 37 } 38 39 foreach (ref a; clusters) 40 a.reset; 41 foreach (a; data) { 42 const bestMatch = () { 43 auto rval = 0L; 44 auto d0 = clusters[rval].distance(a); 45 foreach (i; 1 .. clusters.length) { 46 const d = clusters[i].distance(a); 47 if (d < d0) { 48 rval = i; 49 d0 = d; 50 } 51 } 52 return rval; 53 }(); 54 clusters[bestMatch].put(a); 55 } 56 57 bool term = iter > 1; 58 if (Clock.currTime < stopAt) { 59 foreach (i; 0 .. clusters.length) { 60 if (abs(clusters[i].mean - prevMean[i]) > tolerance) 61 term = false; 62 prevMean[i] = clusters[i].mean; 63 } 64 } 65 66 if (term) 67 break; 68 ++iterations; 69 } 70 71 time = sw.peek; 72 } 73 } 74 75 struct Point { 76 double value; 77 78 T opCast(T : double)() pure const nothrow { 79 return cast(double) value; 80 } 81 82 double distance(Point p) { 83 return abs(value - p.value); 84 } 85 } 86 87 struct Cluster(T) { 88 double mean = 0; 89 Appender!(T[]) data; 90 91 void put(T a) { 92 data.put(a); 93 } 94 95 void reset() { 96 data.clear; 97 } 98 99 void updateMean() { 100 if (data.data.empty) 101 return; 102 mean = data.data.map!(a => cast(double) a).sum() / cast(double) data.data.length; 103 } 104 105 double distance(T a) { 106 return abs(mean - cast(double) a); 107 } 108 }