dextool.plugin.mutate.backend.analyze.schema

1 /**
2 Copyright: Copyright (c) 2021, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 Q-learning algorithm for training the schema generator.
11 
12 Each mutation subtype has a state in range [0,100]. It determins the
13 probability that a mutant of that kind is part of the intermediate schema
14 generation.
15 
16 The state is updated with feedback from if the schema successfully compiled and
17 executed the test suite OK.
18 */
19 module dextool.plugin.mutate.backend.analyze.schema_ml;
20 
21 import std.algorithm : joiner, clamp, min, max, filter;
22 import std.random : uniform01, MinstdRand0, unpredictableSeed;
23 import std.range : only;
24 
25 import dextool.plugin.mutate.backend.type : Mutation;
26 
27 @safe:
28 
29 enum SchemaStatus {
30     /// no status exist
31     none,
32     /// the schema compiled and the test suite executed OK
33     ok,
34     /// either it failed to compile or the test suite failed
35     broken
36 }
37 
38 struct SchemaQ {
39     import std.traits : EnumMembers;
40     import my.hash;
41     import my.path : Path;
42 
43     static immutable MinState = 0;
44     static immutable MaxState = 1000;
45     static immutable LearnRate = 0.01;
46 
47     alias StatusData = Mutation.Kind[]delegate(SchemaStatus);
48 
49     MinstdRand0 rnd0;
50     int[Mutation.Kind][Checksum64] state;
51     Checksum64[Path] pathCache;
52 
53     static auto make() {
54         return SchemaQ(MinstdRand0(unpredictableSeed));
55     }
56 
57     this(MinstdRand0 rnd) {
58         this.rnd0 = rnd;
59     }
60 
61     this(typeof(state) st) {
62         rnd0 = MinstdRand0(unpredictableSeed);
63         state = st;
64     }
65 
66     /// Deep copy.
67     SchemaQ dup() {
68         typeof(state) st;
69         foreach (a; state.byKeyValue) {
70             int[Mutation.Kind] v;
71             foreach (b; a.value.byKeyValue) {
72                 v[b.key] = b.value;
73             }
74             st[a.key] = v;
75         }
76         return SchemaQ(st);
77     }
78 
79     /// Add a state for the `p` if it doesn't exist.
80     void addIfNew(const Path p) {
81         if (checksum(p) !in state) {
82             state[checksum(p)] = (int[Mutation.Kind]).init;
83         }
84     }
85 
86     /// Update the state for all mutants.
87     void update(const Path path, scope StatusData data) {
88         import std.math : round;
89 
90         addIfNew(path);
91         const ch = checksum(path);
92 
93         double[Mutation.Kind] change;
94 
95         // punish
96         foreach (k; data(SchemaStatus.broken))
97             change.update(k, () => (1.0 - LearnRate), (ref double x) {
98                 x -= LearnRate;
99             });
100         // reward
101         foreach (k; data(SchemaStatus.ok))
102             change.update(k, () => (1.0 + LearnRate), (ref double x) {
103                 x += LearnRate;
104             });
105 
106         // apply change
107         foreach (v; change.byKeyValue.filter!(a => a.value != 1.0)) {
108             state[ch].update(v.key, () => cast(int) round(MaxState * v.value), (ref int x) {
109                 if (v.value > 1.0)
110                     x = max(x + 1, cast(int) round(x * v.value));
111                 else
112                     x = min(x - 1, cast(int) round(x * v.value));
113             });
114         }
115 
116         // fix probability to be max P(1)
117         foreach (k; change.byKey) {
118             state[ch].update(k, () => MaxState, (ref int x) {
119                 x = clamp(x, MinState, MaxState);
120             });
121         }
122     }
123 
124     /** To allow those with zero probability to self heal give them a random +1 now and then.
125      */
126     void scatterTick() nothrow {
127         foreach (p; state.byKeyValue) {
128             foreach (k; p.value.byKeyValue.filter!(a => a.value == 0 && uniform01(rnd0) < 0.05)) {
129                 state[p.key][k.key] = 1;
130             }
131         }
132     }
133 
134     /** Roll the dice to see if the mutant should be used.
135      *
136      * Params:
137      *  p = path the mutant is located at.
138      *  k = kind of mutant
139      *  threshold = the mutants probability must be above the threshold
140      *  otherwise it will automatically fail.
141      *
142      * Return: true if the roll is positive, use the mutant.
143      */
144     bool use(const Path p, const Mutation.Kind k, const double threshold) {
145         const s = getState(p, k) / cast(double) MaxState;
146         return s >= threshold && uniform01(rnd0) < s;
147     }
148 
149     /// Returns: true if the probability of success is zero.
150     bool isZero(const Path p, const Mutation.Kind k) {
151         return getState(p, k) == 0;
152     }
153 
154     private Checksum64 checksum(const Path p) {
155         return pathCache.require(p, makeChecksum64(cast(const(ubyte)[]) p.toString));
156     }
157 
158     private int getState(const Path p, const Mutation.Kind k) {
159         if (auto st = checksum(p) in state)
160             return (*st).require(k, MaxState);
161         return MaxState;
162     }
163 }
164 
165 struct Feature {
166     import my.hash;
167 
168     // The path is extremly important because it allows the tool to clear out old data.
169     Checksum64 path;
170 
171     Mutation.Kind kind;
172 
173     Checksum64[] context;
174 
175     size_t toHash() @safe pure nothrow const @nogc {
176         auto rval = (cast(size_t) kind).hashOf();
177         rval = path.c0.hashOf(rval);
178         foreach (a; context)
179             rval = a.c0.hashOf(rval);
180         return rval;
181     }
182 }
183 
184 @("shall update the table")
185 unittest {
186     import std.random : MinstdRand0;
187     import my.path : Path;
188 
189     const foo = Path("foo");
190     SchemaQ q;
191     q.rnd0 = MinstdRand0(42);
192 
193     Mutation.Kind[] r1(SchemaStatus s) {
194         if (s == SchemaStatus.broken)
195             return [Mutation.Kind.rorLE];
196         if (s == SchemaStatus.ok)
197             return [Mutation.Kind.rorLT];
198         return null;
199     }
200 
201     q.update(foo, &r1);
202     const ch = q.pathCache[foo];
203     assert(q.state[ch][Mutation.Kind.rorLE] == 990);
204     assert(q.state[ch][Mutation.Kind.rorLT] == SchemaQ.MaxState);
205 
206     Mutation.Kind[] r2(SchemaStatus s) {
207         if (s == SchemaStatus.broken)
208             return [Mutation.Kind.rorLE, Mutation.Kind.rorLT];
209         if (s == SchemaStatus.ok)
210             return [Mutation.Kind.rorLT];
211         return null;
212     }
213 
214     q.update(foo, &r2);
215     assert(q.state[ch][Mutation.Kind.rorLE] == 980);
216     // in the last run it was one broken and one OK thus the change where 1.0.
217     assert(q.state[ch][Mutation.Kind.rorLT] == SchemaQ.MaxState);
218 }
219 
220 struct SchemaSizeQ {
221     static immutable LearnRate = 0.01;
222     static immutable Threshold = 0.9;
223 
224     /// min size of a schema.
225     long minSize;
226     /// max size of a schema.
227     long maxSize;
228     /// current size of that are used to generate scheman.
229     private long currentSize_;
230     /// total number of mutants that are to be tested when schemas are generated.
231     long testMutantsSize;
232 
233     static auto make(const long minSize, const long maxSize) {
234         return SchemaSizeQ(minSize, maxSize, maxSize);
235     }
236 
237     void updateSize(const double sz) @safe pure nothrow @nogc {
238         currentSize_ = clamp(cast(long) sz, minSize, maxSize);
239     }
240 
241     long currentSize() @safe pure nothrow const @nogc {
242         return currentSize_;
243     }
244 
245     /**
246      * Params:
247      *  schemaMutants = mutants in the schema
248      */
249     void update(const SchemaStatus status, const long schemaMutants) {
250         double newValue = currentSize_;
251 
252         double adjust = 1.0;
253         // ensure there is at least some change.
254         long fixed;
255 
256         final switch (status) {
257         case SchemaStatus.none:
258             break;
259         case SchemaStatus.ok:
260             // only try to increase the size if the schema where close to max
261             if (maxSize > 0 && schemaMutants > currentSize_ * Threshold) {
262                 adjust += LearnRate * (cast(double) schemaMutants / cast(double) maxSize);
263                 fixed++;
264             }
265             break;
266         case SchemaStatus.broken:
267             if (maxSize > 0 && schemaMutants > currentSize_ * Threshold) {
268                 adjust -= LearnRate * (cast(double) schemaMutants / cast(double) maxSize);
269                 fixed--;
270             }
271             break;
272         }
273 
274         updateSize(newValue * adjust + fixed);
275     }
276 
277     /// No schema with `currentSize_` where generated.
278     void noCurrentSize() {
279         if (maxSize > 0 && testMutantsSize > currentSize_ * Threshold)
280             updateSize(currentSize_ - currentSize_ * LearnRate);
281     }
282 
283     string toString() @safe pure const {
284         import std.format : format;
285 
286         return format!"SchemaSizeQ(min:%s, max:%s, current:%s, testMutantsSize:%s)"(minSize,
287                 maxSize, currentSize_, testMutantsSize);
288     }
289 }