dextool.plugin.mutate.backend.analyze.id

1 /**
2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 Generate unique ID's for used to identify mutants.
11 */
12 module dextool.plugin.mutate.backend.analyze.id_factory;
13 
14 import logger = std.experimental.logger;
15 
16 import dextool.type : Path;
17 
18 import dextool.plugin.mutate.backend.analyze.extensions;
19 import dextool.plugin.mutate.backend.analyze.internal;
20 
21 import my.hash : Checksum64, BuildChecksum64, toBytes, toChecksum64;
22 import dextool.plugin.mutate.backend.type : CodeMutant, CodeChecksum, Mutation, Checksum, Offset;
23 
24 @safe:
25 
26 interface MutantIdFactory {
27     import dextool.plugin.mutate.backend.type : CodeMutant, Mutation;
28 
29     /** Change the current file.
30      *
31      * Params:
32      * filename = the file that the factory is for
33      * tokens = all tokens from the file.
34      */
35     void changeFile(Path fileName, scope Token[] tokens);
36 
37     /** Update the number of tokens that are before and after the mutant.
38      *
39      * Params:
40      *  content = offset of the tokens to use for content calculation
41      *  mutant = offset of the mutant
42      */
43     void update(const Offset content, const Offset mutant, scope Token[] tokens);
44 
45     /** Create a mutant at this mutation point.
46       *
47       * It is extremly important that the mutated text representation `mut` is
48       * used and not the kind of mutation because there are many different
49       * kinds of mutations that result in the same textual change. By using the
50       * text and not kind as part of the checksum it results in a
51       * deduplication/merge of mutants that actually are "the same".
52      */
53     CodeMutant make(Mutation m, scope const(ubyte)[] mut) @safe pure nothrow scope;
54 }
55 
56 /** Strict and whole file content sensitive unique mutant IDs.
57  *
58  * ID's are *strict* because any change to tokens besides comments will result
59  * in new ID's for all mutants in the file.
60  *
61  * The algorithm is a checksum of:
62  *  * the content of all relevant tokens, e.g. all except comments
63  *  * the token position before and after the mutant.
64  *  * the original text
65  *  * the mutated text
66  */
67 class StrictImpl : MutantIdFactory {
68     private {
69         /// Checksum of the filename containing the mutants.
70         Checksum file;
71         /// Checksum of all tokens content.
72         Checksum content;
73 
74         /// The offset location of the mutant in the file.
75         Offset mutantLoc;
76 
77         /// Checksum of the token indexes before and after the mutant.
78         Checksum preMutant;
79         Checksum postMutant;
80     }
81 
82     this() {
83     }
84 
85     override void changeFile(Path fileName, scope Token[] tokens) {
86         file = () {
87             BuildChecksum64 bc;
88             bc.put(cast(const(ubyte)[]) fileName.toString);
89             return toChecksum64(bc);
90         }();
91 
92         content = () {
93             BuildChecksum64 bc;
94             foreach (t; tokens) {
95                 bc.put(cast(const(ubyte)[]) t.spelling);
96             }
97             return toChecksum64(bc);
98         }();
99     }
100 
101     override void update(const Offset content, const Offset mutant, scope Token[] tokens) {
102         // only do it if the position changes
103         if (mutant == mutantLoc)
104             return;
105         mutantLoc = mutant;
106 
107         auto split = splice(tokens, mutant);
108 
109         {
110             BuildChecksum64 bc;
111             bc.put(split.begin.toBytes);
112             preMutant = toChecksum64(bc);
113         }
114         {
115             BuildChecksum64 bc;
116             bc.put(split.end.toBytes);
117             postMutant = toChecksum64(bc);
118         }
119     }
120 
121     override CodeMutant make(Mutation m, scope const(ubyte)[] mut) @safe pure nothrow scope {
122         return CodeMutant(CodeChecksum(makeId(mut)), m);
123     }
124 
125     /// Calculate the unique ID for a specific mutation at this point.
126     private Checksum64 makeId(scope const(ubyte)[] mut) @safe pure nothrow scope {
127         BuildChecksum64 h;
128 
129         h.put(file.c0.toBytes);
130 
131         h.put(content.c0.toBytes);
132 
133         h.put(preMutant.c0.toBytes);
134 
135         h.put(mut);
136 
137         h.put(postMutant.c0.toBytes);
138         return toChecksum64(h);
139     }
140 }
141 
142 /** Relaxed mutant IDs that are guaranteed to be unique for a while.
143  *
144  * The position in the file can be reset make it possible to relocate a mutant
145  * inside a file.
146  *
147  * The algorithm is a checksum of:
148  *  * the content of all relevant tokens, e.g. all except comments
149  *  * the token position before and after the mutant.
150  *  * the original text
151  *  * the mutated text
152  */
153 class RelaxedImpl : MutantIdFactory {
154     import my.set;
155 
156     private {
157         /// Checksum of the filename containing the mutants.
158         Checksum file;
159 
160         // the offset of the last window
161         Offset contentLoc;
162 
163         ///
164         Window content;
165 
166         // The offset location of the mutant in the file which the content and
167         // position checksums are calculated for.
168         Offset mutantLoc;
169 
170         /// Checksum of the token indexes before and after the mutant.
171         Checksum preMutant;
172         Checksum postMutant;
173     }
174 
175     this() {
176     }
177 
178     override void changeFile(Path fileName, scope Token[] tokens) {
179         file = () {
180             BuildChecksum64 bc;
181             bc.put(cast(const(ubyte)[]) fileName.toString);
182             return toChecksum64(bc);
183         }();
184     }
185 
186     override void update(const Offset content, const Offset mutant, scope Token[] tokens) {
187         // only do it if the position changes
188         if (contentLoc != content) {
189             contentLoc = content;
190             auto s = splice(tokens, content);
191             this.content.update(s, tokens);
192         }
193 
194         if (mutant != mutantLoc) {
195             mutantLoc = mutant;
196             auto s = this.content.toInside(splice(tokens, mutant));
197 
198             {
199                 BuildChecksum64 bc;
200                 bc.put(s.begin.toBytes);
201                 preMutant = toChecksum64(bc);
202             }
203             {
204                 BuildChecksum64 bc;
205                 bc.put(s.end.toBytes);
206                 postMutant = toChecksum64(bc);
207             }
208         }
209     }
210 
211     override CodeMutant make(Mutation m, scope const(ubyte)[] mut) @safe pure nothrow scope {
212         return CodeMutant(CodeChecksum(makeId(mut)), m);
213     }
214 
215     /// Calculate the unique ID for a specific mutation at this point.
216     private Checksum64 makeId(scope const(ubyte)[] mut) @safe pure nothrow scope {
217         BuildChecksum64 h;
218 
219         h.put(file.c0.toBytes);
220 
221         h.put(content.cs.c0.toBytes);
222 
223         h.put(preMutant.c0.toBytes);
224 
225         h.put(mut);
226 
227         h.put(postMutant.c0.toBytes);
228 
229         return toChecksum64(h);
230     }
231 }
232 
233 private:
234 
235 struct WindowIndex {
236     size_t begin;
237     size_t end;
238 
239     invariant {
240         assert(begin <= end);
241     }
242 }
243 
244 /// Checksum is calculated over the window.
245 struct Window {
246     WindowIndex win;
247     Checksum cs;
248 
249     void update(const WindowIndex window, scope Token[] tokens) {
250         win = window;
251 
252         if (win.end > tokens.length || win.begin == win.end)
253             win = WindowIndex(0, tokens.length);
254 
255         BuildChecksum64 bc;
256         foreach (t; tokens[win.begin .. win.end]) {
257             bc.put(cast(const(ubyte)[]) t.spelling);
258         }
259         cs = toChecksum64(bc);
260     }
261 
262     WindowIndex toInside(const WindowIndex x) {
263         const pre = () {
264             if (x.begin >= win.begin)
265                 return x.begin - win.begin;
266             return x.begin;
267         }();
268         const post = () {
269             if (x.end <= win.end)
270                 return x.end - win.begin;
271             return x.end;
272         }();
273         if (pre != post)
274             return WindowIndex(pre, post);
275         // not possible to use the window thus use the global
276         return x;
277     }
278 }
279 
280 /// Returns: The indexes in `toks` before and after `offset`.
281 auto splice(scope Token[] toks, const Offset offset) {
282     import std.algorithm : countUntil;
283 
284     const preIdx = toks.countUntil!((a, b) => a.offset.begin > b.begin)(offset);
285     if (preIdx <= -1) {
286         return WindowIndex(0, toks.length);
287     }
288 
289     const postIdx = () {
290         auto idx = toks[preIdx .. $].countUntil!((a, b) => a.offset.end > b.end)(offset);
291         if (idx == -1)
292             return toks.length;
293         return preIdx + idx;
294     }();
295 
296     return WindowIndex(preIdx, postIdx);
297 }