1 /** 2 Copyright: Copyright (c) 2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 Generate unique ID's for used to identify mutants. 11 */ 12 module dextool.plugin.mutate.backend.analyze.id_factory; 13 14 import logger = std.experimental.logger; 15 16 import dextool.type : Path; 17 18 import dextool.plugin.mutate.backend.analyze.extensions; 19 import dextool.plugin.mutate.backend.analyze.internal; 20 21 import my.hash : Checksum64, BuildChecksum64, toBytes, toChecksum64; 22 import dextool.plugin.mutate.backend.type : CodeMutant, CodeChecksum, Mutation, Checksum, Offset; 23 24 @safe: 25 26 interface MutantIdFactory { 27 import dextool.plugin.mutate.backend.type : CodeMutant, Mutation; 28 29 /** Change the current file. 30 * 31 * Params: 32 * filename = the file that the factory is for 33 * tokens = all tokens from the file. 34 */ 35 void changeFile(Path fileName, scope Token[] tokens); 36 37 /** Update the number of tokens that are before and after the mutant. 38 * 39 * Params: 40 * content = offset of the tokens to use for content calculation 41 * mutant = offset of the mutant 42 */ 43 void update(const Offset content, const Offset mutant, scope Token[] tokens); 44 45 /** Create a mutant at this mutation point. 46 * 47 * It is extremly important that the mutated text representation `mut` is 48 * used and not the kind of mutation because there are many different 49 * kinds of mutations that result in the same textual change. By using the 50 * text and not kind as part of the checksum it results in a 51 * deduplication/merge of mutants that actually are "the same". 52 */ 53 CodeMutant make(Mutation m, scope const(ubyte)[] mut) @safe pure nothrow scope; 54 } 55 56 /** Strict and whole file content sensitive unique mutant IDs. 57 * 58 * ID's are *strict* because any change to tokens besides comments will result 59 * in new ID's for all mutants in the file. 60 * 61 * The algorithm is a checksum of: 62 * * the content of all relevant tokens, e.g. all except comments 63 * * the token position before and after the mutant. 64 * * the original text 65 * * the mutated text 66 */ 67 class StrictImpl : MutantIdFactory { 68 private { 69 /// Checksum of the filename containing the mutants. 70 Checksum file; 71 /// Checksum of all tokens content. 72 Checksum content; 73 74 /// The offset location of the mutant in the file. 75 Offset mutantLoc; 76 77 /// Checksum of the token indexes before and after the mutant. 78 Checksum preMutant; 79 Checksum postMutant; 80 } 81 82 this() { 83 } 84 85 override void changeFile(Path fileName, scope Token[] tokens) { 86 file = () { 87 BuildChecksum64 bc; 88 bc.put(cast(const(ubyte)[]) fileName.toString); 89 return toChecksum64(bc); 90 }(); 91 92 content = () { 93 BuildChecksum64 bc; 94 foreach (t; tokens) { 95 bc.put(cast(const(ubyte)[]) t.spelling); 96 } 97 return toChecksum64(bc); 98 }(); 99 } 100 101 override void update(const Offset content, const Offset mutant, scope Token[] tokens) { 102 // only do it if the position changes 103 if (mutant == mutantLoc) 104 return; 105 mutantLoc = mutant; 106 107 auto split = splice(tokens, mutant); 108 109 { 110 BuildChecksum64 bc; 111 bc.put(split.begin.toBytes); 112 preMutant = toChecksum64(bc); 113 } 114 { 115 BuildChecksum64 bc; 116 bc.put(split.end.toBytes); 117 postMutant = toChecksum64(bc); 118 } 119 } 120 121 override CodeMutant make(Mutation m, scope const(ubyte)[] mut) @safe pure nothrow scope { 122 return CodeMutant(CodeChecksum(makeId(mut)), m); 123 } 124 125 /// Calculate the unique ID for a specific mutation at this point. 126 private Checksum64 makeId(scope const(ubyte)[] mut) @safe pure nothrow scope { 127 BuildChecksum64 h; 128 129 h.put(file.c0.toBytes); 130 131 h.put(content.c0.toBytes); 132 133 h.put(preMutant.c0.toBytes); 134 135 h.put(mut); 136 137 h.put(postMutant.c0.toBytes); 138 return toChecksum64(h); 139 } 140 } 141 142 /** Relaxed mutant IDs that are guaranteed to be unique for a while. 143 * 144 * The position in the file can be reset make it possible to relocate a mutant 145 * inside a file. 146 * 147 * The algorithm is a checksum of: 148 * * the content of all relevant tokens, e.g. all except comments 149 * * the token position before and after the mutant. 150 * * the original text 151 * * the mutated text 152 */ 153 class RelaxedImpl : MutantIdFactory { 154 import my.set; 155 156 private { 157 /// Checksum of the filename containing the mutants. 158 Checksum file; 159 160 // the offset of the last window 161 Offset contentLoc; 162 163 /// 164 Window content; 165 166 // The offset location of the mutant in the file which the content and 167 // position checksums are calculated for. 168 Offset mutantLoc; 169 170 /// Checksum of the token indexes before and after the mutant. 171 Checksum preMutant; 172 Checksum postMutant; 173 } 174 175 this() { 176 } 177 178 override void changeFile(Path fileName, scope Token[] tokens) { 179 file = () { 180 BuildChecksum64 bc; 181 bc.put(cast(const(ubyte)[]) fileName.toString); 182 return toChecksum64(bc); 183 }(); 184 } 185 186 override void update(const Offset content, const Offset mutant, scope Token[] tokens) { 187 // only do it if the position changes 188 if (contentLoc != content) { 189 contentLoc = content; 190 auto s = splice(tokens, content); 191 this.content.update(s, tokens); 192 } 193 194 if (mutant != mutantLoc) { 195 mutantLoc = mutant; 196 auto s = this.content.toInside(splice(tokens, mutant)); 197 198 { 199 BuildChecksum64 bc; 200 bc.put(s.begin.toBytes); 201 preMutant = toChecksum64(bc); 202 } 203 { 204 BuildChecksum64 bc; 205 bc.put(s.end.toBytes); 206 postMutant = toChecksum64(bc); 207 } 208 } 209 } 210 211 override CodeMutant make(Mutation m, scope const(ubyte)[] mut) @safe pure nothrow scope { 212 return CodeMutant(CodeChecksum(makeId(mut)), m); 213 } 214 215 /// Calculate the unique ID for a specific mutation at this point. 216 private Checksum64 makeId(scope const(ubyte)[] mut) @safe pure nothrow scope { 217 BuildChecksum64 h; 218 219 h.put(file.c0.toBytes); 220 221 h.put(content.cs.c0.toBytes); 222 223 h.put(preMutant.c0.toBytes); 224 225 h.put(mut); 226 227 h.put(postMutant.c0.toBytes); 228 229 return toChecksum64(h); 230 } 231 } 232 233 private: 234 235 struct WindowIndex { 236 size_t begin; 237 size_t end; 238 239 invariant { 240 assert(begin <= end); 241 } 242 } 243 244 /// Checksum is calculated over the window. 245 struct Window { 246 WindowIndex win; 247 Checksum cs; 248 249 void update(const WindowIndex window, scope Token[] tokens) { 250 win = window; 251 252 if (win.end > tokens.length || win.begin == win.end) 253 win = WindowIndex(0, tokens.length); 254 255 BuildChecksum64 bc; 256 foreach (t; tokens[win.begin .. win.end]) { 257 bc.put(cast(const(ubyte)[]) t.spelling); 258 } 259 cs = toChecksum64(bc); 260 } 261 262 WindowIndex toInside(const WindowIndex x) { 263 const pre = () { 264 if (x.begin >= win.begin) 265 return x.begin - win.begin; 266 return x.begin; 267 }(); 268 const post = () { 269 if (x.end <= win.end) 270 return x.end - win.begin; 271 return x.end; 272 }(); 273 if (pre != post) 274 return WindowIndex(pre, post); 275 // not possible to use the window thus use the global 276 return x; 277 } 278 } 279 280 /// Returns: The indexes in `toks` before and after `offset`. 281 auto splice(scope Token[] toks, const Offset offset) { 282 import std.algorithm : countUntil; 283 284 const preIdx = toks.countUntil!((a, b) => a.offset.begin > b.begin)(offset); 285 if (preIdx <= -1) { 286 return WindowIndex(0, toks.length); 287 } 288 289 const postIdx = () { 290 auto idx = toks[preIdx .. $].countUntil!((a, b) => a.offset.end > b.end)(offset); 291 if (idx == -1) 292 return toks.length; 293 return preIdx + idx; 294 }(); 295 296 return WindowIndex(preIdx, postIdx); 297 }