1 /** 2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved. 3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0) 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 */ 6 module blob_model; 7 8 version (unittest) { 9 import unit_threaded.assertions; 10 } 11 12 struct Uri { 13 string value; 14 int version_; 15 16 int opCmp(ref const typeof(this) rhs) const { 17 import std..string : cmp; 18 19 if (version_ != rhs.version_) 20 return version_ - rhs.version_; 21 return cmp(value, rhs.value); 22 } 23 24 T opCast(T : string)() @safe pure nothrow const @nogc { 25 return value; 26 } 27 } 28 29 struct Offset { 30 size_t value; 31 alias value this; 32 } 33 34 struct Interval { 35 Offset start; 36 Offset end; 37 private bool append_; 38 39 invariant { 40 assert(start <= end); 41 } 42 43 this(Offset s, Offset e) @safe pure nothrow @nogc { 44 start = s; 45 end = e; 46 } 47 48 this(size_t s, size_t e) @safe pure nothrow @nogc { 49 this(Offset(s), Offset(e)); 50 } 51 52 /**Returns: An interval that will always be at the end which mean that if 53 * it is e.g. used for an Edit it will be appended to the file. 54 */ 55 static Interval append() @safe pure nothrow @nogc { 56 auto r = Interval(0, 0); 57 r.append_ = true; 58 return r; 59 } 60 61 int opCmp(ref const Interval rhs) @safe pure nothrow const @nogc { 62 if (start < rhs.start) 63 return -1; 64 else if (start > rhs.start) 65 return 1; 66 else if (start == rhs.start && end < rhs.end) 67 return -1; 68 else if (start == rhs.start && end > rhs.end) 69 return 1; 70 71 return 0; 72 } 73 74 import std.range : isOutputRange; 75 76 string toString() @safe pure const { 77 import std.array : appender; 78 79 auto buf = appender!string; 80 toString(buf); 81 return buf.data; 82 } 83 84 void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) { 85 import std.format : formattedWrite; 86 87 formattedWrite(w, "[%s, %s)", start, end); 88 } 89 } 90 91 /** Detect overlap between this interval and other given interval in a 92 * half-open coordinate system [start, end) 93 * 94 * return true in any of the following four situations: 95 * int1 ===== ======= 96 * int2 ======= ======= 97 * 98 * int1 ======= ======= 99 * int2 === ======= 100 * 101 * return false in any other scenario: 102 * int1 ===== | ===== 103 * int2 ===== | ===== 104 * 105 * NOTE that in half-open coordinates [start, end) 106 * i1.end == i2.start => Adjacent, but NO overlap 107 * 108 * Note: This code is copied from the dub package intervaltree. 109 * Author: James S. Blachly, MD <james.blachly@gmail.com> 110 * Copyright: Copyright (c) 2019 James Blachly 111 * License: MIT 112 */ 113 bool overlaps(IntervalType1, IntervalType2)(IntervalType1 int1, IntervalType2 int2) @nogc pure @safe nothrow 114 if (__traits(hasMember, IntervalType1, "start") && __traits(hasMember, IntervalType1, 115 "end") && __traits(hasMember, IntervalType2, "start") 116 && __traits(hasMember, IntervalType2, "end")) { 117 // DMD cannot inline this 118 version (LDC) pragma(inline, true); 119 version (GDC) pragma(inline, true); 120 // int1 ===== ======= 121 // int2 ======= ======= 122 if (int2.start <= int1.start && int1.start < int2.end) 123 return true; 124 125 // int1 ======= ======= 126 // int2 === ======= 127 else if (int1.start <= int2.start && int2.start < int1.end) 128 return true; 129 130 // int1 ===== | ===== 131 // int2 ===== | ===== 132 else 133 return false; 134 } 135 136 @("shall detect overlap between intervals") 137 unittest { 138 overlaps(Interval(5, 10), Interval(2, 10)).should == true; 139 overlaps(Interval(5, 10), Interval(2, 8)).should == true; 140 141 overlaps(Interval(5, 15), Interval(7, 11)).should == true; 142 overlaps(Interval(5, 15), Interval(7, 20)).should == true; 143 144 overlaps(Interval(5, 15), Interval(15, 20)).should == false; 145 overlaps(Interval(15, 20), Interval(5, 15)).should == false; 146 } 147 148 struct Location { 149 Uri uri; 150 Interval interval; 151 } 152 153 /// Unique identifier for the blob. 154 class BlobIdentifier { 155 Uri uri; 156 157 this(Uri uri = Uri.init) @safe pure nothrow @nogc { 158 this.uri = uri; 159 } 160 } 161 162 /// A uniquely identifiable blob and its content. 163 class Blob : BlobIdentifier { 164 const(ubyte)[] content; 165 166 this(Uri uri = Uri.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc { 167 super(uri); 168 this.content = content; 169 } 170 171 this(Uri uri, string content) @safe pure nothrow @nogc { 172 this(uri, cast(const(ubyte)[]) content); 173 } 174 } 175 176 /// Replace `interval` with `content`. 177 class Edit { 178 Interval interval; 179 const(ubyte)[] content; 180 181 /** 182 * Params: 183 * r = interval to replace 184 * content = with this content 185 */ 186 this(Interval r = Interval.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc { 187 this.interval = r; 188 this.content = content; 189 } 190 191 this(Interval r, string content) @safe pure nothrow @nogc { 192 this(r, cast(const(ubyte)[]) content); 193 } 194 195 } 196 197 class BlobEdit { 198 BlobIdentifier blob; 199 Edit[] edits; 200 201 /** 202 * 203 * Params: 204 * blob = identifier of the blob being edited 205 * edits = ? 206 */ 207 this(BlobIdentifier blob = new BlobIdentifier(), Edit[] edits = Edit[].init) @safe pure nothrow @nogc { 208 this.blob = blob; 209 this.edits = edits; 210 } 211 } 212 213 /** A virtual file system of blobs. 214 * 215 * Blobs live in a virtual, in-memory system. They are uniquely identified by 216 * their URI. 217 * 218 * A URI contains a version. This mean that a blob can exist in multiple 219 * versions. The original is usually version zero. 220 */ 221 class BlobVfs { 222 private Blob[Uri] blobs; 223 224 /** Open a blob with the same URI and content as `blob` if it doesn't 225 * already exist. 226 * 227 * Params: 228 * blob = the blob to add an entry in the cache for. 229 */ 230 bool open(const Blob blob) @safe pure { 231 if (auto v = blob.uri in blobs) 232 return false; 233 blobs[blob.uri] = new Blob(blob.uri, blob.content); 234 return true; 235 } 236 237 /** Open a blob with the content read from the file system if it doesn't 238 * already exist in the VFS. 239 */ 240 Blob openFromFile(const Uri uri) @safe { 241 if (auto v = uri in blobs) 242 return *v; 243 auto b = get(uri); 244 open(b); 245 return b; 246 } 247 248 /** Close a blob in the cache if it exists. 249 * 250 * Params: 251 * id = ? 252 */ 253 bool close(const BlobIdentifier id) @safe pure nothrow { 254 if (id.uri in blobs) { 255 blobs.remove(id.uri); 256 return true; 257 } 258 return false; 259 } 260 261 /** Get the blob matching the URI either from the VFS or the filesystem. 262 * 263 * If the blob exists in the VFS with the specific version then that is returned. 264 * 265 * Otherwise the URI is used to try and locate the blob on the filesystem. 266 * 267 * This function may throw if the URI do not exists in the internal DB and 268 * it refers to a file that do not exist on the filesystem. 269 */ 270 Blob get(const Uri uri) @safe { 271 if (auto v = uri in blobs) 272 return *v; 273 return new Blob(uri, rawRead(cast(string) uri)); 274 } 275 276 /// Returns: if there exists a blob with the URI. 277 bool exists(const Uri uri) @safe pure nothrow const @nogc { 278 return (uri in blobs) !is null; 279 } 280 281 /** 282 * Returns: range of the filenames in the VFS. 283 */ 284 auto uris() @safe pure nothrow const @nogc { 285 return blobs.byKey; 286 } 287 288 /** Apply a stream of edits to a blob. 289 * 290 * The edits are applied starting from index zero. If there for example are 291 * two edits for the same interval the second one will be applied on top of 292 * the first one. 293 * 294 * Params: 295 * id = blob to change 296 * edits = changes 297 */ 298 bool change(const BlobIdentifier id, const(Edit)[] edits) @safe pure nothrow { 299 return change(id.uri, edits); 300 } 301 302 /// ditto 303 bool change(const BlobEdit be) @safe pure nothrow { 304 return change(be.blob.uri, be.edits); 305 } 306 307 /// ditto 308 bool change(const Uri uri, const(Edit)[] edits) @safe pure nothrow { 309 import std.algorithm : min; 310 import std.array : empty, appender; 311 312 auto blob = uri in blobs; 313 if (blob is null || edits.length == 0) 314 return false; 315 316 .change(*blob, edits); 317 return true; 318 } 319 } 320 321 /** Modify the blob. 322 */ 323 Blob change(Blob blob, const(Edit)[] edits) @safe pure nothrow { 324 import std.algorithm : min, filter; 325 import std.array : empty, appender; 326 327 foreach (const e; edits.filter!(a => !a.interval.append_)) { 328 if (e.interval.start > e.interval.end) 329 continue; 330 const start = min(e.interval.start, blob.content.length); 331 const end = min(e.interval.end, blob.content.length); 332 333 auto app = appender!(const(ubyte)[])(); 334 app.put(blob.content[0 .. start]); 335 app.put(cast(const(ubyte)[]) e.content); 336 app.put(blob.content[end .. $]); 337 blob.content = app.data; 338 } 339 340 foreach (const e; edits.filter!(a => a.interval.append_)) { 341 blob.content ~= e.content; 342 } 343 344 return blob; 345 } 346 347 /** Merge edits by concatenation when the intervals overlap. 348 * 349 * This will never remove content from the original, only add to it. 350 * 351 * TODO: this my be a bit inefficient because it starts by clearing the content 352 * and then adding it all back. Maybe there are a more efficient way? 353 * It should at least use the allocators. 354 */ 355 BlobEdit merge(const Blob blob, Edit[] edits_) @safe pure nothrow { 356 import std.algorithm : sort, min, filter; 357 import std.array : array, appender; 358 359 import logger = std.experimental.logger; 360 import std.exception; 361 362 auto r = new BlobEdit(new BlobIdentifier(blob.uri)); 363 const end = blob.content.length; 364 365 // start by clearing all content. 366 r.edits = [new Edit(Interval(0, end))]; 367 368 // Current position into the original content which is the position to 369 // start taking data from. It is continiusly adjusted when the edits are 370 // analysed as to cover the last interval of the original content that 371 // where used. 372 size_t cur = 0; 373 374 auto app = appender!(const(ubyte)[])(); 375 foreach (const e; edits_.sort!((a, b) => a.interval < b.interval) 376 .filter!(a => !a.interval.append_)) { 377 // add the original content until this point. 378 if (e.interval.start >= cur && cur < end) { 379 auto ni = Interval(cur, min(e.interval.start, end)); 380 app.put(blob.content[ni.start .. ni.end]); 381 } 382 app.put(e.content); 383 cur = min(e.interval.end, end); 384 } 385 386 if (cur < end) { 387 app.put(blob.content[cur .. $]); 388 } 389 390 foreach (const e; edits_.filter!(a => a.interval.append_)) { 391 app.put(e.content); 392 } 393 394 r.edits ~= new Edit(Interval(0, end), app.data); 395 return r; 396 } 397 398 @("shall merge multiple edits into two edits") 399 unittest { 400 auto vfs = new BlobVfs; 401 auto uri = Uri("my blob"); 402 403 vfs.open(new Blob(uri, "0123456789")).should == true; 404 405 { 406 // insert at the beginning and two in the middle concatenated 407 Edit[] e; 408 e ~= new Edit(Interval(2, 5), "def"); 409 e ~= new Edit(Interval(8, 9), "ghi"); 410 e ~= new Edit(Interval(2, 5), "abc"); 411 // prepend 412 e ~= new Edit(Interval(0, 0), "start"); 413 // delete the end 414 e ~= new Edit(Interval(9, 10), ""); 415 auto m = merge(vfs.get(uri), e); 416 vfs.change(m); 417 } 418 419 (cast(string) vfs.get(uri).content).should == "start01abcdef567ghi"; 420 } 421 422 private: 423 424 // workaround for linking bug 425 auto workaroundLinkingBug() { 426 import std.typecons; 427 428 return typeid(std.typecons.Tuple!(int, double)); 429 } 430 431 const(ubyte)[] rawRead(string path) @safe { 432 import std.array : appender; 433 import std.stdio : File; 434 435 auto fin = File(path); 436 auto content = appender!(ubyte[])(); 437 ubyte[4096] buf; 438 439 while (!fin.eof) { 440 auto s = fin.rawRead(buf); 441 content.put(s); 442 } 443 444 return content.data; 445 } 446 447 @("shall modify a blob when changes are applied") 448 unittest { 449 auto vfs = new BlobVfs; 450 const uri = Uri("my blob"); 451 452 vfs.open(new Blob(uri, "this is some data")).should == true; 453 454 { 455 Edit[] e; 456 e ~= new Edit(Interval(Offset(0), Offset(4)), "that drum"); 457 e ~= new Edit(Interval(Offset(22), Offset(22)), ", big time"); 458 vfs.change(uri, e); 459 } 460 461 (cast(string) vfs.get(uri).content).should == "that drum is some data, big time"; 462 } 463 464 @( 465 "shall append edits outside of the interval and remove invalid edits when applying changes to the content") 466 unittest { 467 auto vfs = new BlobVfs; 468 const uri = Uri("my blob2"); 469 470 vfs.open(new Blob(uri, "more data")).should == true; 471 472 { 473 Edit[] e; 474 e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh"); 475 e ~= new Edit(Interval(Offset(999), Offset(1000)), "abcd"); 476 vfs.change(uri, e); 477 } 478 479 (cast(string) vfs.get(uri).content).should == "more dataedfghabcd"; 480 } 481 482 @("shall apply edits on top of each other when changing a blob") 483 unittest { 484 auto vfs = new BlobVfs; 485 const uri = Uri("my blob2"); 486 487 vfs.open(new Blob(uri, "more data")).should == true; 488 489 { 490 Edit[] e; 491 e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh"); 492 e ~= new Edit(Interval(Offset(10), Offset(11)), "e"); 493 vfs.change(uri, e); 494 } 495 496 (cast(string) vfs.get(uri).content).should == "more dataeefgh"; 497 } 498 499 @("shall create the blob from a file on the filesystem when the URI do not exist in the VFS") 500 unittest { 501 import std.file : remove; 502 import std.stdio : File; 503 504 auto vfs = new BlobVfs; 505 const uri = Uri("my_file.txt"); 506 File(cast(string) uri, "w").write("a string"); 507 scope (exit) 508 remove(cast(string) uri); 509 510 auto b = vfs.get(uri); 511 512 (cast(string) b.content).should == "a string"; 513 } 514 515 @("shall handle blobs with the same path but different versions") 516 unittest { 517 auto vfs = new BlobVfs; 518 const uri = "my blob"; 519 520 { 521 vfs.open(new Blob(Uri(uri), "uri version 0")).should == true; 522 vfs.open(new Blob(Uri(uri, 1), "uri version 1")).should == true; 523 } 524 525 (cast(string) vfs.get(Uri(uri, 0)).content).should == "uri version 0"; 526 (cast(string) vfs.get(Uri(uri, 1)).content).should == "uri version 1"; 527 }