1 /** 2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved. 3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0) 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 */ 6 module blob_model; 7 8 version (unittest) { 9 import unit_threaded.assertions; 10 } 11 12 struct Uri { 13 string value; 14 int version_; 15 16 int opCmp(ref const typeof(this) rhs) const { 17 import std.string : cmp; 18 19 if (version_ != rhs.version_) 20 return version_ - rhs.version_; 21 return cmp(value, rhs.value); 22 } 23 24 T opCast(T : string)() @safe pure nothrow const @nogc { 25 return value; 26 } 27 } 28 29 struct Offset { 30 size_t value; 31 alias value this; 32 } 33 34 struct Interval { 35 Offset start; 36 Offset end; 37 private bool append_; 38 39 invariant { 40 assert(start <= end); 41 } 42 43 this(Offset s, Offset e) @safe pure nothrow @nogc { 44 start = s; 45 end = e; 46 } 47 48 this(size_t s, size_t e) @safe pure nothrow @nogc { 49 this(Offset(s), Offset(e)); 50 } 51 52 /**Returns: An interval that will always be at the end which mean that if 53 * it is e.g. used for an Edit it will be appended to the file. 54 */ 55 static Interval append() @safe pure nothrow @nogc { 56 auto r = Interval(0, 0); 57 r.append_ = true; 58 return r; 59 } 60 61 int opCmp(ref const Interval rhs) @safe pure nothrow const @nogc { 62 if (start < rhs.start) 63 return -1; 64 else if (start > rhs.start) 65 return 1; 66 else if (start == rhs.start && end < rhs.end) 67 return -1; 68 else if (start == rhs.start && end > rhs.end) 69 return 1; 70 71 return 0; 72 } 73 74 int opCmp(const int rhs) @safe pure nothrow const @nogc { 75 if (start < rhs) 76 return -1; 77 if (start > rhs) 78 return 1; 79 return 0; 80 } 81 82 import std.range : isOutputRange; 83 84 string toString() @safe pure const { 85 import std.array : appender; 86 87 auto buf = appender!string; 88 toString(buf); 89 return buf.data; 90 } 91 92 void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) { 93 import std.format : formattedWrite; 94 95 formattedWrite(w, "[%s, %s)", start, end); 96 } 97 } 98 99 /** Detect overlap between this interval and other given interval in a 100 * half-open coordinate system [start, end) 101 * 102 * return true in any of the following four situations: 103 * int1 ===== ======= 104 * int2 ======= ======= 105 * 106 * int1 ======= ======= 107 * int2 === ======= 108 * 109 * return false in any other scenario: 110 * int1 ===== | ===== 111 * int2 ===== | ===== 112 * 113 * NOTE that in half-open coordinates [start, end) 114 * i1.end == i2.start => Adjacent, but NO overlap 115 * 116 * Note: This code is copied from the dub package intervaltree. 117 * Author: James S. Blachly, MD <james.blachly@gmail.com> 118 * Copyright: Copyright (c) 2019 James Blachly 119 * License: MIT 120 */ 121 bool overlaps(IntervalType1, IntervalType2)(IntervalType1 int1, IntervalType2 int2) @nogc pure @safe nothrow 122 if (__traits(hasMember, IntervalType1, "start") && __traits(hasMember, IntervalType1, 123 "end") && __traits(hasMember, IntervalType2, "start") 124 && __traits(hasMember, IntervalType2, "end")) { 125 // DMD cannot inline this 126 version (LDC) pragma(inline, true); 127 version (GDC) pragma(inline, true); 128 // int1 ===== ======= 129 // int2 ======= ======= 130 if (int2.start <= int1.start && int1.start < int2.end) 131 return true; 132 133 // int1 ======= ======= 134 // int2 === ======= 135 else if (int1.start <= int2.start && int2.start < int1.end) 136 return true; 137 138 // int1 ===== | ===== 139 // int2 ===== | ===== 140 else 141 return false; 142 } 143 144 @("shall detect overlap between intervals") 145 unittest { 146 overlaps(Interval(5, 10), Interval(2, 10)).should == true; 147 overlaps(Interval(5, 10), Interval(2, 8)).should == true; 148 149 overlaps(Interval(5, 15), Interval(7, 11)).should == true; 150 overlaps(Interval(5, 15), Interval(7, 20)).should == true; 151 152 overlaps(Interval(5, 15), Interval(15, 20)).should == false; 153 overlaps(Interval(15, 20), Interval(5, 15)).should == false; 154 } 155 156 struct Location { 157 Uri uri; 158 Interval interval; 159 } 160 161 /// Unique identifier for the blob. 162 class BlobIdentifier { 163 Uri uri; 164 165 this(Uri uri = Uri.init) @safe pure nothrow @nogc { 166 this.uri = uri; 167 } 168 } 169 170 /// A uniquely identifiable blob and its content. 171 class Blob : BlobIdentifier { 172 const(ubyte)[] content; 173 174 this(Uri uri = Uri.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc { 175 super(uri); 176 this.content = content; 177 } 178 179 this(Uri uri, string content) @safe pure nothrow @nogc { 180 this(uri, cast(const(ubyte)[]) content); 181 } 182 } 183 184 /// Replace `interval` with `content`. 185 class Edit { 186 Interval interval; 187 const(ubyte)[] content; 188 189 /** 190 * Params: 191 * r = interval to replace 192 * content = with this content 193 */ 194 this(Interval r = Interval.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc { 195 this.interval = r; 196 this.content = content; 197 } 198 199 this(Interval r, string content) @safe pure nothrow @nogc { 200 this(r, cast(const(ubyte)[]) content); 201 } 202 203 } 204 205 class BlobEdit { 206 BlobIdentifier blob; 207 Edit[] edits; 208 209 /** 210 * 211 * Params: 212 * blob = identifier of the blob being edited 213 * edits = ? 214 */ 215 this(BlobIdentifier blob = new BlobIdentifier(), Edit[] edits = Edit[].init) @safe pure nothrow @nogc { 216 this.blob = blob; 217 this.edits = edits; 218 } 219 } 220 221 /** A virtual file system of blobs. 222 * 223 * Blobs live in a virtual, in-memory system. They are uniquely identified by 224 * their URI. 225 * 226 * A URI contains a version. This mean that a blob can exist in multiple 227 * versions. The original is usually version zero. 228 */ 229 class BlobVfs { 230 private Blob[Uri] blobs; 231 232 /** Open a blob with the same URI and content as `blob` if it doesn't 233 * already exist. 234 * 235 * Params: 236 * blob = the blob to add an entry in the cache for. 237 */ 238 bool open(const Blob blob) @safe pure { 239 if (auto v = blob.uri in blobs) 240 return false; 241 blobs[blob.uri] = new Blob(blob.uri, blob.content); 242 return true; 243 } 244 245 /** Open a blob with the content read from the file system if it doesn't 246 * already exist in the VFS. 247 */ 248 Blob openFromFile(const Uri uri) @safe { 249 if (auto v = uri in blobs) 250 return *v; 251 auto b = get(uri); 252 open(b); 253 return b; 254 } 255 256 /** Close a blob in the cache if it exists. 257 * 258 * Params: 259 * id = ? 260 */ 261 bool close(const BlobIdentifier id) @safe pure nothrow { 262 if (id.uri in blobs) { 263 blobs.remove(id.uri); 264 return true; 265 } 266 return false; 267 } 268 269 /** Get the blob matching the URI either from the VFS or the filesystem. 270 * 271 * If the blob exists in the VFS with the specific version then that is returned. 272 * 273 * Otherwise the URI is used to try and locate the blob on the filesystem. 274 * 275 * This function may throw if the URI do not exists in the internal DB and 276 * it refers to a file that do not exist on the filesystem. 277 */ 278 Blob get(const Uri uri) @safe { 279 if (auto v = uri in blobs) 280 return *v; 281 return new Blob(uri, rawRead(cast(string) uri)); 282 } 283 284 /// Returns: if there exists a blob with the URI. 285 bool exists(const Uri uri) @safe pure nothrow const @nogc { 286 return (uri in blobs) !is null; 287 } 288 289 /** 290 * Returns: range of the filenames in the VFS. 291 */ 292 auto uris() @safe pure nothrow const @nogc { 293 return blobs.byKey; 294 } 295 296 /** Apply a stream of edits to a blob. 297 * 298 * The edits are applied starting from index zero. If there for example are 299 * two edits for the same interval the second one will be applied on top of 300 * the first one. 301 * 302 * Params: 303 * id = blob to change 304 * edits = changes 305 */ 306 bool change(const BlobIdentifier id, const(Edit)[] edits) @safe pure nothrow { 307 return change(id.uri, edits); 308 } 309 310 /// ditto 311 bool change(const BlobEdit be) @safe pure nothrow { 312 return change(be.blob.uri, be.edits); 313 } 314 315 /// ditto 316 bool change(const Uri uri, const(Edit)[] edits) @safe pure nothrow { 317 import std.algorithm : min; 318 import std.array : empty, appender; 319 320 auto blob = uri in blobs; 321 if (blob is null || edits.length == 0) 322 return false; 323 324 change(*blob, edits); 325 return true; 326 } 327 } 328 329 /** Modify the blob. 330 */ 331 Blob change(Blob blob, const(Edit)[] edits) @safe pure nothrow { 332 import std.algorithm : min, filter; 333 import std.array : empty, appender; 334 335 foreach (const e; edits.filter!(a => !a.interval.append_)) { 336 if (e.interval.start > e.interval.end) 337 continue; 338 const start = min(e.interval.start, blob.content.length); 339 const end = min(e.interval.end, blob.content.length); 340 341 auto app = appender!(const(ubyte)[])(); 342 app.put(blob.content[0 .. start]); 343 app.put(cast(const(ubyte)[]) e.content); 344 app.put(blob.content[end .. $]); 345 blob.content = app.data; 346 } 347 348 foreach (const e; edits.filter!(a => a.interval.append_)) { 349 blob.content ~= e.content; 350 } 351 352 return blob; 353 } 354 355 /** Merge edits by concatenation when the intervals overlap. 356 * 357 * TODO: this my be a bit inefficient because it starts by clearing the content 358 * and then adding it all back. Maybe there are a more efficient way? 359 * It should at least use the allocators. 360 */ 361 BlobEdit merge(const Blob blob, Edit[] edits_) @safe pure nothrow { 362 import std.algorithm : sort, min, filter; 363 import std.array : array, appender; 364 365 auto r = new BlobEdit(new BlobIdentifier(blob.uri)); 366 const end = blob.content.length; 367 368 // start by clearing all content. 369 r.edits = [new Edit(Interval(0, end))]; 370 371 // Current position into the original content which is the position to 372 // start taking data from. It is continiusly adjusted when the edits are 373 // analysed as to cover the last interval of the original content that 374 // where used. 375 size_t cur = 0; 376 377 auto app = appender!(const(ubyte)[])(); 378 foreach (const e; edits_.sort!((a, b) => a.interval < b.interval) 379 .filter!(a => !a.interval.append_)) { 380 // add the original content until this point. 381 if (e.interval.start > cur && cur < end) { 382 auto ni = Interval(cur, min(e.interval.start, end)); 383 app.put(blob.content[ni.start .. ni.end]); 384 cur = min(e.interval.end, end); 385 } 386 app.put(e.content); 387 } 388 389 if (cur < end) { 390 app.put(blob.content[cur .. $]); 391 } 392 393 foreach (const e; edits_.filter!(a => a.interval.append_)) { 394 app.put(e.content); 395 } 396 397 r.edits ~= new Edit(Interval(0, end), app.data); 398 return r; 399 } 400 401 @("shall merge multiple edits into two edits") 402 unittest { 403 auto vfs = new BlobVfs; 404 auto uri = Uri("my blob"); 405 406 vfs.open(new Blob(uri, "0123456789")).should == true; 407 408 { 409 // insert at the beginning and two in the middle concatenated 410 Edit[] e; 411 e ~= new Edit(Interval(2, 5), "def"); 412 e ~= new Edit(Interval(8, 9), "ghi"); 413 e ~= new Edit(Interval(2, 5), "abc"); 414 e ~= new Edit(Interval(0, 0), "start"); 415 auto m = merge(vfs.get(uri), e); 416 vfs.change(m); 417 } 418 419 (cast(string) vfs.get(uri).content).should == "start01abcdef567ghi9"; 420 } 421 422 private: 423 424 // workaround for linking bug 425 auto workaroundLinkingBug() { 426 import std.typecons; 427 428 return typeid(std.typecons.Tuple!(int, double)); 429 } 430 431 const(ubyte)[] rawRead(string path) @safe { 432 import std.array : appender; 433 import std.stdio : File; 434 435 auto fin = File(path); 436 auto content = appender!(ubyte[])(); 437 ubyte[4096] buf; 438 439 while (!fin.eof) { 440 auto s = fin.rawRead(buf); 441 content.put(s); 442 } 443 444 return content.data; 445 } 446 447 @("shall modify a blob when changes are applied") 448 unittest { 449 auto vfs = new BlobVfs; 450 const uri = Uri("my blob"); 451 452 vfs.open(new Blob(uri, "this is some data")).should == true; 453 454 { 455 Edit[] e; 456 e ~= new Edit(Interval(Offset(0), Offset(4)), "that drum"); 457 e ~= new Edit(Interval(Offset(22), Offset(22)), ", big time"); 458 vfs.change(uri, e); 459 } 460 461 (cast(string) vfs.get(uri).content).should == "that drum is some data, big time"; 462 } 463 464 @( 465 "shall append edits outside of the interval and remove invalid edits when applying changes to the content") 466 unittest { 467 auto vfs = new BlobVfs; 468 const uri = Uri("my blob2"); 469 470 vfs.open(new Blob(uri, "more data")).should == true; 471 472 { 473 Edit[] e; 474 e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh"); 475 e ~= new Edit(Interval(Offset(999), Offset(1000)), "abcd"); 476 vfs.change(uri, e); 477 } 478 479 (cast(string) vfs.get(uri).content).should == "more dataedfghabcd"; 480 } 481 482 @("shall apply edits on top of each other when changing a blob") 483 unittest { 484 auto vfs = new BlobVfs; 485 const uri = Uri("my blob2"); 486 487 vfs.open(new Blob(uri, "more data")).should == true; 488 489 { 490 Edit[] e; 491 e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh"); 492 e ~= new Edit(Interval(Offset(10), Offset(11)), "e"); 493 vfs.change(uri, e); 494 } 495 496 (cast(string) vfs.get(uri).content).should == "more dataeefgh"; 497 } 498 499 @("shall create the blob from a file on the filesystem when the URI do not exist in the VFS") 500 unittest { 501 import std.file : remove; 502 import std.stdio : File; 503 504 auto vfs = new BlobVfs; 505 const uri = Uri("my_file.txt"); 506 File(cast(string) uri, "w").write("a string"); 507 scope (exit) 508 remove(cast(string) uri); 509 510 auto b = vfs.get(uri); 511 512 (cast(string) b.content).should == "a string"; 513 } 514 515 @("shall handle blobs with the same path but different versions") 516 unittest { 517 auto vfs = new BlobVfs; 518 const uri = "my blob"; 519 520 { 521 vfs.open(new Blob(Uri(uri), "uri version 0")).should == true; 522 vfs.open(new Blob(Uri(uri, 1), "uri version 1")).should == true; 523 } 524 525 (cast(string) vfs.get(Uri(uri, 0)).content).should == "uri version 0"; 526 (cast(string) vfs.get(Uri(uri, 1)).content).should == "uri version 1"; 527 }