blob_model source code

1 /**
2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved.
3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0)
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 */
6 module blob_model;
7 
8 version (unittest) {
9     import unit_threaded.assertions;
10 }
11 
12 struct Uri {
13     string value;
14     int version_;
15 
16     int opCmp(ref const typeof(this) rhs) const {
17         import std.string : cmp;
18 
19         if (version_ != rhs.version_)
20             return version_ - rhs.version_;
21         return cmp(value, rhs.value);
22     }
23 
24     T opCast(T : string)() @safe pure nothrow const @nogc {
25         return value;
26     }
27 }
28 
29 struct Offset {
30     size_t value;
31     alias value this;
32 }
33 
34 struct Interval {
35     Offset start;
36     Offset end;
37     private bool append_;
38 
39     invariant {
40         assert(start <= end);
41     }
42 
43     this(Offset s, Offset e) @safe pure nothrow @nogc {
44         start = s;
45         end = e;
46     }
47 
48     this(size_t s, size_t e) @safe pure nothrow @nogc {
49         this(Offset(s), Offset(e));
50     }
51 
52     /**Returns: An interval that will always be at the end which mean that if
53      * it is e.g. used for an Edit it will be appended to the file.
54      */
55     static Interval append() @safe pure nothrow @nogc {
56         auto r = Interval(0, 0);
57         r.append_ = true;
58         return r;
59     }
60 
61     int opCmp(ref const Interval rhs) @safe pure nothrow const @nogc {
62         if (start < rhs.start)
63             return -1;
64         else if (start > rhs.start)
65             return 1;
66         else if (start == rhs.start && end < rhs.end)
67             return -1;
68         else if (start == rhs.start && end > rhs.end)
69             return 1;
70 
71         return 0;
72     }
73 
74     int opCmp(const int rhs) @safe pure nothrow const @nogc {
75         if (start < rhs)
76             return -1;
77         if (start > rhs)
78             return 1;
79         return 0;
80     }
81 
82     import std.range : isOutputRange;
83 
84     string toString() @safe pure const {
85         import std.array : appender;
86 
87         auto buf = appender!string;
88         toString(buf);
89         return buf.data;
90     }
91 
92     void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) {
93         import std.format : formattedWrite;
94 
95         formattedWrite(w, "[%s, %s)", start, end);
96     }
97 }
98 
99 /** Detect overlap between this interval and other given interval in a
100  * half-open coordinate system [start, end)
101  *
102  * return true in any of the following four situations:
103  *     int1   =====    =======
104  *     int2  =======  =======
105  *
106  *     int1  =======  =======
107  *     int2    ===      =======
108  *
109  * return false in any other scenario:
110  *     int1  =====       |       =====
111  *     int2       =====  |  =====
112  *
113  * NOTE that in half-open coordinates [start, end)
114  *  i1.end == i2.start => Adjacent, but NO overlap
115  *
116  * Note: This code is copied from the dub package intervaltree.
117  * Author: James S. Blachly, MD <james.blachly@gmail.com>
118  * Copyright: Copyright (c) 2019 James Blachly
119  * License: MIT
120 */
121 bool overlaps(IntervalType1, IntervalType2)(IntervalType1 int1, IntervalType2 int2) @nogc pure @safe nothrow
122         if (__traits(hasMember, IntervalType1, "start") && __traits(hasMember, IntervalType1,
123             "end") && __traits(hasMember, IntervalType2, "start")
124             && __traits(hasMember, IntervalType2, "end")) {
125     // DMD cannot inline this
126     version (LDC) pragma(inline, true);
127     version (GDC) pragma(inline, true);
128     // int1   =====    =======
129     // int2 =======  =======
130     if (int2.start <= int1.start && int1.start < int2.end)
131         return true;
132 
133     // int1  =======  =======
134     // int2   ===      =======
135     else if (int1.start <= int2.start && int2.start < int1.end)
136         return true;
137 
138     // int1  =====        |       =====
139     // int2       =====   |  =====
140     else
141         return false;
142 }
143 
144 @("shall detect overlap between intervals")
145 unittest {
146     overlaps(Interval(5, 10), Interval(2, 10)).should == true;
147     overlaps(Interval(5, 10), Interval(2, 8)).should == true;
148 
149     overlaps(Interval(5, 15), Interval(7, 11)).should == true;
150     overlaps(Interval(5, 15), Interval(7, 20)).should == true;
151 
152     overlaps(Interval(5, 15), Interval(15, 20)).should == false;
153     overlaps(Interval(15, 20), Interval(5, 15)).should == false;
154 }
155 
156 struct Location {
157     Uri uri;
158     Interval interval;
159 }
160 
161 /// Unique identifier for the blob.
162 class BlobIdentifier {
163     Uri uri;
164 
165     this(Uri uri = Uri.init) @safe pure nothrow @nogc {
166         this.uri = uri;
167     }
168 }
169 
170 /// A uniquely identifiable blob and its content.
171 class Blob : BlobIdentifier {
172     const(ubyte)[] content;
173 
174     this(Uri uri = Uri.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc {
175         super(uri);
176         this.content = content;
177     }
178 
179     this(Uri uri, string content) @safe pure nothrow @nogc {
180         this(uri, cast(const(ubyte)[]) content);
181     }
182 }
183 
184 /// Replace `interval` with `content`.
185 class Edit {
186     Interval interval;
187     const(ubyte)[] content;
188 
189     /**
190      * Params:
191      * r       = interval to replace
192      * content = with this content
193      */
194     this(Interval r = Interval.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc {
195         this.interval = r;
196         this.content = content;
197     }
198 
199     this(Interval r, string content) @safe pure nothrow @nogc {
200         this(r, cast(const(ubyte)[]) content);
201     }
202 
203 }
204 
205 class BlobEdit {
206     BlobIdentifier blob;
207     Edit[] edits;
208 
209     /**
210      *
211      * Params:
212      * blob  = identifier of the blob being edited
213      * edits = ?
214      */
215     this(BlobIdentifier blob = new BlobIdentifier(), Edit[] edits = Edit[].init) @safe pure nothrow @nogc {
216         this.blob = blob;
217         this.edits = edits;
218     }
219 }
220 
221 /** A virtual file system of blobs.
222  *
223  * Blobs live in a virtual, in-memory system. They are uniquely identified by
224  * their URI.
225  *
226  * A URI contains a version. This mean that a blob can exist in multiple
227  * versions. The original is usually version zero.
228  */
229 class BlobVfs {
230     private Blob[Uri] blobs;
231 
232     /** Open a blob with the same URI and content as `blob` if it doesn't
233      * already exist.
234      *
235      * Params:
236      * blob = the blob to add an entry in the cache for.
237      */
238     bool open(const Blob blob) @safe pure {
239         if (auto v = blob.uri in blobs)
240             return false;
241         blobs[blob.uri] = new Blob(blob.uri, blob.content);
242         return true;
243     }
244 
245     /** Open a blob with the content read from the file system if it doesn't
246      * already exist in the VFS.
247      */
248     Blob openFromFile(const Uri uri) @safe {
249         if (auto v = uri in blobs)
250             return *v;
251         auto b = get(uri);
252         open(b);
253         return b;
254     }
255 
256     /** Close a blob in the cache if it exists.
257      *
258      * Params:
259      * id = ?
260      */
261     bool close(const BlobIdentifier id) @safe pure nothrow {
262         if (id.uri in blobs) {
263             blobs.remove(id.uri);
264             return true;
265         }
266         return false;
267     }
268 
269     /** Get the blob matching the URI either from the VFS or the filesystem.
270      *
271      * If the blob exists in the VFS with the specific version then that is returned.
272      *
273      * Otherwise the URI is used to try and locate the blob on the filesystem.
274      *
275      * This function may throw if the URI do not exists in the internal DB and
276      * it refers to a file that do not exist on the filesystem.
277      */
278     Blob get(const Uri uri) @safe {
279         if (auto v = uri in blobs)
280             return *v;
281         return new Blob(uri, rawRead(cast(string) uri));
282     }
283 
284     /// Returns: if there exists a blob with the URI.
285     bool exists(const Uri uri) @safe pure nothrow const @nogc {
286         return (uri in blobs) !is null;
287     }
288 
289     /**
290      * Returns: range of the filenames in the VFS.
291      */
292     auto uris() @safe pure nothrow const @nogc {
293         return blobs.byKey;
294     }
295 
296     /** Apply a stream of edits to a blob.
297      *
298      * The edits are applied starting from index zero. If there for example are
299      * two edits for the same interval the second one will be applied on top of
300      * the first one.
301      *
302      * Params:
303      * id    = blob to change
304      * edits = changes
305      */
306     bool change(const BlobIdentifier id, const(Edit)[] edits) @safe pure nothrow {
307         return change(id.uri, edits);
308     }
309 
310     /// ditto
311     bool change(const BlobEdit be) @safe pure nothrow {
312         return change(be.blob.uri, be.edits);
313     }
314 
315     /// ditto
316     bool change(const Uri uri, const(Edit)[] edits) @safe pure nothrow {
317         import std.algorithm : min;
318         import std.array : empty, appender;
319 
320         auto blob = uri in blobs;
321         if (blob is null || edits.length == 0)
322             return false;
323 
324         change(*blob, edits);
325         return true;
326     }
327 }
328 
329 /** Modify the blob.
330  */
331 Blob change(Blob blob, const(Edit)[] edits) @safe pure nothrow {
332     import std.algorithm : min, filter;
333     import std.array : empty, appender;
334 
335     foreach (const e; edits.filter!(a => !a.interval.append_)) {
336         if (e.interval.start > e.interval.end)
337             continue;
338         const start = min(e.interval.start, blob.content.length);
339         const end = min(e.interval.end, blob.content.length);
340 
341         auto app = appender!(const(ubyte)[])();
342         app.put(blob.content[0 .. start]);
343         app.put(cast(const(ubyte)[]) e.content);
344         app.put(blob.content[end .. $]);
345         blob.content = app.data;
346     }
347 
348     foreach (const e; edits.filter!(a => a.interval.append_)) {
349         blob.content ~= e.content;
350     }
351 
352     return blob;
353 }
354 
355 /** Merge edits by concatenation when the intervals overlap.
356  *
357  * TODO: this my be a bit inefficient because it starts by clearing the content
358  * and then adding it all back. Maybe there are a more efficient way?
359  * It should at least use the allocators.
360  */
361 BlobEdit merge(const Blob blob, Edit[] edits_) @safe pure nothrow {
362     import std.algorithm : sort, min, filter;
363     import std.array : array, appender;
364 
365     auto r = new BlobEdit(new BlobIdentifier(blob.uri));
366     const end = blob.content.length;
367 
368     // start by clearing all content.
369     r.edits = [new Edit(Interval(0, end))];
370 
371     // Current position into the original content which is the position to
372     // start taking data from. It is continiusly adjusted when the edits are
373     // analysed as to cover the last interval of the original content that
374     // where used.
375     size_t cur = 0;
376 
377     auto app = appender!(const(ubyte)[])();
378     foreach (const e; edits_.sort!((a, b) => a.interval < b.interval)
379             .filter!(a => !a.interval.append_)) {
380         // add the original content until this point.
381         if (e.interval.start > cur && cur < end) {
382             auto ni = Interval(cur, min(e.interval.start, end));
383             app.put(blob.content[ni.start .. ni.end]);
384             cur = min(e.interval.end, end);
385         }
386         app.put(e.content);
387     }
388 
389     if (cur < end) {
390         app.put(blob.content[cur .. $]);
391     }
392 
393     foreach (const e; edits_.filter!(a => a.interval.append_)) {
394         app.put(e.content);
395     }
396 
397     r.edits ~= new Edit(Interval(0, end), app.data);
398     return r;
399 }
400 
401 @("shall merge multiple edits into two edits")
402 unittest {
403     auto vfs = new BlobVfs;
404     auto uri = Uri("my blob");
405 
406     vfs.open(new Blob(uri, "0123456789")).should == true;
407 
408     {
409         // insert at the beginning and two in the middle concatenated
410         Edit[] e;
411         e ~= new Edit(Interval(2, 5), "def");
412         e ~= new Edit(Interval(8, 9), "ghi");
413         e ~= new Edit(Interval(2, 5), "abc");
414         e ~= new Edit(Interval(0, 0), "start");
415         auto m = merge(vfs.get(uri), e);
416         vfs.change(m);
417     }
418 
419     (cast(string) vfs.get(uri).content).should == "start01abcdef567ghi9";
420 }
421 
422 private:
423 
424 // workaround for linking bug
425 auto workaroundLinkingBug() {
426     import std.typecons;
427 
428     return typeid(std.typecons.Tuple!(int, double));
429 }
430 
431 const(ubyte)[] rawRead(string path) @safe {
432     import std.array : appender;
433     import std.stdio : File;
434 
435     auto fin = File(path);
436     auto content = appender!(ubyte[])();
437     ubyte[4096] buf;
438 
439     while (!fin.eof) {
440         auto s = fin.rawRead(buf);
441         content.put(s);
442     }
443 
444     return content.data;
445 }
446 
447 @("shall modify a blob when changes are applied")
448 unittest {
449     auto vfs = new BlobVfs;
450     const uri = Uri("my blob");
451 
452     vfs.open(new Blob(uri, "this is some data")).should == true;
453 
454     {
455         Edit[] e;
456         e ~= new Edit(Interval(Offset(0), Offset(4)), "that drum");
457         e ~= new Edit(Interval(Offset(22), Offset(22)), ", big time");
458         vfs.change(uri, e);
459     }
460 
461     (cast(string) vfs.get(uri).content).should == "that drum is some data, big time";
462 }
463 
464 @(
465         "shall append edits outside of the interval and remove invalid edits when applying changes to the content")
466 unittest {
467     auto vfs = new BlobVfs;
468     const uri = Uri("my blob2");
469 
470     vfs.open(new Blob(uri, "more data")).should == true;
471 
472     {
473         Edit[] e;
474         e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh");
475         e ~= new Edit(Interval(Offset(999), Offset(1000)), "abcd");
476         vfs.change(uri, e);
477     }
478 
479     (cast(string) vfs.get(uri).content).should == "more dataedfghabcd";
480 }
481 
482 @("shall apply edits on top of each other when changing a blob")
483 unittest {
484     auto vfs = new BlobVfs;
485     const uri = Uri("my blob2");
486 
487     vfs.open(new Blob(uri, "more data")).should == true;
488 
489     {
490         Edit[] e;
491         e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh");
492         e ~= new Edit(Interval(Offset(10), Offset(11)), "e");
493         vfs.change(uri, e);
494     }
495 
496     (cast(string) vfs.get(uri).content).should == "more dataeefgh";
497 }
498 
499 @("shall create the blob from a file on the filesystem when the URI do not exist in the VFS")
500 unittest {
501     import std.file : remove;
502     import std.stdio : File;
503 
504     auto vfs = new BlobVfs;
505     const uri = Uri("my_file.txt");
506     File(cast(string) uri, "w").write("a string");
507     scope (exit)
508         remove(cast(string) uri);
509 
510     auto b = vfs.get(uri);
511 
512     (cast(string) b.content).should == "a string";
513 }
514 
515 @("shall handle blobs with the same path but different versions")
516 unittest {
517     auto vfs = new BlobVfs;
518     const uri = "my blob";
519 
520     {
521         vfs.open(new Blob(Uri(uri), "uri version 0")).should == true;
522         vfs.open(new Blob(Uri(uri, 1), "uri version 1")).should == true;
523     }
524 
525     (cast(string) vfs.get(Uri(uri, 0)).content).should == "uri version 0";
526     (cast(string) vfs.get(Uri(uri, 1)).content).should == "uri version 1";
527 }