1 /**
2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved.
3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0)
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 */
6 module blob_model;
7 
8 version (unittest) {
9     import unit_threaded.assertions;
10 }
11 
12 struct Uri {
13     string value;
14     int version_;
15 
16     int opCmp(ref const typeof(this) rhs) const {
17         import std.string : cmp;
18 
19         if (version_ != rhs.version_)
20             return version_ - rhs.version_;
21         return cmp(value, rhs.value);
22     }
23 
24     T opCast(T : string)() @safe pure nothrow const @nogc {
25         return value;
26     }
27 }
28 
29 struct Offset {
30     size_t value;
31     alias value this;
32 }
33 
34 struct Interval {
35     Offset start;
36     Offset end;
37     private bool append_;
38 
39     invariant {
40         assert(start <= end);
41     }
42 
43     this(Offset s, Offset e) @safe pure nothrow @nogc {
44         start = s;
45         end = e;
46     }
47 
48     this(size_t s, size_t e) @safe pure nothrow @nogc {
49         this(Offset(s), Offset(e));
50     }
51 
52     /**Returns: An interval that will always be at the end which mean that if
53      * it is e.g. used for an Edit it will be appended to the file.
54      */
55     static Interval append() @safe pure nothrow @nogc {
56         auto r = Interval(0, 0);
57         r.append_ = true;
58         return r;
59     }
60 
61     int opCmp(ref const Interval rhs) @safe pure nothrow const @nogc {
62         if (start < rhs.start)
63             return -1;
64         else if (start > rhs.start)
65             return 1;
66         else if (start == rhs.start && end < rhs.end)
67             return -1;
68         else if (start == rhs.start && end > rhs.end)
69             return 1;
70 
71         return 0;
72     }
73 
74     import std.range : isOutputRange;
75 
76     string toString() @safe pure const {
77         import std.array : appender;
78 
79         auto buf = appender!string;
80         toString(buf);
81         return buf.data;
82     }
83 
84     void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) {
85         import std.format : formattedWrite;
86 
87         formattedWrite(w, "[%s, %s)", start, end);
88     }
89 }
90 
91 /** Detect overlap between this interval and other given interval in a
92  * half-open coordinate system [start, end)
93  *
94  * return true in any of the following four situations:
95  *     int1   =====    =======
96  *     int2  =======  =======
97  *
98  *     int1  =======  =======
99  *     int2    ===      =======
100  *
101  * return false in any other scenario:
102  *     int1  =====       |       =====
103  *     int2       =====  |  =====
104  *
105  * NOTE that in half-open coordinates [start, end)
106  *  i1.end == i2.start => Adjacent, but NO overlap
107  *
108  * Note: This code is copied from the dub package intervaltree.
109  * Author: James S. Blachly, MD <james.blachly@gmail.com>
110  * Copyright: Copyright (c) 2019 James Blachly
111  * License: MIT
112 */
113 bool overlaps(IntervalType1, IntervalType2)(IntervalType1 int1, IntervalType2 int2) @nogc pure @safe nothrow
114         if (__traits(hasMember, IntervalType1, "start") && __traits(hasMember, IntervalType1,
115             "end") && __traits(hasMember, IntervalType2, "start")
116             && __traits(hasMember, IntervalType2, "end")) {
117     // DMD cannot inline this
118     version (LDC) pragma(inline, true);
119     version (GDC) pragma(inline, true);
120     // int1   =====    =======
121     // int2 =======  =======
122     if (int2.start <= int1.start && int1.start < int2.end)
123         return true;
124 
125     // int1  =======  =======
126     // int2   ===      =======
127     else if (int1.start <= int2.start && int2.start < int1.end)
128         return true;
129 
130     // int1  =====        |       =====
131     // int2       =====   |  =====
132     else
133         return false;
134 }
135 
136 @("shall detect overlap between intervals")
137 unittest {
138     overlaps(Interval(5, 10), Interval(2, 10)).should == true;
139     overlaps(Interval(5, 10), Interval(2, 8)).should == true;
140 
141     overlaps(Interval(5, 15), Interval(7, 11)).should == true;
142     overlaps(Interval(5, 15), Interval(7, 20)).should == true;
143 
144     overlaps(Interval(5, 15), Interval(15, 20)).should == false;
145     overlaps(Interval(15, 20), Interval(5, 15)).should == false;
146 }
147 
148 struct Location {
149     Uri uri;
150     Interval interval;
151 }
152 
153 /// Unique identifier for the blob.
154 class BlobIdentifier {
155     Uri uri;
156 
157     this(Uri uri = Uri.init) @safe pure nothrow @nogc {
158         this.uri = uri;
159     }
160 }
161 
162 /// A uniquely identifiable blob and its content.
163 class Blob : BlobIdentifier {
164     const(ubyte)[] content;
165 
166     this(Uri uri = Uri.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc {
167         super(uri);
168         this.content = content;
169     }
170 
171     this(Uri uri, string content) @safe pure nothrow @nogc {
172         this(uri, cast(const(ubyte)[]) content);
173     }
174 }
175 
176 /// Replace `interval` with `content`.
177 class Edit {
178     Interval interval;
179     const(ubyte)[] content;
180 
181     /**
182      * Params:
183      * r       = interval to replace
184      * content = with this content
185      */
186     this(Interval r = Interval.init, const(ubyte)[] content = (const(ubyte)[]).init) @safe pure nothrow @nogc {
187         this.interval = r;
188         this.content = content;
189     }
190 
191     this(Interval r, string content) @safe pure nothrow @nogc {
192         this(r, cast(const(ubyte)[]) content);
193     }
194 
195 }
196 
197 class BlobEdit {
198     BlobIdentifier blob;
199     Edit[] edits;
200 
201     /**
202      *
203      * Params:
204      * blob  = identifier of the blob being edited
205      * edits = ?
206      */
207     this(BlobIdentifier blob = new BlobIdentifier(), Edit[] edits = Edit[].init) @safe pure nothrow @nogc {
208         this.blob = blob;
209         this.edits = edits;
210     }
211 }
212 
213 /** A virtual file system of blobs.
214  *
215  * Blobs live in a virtual, in-memory system. They are uniquely identified by
216  * their URI.
217  *
218  * A URI contains a version. This mean that a blob can exist in multiple
219  * versions. The original is usually version zero.
220  */
221 class BlobVfs {
222     private Blob[Uri] blobs;
223 
224     /** Open a blob with the same URI and content as `blob` if it doesn't
225      * already exist.
226      *
227      * Params:
228      * blob = the blob to add an entry in the cache for.
229      */
230     bool open(const Blob blob) @safe pure {
231         if (auto v = blob.uri in blobs)
232             return false;
233         blobs[blob.uri] = new Blob(blob.uri, blob.content);
234         return true;
235     }
236 
237     /** Open a blob with the content read from the file system if it doesn't
238      * already exist in the VFS.
239      */
240     Blob openFromFile(const Uri uri) @safe {
241         if (auto v = uri in blobs)
242             return *v;
243         auto b = get(uri);
244         open(b);
245         return b;
246     }
247 
248     /** Close a blob in the cache if it exists.
249      *
250      * Params:
251      * id = ?
252      */
253     bool close(const BlobIdentifier id) @safe pure nothrow {
254         if (id.uri in blobs) {
255             blobs.remove(id.uri);
256             return true;
257         }
258         return false;
259     }
260 
261     /** Get the blob matching the URI either from the VFS or the filesystem.
262      *
263      * If the blob exists in the VFS with the specific version then that is returned.
264      *
265      * Otherwise the URI is used to try and locate the blob on the filesystem.
266      *
267      * This function may throw if the URI do not exists in the internal DB and
268      * it refers to a file that do not exist on the filesystem.
269      */
270     Blob get(const Uri uri) @safe {
271         if (auto v = uri in blobs)
272             return *v;
273         return new Blob(uri, rawRead(cast(string) uri));
274     }
275 
276     /// Returns: if there exists a blob with the URI.
277     bool exists(const Uri uri) @safe pure nothrow const @nogc {
278         return (uri in blobs) !is null;
279     }
280 
281     /**
282      * Returns: range of the filenames in the VFS.
283      */
284     auto uris() @safe pure nothrow const @nogc {
285         return blobs.byKey;
286     }
287 
288     /** Apply a stream of edits to a blob.
289      *
290      * The edits are applied starting from index zero. If there for example are
291      * two edits for the same interval the second one will be applied on top of
292      * the first one.
293      *
294      * Params:
295      * id    = blob to change
296      * edits = changes
297      */
298     bool change(const BlobIdentifier id, const(Edit)[] edits) @safe pure nothrow {
299         return change(id.uri, edits);
300     }
301 
302     /// ditto
303     bool change(const BlobEdit be) @safe pure nothrow {
304         return change(be.blob.uri, be.edits);
305     }
306 
307     /// ditto
308     bool change(const Uri uri, const(Edit)[] edits) @safe pure nothrow {
309         import std.algorithm : min;
310         import std.array : empty, appender;
311 
312         auto blob = uri in blobs;
313         if (blob is null || edits.length == 0)
314             return false;
315 
316         .change(*blob, edits);
317         return true;
318     }
319 }
320 
321 /** Modify the blob.
322  */
323 Blob change(Blob blob, const(Edit)[] edits) @safe pure nothrow {
324     import std.algorithm : min, filter;
325     import std.array : empty, appender;
326 
327     foreach (const e; edits.filter!(a => !a.interval.append_)) {
328         if (e.interval.start > e.interval.end)
329             continue;
330         const start = min(e.interval.start, blob.content.length);
331         const end = min(e.interval.end, blob.content.length);
332 
333         auto app = appender!(const(ubyte)[])();
334         app.put(blob.content[0 .. start]);
335         app.put(cast(const(ubyte)[]) e.content);
336         app.put(blob.content[end .. $]);
337         blob.content = app.data;
338     }
339 
340     foreach (const e; edits.filter!(a => a.interval.append_)) {
341         blob.content ~= e.content;
342     }
343 
344     return blob;
345 }
346 
347 /** Merge edits by concatenation when the intervals overlap.
348  *
349  * This will never remove content from the original, only add to it.
350  *
351  * TODO: this my be a bit inefficient because it starts by clearing the content
352  * and then adding it all back. Maybe there are a more efficient way?
353  * It should at least use the allocators.
354  */
355 BlobEdit merge(const Blob blob, Edit[] edits_) @safe pure nothrow {
356     import std.algorithm : sort, min, filter;
357     import std.array : array, appender;
358 
359     import logger = std.experimental.logger;
360     import std.exception;
361 
362     auto r = new BlobEdit(new BlobIdentifier(blob.uri));
363     const end = blob.content.length;
364 
365     // start by clearing all content.
366     r.edits = [new Edit(Interval(0, end))];
367 
368     // Current position into the original content which is the position to
369     // start taking data from. It is continiusly adjusted when the edits are
370     // analysed as to cover the last interval of the original content that
371     // where used.
372     size_t cur = 0;
373 
374     auto app = appender!(const(ubyte)[])();
375     foreach (const e; edits_.sort!((a, b) => a.interval < b.interval)
376             .filter!(a => !a.interval.append_)) {
377         // add the original content until this point.
378         if (e.interval.start >= cur && cur < end) {
379             auto ni = Interval(cur, min(e.interval.start, end));
380             app.put(blob.content[ni.start .. ni.end]);
381         }
382         app.put(e.content);
383         cur = min(e.interval.end, end);
384     }
385 
386     if (cur < end) {
387         app.put(blob.content[cur .. $]);
388     }
389 
390     foreach (const e; edits_.filter!(a => a.interval.append_)) {
391         app.put(e.content);
392     }
393 
394     r.edits ~= new Edit(Interval(0, end), app.data);
395     return r;
396 }
397 
398 @("shall merge multiple edits into two edits")
399 unittest {
400     auto vfs = new BlobVfs;
401     auto uri = Uri("my blob");
402 
403     vfs.open(new Blob(uri, "0123456789")).should == true;
404 
405     {
406         // insert at the beginning and two in the middle concatenated
407         Edit[] e;
408         e ~= new Edit(Interval(2, 5), "def");
409         e ~= new Edit(Interval(8, 9), "ghi");
410         e ~= new Edit(Interval(2, 5), "abc");
411         // prepend
412         e ~= new Edit(Interval(0, 0), "start");
413         // delete the end
414         e ~= new Edit(Interval(9, 10), "");
415         auto m = merge(vfs.get(uri), e);
416         vfs.change(m);
417     }
418 
419     (cast(string) vfs.get(uri).content).should == "start01abcdef567ghi";
420 }
421 
422 private:
423 
424 // workaround for linking bug
425 auto workaroundLinkingBug() {
426     import std.typecons;
427 
428     return typeid(std.typecons.Tuple!(int, double));
429 }
430 
431 const(ubyte)[] rawRead(string path) @safe {
432     import std.array : appender;
433     import std.stdio : File;
434 
435     auto fin = File(path);
436     auto content = appender!(ubyte[])();
437     ubyte[4096] buf;
438 
439     while (!fin.eof) {
440         auto s = fin.rawRead(buf);
441         content.put(s);
442     }
443 
444     return content.data;
445 }
446 
447 @("shall modify a blob when changes are applied")
448 unittest {
449     auto vfs = new BlobVfs;
450     const uri = Uri("my blob");
451 
452     vfs.open(new Blob(uri, "this is some data")).should == true;
453 
454     {
455         Edit[] e;
456         e ~= new Edit(Interval(Offset(0), Offset(4)), "that drum");
457         e ~= new Edit(Interval(Offset(22), Offset(22)), ", big time");
458         vfs.change(uri, e);
459     }
460 
461     (cast(string) vfs.get(uri).content).should == "that drum is some data, big time";
462 }
463 
464 @(
465         "shall append edits outside of the interval and remove invalid edits when applying changes to the content")
466 unittest {
467     auto vfs = new BlobVfs;
468     const uri = Uri("my blob2");
469 
470     vfs.open(new Blob(uri, "more data")).should == true;
471 
472     {
473         Edit[] e;
474         e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh");
475         e ~= new Edit(Interval(Offset(999), Offset(1000)), "abcd");
476         vfs.change(uri, e);
477     }
478 
479     (cast(string) vfs.get(uri).content).should == "more dataedfghabcd";
480 }
481 
482 @("shall apply edits on top of each other when changing a blob")
483 unittest {
484     auto vfs = new BlobVfs;
485     const uri = Uri("my blob2");
486 
487     vfs.open(new Blob(uri, "more data")).should == true;
488 
489     {
490         Edit[] e;
491         e ~= new Edit(Interval(Offset(9), Offset(15)), "edfgh");
492         e ~= new Edit(Interval(Offset(10), Offset(11)), "e");
493         vfs.change(uri, e);
494     }
495 
496     (cast(string) vfs.get(uri).content).should == "more dataeefgh";
497 }
498 
499 @("shall create the blob from a file on the filesystem when the URI do not exist in the VFS")
500 unittest {
501     import std.file : remove;
502     import std.stdio : File;
503 
504     auto vfs = new BlobVfs;
505     const uri = Uri("my_file.txt");
506     File(cast(string) uri, "w").write("a string");
507     scope (exit)
508         remove(cast(string) uri);
509 
510     auto b = vfs.get(uri);
511 
512     (cast(string) b.content).should == "a string";
513 }
514 
515 @("shall handle blobs with the same path but different versions")
516 unittest {
517     auto vfs = new BlobVfs;
518     const uri = "my blob";
519 
520     {
521         vfs.open(new Blob(Uri(uri), "uri version 0")).should == true;
522         vfs.open(new Blob(Uri(uri, 1), "uri version 1")).should == true;
523     }
524 
525     (cast(string) vfs.get(Uri(uri, 0)).content).should == "uri version 0";
526     (cast(string) vfs.get(Uri(uri, 1)).content).should == "uri version 1";
527 }