1 /** 2 Copyright: Copyright (c) 2020, Joakim Brännström. All rights reserved. 3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0) 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 A convenient library for calculating the hash of bits of data. 7 */ 8 module my.hash; 9 10 import std.digest.crc : CRC64ISO; 11 import std.digest.murmurhash : MurmurHash3; 12 13 import std.format : FormatSpec; 14 import std.format : formatValue, formattedWrite; 15 import std.range.primitives : put; 16 17 import my.path : AbsolutePath; 18 19 alias BuildChecksum64 = CRC64ISO; 20 alias Checksum64 = Crc64Iso; 21 alias makeChecksum64 = makeCrc64Iso; 22 alias toChecksum64 = toCrc64Iso; 23 24 alias BuildChecksum128 = MurmurHash3!(128, 64); 25 alias Checksum128 = Murmur3; 26 alias makeChecksum128 = makeMurmur3; 27 alias toChecksum128 = toMurmur3; 28 29 /// Checksum a file. 30 auto checksum(alias checksumFn)(AbsolutePath p) { 31 import std.mmfile : MmFile; 32 33 scope content = new MmFile(p.toString); 34 return checksumFn(cast(const(ubyte)[]) content[]); 35 } 36 37 @("shall calculate the checksum") 38 unittest { 39 auto cs = checksum!makeMurmur3(AbsolutePath("/bin/true")); 40 } 41 42 /// Convert a value to its ubyte representation. 43 /// Note: this is very slow. Prefer std.bitmanip.nativeToBigEndian. 44 auto toBytes(T)(T v) @trusted pure nothrow @nogc { 45 import std.conv : emplace; 46 47 ubyte[T.sizeof] d; 48 T* p = cast(T*)&d; 49 cast(void) emplace!T(p, v); 50 return d; 51 } 52 53 long toLong(ubyte[8] v) @trusted pure nothrow @nogc { 54 return *(cast(long*)&v); 55 } 56 57 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc { 58 return *(cast(ulong*)&v); 59 } 60 61 /// Convert to size_to for use in e.g. operator overload toHash. 62 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) { 63 static if (size_t.sizeof == 4 && T.sizeof == 8) 64 return cast(uint) v + cast(uint)(v >> 32); 65 else 66 return v; 67 } 68 69 /// ditto. 70 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc { 71 return toSizeT(*(cast(const(uint)*)&v)); 72 } 73 74 /// ditto. 75 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc { 76 return toSizeT(*(cast(const(ulong)*)&v)); 77 } 78 79 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow { 80 BuildChecksum128 hasher; 81 hasher.put(p); 82 return toMurmur3(hasher); 83 } 84 85 /// Convenient function to convert to a checksum type. 86 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc { 87 ulong a = *(cast(ulong*)&p[0]); 88 ulong b = *(cast(ulong*)&p[8]); 89 return Murmur3(a, b); 90 } 91 92 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc { 93 return toMurmur3(h.finish); 94 } 95 96 /// 128bit hash. 97 struct Murmur3 { 98 ulong c0; 99 ulong c1; 100 101 size_t toHash() @safe nothrow const pure @nogc { 102 return (c0 + c1).toSizeT; 103 } 104 105 bool opEquals(const typeof(this) o) const nothrow @safe pure @nogc { 106 return c0 == o.c0 && c1 == o.c1; 107 } 108 109 int opCmp(ref const typeof(this) rhs) @safe pure nothrow const @nogc { 110 // return -1 if "this" is less than rhs, 1 if bigger and zero equal 111 if (c0 < rhs.c0) 112 return -1; 113 if (c0 > rhs.c0) 114 return 1; 115 if (c1 < rhs.c1) 116 return -1; 117 if (c1 > rhs.c1) 118 return 1; 119 return 0; 120 } 121 122 void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const { 123 if (fmt.spec == 'x') 124 formattedWrite(w, "%x_%x", c0, c1); 125 else 126 formattedWrite(w, "%s_%s", c0, c1); 127 } 128 } 129 130 /// Create a 64bit hash. 131 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc { 132 BuildChecksum64 hash; 133 hash.put(p); 134 return toCrc64Iso(hash); 135 } 136 137 /// Convenient function to convert to a checksum type. 138 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc { 139 return Crc64Iso(*(cast(ulong*)&p[0])); 140 } 141 142 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc { 143 ubyte[8] v = h.peek; 144 return Crc64Iso(*(cast(ulong*)&v[0])); 145 } 146 147 /** 64-bit checksum. 148 * 149 * It is intended to be generically used in Dextool when such a checksum is needed. 150 * 151 * CRC64 ISO is used because there exist implementations in other languages 152 * which makes it possible to calculate the checksum in e.g. python and compare 153 * with the one from Dextool. 154 * 155 * TODO: check if python have a 64ISO or 64ECMA implementation. 156 */ 157 struct Crc64Iso { 158 ulong c0; 159 160 size_t toHash() @safe pure nothrow const @nogc scope { 161 return c0; 162 } 163 164 bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope { 165 return c0 == s.c0; 166 } 167 168 int opCmp(ref const typeof(this) rhs) @safe pure nothrow const @nogc { 169 // return -1 if "this" is less than rhs, 1 if bigger and zero equal 170 if (c0 < rhs.c0) 171 return -1; 172 if (c0 > rhs.c0) 173 return 1; 174 return 0; 175 } 176 177 void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const { 178 if (fmt.spec == 'x') 179 formattedWrite(w, "%x", c0); 180 else 181 formattedWrite(w, "%s", c0); 182 } 183 }