1 /** 2 Copyright: Copyright (c) 2016-2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains functionality to calculate hashes for use as e.g. 11 checksums. The intention is to have the *same* algorithm being used for the 12 same *things* in Dextool. 13 14 This is to make it easier to integrate with Dextool produced data. 15 16 **Prefer** the 128-bit hash. 17 18 Use the 64-bit if you have to for compatibility reasons with other services not 19 part of dextool. 20 21 **Warning**: These may not be endian independent. 22 23 TODO: rename ChecksumXX to HashXX. 24 */ 25 module dextool.hash; 26 27 import std.digest.crc : CRC64ISO; 28 import std.digest.murmurhash : MurmurHash3; 29 30 alias BuildChecksum64 = CRC64ISO; 31 alias Checksum64 = Crc64Iso; 32 alias makeChecksum64 = makeCrc64Iso; 33 alias toChecksum64 = toCrc64Iso; 34 35 alias BuildChecksum128 = MurmurHash3!(128, 64); 36 alias Checksum128 = Murmur3; 37 alias makeChecksum128 = makeMurmur3; 38 alias toChecksum128 = toMurmur3; 39 40 /// Convert a value to its ubyte representation. 41 auto toBytes(T)(T v) @trusted pure nothrow @nogc { 42 import std.conv : emplace; 43 44 ubyte[T.sizeof] d; 45 T* p = cast(T*)&d; 46 cast(void) emplace!T(p, v); 47 48 return d; 49 } 50 51 long toLong(ubyte[8] v) @trusted pure nothrow @nogc { 52 return *(cast(long*)&v); 53 } 54 55 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc { 56 return *(cast(ulong*)&v); 57 } 58 59 /// Convert to size_to for use in e.g. operator overload toHash. 60 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) { 61 static if (size_t.sizeof == 4 && T.sizeof == 8) 62 return cast(uint) v + cast(uint)(v >> 32); 63 else 64 return v; 65 } 66 67 /// ditto. 68 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc { 69 return toSizeT(*(cast(const(uint)*)&v)); 70 } 71 72 /// ditto. 73 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc { 74 return toSizeT(*(cast(const(ulong)*)&v)); 75 } 76 77 /// Make a 32bit hash. 78 // TODO: deprecate this. Should use the 128-bit. 79 ulong makeHash(T)(T raw) @safe pure nothrow @nogc { 80 import std.digest.crc; 81 82 if (raw is null) 83 return 0; 84 ubyte[4] hash = crc32Of(raw); 85 return (hash[0] << 24) | (hash[1] << 16) | (hash[2] << 8) | hash[3]; 86 } 87 88 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow { 89 BuildChecksum128 hasher; 90 hasher.put(p); 91 return toMurmur3(hasher); 92 } 93 94 /// Convenient function to convert to a checksum type. 95 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc { 96 ulong a = *(cast(ulong*)&p[0]); 97 ulong b = *(cast(ulong*)&p[8]); 98 return Murmur3(a, b); 99 } 100 101 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc { 102 return toMurmur3(h.finish); 103 } 104 105 /// 128bit hash. 106 struct Murmur3 { 107 ulong c0; 108 ulong c1; 109 110 size_t toHash() @safe nothrow const pure @nogc { 111 return (c0 + c1).toSizeT; 112 } 113 114 bool opEquals(const typeof(this) o) const nothrow @safe pure @nogc { 115 return c0 == o.c0 && c1 == o.c1; 116 } 117 118 int opCmp(ref const typeof(this) rhs) @safe pure nothrow const @nogc { 119 // return -1 if "this" is less than rhs, 1 if bigger and zero equal 120 if (c0 < rhs.c0) 121 return -1; 122 if (c0 > rhs.c0) 123 return 1; 124 if (c1 < rhs.c1) 125 return -1; 126 if (c1 > rhs.c1) 127 return 1; 128 return 0; 129 } 130 131 import std.format : FormatSpec; 132 133 void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const { 134 import std.format : formatValue, formattedWrite; 135 import std.range.primitives : put; 136 137 if (fmt.spec == 'x') 138 formattedWrite(w, "%x_%x", c0, c1); 139 else 140 formattedWrite(w, "%s_%s", c0, c1); 141 } 142 } 143 144 /// Create a 64bit hash. 145 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc { 146 BuildChecksum64 hash; 147 hash.put(p); 148 return toCrc64Iso(hash); 149 } 150 151 /// Convenient function to convert to a checksum type. 152 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc { 153 return Crc64Iso(*(cast(ulong*)&p[0])); 154 } 155 156 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc { 157 ubyte[8] v = h.peek; 158 return Crc64Iso(*(cast(ulong*)&v[0])); 159 } 160 161 /** 64-bit checksum. 162 * 163 * It is intended to be generically used in Dextool when such a checksum is needed. 164 * 165 * CRC64 ISO is used because there exist implementations in other languages 166 * which makes it possible to calculate the checksum in e.g. python and compare 167 * with the one from Dextool. 168 * 169 * TODO: check if python have a 64ISO or 64ECMA implementation. 170 */ 171 struct Crc64Iso { 172 ulong c0; 173 174 size_t toHash() @safe pure nothrow const @nogc scope { 175 return c0; 176 } 177 178 bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope { 179 return c0 == s.c0; 180 } 181 182 import std.format : FormatSpec; 183 184 void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const { 185 import std.format : formatValue, formattedWrite; 186 import std.range.primitives : put; 187 188 if (fmt.spec == 'x') 189 formattedWrite(w, "%x", c0); 190 else 191 formattedWrite(w, "%s", c0); 192 } 193 }