1 /** 2 Copyright: Copyright (c) 2016-2017, Joakim Brännström. All rights reserved. 3 License: MPL-2 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 This Source Code Form is subject to the terms of the Mozilla Public License, 7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain 8 one at http://mozilla.org/MPL/2.0/. 9 10 This module contains functionality to calculate hashes for use as e.g. 11 checksums. The intention is to have the *same* algorithm being used for the 12 same *things* in Dextool. 13 14 This is to make it easier to integrate with Dextool produced data. 15 16 **Prefer** the 128-bit hash. 17 18 Use the 64-bit if you have to for compatibility reasons with other services not 19 part of dextool. 20 21 **Warning**: These may not be endian independent. 22 23 TODO: rename ChecksumXX to HashXX. 24 */ 25 module dextool.hash; 26 27 import std.digest.crc : CRC64ISO; 28 import std.digest.murmurhash : MurmurHash3; 29 30 alias BuildChecksum64 = CRC64ISO; 31 alias Checksum64 = Crc64Iso; 32 alias makeChecksum64 = makeCrc64Iso; 33 alias toChecksum64 = toCrc64Iso; 34 35 alias BuildChecksum128 = MurmurHash3!(128, 64); 36 alias Checksum128 = Murmur3; 37 alias makeChecksum128 = makeMurmur3; 38 alias toChecksum128 = toMurmur3; 39 40 /// Convert a value to its ubyte representation. 41 auto toBytes(T)(T v) @trusted pure nothrow @nogc { 42 import std.conv : emplace; 43 44 ubyte[T.sizeof] d; 45 T* p = cast(T*)&d; 46 cast(void) emplace!T(p, v); 47 48 return d; 49 } 50 51 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc { 52 return *(cast(size_t*)&v); 53 } 54 55 /// Convert to size_to for use in e.g. operator overload toHash. 56 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) { 57 static if (size_t.sizeof == 4 && T.sizeof == 8) 58 return cast(uint) v + cast(uint)(v >> 32); 59 else 60 return v; 61 } 62 63 /// ditto. 64 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc { 65 return toSizeT(*(cast(const(uint)*)&v)); 66 } 67 68 /// ditto. 69 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc { 70 return toSizeT(*(cast(const(ulong)*)&v)); 71 } 72 73 /// Make a 32bit hash. 74 // TODO: deprecate this. Should use the 128-bit. 75 ulong makeHash(T)(T raw) @safe pure nothrow @nogc { 76 import std.digest.crc; 77 78 if (raw is null) 79 return 0; 80 ubyte[4] hash = crc32Of(raw); 81 return (hash[0] << 24) | (hash[1] << 16) | (hash[2] << 8) | hash[3]; 82 } 83 84 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow { 85 BuildChecksum128 hasher; 86 hasher.put(p); 87 return toMurmur3(hasher); 88 } 89 90 /// Convenient function to convert to a checksum type. 91 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc { 92 ulong a = *(cast(ulong*)&p[0]); 93 ulong b = *(cast(ulong*)&p[8]); 94 return Murmur3(a, b); 95 } 96 97 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc { 98 return toMurmur3(h.finish); 99 } 100 101 /// 128bit hash. 102 struct Murmur3 { 103 ulong c0; 104 ulong c1; 105 106 size_t toHash() @safe nothrow const { 107 return (c0 + c1).toSizeT; 108 } 109 110 bool opEquals(const typeof(this) o) const nothrow @safe { 111 return c0 == o.c0 && c1 == o.c1; 112 } 113 114 import std.format : FormatSpec; 115 116 void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const { 117 import std.format : formatValue, formattedWrite; 118 import std.range.primitives : put; 119 120 if (fmt.spec == 'x') 121 formattedWrite(w, "%x%x", c0, c1); 122 else 123 formattedWrite(w, "%s%s", c0, c1); 124 } 125 } 126 127 /// Create a 64bit hash. 128 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc { 129 BuildChecksum64 hash; 130 hash.put(p); 131 return toCrc64Iso(hash); 132 } 133 134 /// Convenient function to convert to a checksum type. 135 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc { 136 return Crc64Iso(*(cast(ulong*)&p[0])); 137 } 138 139 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc { 140 ubyte[8] v = h.peek; 141 return Crc64Iso(*(cast(ulong*)&v[0])); 142 } 143 144 /** 64-bit checksum. 145 * 146 * It is intended to be generically used in Dextool when such a checksum is needed. 147 * 148 * CRC64 ISO is used because there exist implementations in other languages 149 * which makes it possible to calculate the checksum in e.g. python and compare 150 * with the one from Dextool. 151 * 152 * TODO: check if python have a 64ISO or 64ECMA implementation. 153 */ 154 struct Crc64Iso { 155 ulong c0; 156 157 size_t toHash() @safe pure nothrow const @nogc scope { 158 return c0; 159 } 160 161 bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope { 162 return c0 == s.c0; 163 } 164 165 import std.format : FormatSpec; 166 167 void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const { 168 import std.format : formatValue, formattedWrite; 169 import std.range.primitives : put; 170 171 if (fmt.spec == 'x') 172 formattedWrite(w, "%x", c0); 173 else 174 formattedWrite(w, "%s", c0); 175 } 176 }