1 /**
2 Copyright: Copyright (c) 2016-2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains functionality to calculate hashes for use as e.g.
11 checksums. The intention is to have the *same* algorithm being used for the
12 same *things* in Dextool.
13 
14 This is to make it easier to integrate with Dextool produced data.
15 
16 **Prefer** the 128-bit hash.
17 
18 Use the 64-bit if you have to for compatibility reasons with other services not
19 part of dextool.
20 
21 **Warning**: These may not be endian independent.
22 
23 TODO: rename ChecksumXX to HashXX.
24 */
25 module dextool.hash;
26 
27 import std.digest.crc : CRC64ISO;
28 import std.digest.murmurhash : MurmurHash3;
29 
30 alias BuildChecksum64 = CRC64ISO;
31 alias Checksum64 = Crc64Iso;
32 alias makeChecksum64 = makeCrc64Iso;
33 alias toChecksum64 = toCrc64Iso;
34 
35 alias BuildChecksum128 = MurmurHash3!(128, 64);
36 alias Checksum128 = Murmur3;
37 alias makeChecksum128 = makeMurmur3;
38 alias toChecksum128 = toMurmur3;
39 
40 /// Convert a value to its ubyte representation.
41 auto toBytes(T)(T v) @trusted pure nothrow @nogc {
42     import std.conv : emplace;
43 
44     ubyte[T.sizeof] d;
45     T* p = cast(T*)&d;
46     cast(void) emplace!T(p, v);
47 
48     return d;
49 }
50 
51 long toLong(ubyte[8] v) @trusted pure nothrow @nogc {
52     return *(cast(long*)&v);
53 }
54 
55 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc {
56     return *(cast(ulong*)&v);
57 }
58 
59 /// Convert to size_to for use in e.g. operator overload toHash.
60 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) {
61     static if (size_t.sizeof == 4 && T.sizeof == 8)
62         return cast(uint) v + cast(uint)(v >> 32);
63     else
64         return v;
65 }
66 
67 /// ditto.
68 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc {
69     return toSizeT(*(cast(const(uint)*)&v));
70 }
71 
72 /// ditto.
73 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc {
74     return toSizeT(*(cast(const(ulong)*)&v));
75 }
76 
77 /// Make a 32bit hash.
78 // TODO: deprecate this. Should use the 128-bit.
79 ulong makeHash(T)(T raw) @safe pure nothrow @nogc {
80     import std.digest.crc;
81 
82     if (raw is null)
83         return 0;
84     ubyte[4] hash = crc32Of(raw);
85     return (hash[0] << 24) | (hash[1] << 16) | (hash[2] << 8) | hash[3];
86 }
87 
88 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow {
89     BuildChecksum128 hasher;
90     hasher.put(p);
91     return toMurmur3(hasher);
92 }
93 
94 /// Convenient function to convert to a checksum type.
95 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc {
96     ulong a = *(cast(ulong*)&p[0]);
97     ulong b = *(cast(ulong*)&p[8]);
98     return Murmur3(a, b);
99 }
100 
101 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc {
102     return toMurmur3(h.finish);
103 }
104 
105 /// 128bit hash.
106 struct Murmur3 {
107     ulong c0;
108     ulong c1;
109 
110     size_t toHash() @safe nothrow const pure @nogc {
111         return (c0 + c1).toSizeT;
112     }
113 
114     bool opEquals(const typeof(this) o) const nothrow @safe pure @nogc {
115         return c0 == o.c0 && c1 == o.c1;
116     }
117 
118     int opCmp(ref const typeof(this) rhs) @safe pure nothrow const @nogc {
119         // return -1 if "this" is less than rhs, 1 if bigger and zero equal
120         if (c0 < rhs.c0)
121             return -1;
122         if (c0 > rhs.c0)
123             return 1;
124         if (c1 < rhs.c1)
125             return -1;
126         if (c1 > rhs.c1)
127             return 1;
128         return 0;
129     }
130 
131     import std.format : FormatSpec;
132 
133     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
134         import std.format : formatValue, formattedWrite;
135         import std.range.primitives : put;
136 
137         if (fmt.spec == 'x')
138             formattedWrite(w, "%x_%x", c0, c1);
139         else
140             formattedWrite(w, "%s_%s", c0, c1);
141     }
142 }
143 
144 /// Create a 64bit hash.
145 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc {
146     BuildChecksum64 hash;
147     hash.put(p);
148     return toCrc64Iso(hash);
149 }
150 
151 /// Convenient function to convert to a checksum type.
152 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc {
153     return Crc64Iso(*(cast(ulong*)&p[0]));
154 }
155 
156 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc {
157     ubyte[8] v = h.peek;
158     return Crc64Iso(*(cast(ulong*)&v[0]));
159 }
160 
161 /** 64-bit checksum.
162  *
163  * It is intended to be generically used in Dextool when such a checksum is needed.
164  *
165  * CRC64 ISO is used because there exist implementations in other languages
166  * which makes it possible to calculate the checksum in e.g. python and compare
167  * with the one from Dextool.
168  *
169  * TODO: check if python have a 64ISO or 64ECMA implementation.
170  */
171 struct Crc64Iso {
172     ulong c0;
173 
174     size_t toHash() @safe pure nothrow const @nogc scope {
175         return c0;
176     }
177 
178     bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope {
179         return c0 == s.c0;
180     }
181 
182     import std.format : FormatSpec;
183 
184     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
185         import std.format : formatValue, formattedWrite;
186         import std.range.primitives : put;
187 
188         if (fmt.spec == 'x')
189             formattedWrite(w, "%x", c0);
190         else
191             formattedWrite(w, "%s", c0);
192     }
193 }