1 /**
2 Copyright: Copyright (c) 2016-2017, Joakim Brännström. All rights reserved.
3 License: MPL-2
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 This Source Code Form is subject to the terms of the Mozilla Public License,
7 v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
8 one at http://mozilla.org/MPL/2.0/.
9 
10 This module contains functionality to calculate hashes for use as e.g.
11 checksums. The intention is to have the *same* algorithm being used for the
12 same *things* in Dextool.
13 
14 This is to make it easier to integrate with Dextool produced data.
15 
16 **Prefer** the 128-bit hash.
17 
18 Use the 64-bit if you have to for compatibility reasons with other services not
19 part of dextool.
20 
21 **Warning**: These may not be endian independent.
22 
23 TODO: rename ChecksumXX to HashXX.
24 */
25 module dextool.hash;
26 
27 import std.digest.crc : CRC64ISO;
28 import std.digest.murmurhash : MurmurHash3;
29 
30 alias BuildChecksum64 = CRC64ISO;
31 alias Checksum64 = Crc64Iso;
32 alias makeChecksum64 = makeCrc64Iso;
33 alias toChecksum64 = toCrc64Iso;
34 
35 alias BuildChecksum128 = MurmurHash3!(128, 64);
36 alias Checksum128 = Murmur3;
37 alias makeChecksum128 = makeMurmur3;
38 alias toChecksum128 = toMurmur3;
39 
40 /// Convert a value to its ubyte representation.
41 auto toBytes(T)(T v) @trusted pure nothrow @nogc {
42     import std.conv : emplace;
43 
44     ubyte[T.sizeof] d;
45     T* p = cast(T*)&d;
46     cast(void) emplace!T(p, v);
47 
48     return d;
49 }
50 
51 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc {
52     return *(cast(size_t*)&v);
53 }
54 
55 /// Convert to size_to for use in e.g. operator overload toHash.
56 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) {
57     static if (size_t.sizeof == 4 && T.sizeof == 8)
58         return cast(uint) v + cast(uint)(v >> 32);
59     else
60         return v;
61 }
62 
63 /// ditto.
64 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc {
65     return toSizeT(*(cast(const(uint)*)&v));
66 }
67 
68 /// ditto.
69 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc {
70     return toSizeT(*(cast(const(ulong)*)&v));
71 }
72 
73 /// Make a 32bit hash.
74 // TODO: deprecate this. Should use the 128-bit.
75 ulong makeHash(T)(T raw) @safe pure nothrow @nogc {
76     import std.digest.crc;
77 
78     if (raw is null)
79         return 0;
80     ubyte[4] hash = crc32Of(raw);
81     return (hash[0] << 24) | (hash[1] << 16) | (hash[2] << 8) | hash[3];
82 }
83 
84 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow {
85     BuildChecksum128 hasher;
86     hasher.put(p);
87     return toMurmur3(hasher);
88 }
89 
90 /// Convenient function to convert to a checksum type.
91 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc {
92     ulong a = *(cast(ulong*)&p[0]);
93     ulong b = *(cast(ulong*)&p[8]);
94     return Murmur3(a, b);
95 }
96 
97 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc {
98     return toMurmur3(h.finish);
99 }
100 
101 /// 128bit hash.
102 struct Murmur3 {
103     ulong c0;
104     ulong c1;
105 
106     size_t toHash() @safe nothrow const {
107         return (c0 + c1).toSizeT;
108     }
109 
110     bool opEquals(const typeof(this) o) const nothrow @safe {
111         return c0 == o.c0 && c1 == o.c1;
112     }
113 
114     import std.format : FormatSpec;
115 
116     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
117         import std.format : formatValue, formattedWrite;
118         import std.range.primitives : put;
119 
120         if (fmt.spec == 'x')
121             formattedWrite(w, "%x%x", c0, c1);
122         else
123             formattedWrite(w, "%s%s", c0, c1);
124     }
125 }
126 
127 /// Create a 64bit hash.
128 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc {
129     BuildChecksum64 hash;
130     hash.put(p);
131     return toCrc64Iso(hash);
132 }
133 
134 /// Convenient function to convert to a checksum type.
135 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc {
136     return Crc64Iso(*(cast(ulong*)&p[0]));
137 }
138 
139 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc {
140     ubyte[8] v = h.peek;
141     return Crc64Iso(*(cast(ulong*)&v[0]));
142 }
143 
144 /** 64-bit checksum.
145  *
146  * It is intended to be generically used in Dextool when such a checksum is needed.
147  *
148  * CRC64 ISO is used because there exist implementations in other languages
149  * which makes it possible to calculate the checksum in e.g. python and compare
150  * with the one from Dextool.
151  *
152  * TODO: check if python have a 64ISO or 64ECMA implementation.
153  */
154 struct Crc64Iso {
155     ulong c0;
156 
157     size_t toHash() @safe pure nothrow const @nogc scope {
158         return c0;
159     }
160 
161     bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope {
162         return c0 == s.c0;
163     }
164 
165     import std.format : FormatSpec;
166 
167     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
168         import std.format : formatValue, formattedWrite;
169         import std.range.primitives : put;
170 
171         if (fmt.spec == 'x')
172             formattedWrite(w, "%x", c0);
173         else
174             formattedWrite(w, "%s", c0);
175     }
176 }