sudachi/
hash.rs

1/*
2 *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
3 *
4 *  Licensed under the Apache License, Version 2.0 (the "License");
5 *  you may not use this file except in compliance with the License.
6 *  You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 *   Unless required by applicable law or agreed to in writing, software
11 *  distributed under the License is distributed on an "AS IS" BASIS,
12 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 *  See the License for the specific language governing permissions and
14 *  limitations under the License.
15 */
16
17use std::hash::{BuildHasher, Hasher};
18
19const MULTIPLIER: u64 = 0x6eed0e9da4d94a4fu64;
20const SEED: u64 = 0x16f11fe89b0d677cu64;
21
22/// Ro(tate) + Mu(ltiply) Hasher Factory
23pub struct RoMu {}
24
25impl RoMu {
26    pub fn new() -> RoMu {
27        RoMu {}
28    }
29}
30
31impl Default for RoMu {
32    fn default() -> Self {
33        RoMu::new()
34    }
35}
36
37impl BuildHasher for RoMu {
38    type Hasher = RoMuHash;
39
40    #[inline(always)]
41    fn build_hasher(&self) -> Self::Hasher {
42        RoMuHash::new()
43    }
44}
45
46pub struct RoMuHash {
47    state: u64,
48}
49
50// from https://github.com/ku-nlp/jumanpp/blob/master/src/util/fast_hash_rot.h
51// It is very fast (xor+mul+rot) for extremely small values (e.g. 1 field)
52impl RoMuHash {
53    #[inline(always)]
54    pub fn new() -> RoMuHash {
55        RoMuHash { state: SEED }
56    }
57
58    #[inline(always)]
59    fn consume(&mut self, value: u64) {
60        let data = self.state ^ value;
61        let data = data.wrapping_mul(MULTIPLIER);
62        self.state = data.rotate_left(32);
63    }
64}
65
66impl Hasher for RoMuHash {
67    #[inline(always)]
68    fn finish(&self) -> u64 {
69        self.state
70    }
71
72    #[inline(always)]
73    fn write(&mut self, _bytes: &[u8]) {
74        panic!("not supported for bytes")
75    }
76
77    #[inline(always)]
78    fn write_u8(&mut self, _: u8) {
79        panic!("not supported for u8")
80    }
81
82    #[inline(always)]
83    fn write_u16(&mut self, _: u16) {
84        panic!("not supported for u16")
85    }
86
87    #[inline(always)]
88    fn write_u32(&mut self, i: u32) {
89        self.consume(i as u64);
90    }
91
92    #[inline(always)]
93    fn write_u64(&mut self, i: u64) {
94        self.consume(i);
95    }
96
97    #[inline(always)]
98    fn write_u128(&mut self, _: u128) {
99        panic!("not supported for u128")
100    }
101
102    #[inline(always)]
103    fn write_usize(&mut self, i: usize) {
104        self.consume(i as u64);
105    }
106
107    #[inline(always)]
108    fn write_i8(&mut self, _: i8) {
109        panic!("not supported for i8")
110    }
111
112    #[inline(always)]
113    fn write_i16(&mut self, _: i16) {
114        panic!("not supported for i16")
115    }
116
117    #[inline(always)]
118    fn write_i32(&mut self, i: i32) {
119        self.consume(i as u64)
120    }
121
122    #[inline(always)]
123    fn write_i64(&mut self, i: i64) {
124        self.consume(i as u64)
125    }
126
127    #[inline(always)]
128    fn write_i128(&mut self, _: i128) {
129        panic!("not supported for i128")
130    }
131
132    #[inline(always)]
133    fn write_isize(&mut self, i: isize) {
134        self.consume(i as u64)
135    }
136}
137
138#[cfg(test)]
139mod test {
140    use crate::hash::RoMu;
141    use std::collections::HashMap;
142    use std::hash::{Hash, Hasher};
143
144    #[derive(Eq, PartialEq)]
145    struct Small(i32, i32);
146
147    impl Hash for Small {
148        fn hash<H: Hasher>(&self, state: &mut H) {
149            state.write_u64((self.0 as u64) << 32 | (self.1 as u64))
150        }
151    }
152
153    #[test]
154    fn works_in_hashmap() {
155        let mut map = HashMap::with_hasher(RoMu::new());
156        map.insert(Small(5, 6), "data");
157        map.insert(Small(6, 5), "data2");
158        assert_eq!(*map.get(&Small(5, 6)).unwrap(), "data");
159        assert!(!map.contains_key(&Small(0, 0)));
160    }
161}