sudachi/dic/lexicon/
word_id_table.rs

1/*
2 * Copyright (c) 2021-2024 Works Applications Co., Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use std::iter::FusedIterator;
18use std::ptr::NonNull;
19
20pub struct WordIdTable<'a> {
21    bytes: &'a [u8],
22    size: u32,
23    offset: usize,
24}
25
26impl<'a> WordIdTable<'a> {
27    pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordIdTable {
28        WordIdTable {
29            bytes,
30            size,
31            offset,
32        }
33    }
34
35    pub fn storage_size(&self) -> usize {
36        4 + self.size as usize
37    }
38
39    #[inline]
40    pub fn entries(&self, index: usize) -> WordIdIter {
41        debug_assert!(index < self.bytes.len());
42        let ptr = unsafe { self.bytes.as_ptr().add(index + self.offset) };
43        let cnt = unsafe { ptr.read() } as usize;
44        let data_ptr = unsafe { ptr.offset(1) } as *const u32;
45        debug_assert!(index + cnt * std::mem::size_of::<u32>() < self.bytes.len());
46        WordIdIter {
47            data: unsafe { NonNull::new_unchecked(data_ptr as _) },
48            remaining: cnt,
49        }
50    }
51}
52
53pub struct WordIdIter {
54    /// This pointer is unaligned and must be read from using unaligned reads.
55    /// Using NonNull makes Option<Self> be the same as the struct itself.
56    data: NonNull<u32>,
57    /// number of remaining elements
58    remaining: usize,
59}
60
61impl Iterator for WordIdIter {
62    type Item = u32;
63
64    #[inline]
65    fn next(&mut self) -> Option<Self::Item> {
66        if self.remaining == 0 {
67            return None;
68        }
69        let ptr = self.data.as_ptr();
70
71        let val = unsafe { ptr.read_unaligned() };
72        self.data = unsafe { NonNull::new_unchecked(ptr.offset(1)) };
73        self.remaining -= 1;
74        Some(val)
75    }
76}
77
78impl FusedIterator for WordIdIter {}