sudachi/dic/
connect.rs

1/*
2 *  Copyright (c) 2021 Works Applications Co., Ltd.
3 *
4 *  Licensed under the Apache License, Version 2.0 (the "License");
5 *  you may not use this file except in compliance with the License.
6 *  You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 *   Unless required by applicable law or agreed to in writing, software
11 *  distributed under the License is distributed on an "AS IS" BASIS,
12 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 *  See the License for the specific language governing permissions and
14 *  limitations under the License.
15 */
16
17use crate::error::{SudachiError, SudachiResult};
18use crate::util::cow_array::CowArray;
19
20pub struct ConnectionMatrix<'a> {
21    data: CowArray<'a, i16>,
22    num_left: usize,
23    num_right: usize,
24}
25
26impl<'a> ConnectionMatrix<'a> {
27    pub fn from_offset_size(
28        data: &'a [u8],
29        offset: usize,
30        num_left: usize,
31        num_right: usize,
32    ) -> SudachiResult<ConnectionMatrix<'a>> {
33        let size = num_left * num_right;
34
35        let end = offset + size;
36        if end > data.len() {
37            return Err(SudachiError::InvalidDictionaryGrammar.with_context("connection matrix"));
38        }
39
40        Ok(ConnectionMatrix {
41            data: CowArray::from_bytes(data, offset, size),
42            num_left,
43            num_right,
44        })
45    }
46
47    #[inline(always)]
48    fn index(&self, left: u16, right: u16) -> usize {
49        let uleft = left as usize;
50        let uright = right as usize;
51        debug_assert!(uleft < self.num_left);
52        debug_assert!(uright < self.num_right);
53        let index = uright * self.num_left + uleft;
54        debug_assert!(index < self.data.len());
55        index
56    }
57
58    /// Gets the value of the connection matrix
59    ///
60    /// It is performance critical that this function
61    /// 1. Has no branches
62    /// 2. Is inlined to the caller
63    ///
64    /// This is UB if index is out of bounds, but that can't happen
65    /// except in the case if the binary dictionary was tampered with.
66    /// It is OK to make usage of tampered binary dictionaries UB.
67    #[inline(always)]
68    pub fn cost(&self, left: u16, right: u16) -> i16 {
69        let index = self.index(left, right);
70        *unsafe { self.data.get_unchecked(index) }
71    }
72
73    pub fn update(&mut self, left: u16, right: u16, value: i16) {
74        let index = self.index(left, right);
75        self.data.set(index, value);
76    }
77
78    /// Returns maximum number of left connection ID
79    pub fn num_left(&self) -> usize {
80        self.num_left
81    }
82
83    /// Returns maximum number of right connection ID
84    pub fn num_right(&self) -> usize {
85        self.num_right
86    }
87}