sudachi/dic/
subset.rs

1/*
2 *  Copyright (c) 2021 Works Applications Co., Ltd.
3 *
4 *  Licensed under the Apache License, Version 2.0 (the "License");
5 *  you may not use this file except in compliance with the License.
6 *  You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 *   Unless required by applicable law or agreed to in writing, software
11 *  distributed under the License is distributed on an "AS IS" BASIS,
12 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 *  See the License for the specific language governing permissions and
14 *  limitations under the License.
15 */
16
17use bitflags::bitflags;
18
19bitflags! {
20    #[repr(transparent)]
21    #[derive(Copy, Clone, Eq, PartialEq, Debug)]
22    pub struct InfoSubset: u32 {
23        const SURFACE = (1 << 0);
24        const HEAD_WORD_LENGTH = (1 << 1);
25        const POS_ID = (1 << 2);
26        const NORMALIZED_FORM = (1 << 3);
27        const DIC_FORM_WORD_ID = (1 << 4);
28        const READING_FORM = (1 << 5);
29        const SPLIT_A = (1 << 6);
30        const SPLIT_B = (1 << 7);
31        const WORD_STRUCTURE = (1 << 8);
32        const SYNONYM_GROUP_ID = (1 << 9);
33    }
34}
35
36impl Default for InfoSubset {
37    fn default() -> Self {
38        Self::all()
39    }
40}
41
42impl InfoSubset {
43    pub fn normalize(mut self) -> Self {
44        // need to read surface if reading any of one of these forms
45        if self.intersects(InfoSubset::READING_FORM | InfoSubset::NORMALIZED_FORM) {
46            self |= InfoSubset::SURFACE
47        }
48
49        // need to have head word length when splitting
50        if self.intersects(InfoSubset::SPLIT_A | InfoSubset::SPLIT_B) {
51            self |= InfoSubset::HEAD_WORD_LENGTH;
52        }
53
54        self
55    }
56}