sudachi/dic/
subset.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/*
 *  Copyright (c) 2021 Works Applications Co., Ltd.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

use bitflags::bitflags;

bitflags! {
    #[repr(transparent)]
    #[derive(Copy, Clone, Eq, PartialEq, Debug)]
    pub struct InfoSubset: u32 {
        const SURFACE = (1 << 0);
        const HEAD_WORD_LENGTH = (1 << 1);
        const POS_ID = (1 << 2);
        const NORMALIZED_FORM = (1 << 3);
        const DIC_FORM_WORD_ID = (1 << 4);
        const READING_FORM = (1 << 5);
        const SPLIT_A = (1 << 6);
        const SPLIT_B = (1 << 7);
        const WORD_STRUCTURE = (1 << 8);
        const SYNONYM_GROUP_ID = (1 << 9);
    }
}

impl Default for InfoSubset {
    fn default() -> Self {
        Self::all()
    }
}

impl InfoSubset {
    pub fn normalize(mut self) -> Self {
        // need to read surface if reading any of one of these forms
        if self.intersects(InfoSubset::READING_FORM | InfoSubset::NORMALIZED_FORM) {
            self |= InfoSubset::SURFACE
        }

        // need to have head word length when splitting
        if self.intersects(InfoSubset::SPLIT_A | InfoSubset::SPLIT_B) {
            self |= InfoSubset::HEAD_WORD_LENGTH;
        }

        self
    }
}