sudachi/dic/subset.rs
1/*
2 * Copyright (c) 2021 Works Applications Co., Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use bitflags::bitflags;
18
19bitflags! {
20 #[repr(transparent)]
21 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
22 pub struct InfoSubset: u32 {
23 const SURFACE = (1 << 0);
24 const HEAD_WORD_LENGTH = (1 << 1);
25 const POS_ID = (1 << 2);
26 const NORMALIZED_FORM = (1 << 3);
27 const DIC_FORM_WORD_ID = (1 << 4);
28 const READING_FORM = (1 << 5);
29 const SPLIT_A = (1 << 6);
30 const SPLIT_B = (1 << 7);
31 const WORD_STRUCTURE = (1 << 8);
32 const SYNONYM_GROUP_ID = (1 << 9);
33 }
34}
35
36impl Default for InfoSubset {
37 fn default() -> Self {
38 Self::all()
39 }
40}
41
42impl InfoSubset {
43 pub fn normalize(mut self) -> Self {
44 // need to read surface if reading any of one of these forms
45 if self.intersects(InfoSubset::READING_FORM | InfoSubset::NORMALIZED_FORM) {
46 self |= InfoSubset::SURFACE
47 }
48
49 // need to have head word length when splitting
50 if self.intersects(InfoSubset::SPLIT_A | InfoSubset::SPLIT_B) {
51 self |= InfoSubset::HEAD_WORD_LENGTH;
52 }
53
54 self
55 }
56}