sudachi/dic/read/
word_info.rs

1/*
2 *  Copyright (c) 2021 Works Applications Co., Ltd.
3 *
4 *  Licensed under the Apache License, Version 2.0 (the "License");
5 *  you may not use this file except in compliance with the License.
6 *  You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 *   Unless required by applicable law or agreed to in writing, software
11 *  distributed under the License is distributed on an "AS IS" BASIS,
12 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 *  See the License for the specific language governing permissions and
14 *  limitations under the License.
15 */
16
17use crate::dic::lexicon::word_infos::WordInfoData;
18use crate::dic::read::u16str::*;
19use crate::dic::read::{skip_u32_array, skip_wid_array, u32_array_parser, u32_wid_array_parser};
20use crate::dic::subset::InfoSubset;
21use crate::error::SudachiResult;
22use nom::number::complete::{le_i32, le_u16};
23
24pub struct WordInfoParser {
25    info: WordInfoData,
26    flds: InfoSubset,
27}
28
29/// Parse a single field of the WordInfo binary representation.
30/// Six-parameter version accepts two funcitons:
31/// true function which will actually parse the data, and
32/// false function which should skip reading the data and just advance the parser position
33///
34/// Five-parameter version accepts only a single function and will unconditionally write
35/// value from the binary form into the structure.
36///
37/// Six-parameter version should be used for "heavy" fields which require memory allocation
38/// and five-parameter version should be used for "light" fields.
39macro_rules! parse_field {
40    ($root: expr, $data: ident, $name:tt, $field:expr, $tfn:tt, $ffn:tt) => {
41        if $root.flds.is_empty() {
42            return Ok($root.info);
43        }
44        #[allow(unused)]
45        let $data = if $root.flds.contains($field) {
46            let (next, res) = $tfn($data)?;
47            $root.info.$name = res;
48            $root.flds -= $field;
49            next
50        } else {
51            let (next, _) = $ffn($data)?;
52            next
53        };
54    };
55    ($root: expr, $data: ident, $name:tt, $field:expr, $tfn:tt) => {
56        if $root.flds.is_empty() {
57            return Ok($root.info);
58        }
59        $root.flds -= $field;
60        #[allow(unused)]
61        let $data = {
62            let (next, res) = $tfn($data)?;
63            $root.info.$name = res;
64            next
65        };
66    };
67}
68
69impl Default for WordInfoParser {
70    #[inline]
71    fn default() -> Self {
72        Self::subset(InfoSubset::all())
73    }
74}
75
76impl WordInfoParser {
77    #[inline]
78    pub fn subset(flds: InfoSubset) -> WordInfoParser {
79        Self {
80            info: Default::default(),
81            flds,
82        }
83    }
84
85    #[inline]
86    pub fn parse(mut self, data: &[u8]) -> SudachiResult<WordInfoData> {
87        parse_field!(
88            self,
89            data,
90            surface,
91            InfoSubset::SURFACE,
92            utf16_string_parser,
93            skip_u16_string
94        );
95        parse_field!(
96            self,
97            data,
98            head_word_length,
99            InfoSubset::HEAD_WORD_LENGTH,
100            string_length_parser
101        );
102        parse_field!(self, data, pos_id, InfoSubset::POS_ID, le_u16);
103        parse_field!(
104            self,
105            data,
106            normalized_form,
107            InfoSubset::NORMALIZED_FORM,
108            utf16_string_parser,
109            skip_u16_string
110        );
111        parse_field!(
112            self,
113            data,
114            dictionary_form_word_id,
115            InfoSubset::DIC_FORM_WORD_ID,
116            le_i32
117        );
118        parse_field!(
119            self,
120            data,
121            reading_form,
122            InfoSubset::READING_FORM,
123            utf16_string_parser,
124            skip_u16_string
125        );
126        parse_field!(
127            self,
128            data,
129            a_unit_split,
130            InfoSubset::SPLIT_A,
131            u32_wid_array_parser,
132            skip_wid_array
133        );
134        parse_field!(
135            self,
136            data,
137            b_unit_split,
138            InfoSubset::SPLIT_B,
139            u32_wid_array_parser,
140            skip_wid_array
141        );
142        parse_field!(
143            self,
144            data,
145            word_structure,
146            InfoSubset::WORD_STRUCTURE,
147            u32_wid_array_parser,
148            skip_wid_array
149        );
150        parse_field!(
151            self,
152            data,
153            synonym_group_ids,
154            InfoSubset::SYNONYM_GROUP_ID,
155            u32_array_parser,
156            skip_u32_array
157        );
158        Ok(self.info)
159    }
160}