sudachi/dic/read/
word_info.rs1use crate::dic::lexicon::word_infos::WordInfoData;
18use crate::dic::read::u16str::*;
19use crate::dic::read::{skip_u32_array, skip_wid_array, u32_array_parser, u32_wid_array_parser};
20use crate::dic::subset::InfoSubset;
21use crate::error::SudachiResult;
22use nom::number::complete::{le_i32, le_u16};
23
24pub struct WordInfoParser {
25 info: WordInfoData,
26 flds: InfoSubset,
27}
28
29macro_rules! parse_field {
40 ($root: expr, $data: ident, $name:tt, $field:expr, $tfn:tt, $ffn:tt) => {
41 if $root.flds.is_empty() {
42 return Ok($root.info);
43 }
44 #[allow(unused)]
45 let $data = if $root.flds.contains($field) {
46 let (next, res) = $tfn($data)?;
47 $root.info.$name = res;
48 $root.flds -= $field;
49 next
50 } else {
51 let (next, _) = $ffn($data)?;
52 next
53 };
54 };
55 ($root: expr, $data: ident, $name:tt, $field:expr, $tfn:tt) => {
56 if $root.flds.is_empty() {
57 return Ok($root.info);
58 }
59 $root.flds -= $field;
60 #[allow(unused)]
61 let $data = {
62 let (next, res) = $tfn($data)?;
63 $root.info.$name = res;
64 next
65 };
66 };
67}
68
69impl Default for WordInfoParser {
70 #[inline]
71 fn default() -> Self {
72 Self::subset(InfoSubset::all())
73 }
74}
75
76impl WordInfoParser {
77 #[inline]
78 pub fn subset(flds: InfoSubset) -> WordInfoParser {
79 Self {
80 info: Default::default(),
81 flds,
82 }
83 }
84
85 #[inline]
86 pub fn parse(mut self, data: &[u8]) -> SudachiResult<WordInfoData> {
87 parse_field!(
88 self,
89 data,
90 surface,
91 InfoSubset::SURFACE,
92 utf16_string_parser,
93 skip_u16_string
94 );
95 parse_field!(
96 self,
97 data,
98 head_word_length,
99 InfoSubset::HEAD_WORD_LENGTH,
100 string_length_parser
101 );
102 parse_field!(self, data, pos_id, InfoSubset::POS_ID, le_u16);
103 parse_field!(
104 self,
105 data,
106 normalized_form,
107 InfoSubset::NORMALIZED_FORM,
108 utf16_string_parser,
109 skip_u16_string
110 );
111 parse_field!(
112 self,
113 data,
114 dictionary_form_word_id,
115 InfoSubset::DIC_FORM_WORD_ID,
116 le_i32
117 );
118 parse_field!(
119 self,
120 data,
121 reading_form,
122 InfoSubset::READING_FORM,
123 utf16_string_parser,
124 skip_u16_string
125 );
126 parse_field!(
127 self,
128 data,
129 a_unit_split,
130 InfoSubset::SPLIT_A,
131 u32_wid_array_parser,
132 skip_wid_array
133 );
134 parse_field!(
135 self,
136 data,
137 b_unit_split,
138 InfoSubset::SPLIT_B,
139 u32_wid_array_parser,
140 skip_wid_array
141 );
142 parse_field!(
143 self,
144 data,
145 word_structure,
146 InfoSubset::WORD_STRUCTURE,
147 u32_wid_array_parser,
148 skip_wid_array
149 );
150 parse_field!(
151 self,
152 data,
153 synonym_group_ids,
154 InfoSubset::SYNONYM_GROUP_ID,
155 u32_array_parser,
156 skip_u32_array
157 );
158 Ok(self.info)
159 }
160}