sudachi/dic/build/
primitives.rs1use crate::dic::build::error::BuildFailure::InvalidSize;
18use crate::dic::build::error::DicWriteResult;
19use crate::dic::build::lexicon::SplitUnit;
20use crate::dic::word_id::WordId;
21use std::io::Write;
22
23pub struct Utf16Writer {
24 buffer: Vec<u8>,
25}
26
27impl Utf16Writer {
28 pub fn new() -> Self {
29 Utf16Writer {
30 buffer: Vec::with_capacity(256),
31 }
32 }
33
34 pub fn write_len<W: Write>(&self, w: &mut W, length: usize) -> DicWriteResult<usize> {
35 if length > i16::MAX as _ {
36 return Err(InvalidSize {
37 actual: length,
38 expected: i16::MAX as _,
39 });
40 }
41
42 let length = length as u16;
43
44 let prefix = if length < 127 {
45 w.write_all(&[length as u8])?;
46 1
47 } else {
48 let b0 = length as u8;
49 let b1 = ((length >> 8) as u8) | 0x80;
50 w.write_all(&[b1, b0])?;
51 2
52 };
53
54 Ok(prefix)
55 }
56
57 pub fn write<W: Write, T: AsRef<str>>(&mut self, w: &mut W, data: T) -> DicWriteResult<usize> {
58 let str_data: &str = data.as_ref();
59 if str_data.len() > 4 * 64 * 1024 {
60 return Err(InvalidSize {
61 actual: str_data.len(),
62 expected: 4 * 64 * 1024,
63 });
64 }
65
66 let mut scratch: [u16; 2] = [0; 2];
67 let mut length: usize = 0;
68 self.buffer.clear();
69
70 for c in str_data.chars() {
71 for u16c in c.encode_utf16(&mut scratch) {
72 self.buffer.extend_from_slice(&u16c.to_le_bytes());
73 length += 1;
74 }
75 }
76
77 let prefix = self.write_len(w, length)?;
78 w.write_all(&self.buffer)?;
79 Ok(prefix + self.buffer.len())
80 }
81
82 pub fn write_empty_if_equal<W, T1, T2>(
83 &mut self,
84 w: &mut W,
85 data: T1,
86 other: T2,
87 ) -> DicWriteResult<usize>
88 where
89 W: Write,
90 T1: AsRef<str> + PartialEq<T2>,
91 {
92 if data == other {
93 self.write(w, "")
94 } else {
95 self.write(w, data)
96 }
97 }
98}
99
100pub(crate) trait ToU32 {
101 fn to_u32(&self) -> u32;
102}
103
104impl ToU32 for u32 {
105 fn to_u32(&self) -> u32 {
106 *self
107 }
108}
109
110impl ToU32 for i32 {
111 fn to_u32(&self) -> u32 {
112 *self as u32
113 }
114}
115
116impl ToU32 for WordId {
117 fn to_u32(&self) -> u32 {
118 self.as_raw()
119 }
120}
121
122impl ToU32 for SplitUnit {
123 fn to_u32(&self) -> u32 {
124 match self {
125 SplitUnit::Ref(w) => w.to_u32(),
126 SplitUnit::Inline { .. } => panic!("splits must be resolved before writing"),
127 }
128 }
129}
130
131pub(crate) fn write_u32_array<W: Write, T: ToU32>(w: &mut W, data: &[T]) -> DicWriteResult<usize> {
132 let len = data.len();
133 if len > 127 {
134 return Err(InvalidSize {
135 expected: 127,
136 actual: len,
137 });
138 }
139 w.write_all(&[len as u8])?;
140 let mut written = 1;
141
142 for o in data {
143 let i = o.to_u32();
144 w.write_all(&i.to_le_bytes())?;
145 written += 4;
146 }
147
148 Ok(written)
149}
150
151#[cfg(test)]
152mod test {
153 use crate::dic::build::error::DicWriteResult;
154 use crate::dic::build::primitives::{write_u32_array, Utf16Writer};
155 use crate::dic::read::u16str::utf16_string_parser;
156 use crate::dic::read::u32_array_parser;
157 use claim::assert_matches;
158
159 #[test]
160 fn write_utf16() {
161 let mut writer = Utf16Writer::new();
162 let mut data: Vec<u8> = Vec::new();
163 writer
164 .write(&mut data, "これはテスト文です")
165 .expect("success");
166 let (remaining, parsed) = utf16_string_parser(&data).expect("parsed");
167 assert_eq!(0, remaining.len());
168 assert_eq!("これはテスト文です", parsed);
169 }
170
171 #[test]
172 fn write_strings() -> DicWriteResult<()> {
173 let mut writer = Utf16Writer::new();
174 let mut data: Vec<u8> = Vec::new();
175
176 let xstr = "";
177 let mut w = writer.write(&mut data, xstr)?;
178 assert_eq!(data.len(), w);
179 let ystr = "あ𠮟";
180 w += writer.write(&mut data, ystr)?;
181 assert_eq!(data.len(), w);
182 let zstr = "0123456789".repeat(15); w += writer.write(&mut data, &zstr)?;
184 assert_eq!(data.len(), w);
185 let (rem, parsed) = utf16_string_parser(&data).expect("ok");
186 assert_eq!(parsed, xstr);
187 let (rem, parsed) = utf16_string_parser(rem).expect("ok");
188 assert_eq!(parsed, ystr);
189 let (rem, parsed) = utf16_string_parser(rem).expect("ok");
190 assert_eq!(parsed, zstr);
191 assert_eq!(rem.len(), 0);
192
193 Ok(())
194 }
195
196 #[test]
197 fn write_ints_empty() {
198 let mut data: Vec<u8> = Vec::new();
199 let written = write_u32_array(&mut data, &[0u32; 0]).expect("ok");
200 assert_eq!(written, 1);
201 assert_eq!(data, b"\0");
202 }
203
204 #[test]
205 fn write_ints_full() {
206 let mut data: Vec<u8> = Vec::new();
207 let array = [0, 5, u32::MAX, u32::MIN];
208 let written = write_u32_array(&mut data, &array).expect("ok");
209 let (rem, parsed) = u32_array_parser(&data).expect("ok");
210 assert_eq!(rem, b"");
211 assert_eq!(parsed, array);
212 assert_eq!(written, 4 * 4 + 1);
213 }
214
215 #[test]
216 fn write_ints_over_length() {
217 let mut data: Vec<u8> = Vec::new();
218 let array = [0u32; 130];
219 let status = write_u32_array(&mut data, &array);
220 assert_matches!(status, Err(_));
221 }
222}