sudachi/dic/read/
u16str.rs1use crate::error::{SudachiNomError, SudachiNomResult};
18use nom::number::complete::le_u8;
19use std::iter::FusedIterator;
20
21pub fn utf16_string_parser(input: &[u8]) -> SudachiNomResult<&[u8], String> {
22 utf16_string_data(input).and_then(|(rest, data)| {
23 if data.is_empty() {
24 Ok((rest, String::new()))
25 } else {
26 let capacity = (data.len() + 1) * 3 / 2;
28 let mut result = String::with_capacity(capacity);
29 let iter = U16CodeUnits::new(data);
30 for c in char::decode_utf16(iter) {
31 match c {
32 Err(_) => return Err(nom::Err::Failure(SudachiNomError::Utf16String)),
33 Ok(c) => result.push(c),
34 }
35 }
36 Ok((rest, result))
37 }
38 })
39}
40
41pub fn skip_u16_string(input: &[u8]) -> SudachiNomResult<&[u8], String> {
42 utf16_string_data(input).map(|(rest, _)| (rest, String::new()))
43}
44
45#[inline]
46pub fn utf16_string_data(input: &[u8]) -> SudachiNomResult<&[u8], &[u8]> {
47 let (rest, length) = string_length_parser(input)?;
48 if length == 0 {
49 return Ok((rest, &[]));
50 }
51 let num_bytes = (length * 2) as usize;
52 if rest.len() < num_bytes {
53 return Err(nom::Err::Failure(SudachiNomError::Utf16String));
54 }
55
56 let (data, rest) = rest.split_at(num_bytes);
57
58 Ok((rest, data))
59}
60
61pub fn string_length_parser(input: &[u8]) -> SudachiNomResult<&[u8], u16> {
62 let (rest, length) = le_u8(input)?;
63 let (rest, opt_low) = nom::combinator::cond(length >= 128, le_u8)(rest)?;
65 Ok((
66 rest,
67 match opt_low {
68 Some(low) => ((length as u16 & 0x7F) << 8) | low as u16,
69 None => length as u16,
70 },
71 ))
72}
73
74pub struct U16CodeUnits<'a> {
76 data: &'a [u8],
77 offset: usize,
78}
79
80impl<'a> U16CodeUnits<'a> {
81 pub fn new(data: &'a [u8]) -> Self {
82 U16CodeUnits { data, offset: 0 }
83 }
84}
85
86impl Iterator for U16CodeUnits<'_> {
87 type Item = u16;
88
89 fn next(&mut self) -> Option<Self::Item> {
90 if self.data.len() <= self.offset {
91 return None;
92 }
93 let p1 = self.data[self.offset];
94 let p2 = self.data[self.offset + 1];
95 self.offset += 2;
96 Some(u16::from_le_bytes([p1, p2]))
97 }
98
99 fn size_hint(&self) -> (usize, Option<usize>) {
100 let rem = self.data.len() - self.offset;
101 (rem, Some(rem))
102 }
103}
104
105impl FusedIterator for U16CodeUnits<'_> {}