sudachi/dic/build/
conn.rs

1/*
2 *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
3 *
4 *  Licensed under the Apache License, Version 2.0 (the "License");
5 *  you may not use this file except in compliance with the License.
6 *  You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 *   Unless required by applicable law or agreed to in writing, software
11 *  distributed under the License is distributed on an "AS IS" BASIS,
12 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 *  See the License for the specific language governing permissions and
14 *  limitations under the License.
15 */
16
17use std::fs::File;
18use std::io::{BufReader, Write};
19use std::path::Path;
20
21use lazy_static::lazy_static;
22use regex::Regex;
23
24use crate::dic::build::error::{BuildFailure, DicBuildError, DicCompilationCtx, DicWriteResult};
25use crate::dic::build::parse::{it_next, parse_i16};
26use crate::error::SudachiResult;
27
28pub struct ConnBuffer {
29    matrix: Vec<u8>,
30    ctx: DicCompilationCtx,
31    line: String,
32    num_left: i16,
33    num_right: i16,
34}
35
36lazy_static! {
37    static ref SPLIT_REGEX: Regex = Regex::new(r"\s+").unwrap();
38    static ref EMPTY_LINE: Regex = Regex::new(r"^\s*$").unwrap();
39}
40
41impl ConnBuffer {
42    pub fn new() -> Self {
43        Self {
44            matrix: Vec::new(),
45            ctx: DicCompilationCtx::default(),
46            line: String::new(),
47            num_left: 0,
48            num_right: 0,
49        }
50    }
51
52    #[allow(unused)]
53    pub fn matrix(&self) -> &[u8] {
54        &self.matrix
55    }
56
57    #[allow(unused)]
58    pub fn left(&self) -> i16 {
59        self.num_left
60    }
61
62    #[allow(unused)]
63    pub fn right(&self) -> i16 {
64        self.num_right
65    }
66
67    pub fn write_to<W: Write>(&self, writer: &mut W) -> SudachiResult<usize> {
68        if self.num_left < 0 {
69            return num_error("left", self.num_left);
70        }
71
72        if self.num_right < 0 {
73            return num_error("right", self.num_right);
74        }
75
76        writer.write_all(&i16::to_le_bytes(self.num_left))?;
77        writer.write_all(&i16::to_le_bytes(self.num_right))?;
78        writer.write_all(&self.matrix)?;
79        Ok(4 + self.matrix.len())
80    }
81
82    pub fn read_file(&mut self, path: &Path) -> SudachiResult<()> {
83        let file = File::open(path)?;
84        let bufrd = BufReader::with_capacity(32 * 1024, file);
85        let filename = path.to_str().unwrap_or("unknown").to_owned();
86        let old = self.ctx.set_filename(filename);
87        let status = self.read(bufrd);
88        self.ctx.set_filename(old);
89        status
90    }
91
92    pub fn read<R: std::io::BufRead>(&mut self, mut reader: R) -> SudachiResult<()> {
93        self.ctx.set_line(0);
94        loop {
95            let nread = reader.read_line(&mut self.line)?;
96            if nread == 0 {
97                todo!()
98            }
99            self.ctx.add_line(1);
100            if !EMPTY_LINE.is_match(&self.line) {
101                break;
102            }
103        }
104
105        let result = self.parse_header();
106        let (left, right) = self.ctx.transform(result)?;
107        if left < 0 {
108            return num_error("left", left);
109        }
110
111        if right < 0 {
112            return num_error("right", right);
113        }
114
115        let size = left as usize * right as usize * 2;
116        self.matrix.resize(size, 0);
117        self.num_left = left;
118        self.num_right = right;
119
120        loop {
121            self.line.clear();
122            let nread = reader.read_line(&mut self.line)?;
123            if nread == 0 {
124                break;
125            }
126            self.ctx.add_line(1);
127
128            if EMPTY_LINE.is_match(&self.line) {
129                continue;
130            }
131
132            // borrow checker complains when written as a single line
133            let status = self.parse_line();
134            self.ctx.transform(status)?;
135        }
136
137        Ok(())
138    }
139
140    fn parse_header(&mut self) -> DicWriteResult<(i16, i16)> {
141        let mut items = SPLIT_REGEX.splitn(self.line.trim(), 2);
142        // TODO: fix get_next error message
143        let left = it_next(&self.line, &mut items, "left_num", parse_i16)?;
144        let right = it_next(&self.line, &mut items, "right_num", parse_i16)?;
145        Ok((left, right))
146    }
147
148    fn parse_line(&mut self) -> DicWriteResult<()> {
149        let mut items = SPLIT_REGEX.splitn(self.line.trim(), 3);
150        let left = it_next(&self.line, &mut items, "left", parse_i16)?;
151        let right = it_next(&self.line, &mut items, "right", parse_i16)?;
152        let cost = it_next(&self.line, &mut items, "cost", parse_i16)?;
153        self.write_elem(left, right, cost)
154    }
155
156    fn write_elem(&mut self, left: i16, right: i16, cost: i16) -> DicWriteResult<()> {
157        let index = right as usize * self.num_left as usize + left as usize;
158        let index = index * 2;
159        let bytes = cost.to_le_bytes();
160        self.matrix[index] = bytes[0];
161        self.matrix[index + 1] = bytes[1];
162        Ok(())
163    }
164}
165
166fn num_error<T>(part: &'static str, value: i16) -> SudachiResult<T> {
167    Err(DicBuildError {
168        file: "<connection>".to_owned(),
169        line: 0,
170        cause: BuildFailure::InvalidConnSize(part, value),
171    }
172    .into())
173}
174
175#[cfg(test)]
176mod test {
177    use crate::dic::build::conn::ConnBuffer;
178    use crate::dic::connect::ConnectionMatrix;
179
180    #[test]
181    fn parse_simple2x2() {
182        let data = "
183        2 2
184        0 0 0
185        0 1 1
186        1 0 2
187        1 1 3";
188        let mut parser = ConnBuffer::new();
189        parser.read(data.as_bytes()).unwrap();
190        let cost = ConnectionMatrix::from_offset_size(
191            parser.matrix(),
192            0,
193            parser.left() as _,
194            parser.right() as _,
195        )
196        .unwrap();
197        assert_eq!(cost.cost(0, 0), 0);
198        assert_eq!(cost.cost(0, 1), 1);
199        assert_eq!(cost.cost(1, 0), 2);
200        assert_eq!(cost.cost(1, 1), 3);
201    }
202}