sudachi/dic/build/
resolve.rs1use crate::analysis::stateless_tokenizer::DictionaryAccess;
18use crate::dic::build::lexicon::{RawLexiconEntry, SplitUnitResolver};
19use crate::dic::lexicon::word_infos::WordInfoData;
20use crate::dic::subset::InfoSubset;
21use crate::dic::word_id::WordId;
22use crate::error::SudachiResult;
23use crate::util::fxhash::FxBuildHasher;
24use std::collections::HashMap;
25
26type ResolutionCandidateMap<T> = HashMap<T, Vec<(u16, Option<T>, WordId)>, FxBuildHasher>;
28
29pub struct BinDictResolver {
32 index: ResolutionCandidateMap<String>,
33}
34
35impl BinDictResolver {
36 pub fn new<D: DictionaryAccess>(dict: D) -> SudachiResult<Self> {
37 let lex = dict.lexicon();
38 let size = lex.size();
39 let mut index: ResolutionCandidateMap<String> = HashMap::default();
40 for id in 0..size {
41 let wid = WordId::new(0, id);
42 let winfo: WordInfoData = lex
43 .get_word_info_subset(
44 wid,
45 InfoSubset::SURFACE | InfoSubset::READING_FORM | InfoSubset::POS_ID,
46 )?
47 .into();
48 let surface = winfo.surface;
49 let reading = winfo.reading_form;
50 let pos_id = winfo.pos_id;
51
52 let rdfield = if reading.is_empty() || surface == reading {
53 None
54 } else {
55 Some(reading)
56 };
57
58 index
59 .entry(surface)
60 .or_default()
61 .push((pos_id, rdfield, wid));
62 }
63
64 Ok(Self { index })
65 }
66}
67
68impl SplitUnitResolver for BinDictResolver {
69 fn resolve_inline(&self, surface: &str, pos: u16, reading: Option<&str>) -> Option<WordId> {
70 self.index.get(surface).and_then(|v| {
71 for (p, rd, wid) in v {
72 if *p == pos && reading.eq(&rd.as_deref()) {
73 return Some(*wid);
74 }
75 }
76 None
77 })
78 }
79}
80
81pub struct RawDictResolver<'a> {
82 data: ResolutionCandidateMap<&'a str>,
83}
84
85impl<'a> RawDictResolver<'a> {
86 pub(crate) fn new(entries: &'a [RawLexiconEntry], user: bool) -> Self {
87 let mut data: ResolutionCandidateMap<&'a str> = HashMap::default();
88
89 let dic_id = if user { 1 } else { 0 };
90
91 for (i, e) in entries.iter().enumerate() {
92 let surface: &'a str = e.surface();
93 let reading: &'a str = e.reading();
94 let wid = WordId::new(dic_id, i as u32);
95
96 let read_opt = if surface == reading {
97 None
98 } else {
99 Some(reading)
100 };
101
102 data.entry(surface)
103 .or_default()
104 .push((e.pos, read_opt, wid));
105 }
106
107 Self { data }
108 }
109}
110
111impl SplitUnitResolver for RawDictResolver<'_> {
112 fn resolve_inline(&self, surface: &str, pos: u16, reading: Option<&str>) -> Option<WordId> {
113 self.data.get(surface).and_then(|data| {
114 for (p, rd, wid) in data {
115 if *p == pos && *rd == reading {
116 return Some(*wid);
117 }
118 }
119 None
120 })
121 }
122}
123
124pub(crate) struct ChainedResolver<A, B> {
125 a: A,
126 b: B,
127}
128
129impl<A: SplitUnitResolver, B: SplitUnitResolver> ChainedResolver<A, B> {
130 pub(crate) fn new(a: A, b: B) -> Self {
131 Self { a, b }
132 }
133}
134
135impl<A: SplitUnitResolver, B: SplitUnitResolver> SplitUnitResolver for ChainedResolver<A, B> {
136 fn resolve_inline(&self, surface: &str, pos: u16, reading: Option<&str>) -> Option<WordId> {
137 self.a
138 .resolve_inline(surface, pos, reading)
139 .or_else(|| self.b.resolve_inline(surface, pos, reading))
140 }
141}