1use crate::dic::lexicon_set::LexiconSetError;
18use crate::error::{SudachiError, SudachiResult};
19use std::fmt::{Debug, Display, Formatter};
20
21#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
27#[repr(transparent)]
28pub struct WordId {
29 raw: u32,
30}
31
32impl Debug for WordId {
33 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
34 Display::fmt(self, f)
35 }
36}
37
38impl Display for WordId {
39 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
40 let fmtdic = if self.is_oov() { -1 } else { self.dic() as i32 };
41 write!(f, "({}, {})", fmtdic, self.word())
42 }
43}
44
45const WORD_MASK: u32 = 0x0fff_ffff;
46
47impl WordId {
48 pub const fn from_raw(raw: u32) -> WordId {
50 WordId { raw }
51 }
52
53 pub fn new(dic: u8, word: u32) -> WordId {
55 debug_assert_eq!(word & (!WORD_MASK), 0);
56 debug_assert_eq!(dic & (!0xf), 0);
57 let dic_part = ((dic & 0xf) as u32) << 28;
58 let word_part = word & WORD_MASK;
59 let raw = dic_part | word_part;
60 Self::from_raw(raw)
61 }
62
63 pub fn checked(dic: u8, word: u32) -> SudachiResult<WordId> {
65 if dic & !0xf != 0 {
66 return Err(SudachiError::LexiconSetError(
67 LexiconSetError::TooLargeDictionaryId(dic as usize),
68 ));
69 }
70
71 if word & !WORD_MASK != 0 {
72 return Err(SudachiError::LexiconSetError(
73 LexiconSetError::TooLargeWordId(word, WORD_MASK as usize),
74 ));
75 }
76
77 Ok(Self::new(dic, word))
78 }
79
80 pub fn oov(pos_id: u32) -> WordId {
82 Self::new(0xf, pos_id)
83 }
84
85 pub fn dic(&self) -> u8 {
87 (self.raw >> 28) as u8
88 }
89
90 pub fn word(&self) -> u32 {
92 self.raw & WORD_MASK
93 }
94
95 pub fn is_system(&self) -> bool {
97 self.dic() == 0
98 }
99
100 pub fn is_user(&self) -> bool {
102 !matches!(self.dic(), 0 | 0xf)
103 }
104
105 pub fn as_raw(&self) -> u32 {
106 self.raw
107 }
108
109 pub fn is_oov(&self) -> bool {
112 self.dic() == 0xf
113 }
114
115 pub fn is_special(&self) -> bool {
117 self >= &Self::EOS && self < &Self::INVALID
118 }
119
120 pub const INVALID: WordId = WordId::from_raw(0xffff_ffff);
121 pub const BOS: WordId = WordId::from_raw(0xffff_fffe);
122 pub const EOS: WordId = WordId::from_raw(0xffff_fffd);
123 pub const MAX_WORD: u32 = 0x0fff_ffff;
124}
125
126#[cfg(test)]
127mod test {
128 use super::*;
129
130 fn assert_create(dic: u8, word: u32) {
131 let id = WordId::new(dic, word);
132 assert_eq!(dic, id.dic());
133 assert_eq!(word, id.word());
134 }
135
136 #[test]
137 fn create() {
138 assert_create(0, 0);
139 assert_create(0, 1);
140 assert_create(0, 0x0fffffff);
141 assert_create(14, 0x0fffffff);
142 assert_create(1, 0);
143 assert_create(1, 0x0fffffff);
144 assert_create(15, 3121);
145 assert_create(15, 0);
146 assert_create(15, 0x0fffffff);
147 }
148
149 #[test]
150 fn display() {
151 let id1 = WordId::new(0, 521321);
152 assert_eq!("(0, 521321)", format!("{}", id1));
153 }
154
155 #[test]
156 fn debug() {
157 let id1 = WordId::new(0, 521321);
158 assert_eq!("(0, 521321)", format!("{:?}", id1));
159 }
160
161 #[test]
162 fn is_system() {
163 assert!(WordId::new(0, 0).is_system());
164 assert!(!WordId::new(1, 0).is_system());
165 assert!(!WordId::new(14, 0).is_system());
166 assert!(!WordId::new(15, 0).is_system());
167 }
168
169 #[test]
170 fn is_user() {
171 assert!(!WordId::new(0, 0).is_user());
172 assert!(WordId::new(1, 0).is_user());
173 assert!(WordId::new(14, 0).is_user());
174 assert!(!WordId::new(15, 0).is_user());
175 }
176
177 #[test]
178 fn is_oov() {
179 assert!(!WordId::new(0, 0).is_oov());
180 assert!(!WordId::new(1, 0).is_oov());
181 assert!(!WordId::new(14, 0).is_oov());
182 assert!(WordId::new(15, 0).is_oov());
183 }
184
185 #[test]
186 fn is_special() {
187 assert!(WordId::EOS.is_special());
188 assert!(WordId::BOS.is_special());
189 assert!(!WordId::INVALID.is_special());
190 assert!(!WordId::new(0, 0).is_special());
191 }
192}