1use crate::dic::lexicon_set::LexiconSetError;
18use crate::error::{SudachiError, SudachiResult};
19use std::fmt::{Debug, Display, Formatter};
20
21#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
27#[repr(transparent)]
28pub struct WordId {
29    raw: u32,
30}
31
32impl Debug for WordId {
33    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
34        Display::fmt(self, f)
35    }
36}
37
38impl Display for WordId {
39    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
40        let fmtdic = if self.is_oov() { -1 } else { self.dic() as i32 };
41        write!(f, "({}, {})", fmtdic, self.word())
42    }
43}
44
45const WORD_MASK: u32 = 0x0fff_ffff;
46
47impl WordId {
48    pub const fn from_raw(raw: u32) -> WordId {
50        WordId { raw }
51    }
52
53    pub fn new(dic: u8, word: u32) -> WordId {
55        debug_assert_eq!(word & (!WORD_MASK), 0);
56        debug_assert_eq!(dic & (!0xf), 0);
57        let dic_part = ((dic & 0xf) as u32) << 28;
58        let word_part = word & WORD_MASK;
59        let raw = dic_part | word_part;
60        Self::from_raw(raw)
61    }
62
63    pub fn checked(dic: u8, word: u32) -> SudachiResult<WordId> {
65        if dic & !0xf != 0 {
66            return Err(SudachiError::LexiconSetError(
67                LexiconSetError::TooLargeDictionaryId(dic as usize),
68            ));
69        }
70
71        if word & !WORD_MASK != 0 {
72            return Err(SudachiError::LexiconSetError(
73                LexiconSetError::TooLargeWordId(word, WORD_MASK as usize),
74            ));
75        }
76
77        Ok(Self::new(dic, word))
78    }
79
80    pub fn oov(pos_id: u32) -> WordId {
82        Self::new(0xf, pos_id)
83    }
84
85    pub fn dic(&self) -> u8 {
87        (self.raw >> 28) as u8
88    }
89
90    pub fn word(&self) -> u32 {
92        self.raw & WORD_MASK
93    }
94
95    pub fn is_system(&self) -> bool {
97        self.dic() == 0
98    }
99
100    pub fn is_user(&self) -> bool {
102        !matches!(self.dic(), 0 | 0xf)
103    }
104
105    pub fn as_raw(&self) -> u32 {
106        self.raw
107    }
108
109    pub fn is_oov(&self) -> bool {
112        self.dic() == 0xf
113    }
114
115    pub fn is_special(&self) -> bool {
117        self >= &Self::EOS && self < &Self::INVALID
118    }
119
120    pub const INVALID: WordId = WordId::from_raw(0xffff_ffff);
121    pub const BOS: WordId = WordId::from_raw(0xffff_fffe);
122    pub const EOS: WordId = WordId::from_raw(0xffff_fffd);
123    pub const MAX_WORD: u32 = 0x0fff_ffff;
124}
125
126#[cfg(test)]
127mod test {
128    use super::*;
129
130    fn assert_create(dic: u8, word: u32) {
131        let id = WordId::new(dic, word);
132        assert_eq!(dic, id.dic());
133        assert_eq!(word, id.word());
134    }
135
136    #[test]
137    fn create() {
138        assert_create(0, 0);
139        assert_create(0, 1);
140        assert_create(0, 0x0fffffff);
141        assert_create(14, 0x0fffffff);
142        assert_create(1, 0);
143        assert_create(1, 0x0fffffff);
144        assert_create(15, 3121);
145        assert_create(15, 0);
146        assert_create(15, 0x0fffffff);
147    }
148
149    #[test]
150    fn display() {
151        let id1 = WordId::new(0, 521321);
152        assert_eq!("(0, 521321)", format!("{}", id1));
153    }
154
155    #[test]
156    fn debug() {
157        let id1 = WordId::new(0, 521321);
158        assert_eq!("(0, 521321)", format!("{:?}", id1));
159    }
160
161    #[test]
162    fn is_system() {
163        assert!(WordId::new(0, 0).is_system());
164        assert!(!WordId::new(1, 0).is_system());
165        assert!(!WordId::new(14, 0).is_system());
166        assert!(!WordId::new(15, 0).is_system());
167    }
168
169    #[test]
170    fn is_user() {
171        assert!(!WordId::new(0, 0).is_user());
172        assert!(WordId::new(1, 0).is_user());
173        assert!(WordId::new(14, 0).is_user());
174        assert!(!WordId::new(15, 0).is_user());
175    }
176
177    #[test]
178    fn is_oov() {
179        assert!(!WordId::new(0, 0).is_oov());
180        assert!(!WordId::new(1, 0).is_oov());
181        assert!(!WordId::new(14, 0).is_oov());
182        assert!(WordId::new(15, 0).is_oov());
183    }
184
185    #[test]
186    fn is_special() {
187        assert!(WordId::EOS.is_special());
188        assert!(WordId::BOS.is_special());
189        assert!(!WordId::INVALID.is_special());
190        assert!(!WordId::new(0, 0).is_special());
191    }
192}