206 lines
6.9 KiB
Rust
206 lines
6.9 KiB
Rust
use std::mem::replace;
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
|
enum ByteState {
|
|
Char,
|
|
Newline,
|
|
}
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
|
enum LineState {
|
|
Key,
|
|
PropName,
|
|
PropValue,
|
|
Value,
|
|
ValueEscape,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum Token {
|
|
Key(String),
|
|
PropName(String),
|
|
PropValue(String),
|
|
Value(String),
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Tokenizer {
|
|
byte_state: ByteState,
|
|
line_state: LineState,
|
|
buffer: Vec<u8>,
|
|
}
|
|
|
|
impl Tokenizer {
|
|
pub fn new() -> Self {
|
|
Tokenizer {
|
|
byte_state: ByteState::Char,
|
|
line_state: LineState::Key,
|
|
buffer: vec![],
|
|
}
|
|
}
|
|
|
|
pub fn feed<F>(&mut self, input: &'_ [u8], mut f: F)
|
|
where
|
|
F: FnMut(Token),
|
|
{
|
|
for b in input {
|
|
let bs = match (self.byte_state, *b as char) {
|
|
(_, '\r') => [None; 2],
|
|
(ByteState::Char, '\n') => {
|
|
self.byte_state = ByteState::Newline;
|
|
[None; 2]
|
|
}
|
|
(ByteState::Char, _) => [Some(*b), None],
|
|
(ByteState::Newline, ' ') => {
|
|
self.byte_state = ByteState::Char;
|
|
[None; 2]
|
|
}
|
|
(ByteState::Newline, _) => {
|
|
self.byte_state = ByteState::Char;
|
|
[Some('\n' as u8), Some(*b)]
|
|
}
|
|
};
|
|
|
|
for b in bs.iter().filter_map(|b| *b) {
|
|
match (self.line_state, b as char) {
|
|
(_, '\r') => {}
|
|
(LineState::Key, ':') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::Key(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.line_state = LineState::Value;
|
|
}
|
|
(LineState::Key, '\n') => {
|
|
if self.buffer.len() > 0 {
|
|
println!("Key without value: {:?}", self.buffer);
|
|
self.buffer = vec![];
|
|
}
|
|
self.line_state = LineState::Key;
|
|
}
|
|
(LineState::Key, ';') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::Key(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.line_state = LineState::PropName;
|
|
}
|
|
(LineState::PropName, '=') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::PropName(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.line_state = LineState::PropValue;
|
|
}
|
|
(LineState::PropName, ':') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::PropName(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.line_state = LineState::Value;
|
|
}
|
|
(LineState::PropValue, ':') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::PropValue(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.line_state = LineState::Value;
|
|
}
|
|
(LineState::Value, '\n') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::Value(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.line_state = LineState::Key;
|
|
}
|
|
(LineState::Value, '\\') => {
|
|
self.line_state = LineState::ValueEscape;
|
|
}
|
|
(LineState::ValueEscape, 'n') => {
|
|
self.buffer.push('\n' as u8);
|
|
self.line_state = LineState::Value;
|
|
}
|
|
(LineState::ValueEscape, 'r') => {
|
|
self.buffer.push('\n' as u8);
|
|
self.line_state = LineState::Value;
|
|
}
|
|
(LineState::ValueEscape, _) => {
|
|
self.buffer.push(b);
|
|
self.line_state = LineState::Value;
|
|
}
|
|
(_, _) => self.buffer.push(b),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn tokenize_prop() {
|
|
let mut t = Tokenizer::new();
|
|
let mut tokens = vec![];
|
|
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
|
|
|
|
", |token| tokens.push(token));
|
|
assert_eq!(tokens, vec![
|
|
Token::Key("DTSTART".to_owned()),
|
|
Token::PropName("TZID".to_owned()),
|
|
Token::PropValue("Europe/Berlin".to_owned()),
|
|
Token::Value("20191121T150000".to_owned()),
|
|
]);
|
|
}
|
|
|
|
#[test]
|
|
fn tokenize_event() {
|
|
let mut t = Tokenizer::new();
|
|
let mut tokens = vec![];
|
|
t.feed(b"BEGIN:VEVENT
|
|
SUMMARY:Test event
|
|
DTSTART:19700101
|
|
END:VEVENT
|
|
|
|
", |token| tokens.push(token));
|
|
assert_eq!(tokens, vec![
|
|
Token::Key("BEGIN".to_owned()), Token::Value("VEVENT".to_owned()),
|
|
Token::Key("SUMMARY".to_owned()), Token::Value("Test event".to_owned()),
|
|
Token::Key("DTSTART".to_owned()), Token::Value("19700101".to_owned()),
|
|
Token::Key("END".to_owned()), Token::Value("VEVENT".to_owned()),
|
|
]);
|
|
}
|
|
|
|
#[test]
|
|
fn tokenize_multiline() {
|
|
let mut t = Tokenizer::new();
|
|
let mut tokens = vec![];
|
|
t.feed(b"SUMMARY:Hello
|
|
World
|
|
|
|
", |token| tokens.push(token));
|
|
assert_eq!(tokens, vec![
|
|
Token::Key("SUMMARY".to_owned()), Token::Value("HelloWorld".to_owned()),
|
|
]);
|
|
}
|
|
}
|