ticker/src/ics/tokenizer.rs

206 lines
6.9 KiB
Rust

use std::mem::replace;
#[derive(Clone, Copy, PartialEq, Debug)]
enum ByteState {
Char,
Newline,
}
#[derive(Clone, Copy, PartialEq, Debug)]
enum LineState {
Key,
PropName,
PropValue,
Value,
ValueEscape,
}
#[derive(Debug, PartialEq)]
pub enum Token {
Key(String),
PropName(String),
PropValue(String),
Value(String),
}
#[derive(Debug)]
pub struct Tokenizer {
byte_state: ByteState,
line_state: LineState,
buffer: Vec<u8>,
}
impl Tokenizer {
pub fn new() -> Self {
Tokenizer {
byte_state: ByteState::Char,
line_state: LineState::Key,
buffer: vec![],
}
}
pub fn feed<F>(&mut self, input: &'_ [u8], mut f: F)
where
F: FnMut(Token),
{
for b in input {
let bs = match (self.byte_state, *b as char) {
(_, '\r') => [None; 2],
(ByteState::Char, '\n') => {
self.byte_state = ByteState::Newline;
[None; 2]
}
(ByteState::Char, _) => [Some(*b), None],
(ByteState::Newline, ' ') => {
self.byte_state = ByteState::Char;
[None; 2]
}
(ByteState::Newline, _) => {
self.byte_state = ByteState::Char;
[Some('\n' as u8), Some(*b)]
}
};
for b in bs.iter().filter_map(|b| *b) {
match (self.line_state, b as char) {
(_, '\r') => {}
(LineState::Key, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::Key, '\n') => {
if self.buffer.len() > 0 {
println!("Key without value: {:?}", self.buffer);
self.buffer = vec![];
}
self.line_state = LineState::Key;
}
(LineState::Key, ';') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::PropName;
}
(LineState::PropName, '=') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropName(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::PropValue;
}
(LineState::PropName, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropName(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::PropValue, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropValue(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::Value, '\n') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Value(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Key;
}
(LineState::Value, '\\') => {
self.line_state = LineState::ValueEscape;
}
(LineState::ValueEscape, 'n') => {
self.buffer.push('\n' as u8);
self.line_state = LineState::Value;
}
(LineState::ValueEscape, 'r') => {
self.buffer.push('\n' as u8);
self.line_state = LineState::Value;
}
(LineState::ValueEscape, _) => {
self.buffer.push(b);
self.line_state = LineState::Value;
}
(_, _) => self.buffer.push(b),
}
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn tokenize_prop() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("DTSTART".to_owned()),
Token::PropName("TZID".to_owned()),
Token::PropValue("Europe/Berlin".to_owned()),
Token::Value("20191121T150000".to_owned()),
]);
}
#[test]
fn tokenize_event() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"BEGIN:VEVENT
SUMMARY:Test event
DTSTART:19700101
END:VEVENT
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("BEGIN".to_owned()), Token::Value("VEVENT".to_owned()),
Token::Key("SUMMARY".to_owned()), Token::Value("Test event".to_owned()),
Token::Key("DTSTART".to_owned()), Token::Value("19700101".to_owned()),
Token::Key("END".to_owned()), Token::Value("VEVENT".to_owned()),
]);
}
#[test]
fn tokenize_multiline() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"SUMMARY:Hello
World
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("SUMMARY".to_owned()), Token::Value("HelloWorld".to_owned()),
]);
}
}