ticker/src/ics/tokenizer.rs

140 lines
3.9 KiB
Rust
Raw Normal View History

2019-10-06 23:28:39 +02:00
use std::mem::replace;
#[derive(Clone, Copy, PartialEq, Debug)]
enum State {
Key,
Value,
ValueNewline,
ValueEscape,
}
#[derive(Debug, PartialEq)]
pub enum Token {
Key(String),
Value(String),
}
#[derive(Debug)]
pub struct Tokenizer {
state: State,
buffer: Vec<u8>,
}
impl Tokenizer {
pub fn new() -> Self {
Tokenizer {
state: State::Key,
buffer: vec![],
}
}
pub fn feed<F>(&mut self, input: &'_ [u8], mut f: F)
where
F: FnMut(Token),
{
for b in input {
match (self.state, *b as char) {
(_, '\r') => {}
(State::Key, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.state = State::Value;
}
(State::Key, '\n') => {
println!("Key without value: {:?}", self.buffer);
self.state = State::Key;
self.buffer = vec![];
}
(State::Value, '\n') => {
self.state = State::ValueNewline;
}
(State::Value, '\\') => {
self.state = State::ValueEscape;
}
(State::ValueNewline, ' ') => {
self.state = State::Value;
}
(State::ValueNewline, _) => {
let buffer = replace(&mut self.buffer, vec![*b]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Value(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.state = State::Key;
}
(State::ValueEscape, 'n') => {
self.buffer.push('\n' as u8);
self.state = State::Value;
}
(State::ValueEscape, 'r') => {
self.buffer.push('\n' as u8);
self.state = State::Value;
}
(State::ValueEscape, _) => {
self.buffer.push(*b);
self.state = State::Value;
}
(_, _) => self.buffer.push(*b),
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn tokenize_attr() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("DTSTART".to_owned()),
Token::AttrName("TZID".to_owned()),
Token::AttrValue("Europe/Berlin".to_owned()),
Token::Value("20191121T150000".to_owned()),
]);
}
#[test]
fn tokenize_event() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"BEGIN:VEVENT
SUMMARY:Test event
DTSTART:19700101
END:VEVENT
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("BEGIN".to_owned()), Token::Value("VEVENT".to_owned()),
Token::Key("SUMMARY".to_owned()), Token::Value("Test event".to_owned()),
Token::Key("DTSTART".to_owned()), Token::Value("19700101".to_owned()),
Token::Key("END".to_owned()), Token::Value("VEVENT".to_owned()),
]);
}
#[test]
fn tokenize_multiline() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"SUMMARY:Hello
World
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()),
]);
}
}