140 lines
3.9 KiB
Rust
140 lines
3.9 KiB
Rust
use std::mem::replace;
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
|
enum State {
|
|
Key,
|
|
Value,
|
|
ValueNewline,
|
|
ValueEscape,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum Token {
|
|
Key(String),
|
|
Value(String),
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Tokenizer {
|
|
state: State,
|
|
buffer: Vec<u8>,
|
|
}
|
|
|
|
impl Tokenizer {
|
|
pub fn new() -> Self {
|
|
Tokenizer {
|
|
state: State::Key,
|
|
buffer: vec![],
|
|
}
|
|
}
|
|
|
|
pub fn feed<F>(&mut self, input: &'_ [u8], mut f: F)
|
|
where
|
|
F: FnMut(Token),
|
|
{
|
|
for b in input {
|
|
match (self.state, *b as char) {
|
|
(_, '\r') => {}
|
|
(State::Key, ':') => {
|
|
let buffer = replace(&mut self.buffer, vec![]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::Key(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.state = State::Value;
|
|
}
|
|
(State::Key, '\n') => {
|
|
println!("Key without value: {:?}", self.buffer);
|
|
self.state = State::Key;
|
|
self.buffer = vec![];
|
|
}
|
|
(State::Value, '\n') => {
|
|
self.state = State::ValueNewline;
|
|
}
|
|
(State::Value, '\\') => {
|
|
self.state = State::ValueEscape;
|
|
}
|
|
(State::ValueNewline, ' ') => {
|
|
self.state = State::Value;
|
|
}
|
|
(State::ValueNewline, _) => {
|
|
let buffer = replace(&mut self.buffer, vec![*b]);
|
|
match String::from_utf8(buffer) {
|
|
Ok(s) =>
|
|
f(Token::Value(s)),
|
|
Err(e) =>
|
|
println!("UTF8 error: {:?}", e),
|
|
}
|
|
self.state = State::Key;
|
|
}
|
|
(State::ValueEscape, 'n') => {
|
|
self.buffer.push('\n' as u8);
|
|
self.state = State::Value;
|
|
}
|
|
(State::ValueEscape, 'r') => {
|
|
self.buffer.push('\n' as u8);
|
|
self.state = State::Value;
|
|
}
|
|
(State::ValueEscape, _) => {
|
|
self.buffer.push(*b);
|
|
self.state = State::Value;
|
|
}
|
|
(_, _) => self.buffer.push(*b),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn tokenize_attr() {
|
|
let mut t = Tokenizer::new();
|
|
let mut tokens = vec![];
|
|
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
|
|
|
|
", |token| tokens.push(token));
|
|
assert_eq!(tokens, vec![
|
|
Token::Key("DTSTART".to_owned()),
|
|
Token::AttrName("TZID".to_owned()),
|
|
Token::AttrValue("Europe/Berlin".to_owned()),
|
|
Token::Value("20191121T150000".to_owned()),
|
|
]);
|
|
}
|
|
|
|
#[test]
|
|
fn tokenize_event() {
|
|
let mut t = Tokenizer::new();
|
|
let mut tokens = vec![];
|
|
t.feed(b"BEGIN:VEVENT
|
|
SUMMARY:Test event
|
|
DTSTART:19700101
|
|
END:VEVENT
|
|
|
|
", |token| tokens.push(token));
|
|
assert_eq!(tokens, vec![
|
|
Token::Key("BEGIN".to_owned()), Token::Value("VEVENT".to_owned()),
|
|
Token::Key("SUMMARY".to_owned()), Token::Value("Test event".to_owned()),
|
|
Token::Key("DTSTART".to_owned()), Token::Value("19700101".to_owned()),
|
|
Token::Key("END".to_owned()), Token::Value("VEVENT".to_owned()),
|
|
]);
|
|
}
|
|
|
|
#[test]
|
|
fn tokenize_multiline() {
|
|
let mut t = Tokenizer::new();
|
|
let mut tokens = vec![];
|
|
t.feed(b"SUMMARY:Hello
|
|
World
|
|
|
|
", |token| tokens.push(token));
|
|
assert_eq!(tokens, vec![
|
|
Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()),
|
|
]);
|
|
}
|
|
}
|