140 lines
3.9 KiB
Rust
140 lines
3.9 KiB
Rust
|
use std::mem::replace;
|
||
|
|
||
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||
|
enum State {
|
||
|
Key,
|
||
|
Value,
|
||
|
ValueNewline,
|
||
|
ValueEscape,
|
||
|
}
|
||
|
|
||
|
#[derive(Debug, PartialEq)]
|
||
|
pub enum Token {
|
||
|
Key(String),
|
||
|
Value(String),
|
||
|
}
|
||
|
|
||
|
#[derive(Debug)]
|
||
|
pub struct Tokenizer {
|
||
|
state: State,
|
||
|
buffer: Vec<u8>,
|
||
|
}
|
||
|
|
||
|
impl Tokenizer {
|
||
|
pub fn new() -> Self {
|
||
|
Tokenizer {
|
||
|
state: State::Key,
|
||
|
buffer: vec![],
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub fn feed<F>(&mut self, input: &'_ [u8], mut f: F)
|
||
|
where
|
||
|
F: FnMut(Token),
|
||
|
{
|
||
|
for b in input {
|
||
|
match (self.state, *b as char) {
|
||
|
(_, '\r') => {}
|
||
|
(State::Key, ':') => {
|
||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||
|
match String::from_utf8(buffer) {
|
||
|
Ok(s) =>
|
||
|
f(Token::Key(s)),
|
||
|
Err(e) =>
|
||
|
println!("UTF8 error: {:?}", e),
|
||
|
}
|
||
|
self.state = State::Value;
|
||
|
}
|
||
|
(State::Key, '\n') => {
|
||
|
println!("Key without value: {:?}", self.buffer);
|
||
|
self.state = State::Key;
|
||
|
self.buffer = vec![];
|
||
|
}
|
||
|
(State::Value, '\n') => {
|
||
|
self.state = State::ValueNewline;
|
||
|
}
|
||
|
(State::Value, '\\') => {
|
||
|
self.state = State::ValueEscape;
|
||
|
}
|
||
|
(State::ValueNewline, ' ') => {
|
||
|
self.state = State::Value;
|
||
|
}
|
||
|
(State::ValueNewline, _) => {
|
||
|
let buffer = replace(&mut self.buffer, vec![*b]);
|
||
|
match String::from_utf8(buffer) {
|
||
|
Ok(s) =>
|
||
|
f(Token::Value(s)),
|
||
|
Err(e) =>
|
||
|
println!("UTF8 error: {:?}", e),
|
||
|
}
|
||
|
self.state = State::Key;
|
||
|
}
|
||
|
(State::ValueEscape, 'n') => {
|
||
|
self.buffer.push('\n' as u8);
|
||
|
self.state = State::Value;
|
||
|
}
|
||
|
(State::ValueEscape, 'r') => {
|
||
|
self.buffer.push('\n' as u8);
|
||
|
self.state = State::Value;
|
||
|
}
|
||
|
(State::ValueEscape, _) => {
|
||
|
self.buffer.push(*b);
|
||
|
self.state = State::Value;
|
||
|
}
|
||
|
(_, _) => self.buffer.push(*b),
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn tokenize_attr() {
|
||
|
let mut t = Tokenizer::new();
|
||
|
let mut tokens = vec![];
|
||
|
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
|
||
|
|
||
|
", |token| tokens.push(token));
|
||
|
assert_eq!(tokens, vec![
|
||
|
Token::Key("DTSTART".to_owned()),
|
||
|
Token::AttrName("TZID".to_owned()),
|
||
|
Token::AttrValue("Europe/Berlin".to_owned()),
|
||
|
Token::Value("20191121T150000".to_owned()),
|
||
|
]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn tokenize_event() {
|
||
|
let mut t = Tokenizer::new();
|
||
|
let mut tokens = vec![];
|
||
|
t.feed(b"BEGIN:VEVENT
|
||
|
SUMMARY:Test event
|
||
|
DTSTART:19700101
|
||
|
END:VEVENT
|
||
|
|
||
|
", |token| tokens.push(token));
|
||
|
assert_eq!(tokens, vec![
|
||
|
Token::Key("BEGIN".to_owned()), Token::Value("VEVENT".to_owned()),
|
||
|
Token::Key("SUMMARY".to_owned()), Token::Value("Test event".to_owned()),
|
||
|
Token::Key("DTSTART".to_owned()), Token::Value("19700101".to_owned()),
|
||
|
Token::Key("END".to_owned()), Token::Value("VEVENT".to_owned()),
|
||
|
]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn tokenize_multiline() {
|
||
|
let mut t = Tokenizer::new();
|
||
|
let mut tokens = vec![];
|
||
|
t.feed(b"SUMMARY:Hello
|
||
|
World
|
||
|
|
||
|
", |token| tokens.push(token));
|
||
|
assert_eq!(tokens, vec![
|
||
|
Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()),
|
||
|
]);
|
||
|
}
|
||
|
}
|