diff --git a/src/ics/mod.rs b/src/ics/mod.rs index cedba71..5177647 100644 --- a/src/ics/mod.rs +++ b/src/ics/mod.rs @@ -4,8 +4,17 @@ mod tokenizer; mod parser; pub use parser::Parser; +pub type Props = Vec<(String, String)>; + #[derive(Debug, PartialEq)] pub struct Object { pub name: String, - pub content: HashMap, + pub content: HashMap, +} + +impl Object { + pub fn get(&self, key: &'_ str) -> Option<&str> { + self.content.get(key) + .map(|(props, value)| value.as_ref()) + } } diff --git a/src/ics/parser.rs b/src/ics/parser.rs index c666f56..3ca89ba 100644 --- a/src/ics/parser.rs +++ b/src/ics/parser.rs @@ -1,13 +1,15 @@ use std::mem::replace; use std::collections::HashMap; -use super::Object; +use super::{Object, Props}; use super::tokenizer::{Tokenizer, Token}; pub struct Parser { tokenizer: Tokenizer, object_name: Option, current_key: Option, - content: HashMap, + current_prop: Option, + props: Props, + content: HashMap, } impl Parser { @@ -16,6 +18,8 @@ impl Parser { tokenizer: Tokenizer::new(), object_name: None, current_key: None, + current_prop: None, + props: vec![], content: HashMap::new(), } } @@ -25,11 +29,19 @@ impl Parser { F: FnMut(Object), { let current_key = &mut self.current_key; + let current_prop = &mut self.current_prop; + let props = &mut self.props; let object_name = &mut self.object_name; let content = &mut self.content; self.tokenizer.feed(input, |token| { match token { Token::Key(key) => *current_key = Some(key), + Token::PropName(name) => *current_prop = Some(name), + Token::PropValue(value) => { + current_prop.take().map(|name| { + props.push((name, value)); + }); + } Token::Value(value) => { fn compare(s1: &Option, s2: &str) -> bool { s1.as_ref().map(|s1| s1 == s2).unwrap_or(s2.len() == 0) @@ -43,8 +55,9 @@ impl Parser { let content = replace(content, HashMap::new()); object_name.map(|name| f(Object { name, content })); } else { + let props = replace(props, vec![]); let key = replace(current_key, None); - key.map(|key| content.insert(key, value)); + key.map(|key| content.insert(key, (props, value))); } } } @@ -75,7 +88,7 @@ END:VEVENT ("DTSTART", "19700101")] .iter() .cloned() - .map(|(k, v)| (k.to_owned(), v.to_owned())) + .map(|(k, v)| (k.to_owned(), (vec![], v.to_owned()))) .collect(), })); } diff --git a/src/ics/tokenizer.rs b/src/ics/tokenizer.rs index 512fd22..4fadc2f 100644 --- a/src/ics/tokenizer.rs +++ b/src/ics/tokenizer.rs @@ -1,29 +1,40 @@ use std::mem::replace; #[derive(Clone, Copy, PartialEq, Debug)] -enum State { +enum ByteState { + Char, + Newline, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +enum LineState { Key, + PropName, + PropValue, Value, - ValueNewline, ValueEscape, } #[derive(Debug, PartialEq)] pub enum Token { Key(String), + PropName(String), + PropValue(String), Value(String), } #[derive(Debug)] pub struct Tokenizer { - state: State, + byte_state: ByteState, + line_state: LineState, buffer: Vec, } impl Tokenizer { pub fn new() -> Self { Tokenizer { - state: State::Key, + byte_state: ByteState::Char, + line_state: LineState::Key, buffer: vec![], } } @@ -33,55 +44,112 @@ impl Tokenizer { F: FnMut(Token), { for b in input { - match (self.state, *b as char) { - (_, '\r') => {} - (State::Key, ':') => { - let buffer = replace(&mut self.buffer, vec![]); - match String::from_utf8(buffer) { - Ok(s) => - f(Token::Key(s)), - Err(e) => - println!("UTF8 error: {:?}", e), + let bs = match (self.byte_state, *b as char) { + (_, '\r') => [None; 2], + (ByteState::Char, '\n') => { + self.byte_state = ByteState::Newline; + [None; 2] + } + (ByteState::Char, _) => [Some(*b), None], + (ByteState::Newline, ' ') => { + self.byte_state = ByteState::Char; + [None; 2] + } + (ByteState::Newline, ' ') => { + self.byte_state = ByteState::Char; + [None; 2] + } + (ByteState::Newline, _) => { + self.byte_state = ByteState::Char; + [Some('\n' as u8), Some(*b)] + } + }; + + for b in bs.iter().filter_map(|b| *b) { + match (self.line_state, b as char) { + (_, '\r') => {} + (LineState::Key, ':') => { + let buffer = replace(&mut self.buffer, vec![]); + match String::from_utf8(buffer) { + Ok(s) => + f(Token::Key(s)), + Err(e) => + println!("UTF8 error: {:?}", e), + } + self.line_state = LineState::Value; } - self.state = State::Value; - } - (State::Key, '\n') => { - println!("Key without value: {:?}", self.buffer); - self.state = State::Key; - self.buffer = vec![]; - } - (State::Value, '\n') => { - self.state = State::ValueNewline; - } - (State::Value, '\\') => { - self.state = State::ValueEscape; - } - (State::ValueNewline, ' ') => { - self.state = State::Value; - } - (State::ValueNewline, _) => { - let buffer = replace(&mut self.buffer, vec![*b]); - match String::from_utf8(buffer) { - Ok(s) => - f(Token::Value(s)), - Err(e) => - println!("UTF8 error: {:?}", e), + (LineState::Key, '\n') => { + println!("Key without value: {:?}", self.buffer); + self.line_state = LineState::Key; + self.buffer = vec![]; } - self.state = State::Key; + (LineState::Key, ';') => { + let buffer = replace(&mut self.buffer, vec![]); + match String::from_utf8(buffer) { + Ok(s) => + f(Token::Key(s)), + Err(e) => + println!("UTF8 error: {:?}", e), + } + self.line_state = LineState::PropName; + } + (LineState::PropName, '=') => { + let buffer = replace(&mut self.buffer, vec![]); + match String::from_utf8(buffer) { + Ok(s) => + f(Token::PropName(s)), + Err(e) => + println!("UTF8 error: {:?}", e), + } + self.line_state = LineState::PropValue; + } + (LineState::PropName, ':') => { + let buffer = replace(&mut self.buffer, vec![]); + match String::from_utf8(buffer) { + Ok(s) => + f(Token::PropName(s)), + Err(e) => + println!("UTF8 error: {:?}", e), + } + self.line_state = LineState::Value; + } + (LineState::PropValue, ':') => { + let buffer = replace(&mut self.buffer, vec![]); + match String::from_utf8(buffer) { + Ok(s) => + f(Token::PropValue(s)), + Err(e) => + println!("UTF8 error: {:?}", e), + } + self.line_state = LineState::Value; + } + (LineState::Value, '\n') => { + let buffer = replace(&mut self.buffer, vec![]); + match String::from_utf8(buffer) { + Ok(s) => + f(Token::Value(s)), + Err(e) => + println!("UTF8 error: {:?}", e), + } + self.line_state = LineState::Key; + } + (LineState::Value, '\\') => { + self.line_state = LineState::ValueEscape; + } + (LineState::ValueEscape, 'n') => { + self.buffer.push('\n' as u8); + self.line_state = LineState::Value; + } + (LineState::ValueEscape, 'r') => { + self.buffer.push('\n' as u8); + self.line_state = LineState::Value; + } + (LineState::ValueEscape, _) => { + self.buffer.push(b); + self.line_state = LineState::Value; + } + (_, _) => self.buffer.push(b), } - (State::ValueEscape, 'n') => { - self.buffer.push('\n' as u8); - self.state = State::Value; - } - (State::ValueEscape, 'r') => { - self.buffer.push('\n' as u8); - self.state = State::Value; - } - (State::ValueEscape, _) => { - self.buffer.push(*b); - self.state = State::Value; - } - (_, _) => self.buffer.push(*b), } } } @@ -92,7 +160,7 @@ mod test { use super::*; #[test] - fn tokenize_attr() { + fn tokenize_prop() { let mut t = Tokenizer::new(); let mut tokens = vec![]; t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000 @@ -100,8 +168,8 @@ mod test { ", |token| tokens.push(token)); assert_eq!(tokens, vec![ Token::Key("DTSTART".to_owned()), - Token::AttrName("TZID".to_owned()), - Token::AttrValue("Europe/Berlin".to_owned()), + Token::PropName("TZID".to_owned()), + Token::PropValue("Europe/Berlin".to_owned()), Token::Value("20191121T150000".to_owned()), ]); } @@ -133,7 +201,7 @@ END:VEVENT ", |token| tokens.push(token)); assert_eq!(tokens, vec![ - Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()), + Token::Key("SUMMARY".to_owned()), Token::Value("HelloWorld".to_owned()), ]); } } diff --git a/src/main.rs b/src/main.rs index 2fc49d9..9a6bb46 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,8 +16,8 @@ fn fetch(client: &reqwest::Client, url: &str) -> Result<(), Box 0 => { let data = &buf[..len]; p.feed(data, |obj| { - println!("{} {}", obj.content.get("DTSTART").unwrap_or(&"?".to_owned()), obj.content.get("SUMMARY").unwrap_or(&"?".to_owned())); - println!("{}", obj.content.get("LOCATION").unwrap_or(&"?".to_owned())); + println!("{} {}", obj.get("DTSTART").unwrap_or("?"), obj.get("SUMMARY").unwrap_or("?")); + println!("{}", obj.get("LOCATION").unwrap_or("?")); }); } _ => break,