parse props too

This commit is contained in:
Astro 2019-10-07 00:12:31 +02:00
parent f42d6a2603
commit 7a3935987e
4 changed files with 151 additions and 61 deletions

View File

@ -4,8 +4,17 @@ mod tokenizer;
mod parser; mod parser;
pub use parser::Parser; pub use parser::Parser;
pub type Props = Vec<(String, String)>;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Object { pub struct Object {
pub name: String, pub name: String,
pub content: HashMap<String, String>, pub content: HashMap<String, (Props, String)>,
}
impl Object {
pub fn get(&self, key: &'_ str) -> Option<&str> {
self.content.get(key)
.map(|(props, value)| value.as_ref())
}
} }

View File

@ -1,13 +1,15 @@
use std::mem::replace; use std::mem::replace;
use std::collections::HashMap; use std::collections::HashMap;
use super::Object; use super::{Object, Props};
use super::tokenizer::{Tokenizer, Token}; use super::tokenizer::{Tokenizer, Token};
pub struct Parser { pub struct Parser {
tokenizer: Tokenizer, tokenizer: Tokenizer,
object_name: Option<String>, object_name: Option<String>,
current_key: Option<String>, current_key: Option<String>,
content: HashMap<String, String>, current_prop: Option<String>,
props: Props,
content: HashMap<String, (Props, String)>,
} }
impl Parser { impl Parser {
@ -16,6 +18,8 @@ impl Parser {
tokenizer: Tokenizer::new(), tokenizer: Tokenizer::new(),
object_name: None, object_name: None,
current_key: None, current_key: None,
current_prop: None,
props: vec![],
content: HashMap::new(), content: HashMap::new(),
} }
} }
@ -25,11 +29,19 @@ impl Parser {
F: FnMut(Object), F: FnMut(Object),
{ {
let current_key = &mut self.current_key; let current_key = &mut self.current_key;
let current_prop = &mut self.current_prop;
let props = &mut self.props;
let object_name = &mut self.object_name; let object_name = &mut self.object_name;
let content = &mut self.content; let content = &mut self.content;
self.tokenizer.feed(input, |token| { self.tokenizer.feed(input, |token| {
match token { match token {
Token::Key(key) => *current_key = Some(key), Token::Key(key) => *current_key = Some(key),
Token::PropName(name) => *current_prop = Some(name),
Token::PropValue(value) => {
current_prop.take().map(|name| {
props.push((name, value));
});
}
Token::Value(value) => { Token::Value(value) => {
fn compare(s1: &Option<String>, s2: &str) -> bool { fn compare(s1: &Option<String>, s2: &str) -> bool {
s1.as_ref().map(|s1| s1 == s2).unwrap_or(s2.len() == 0) s1.as_ref().map(|s1| s1 == s2).unwrap_or(s2.len() == 0)
@ -43,8 +55,9 @@ impl Parser {
let content = replace(content, HashMap::new()); let content = replace(content, HashMap::new());
object_name.map(|name| f(Object { name, content })); object_name.map(|name| f(Object { name, content }));
} else { } else {
let props = replace(props, vec![]);
let key = replace(current_key, None); let key = replace(current_key, None);
key.map(|key| content.insert(key, value)); key.map(|key| content.insert(key, (props, value)));
} }
} }
} }
@ -75,7 +88,7 @@ END:VEVENT
("DTSTART", "19700101")] ("DTSTART", "19700101")]
.iter() .iter()
.cloned() .cloned()
.map(|(k, v)| (k.to_owned(), v.to_owned())) .map(|(k, v)| (k.to_owned(), (vec![], v.to_owned())))
.collect(), .collect(),
})); }));
} }

View File

@ -1,29 +1,40 @@
use std::mem::replace; use std::mem::replace;
#[derive(Clone, Copy, PartialEq, Debug)] #[derive(Clone, Copy, PartialEq, Debug)]
enum State { enum ByteState {
Char,
Newline,
}
#[derive(Clone, Copy, PartialEq, Debug)]
enum LineState {
Key, Key,
PropName,
PropValue,
Value, Value,
ValueNewline,
ValueEscape, ValueEscape,
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Token { pub enum Token {
Key(String), Key(String),
PropName(String),
PropValue(String),
Value(String), Value(String),
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Tokenizer { pub struct Tokenizer {
state: State, byte_state: ByteState,
line_state: LineState,
buffer: Vec<u8>, buffer: Vec<u8>,
} }
impl Tokenizer { impl Tokenizer {
pub fn new() -> Self { pub fn new() -> Self {
Tokenizer { Tokenizer {
state: State::Key, byte_state: ByteState::Char,
line_state: LineState::Key,
buffer: vec![], buffer: vec![],
} }
} }
@ -33,55 +44,112 @@ impl Tokenizer {
F: FnMut(Token), F: FnMut(Token),
{ {
for b in input { for b in input {
match (self.state, *b as char) { let bs = match (self.byte_state, *b as char) {
(_, '\r') => {} (_, '\r') => [None; 2],
(State::Key, ':') => { (ByteState::Char, '\n') => {
let buffer = replace(&mut self.buffer, vec![]); self.byte_state = ByteState::Newline;
match String::from_utf8(buffer) { [None; 2]
Ok(s) => }
f(Token::Key(s)), (ByteState::Char, _) => [Some(*b), None],
Err(e) => (ByteState::Newline, ' ') => {
println!("UTF8 error: {:?}", e), self.byte_state = ByteState::Char;
[None; 2]
}
(ByteState::Newline, ' ') => {
self.byte_state = ByteState::Char;
[None; 2]
}
(ByteState::Newline, _) => {
self.byte_state = ByteState::Char;
[Some('\n' as u8), Some(*b)]
}
};
for b in bs.iter().filter_map(|b| *b) {
match (self.line_state, b as char) {
(_, '\r') => {}
(LineState::Key, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
} }
self.state = State::Value; (LineState::Key, '\n') => {
} println!("Key without value: {:?}", self.buffer);
(State::Key, '\n') => { self.line_state = LineState::Key;
println!("Key without value: {:?}", self.buffer); self.buffer = vec![];
self.state = State::Key;
self.buffer = vec![];
}
(State::Value, '\n') => {
self.state = State::ValueNewline;
}
(State::Value, '\\') => {
self.state = State::ValueEscape;
}
(State::ValueNewline, ' ') => {
self.state = State::Value;
}
(State::ValueNewline, _) => {
let buffer = replace(&mut self.buffer, vec![*b]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Value(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
} }
self.state = State::Key; (LineState::Key, ';') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::PropName;
}
(LineState::PropName, '=') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropName(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::PropValue;
}
(LineState::PropName, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropName(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::PropValue, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropValue(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::Value, '\n') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Value(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Key;
}
(LineState::Value, '\\') => {
self.line_state = LineState::ValueEscape;
}
(LineState::ValueEscape, 'n') => {
self.buffer.push('\n' as u8);
self.line_state = LineState::Value;
}
(LineState::ValueEscape, 'r') => {
self.buffer.push('\n' as u8);
self.line_state = LineState::Value;
}
(LineState::ValueEscape, _) => {
self.buffer.push(b);
self.line_state = LineState::Value;
}
(_, _) => self.buffer.push(b),
} }
(State::ValueEscape, 'n') => {
self.buffer.push('\n' as u8);
self.state = State::Value;
}
(State::ValueEscape, 'r') => {
self.buffer.push('\n' as u8);
self.state = State::Value;
}
(State::ValueEscape, _) => {
self.buffer.push(*b);
self.state = State::Value;
}
(_, _) => self.buffer.push(*b),
} }
} }
} }
@ -92,7 +160,7 @@ mod test {
use super::*; use super::*;
#[test] #[test]
fn tokenize_attr() { fn tokenize_prop() {
let mut t = Tokenizer::new(); let mut t = Tokenizer::new();
let mut tokens = vec![]; let mut tokens = vec![];
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000 t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
@ -100,8 +168,8 @@ mod test {
", |token| tokens.push(token)); ", |token| tokens.push(token));
assert_eq!(tokens, vec![ assert_eq!(tokens, vec![
Token::Key("DTSTART".to_owned()), Token::Key("DTSTART".to_owned()),
Token::AttrName("TZID".to_owned()), Token::PropName("TZID".to_owned()),
Token::AttrValue("Europe/Berlin".to_owned()), Token::PropValue("Europe/Berlin".to_owned()),
Token::Value("20191121T150000".to_owned()), Token::Value("20191121T150000".to_owned()),
]); ]);
} }
@ -133,7 +201,7 @@ END:VEVENT
", |token| tokens.push(token)); ", |token| tokens.push(token));
assert_eq!(tokens, vec![ assert_eq!(tokens, vec![
Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()), Token::Key("SUMMARY".to_owned()), Token::Value("HelloWorld".to_owned()),
]); ]);
} }
} }

View File

@ -16,8 +16,8 @@ fn fetch(client: &reqwest::Client, url: &str) -> Result<(), Box<dyn std::error::
len if len > 0 => { len if len > 0 => {
let data = &buf[..len]; let data = &buf[..len];
p.feed(data, |obj| { p.feed(data, |obj| {
println!("{} {}", obj.content.get("DTSTART").unwrap_or(&"?".to_owned()), obj.content.get("SUMMARY").unwrap_or(&"?".to_owned())); println!("{} {}", obj.get("DTSTART").unwrap_or("?"), obj.get("SUMMARY").unwrap_or("?"));
println!("{}", obj.content.get("LOCATION").unwrap_or(&"?".to_owned())); println!("{}", obj.get("LOCATION").unwrap_or("?"));
}); });
} }
_ => break, _ => break,