parse props too

This commit is contained in:
Astro 2019-10-07 00:12:31 +02:00
parent f42d6a2603
commit 7a3935987e
4 changed files with 151 additions and 61 deletions

View File

@ -4,8 +4,17 @@ mod tokenizer;
mod parser;
pub use parser::Parser;
pub type Props = Vec<(String, String)>;
#[derive(Debug, PartialEq)]
pub struct Object {
pub name: String,
pub content: HashMap<String, String>,
pub content: HashMap<String, (Props, String)>,
}
impl Object {
pub fn get(&self, key: &'_ str) -> Option<&str> {
self.content.get(key)
.map(|(props, value)| value.as_ref())
}
}

View File

@ -1,13 +1,15 @@
use std::mem::replace;
use std::collections::HashMap;
use super::Object;
use super::{Object, Props};
use super::tokenizer::{Tokenizer, Token};
pub struct Parser {
tokenizer: Tokenizer,
object_name: Option<String>,
current_key: Option<String>,
content: HashMap<String, String>,
current_prop: Option<String>,
props: Props,
content: HashMap<String, (Props, String)>,
}
impl Parser {
@ -16,6 +18,8 @@ impl Parser {
tokenizer: Tokenizer::new(),
object_name: None,
current_key: None,
current_prop: None,
props: vec![],
content: HashMap::new(),
}
}
@ -25,11 +29,19 @@ impl Parser {
F: FnMut(Object),
{
let current_key = &mut self.current_key;
let current_prop = &mut self.current_prop;
let props = &mut self.props;
let object_name = &mut self.object_name;
let content = &mut self.content;
self.tokenizer.feed(input, |token| {
match token {
Token::Key(key) => *current_key = Some(key),
Token::PropName(name) => *current_prop = Some(name),
Token::PropValue(value) => {
current_prop.take().map(|name| {
props.push((name, value));
});
}
Token::Value(value) => {
fn compare(s1: &Option<String>, s2: &str) -> bool {
s1.as_ref().map(|s1| s1 == s2).unwrap_or(s2.len() == 0)
@ -43,8 +55,9 @@ impl Parser {
let content = replace(content, HashMap::new());
object_name.map(|name| f(Object { name, content }));
} else {
let props = replace(props, vec![]);
let key = replace(current_key, None);
key.map(|key| content.insert(key, value));
key.map(|key| content.insert(key, (props, value)));
}
}
}
@ -75,7 +88,7 @@ END:VEVENT
("DTSTART", "19700101")]
.iter()
.cloned()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.map(|(k, v)| (k.to_owned(), (vec![], v.to_owned())))
.collect(),
}));
}

View File

@ -1,29 +1,40 @@
use std::mem::replace;
#[derive(Clone, Copy, PartialEq, Debug)]
enum State {
enum ByteState {
Char,
Newline,
}
#[derive(Clone, Copy, PartialEq, Debug)]
enum LineState {
Key,
PropName,
PropValue,
Value,
ValueNewline,
ValueEscape,
}
#[derive(Debug, PartialEq)]
pub enum Token {
Key(String),
PropName(String),
PropValue(String),
Value(String),
}
#[derive(Debug)]
pub struct Tokenizer {
state: State,
byte_state: ByteState,
line_state: LineState,
buffer: Vec<u8>,
}
impl Tokenizer {
pub fn new() -> Self {
Tokenizer {
state: State::Key,
byte_state: ByteState::Char,
line_state: LineState::Key,
buffer: vec![],
}
}
@ -33,55 +44,112 @@ impl Tokenizer {
F: FnMut(Token),
{
for b in input {
match (self.state, *b as char) {
(_, '\r') => {}
(State::Key, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
let bs = match (self.byte_state, *b as char) {
(_, '\r') => [None; 2],
(ByteState::Char, '\n') => {
self.byte_state = ByteState::Newline;
[None; 2]
}
(ByteState::Char, _) => [Some(*b), None],
(ByteState::Newline, ' ') => {
self.byte_state = ByteState::Char;
[None; 2]
}
(ByteState::Newline, ' ') => {
self.byte_state = ByteState::Char;
[None; 2]
}
(ByteState::Newline, _) => {
self.byte_state = ByteState::Char;
[Some('\n' as u8), Some(*b)]
}
};
for b in bs.iter().filter_map(|b| *b) {
match (self.line_state, b as char) {
(_, '\r') => {}
(LineState::Key, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
self.state = State::Value;
}
(State::Key, '\n') => {
println!("Key without value: {:?}", self.buffer);
self.state = State::Key;
self.buffer = vec![];
}
(State::Value, '\n') => {
self.state = State::ValueNewline;
}
(State::Value, '\\') => {
self.state = State::ValueEscape;
}
(State::ValueNewline, ' ') => {
self.state = State::Value;
}
(State::ValueNewline, _) => {
let buffer = replace(&mut self.buffer, vec![*b]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Value(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
(LineState::Key, '\n') => {
println!("Key without value: {:?}", self.buffer);
self.line_state = LineState::Key;
self.buffer = vec![];
}
self.state = State::Key;
(LineState::Key, ';') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Key(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::PropName;
}
(LineState::PropName, '=') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropName(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::PropValue;
}
(LineState::PropName, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropName(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::PropValue, ':') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::PropValue(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Value;
}
(LineState::Value, '\n') => {
let buffer = replace(&mut self.buffer, vec![]);
match String::from_utf8(buffer) {
Ok(s) =>
f(Token::Value(s)),
Err(e) =>
println!("UTF8 error: {:?}", e),
}
self.line_state = LineState::Key;
}
(LineState::Value, '\\') => {
self.line_state = LineState::ValueEscape;
}
(LineState::ValueEscape, 'n') => {
self.buffer.push('\n' as u8);
self.line_state = LineState::Value;
}
(LineState::ValueEscape, 'r') => {
self.buffer.push('\n' as u8);
self.line_state = LineState::Value;
}
(LineState::ValueEscape, _) => {
self.buffer.push(b);
self.line_state = LineState::Value;
}
(_, _) => self.buffer.push(b),
}
(State::ValueEscape, 'n') => {
self.buffer.push('\n' as u8);
self.state = State::Value;
}
(State::ValueEscape, 'r') => {
self.buffer.push('\n' as u8);
self.state = State::Value;
}
(State::ValueEscape, _) => {
self.buffer.push(*b);
self.state = State::Value;
}
(_, _) => self.buffer.push(*b),
}
}
}
@ -92,7 +160,7 @@ mod test {
use super::*;
#[test]
fn tokenize_attr() {
fn tokenize_prop() {
let mut t = Tokenizer::new();
let mut tokens = vec![];
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
@ -100,8 +168,8 @@ mod test {
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("DTSTART".to_owned()),
Token::AttrName("TZID".to_owned()),
Token::AttrValue("Europe/Berlin".to_owned()),
Token::PropName("TZID".to_owned()),
Token::PropValue("Europe/Berlin".to_owned()),
Token::Value("20191121T150000".to_owned()),
]);
}
@ -133,7 +201,7 @@ END:VEVENT
", |token| tokens.push(token));
assert_eq!(tokens, vec![
Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()),
Token::Key("SUMMARY".to_owned()), Token::Value("HelloWorld".to_owned()),
]);
}
}

View File

@ -16,8 +16,8 @@ fn fetch(client: &reqwest::Client, url: &str) -> Result<(), Box<dyn std::error::
len if len > 0 => {
let data = &buf[..len];
p.feed(data, |obj| {
println!("{} {}", obj.content.get("DTSTART").unwrap_or(&"?".to_owned()), obj.content.get("SUMMARY").unwrap_or(&"?".to_owned()));
println!("{}", obj.content.get("LOCATION").unwrap_or(&"?".to_owned()));
println!("{} {}", obj.get("DTSTART").unwrap_or("?"), obj.get("SUMMARY").unwrap_or("?"));
println!("{}", obj.get("LOCATION").unwrap_or("?"));
});
}
_ => break,