parse props too
This commit is contained in:
parent
f42d6a2603
commit
7a3935987e
|
@ -4,8 +4,17 @@ mod tokenizer;
|
||||||
mod parser;
|
mod parser;
|
||||||
pub use parser::Parser;
|
pub use parser::Parser;
|
||||||
|
|
||||||
|
pub type Props = Vec<(String, String)>;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Object {
|
pub struct Object {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub content: HashMap<String, String>,
|
pub content: HashMap<String, (Props, String)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Object {
|
||||||
|
pub fn get(&self, key: &'_ str) -> Option<&str> {
|
||||||
|
self.content.get(key)
|
||||||
|
.map(|(props, value)| value.as_ref())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
use std::mem::replace;
|
use std::mem::replace;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use super::Object;
|
use super::{Object, Props};
|
||||||
use super::tokenizer::{Tokenizer, Token};
|
use super::tokenizer::{Tokenizer, Token};
|
||||||
|
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
tokenizer: Tokenizer,
|
tokenizer: Tokenizer,
|
||||||
object_name: Option<String>,
|
object_name: Option<String>,
|
||||||
current_key: Option<String>,
|
current_key: Option<String>,
|
||||||
content: HashMap<String, String>,
|
current_prop: Option<String>,
|
||||||
|
props: Props,
|
||||||
|
content: HashMap<String, (Props, String)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser {
|
impl Parser {
|
||||||
|
@ -16,6 +18,8 @@ impl Parser {
|
||||||
tokenizer: Tokenizer::new(),
|
tokenizer: Tokenizer::new(),
|
||||||
object_name: None,
|
object_name: None,
|
||||||
current_key: None,
|
current_key: None,
|
||||||
|
current_prop: None,
|
||||||
|
props: vec![],
|
||||||
content: HashMap::new(),
|
content: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,11 +29,19 @@ impl Parser {
|
||||||
F: FnMut(Object),
|
F: FnMut(Object),
|
||||||
{
|
{
|
||||||
let current_key = &mut self.current_key;
|
let current_key = &mut self.current_key;
|
||||||
|
let current_prop = &mut self.current_prop;
|
||||||
|
let props = &mut self.props;
|
||||||
let object_name = &mut self.object_name;
|
let object_name = &mut self.object_name;
|
||||||
let content = &mut self.content;
|
let content = &mut self.content;
|
||||||
self.tokenizer.feed(input, |token| {
|
self.tokenizer.feed(input, |token| {
|
||||||
match token {
|
match token {
|
||||||
Token::Key(key) => *current_key = Some(key),
|
Token::Key(key) => *current_key = Some(key),
|
||||||
|
Token::PropName(name) => *current_prop = Some(name),
|
||||||
|
Token::PropValue(value) => {
|
||||||
|
current_prop.take().map(|name| {
|
||||||
|
props.push((name, value));
|
||||||
|
});
|
||||||
|
}
|
||||||
Token::Value(value) => {
|
Token::Value(value) => {
|
||||||
fn compare(s1: &Option<String>, s2: &str) -> bool {
|
fn compare(s1: &Option<String>, s2: &str) -> bool {
|
||||||
s1.as_ref().map(|s1| s1 == s2).unwrap_or(s2.len() == 0)
|
s1.as_ref().map(|s1| s1 == s2).unwrap_or(s2.len() == 0)
|
||||||
|
@ -43,8 +55,9 @@ impl Parser {
|
||||||
let content = replace(content, HashMap::new());
|
let content = replace(content, HashMap::new());
|
||||||
object_name.map(|name| f(Object { name, content }));
|
object_name.map(|name| f(Object { name, content }));
|
||||||
} else {
|
} else {
|
||||||
|
let props = replace(props, vec![]);
|
||||||
let key = replace(current_key, None);
|
let key = replace(current_key, None);
|
||||||
key.map(|key| content.insert(key, value));
|
key.map(|key| content.insert(key, (props, value)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,7 +88,7 @@ END:VEVENT
|
||||||
("DTSTART", "19700101")]
|
("DTSTART", "19700101")]
|
||||||
.iter()
|
.iter()
|
||||||
.cloned()
|
.cloned()
|
||||||
.map(|(k, v)| (k.to_owned(), v.to_owned()))
|
.map(|(k, v)| (k.to_owned(), (vec![], v.to_owned())))
|
||||||
.collect(),
|
.collect(),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,40 @@
|
||||||
use std::mem::replace;
|
use std::mem::replace;
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
enum State {
|
enum ByteState {
|
||||||
|
Char,
|
||||||
|
Newline,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
|
enum LineState {
|
||||||
Key,
|
Key,
|
||||||
|
PropName,
|
||||||
|
PropValue,
|
||||||
Value,
|
Value,
|
||||||
ValueNewline,
|
|
||||||
ValueEscape,
|
ValueEscape,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
Key(String),
|
Key(String),
|
||||||
|
PropName(String),
|
||||||
|
PropValue(String),
|
||||||
Value(String),
|
Value(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Tokenizer {
|
pub struct Tokenizer {
|
||||||
state: State,
|
byte_state: ByteState,
|
||||||
|
line_state: LineState,
|
||||||
buffer: Vec<u8>,
|
buffer: Vec<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tokenizer {
|
impl Tokenizer {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Tokenizer {
|
Tokenizer {
|
||||||
state: State::Key,
|
byte_state: ByteState::Char,
|
||||||
|
line_state: LineState::Key,
|
||||||
buffer: vec![],
|
buffer: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,55 +44,112 @@ impl Tokenizer {
|
||||||
F: FnMut(Token),
|
F: FnMut(Token),
|
||||||
{
|
{
|
||||||
for b in input {
|
for b in input {
|
||||||
match (self.state, *b as char) {
|
let bs = match (self.byte_state, *b as char) {
|
||||||
(_, '\r') => {}
|
(_, '\r') => [None; 2],
|
||||||
(State::Key, ':') => {
|
(ByteState::Char, '\n') => {
|
||||||
let buffer = replace(&mut self.buffer, vec![]);
|
self.byte_state = ByteState::Newline;
|
||||||
match String::from_utf8(buffer) {
|
[None; 2]
|
||||||
Ok(s) =>
|
}
|
||||||
f(Token::Key(s)),
|
(ByteState::Char, _) => [Some(*b), None],
|
||||||
Err(e) =>
|
(ByteState::Newline, ' ') => {
|
||||||
println!("UTF8 error: {:?}", e),
|
self.byte_state = ByteState::Char;
|
||||||
|
[None; 2]
|
||||||
|
}
|
||||||
|
(ByteState::Newline, ' ') => {
|
||||||
|
self.byte_state = ByteState::Char;
|
||||||
|
[None; 2]
|
||||||
|
}
|
||||||
|
(ByteState::Newline, _) => {
|
||||||
|
self.byte_state = ByteState::Char;
|
||||||
|
[Some('\n' as u8), Some(*b)]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for b in bs.iter().filter_map(|b| *b) {
|
||||||
|
match (self.line_state, b as char) {
|
||||||
|
(_, '\r') => {}
|
||||||
|
(LineState::Key, ':') => {
|
||||||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||||||
|
match String::from_utf8(buffer) {
|
||||||
|
Ok(s) =>
|
||||||
|
f(Token::Key(s)),
|
||||||
|
Err(e) =>
|
||||||
|
println!("UTF8 error: {:?}", e),
|
||||||
|
}
|
||||||
|
self.line_state = LineState::Value;
|
||||||
}
|
}
|
||||||
self.state = State::Value;
|
(LineState::Key, '\n') => {
|
||||||
}
|
println!("Key without value: {:?}", self.buffer);
|
||||||
(State::Key, '\n') => {
|
self.line_state = LineState::Key;
|
||||||
println!("Key without value: {:?}", self.buffer);
|
self.buffer = vec![];
|
||||||
self.state = State::Key;
|
|
||||||
self.buffer = vec![];
|
|
||||||
}
|
|
||||||
(State::Value, '\n') => {
|
|
||||||
self.state = State::ValueNewline;
|
|
||||||
}
|
|
||||||
(State::Value, '\\') => {
|
|
||||||
self.state = State::ValueEscape;
|
|
||||||
}
|
|
||||||
(State::ValueNewline, ' ') => {
|
|
||||||
self.state = State::Value;
|
|
||||||
}
|
|
||||||
(State::ValueNewline, _) => {
|
|
||||||
let buffer = replace(&mut self.buffer, vec![*b]);
|
|
||||||
match String::from_utf8(buffer) {
|
|
||||||
Ok(s) =>
|
|
||||||
f(Token::Value(s)),
|
|
||||||
Err(e) =>
|
|
||||||
println!("UTF8 error: {:?}", e),
|
|
||||||
}
|
}
|
||||||
self.state = State::Key;
|
(LineState::Key, ';') => {
|
||||||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||||||
|
match String::from_utf8(buffer) {
|
||||||
|
Ok(s) =>
|
||||||
|
f(Token::Key(s)),
|
||||||
|
Err(e) =>
|
||||||
|
println!("UTF8 error: {:?}", e),
|
||||||
|
}
|
||||||
|
self.line_state = LineState::PropName;
|
||||||
|
}
|
||||||
|
(LineState::PropName, '=') => {
|
||||||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||||||
|
match String::from_utf8(buffer) {
|
||||||
|
Ok(s) =>
|
||||||
|
f(Token::PropName(s)),
|
||||||
|
Err(e) =>
|
||||||
|
println!("UTF8 error: {:?}", e),
|
||||||
|
}
|
||||||
|
self.line_state = LineState::PropValue;
|
||||||
|
}
|
||||||
|
(LineState::PropName, ':') => {
|
||||||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||||||
|
match String::from_utf8(buffer) {
|
||||||
|
Ok(s) =>
|
||||||
|
f(Token::PropName(s)),
|
||||||
|
Err(e) =>
|
||||||
|
println!("UTF8 error: {:?}", e),
|
||||||
|
}
|
||||||
|
self.line_state = LineState::Value;
|
||||||
|
}
|
||||||
|
(LineState::PropValue, ':') => {
|
||||||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||||||
|
match String::from_utf8(buffer) {
|
||||||
|
Ok(s) =>
|
||||||
|
f(Token::PropValue(s)),
|
||||||
|
Err(e) =>
|
||||||
|
println!("UTF8 error: {:?}", e),
|
||||||
|
}
|
||||||
|
self.line_state = LineState::Value;
|
||||||
|
}
|
||||||
|
(LineState::Value, '\n') => {
|
||||||
|
let buffer = replace(&mut self.buffer, vec![]);
|
||||||
|
match String::from_utf8(buffer) {
|
||||||
|
Ok(s) =>
|
||||||
|
f(Token::Value(s)),
|
||||||
|
Err(e) =>
|
||||||
|
println!("UTF8 error: {:?}", e),
|
||||||
|
}
|
||||||
|
self.line_state = LineState::Key;
|
||||||
|
}
|
||||||
|
(LineState::Value, '\\') => {
|
||||||
|
self.line_state = LineState::ValueEscape;
|
||||||
|
}
|
||||||
|
(LineState::ValueEscape, 'n') => {
|
||||||
|
self.buffer.push('\n' as u8);
|
||||||
|
self.line_state = LineState::Value;
|
||||||
|
}
|
||||||
|
(LineState::ValueEscape, 'r') => {
|
||||||
|
self.buffer.push('\n' as u8);
|
||||||
|
self.line_state = LineState::Value;
|
||||||
|
}
|
||||||
|
(LineState::ValueEscape, _) => {
|
||||||
|
self.buffer.push(b);
|
||||||
|
self.line_state = LineState::Value;
|
||||||
|
}
|
||||||
|
(_, _) => self.buffer.push(b),
|
||||||
}
|
}
|
||||||
(State::ValueEscape, 'n') => {
|
|
||||||
self.buffer.push('\n' as u8);
|
|
||||||
self.state = State::Value;
|
|
||||||
}
|
|
||||||
(State::ValueEscape, 'r') => {
|
|
||||||
self.buffer.push('\n' as u8);
|
|
||||||
self.state = State::Value;
|
|
||||||
}
|
|
||||||
(State::ValueEscape, _) => {
|
|
||||||
self.buffer.push(*b);
|
|
||||||
self.state = State::Value;
|
|
||||||
}
|
|
||||||
(_, _) => self.buffer.push(*b),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -92,7 +160,7 @@ mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_attr() {
|
fn tokenize_prop() {
|
||||||
let mut t = Tokenizer::new();
|
let mut t = Tokenizer::new();
|
||||||
let mut tokens = vec![];
|
let mut tokens = vec![];
|
||||||
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
|
t.feed(b"DTSTART;TZID=Europe/Berlin:20191121T150000
|
||||||
|
@ -100,8 +168,8 @@ mod test {
|
||||||
", |token| tokens.push(token));
|
", |token| tokens.push(token));
|
||||||
assert_eq!(tokens, vec![
|
assert_eq!(tokens, vec![
|
||||||
Token::Key("DTSTART".to_owned()),
|
Token::Key("DTSTART".to_owned()),
|
||||||
Token::AttrName("TZID".to_owned()),
|
Token::PropName("TZID".to_owned()),
|
||||||
Token::AttrValue("Europe/Berlin".to_owned()),
|
Token::PropValue("Europe/Berlin".to_owned()),
|
||||||
Token::Value("20191121T150000".to_owned()),
|
Token::Value("20191121T150000".to_owned()),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
@ -133,7 +201,7 @@ END:VEVENT
|
||||||
|
|
||||||
", |token| tokens.push(token));
|
", |token| tokens.push(token));
|
||||||
assert_eq!(tokens, vec![
|
assert_eq!(tokens, vec![
|
||||||
Token::Key("SUMMARY".to_owned()), Token::Value("Hello World".to_owned()),
|
Token::Key("SUMMARY".to_owned()), Token::Value("HelloWorld".to_owned()),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,8 +16,8 @@ fn fetch(client: &reqwest::Client, url: &str) -> Result<(), Box<dyn std::error::
|
||||||
len if len > 0 => {
|
len if len > 0 => {
|
||||||
let data = &buf[..len];
|
let data = &buf[..len];
|
||||||
p.feed(data, |obj| {
|
p.feed(data, |obj| {
|
||||||
println!("{} {}", obj.content.get("DTSTART").unwrap_or(&"?".to_owned()), obj.content.get("SUMMARY").unwrap_or(&"?".to_owned()));
|
println!("{} {}", obj.get("DTSTART").unwrap_or("?"), obj.get("SUMMARY").unwrap_or("?"));
|
||||||
println!("{}", obj.content.get("LOCATION").unwrap_or(&"?".to_owned()));
|
println!("{}", obj.get("LOCATION").unwrap_or("?"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
_ => break,
|
_ => break,
|
||||||
|
|
Loading…
Reference in New Issue