Example: Add jxmpp Jid corpus parser

Signed-off-by: Maxime “pep” Buquet <pep@bouah.net>
This commit is contained in:
Maxime “pep” Buquet 2023-03-01 18:30:34 +01:00
parent 216d9c4a8d
commit 1ad3459968
No known key found for this signature in database
GPG Key ID: DEDA74AEECA9D0F2
2 changed files with 254 additions and 0 deletions

View File

@ -23,5 +23,13 @@ icu = { version = "0.1", optional = true }
minidom = { version = "0.15", optional = true }
serde = { version = "1.0", features = ["derive"], optional = true }
[dev-dependencies]
jid = { version = "*", features = ["stringprep"] }
nom = "7.1.3"
[features]
stringprep = ["icu"]
[[example]]
name = "corpus"
required-features = ["stringprep"]

246
jid/examples/corpus.rs Normal file
View File

@ -0,0 +1,246 @@
// Copyright (c) 2023 Maxime “pep” Buquet <pep@bouah.net>
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Affero General Public License as published by the
// Free Software Foundation, either version 3 of the License, or (at your
// option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
// for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! Tests with a corpus file respecting the following formats:
//! For valid JIDs: https://github.com/igniterealtime/jxmpp/blob/master/jxmpp-strings-testframework/src/main/resources/xmpp-strings/jids/valid/main
//! For invalid JIDs: https://github.com/igniterealtime/jxmpp/blob/master/jxmpp-strings-testframework/src/main/resources/xmpp-strings/jids/invalid/main
use std::env::args;
use std::fs::File;
use std::io::{self, Read};
use std::path::Path;
use std::str::FromStr;
use jid::{Jid, FullJid, BareJid};
use nom::{
branch::alt,
bytes::complete::{tag, take_while},
combinator::opt,
multi::many0,
sequence::tuple,
IResult,
};
pub type Input<'a> = &'a str;
pub type Output<'a> = (&'a str, &'a str, &'a str);
enum EntryOrComment<'a> {
ValidJid((Input<'a>, Output<'a>)),
InvalidJid(Option<Input<'a>>),
Comment(&'a str),
}
#[derive(Debug)]
pub struct ValidCorpus<'a> {
pub input: Vec<Input<'a>>,
pub output: Vec<Output<'a>>,
}
impl<'a> ValidCorpus<'a> {
pub fn into_inner(self) -> (Vec<Input<'a>>, Vec<Output<'a>>) {
(self.input, self.output)
}
}
pub struct InvalidCorpus<'a> {
pub input: Vec<Option<&'a str>>,
}
// Valid Jid parsing
fn parse_noctrlchr(i: &str) -> IResult<&str, &str> {
Ok(take_while(|c| c != '\u{000A}' && c != '\u{001E}' && c != '\u{001F}')(i)?)
}
fn parse_commentline(i: &str) -> IResult<&str, EntryOrComment> {
let (i, (comment, _)) = tuple((
opt(parse_noctrlchr),
tag("\n"),
))(i)?;
let comment = if let Some(comment) = comment {
comment
} else {
""
};
Ok((i, EntryOrComment::Comment(comment)))
}
fn parse_jidheader(i: &str) -> IResult<&str, ()> {
let (i, _) = tag("jid:\n")(i)?;
Ok((i, ()))
}
fn parse_unnormalized_jid(i: &str) -> IResult<&str, &str> {
let (i, (jid, _)) = tuple((parse_noctrlchr, tag("\u{001E}\n")))(i)?;
Ok((i, jid))
}
fn parse_normalized_jid(i: &str) -> IResult<&str, (&str, &str, &str)> {
let (i, (node, _)) = tuple((parse_noctrlchr, tag("\u{001F}")))(i)?;
println!("FOO5: {node:?}");
let (i, (domain, _)) = tuple((parse_noctrlchr, tag("\u{001F}")))(i)?;
println!("FOO6: {domain:?}\n{i:?}");
let (i, (resource, _)) = tuple((parse_noctrlchr, tag("\u{001E}\n")))(i)?;
println!("FOO7: {resource:?}");
/*
let (i, (node, _, domain, _, resource, _)) = tuple((
parse_noctrlchr, tag("\u{001F}"),
parse_noctrlchr, tag("\u{001F}"),
parse_noctrlchr, tag("\u{001E}\n"),
))(i)?;
*/
Ok((i, (node, domain, resource)))
}
fn parse_valid_jid_entry(i: &str) -> IResult<&str, EntryOrComment> {
let (i, header) = parse_jidheader(i)?;
println!("FOO1: {header:?}");
let (i, input) = parse_unnormalized_jid(i)?;
println!("FOO2: {input:?}");
let (i, output) = parse_normalized_jid(i)?;
println!("FOO3: {output:?}");
/*
let (i, (header, input, output)) = tuple((
parse_jidheader,
parse_unnormalized_jid,
parse_normalized_jid,
))(i)?;
*/
Ok((i, EntryOrComment::ValidJid((input, output))))
}
fn parse_valid_entry(i: &str) -> IResult<&str, Option<EntryOrComment>> {
let (i, opt_entry) = opt(alt((parse_valid_jid_entry, parse_commentline)))(i)?;
Ok((i, opt_entry))
}
pub fn parse_valid_corpus(i: &str) -> IResult<&str, ValidCorpus> {
let mut corp = ValidCorpus { input: vec![], output: vec![] };
let (i, entries) = many0(parse_valid_entry)(i)?;
for entry in entries {
match entry {
Some(EntryOrComment::ValidJid((input, output))) => {
corp.input.push(input);
corp.output.push(output);
},
_ => (),
}
}
Ok((i, corp))
}
// Invalid Jid parsing
fn parse_norschr(i: &str) -> IResult<&str, &str> {
Ok(take_while(|c| c != '\u{000A}' && c != '\u{001E}' && c != '\u{001F}')(i)?)
}
fn parse_invalid_jid_header(i: &str) -> IResult<&str, ()> {
let (i, _) = tag("invalid jid:\n")(i)?;
Ok((i, ()))
}
fn parse_invalid_jid(i: &str) -> IResult<&str, Option<&str>> {
let (i, (jid, _)) = tuple((opt(parse_norschr), tag("\u{001E}\n")))(i)?;
Ok((i, jid))
}
fn parse_invalid_jid_entry(i: &str) -> IResult<&str, EntryOrComment> {
let (i, (_, jid)) = tuple((
parse_invalid_jid_header,
parse_invalid_jid,
))(i)?;
Ok((i, EntryOrComment::InvalidJid(jid)))
}
fn parse_invalid_entry(i: &str) -> IResult<&str, Option<EntryOrComment>> {
let (i, opt_entry) = opt(alt((parse_invalid_jid_entry, parse_commentline)))(i)?;
Ok((i, opt_entry))
}
pub fn parse_invalid_corpus(i: &str) -> IResult<&str, InvalidCorpus> {
let mut corp = InvalidCorpus { input: vec![] };
let (i, entries) = many0(parse_invalid_entry)(i)?;
for entry in entries {
match entry {
Some(EntryOrComment::InvalidJid(input)) => {
corp.input.push(input);
},
_ => (),
}
}
Ok((i, corp))
}
fn main() -> io::Result<()> {
let args: Vec<String> = args().collect();
if args.len() != 3 {
println!("Usage: {} <valid-file-path> <invalid-file-path>", args[0]);
return Err(io::Error::new(
io::ErrorKind::Other,
"Invalid argument count",
));
}
let valid_path = Path::new(&args[1]);
let invalid_path = Path::new(&args[2]);
if !valid_path.exists() || !invalid_path.exists() {
return Err(io::Error::new(
io::ErrorKind::Other,
"At least one specified file doesn't exist",
));
}
let mut file = File::open(valid_path)?;
let mut buf = String::new();
file.read_to_string(&mut buf)?;
let parsed_corpus = parse_valid_corpus(&buf);
if let Ok((_, corpus)) = parsed_corpus {
let (inputs, outputs) = corpus.into_inner();
let iter = inputs.into_iter().zip(outputs.into_iter());
for (input, (onode, odomain, oresource)) in iter {
println!("INPUT: {:?}", input);
let mut success = true;
match Jid::from_str(input) {
Ok(Jid::Full(FullJid { node, domain, resource })) => {
if !node.as_ref().map(|s| s == onode).unwrap_or_else(|| onode.len() == 0) ||
domain != odomain ||
resource != oresource {
success = false;
}
},
Ok(Jid::Bare(BareJid { node, domain })) => {
if !node.as_ref().map(|s| s == onode).unwrap_or_else(|| onode.len() == 0) ||
domain != odomain {
success = false;
}
},
_ => success = false,
}
if success {
println!(": \x1b[32m OK\x1b[0m\n");
} else {
println!(": \x1b[31mERR\x1b[0m\n");
}
}
} else {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("Couldn't parse valid corpus file: {parsed_corpus:?}"),
));
}
Ok(())
}