1
0
mirror of https://gitlab.com/xmpp-rs/xmpp-rs.git synced 2024-06-02 14:29:20 +02:00

icu: Remove this obsolete attempt

This commit is contained in:
Emmanuel Gil Peyrot 2023-06-20 18:57:36 +02:00
parent cea9c04507
commit 5b6dcb6549
10 changed files with 0 additions and 636 deletions

View File

@ -1,6 +1,5 @@
[workspace]
members = [ # alphabetically sorted
"icu",
"jid",
"minidom",
"parsers",
@ -9,7 +8,6 @@ members = [ # alphabetically sorted
]
[patch.crates-io]
icu = { path = "icu" }
jid = { path = "jid" }
minidom = { path = "minidom" }
tokio-xmpp = { path = "tokio-xmpp" }

View File

@ -1,12 +0,0 @@
[package]
name = "icu"
version = "0.1.0"
authors = ["Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
[build-dependencies]
cc = "1"

View File

@ -1,5 +0,0 @@
fn main() {
cc::Build::new().file("src/bindings.c").compile("bindings");
println!("cargo:rustc-link-lib=dylib=icuuc");
println!("cargo:rustc-link-lib=dylib=icui18n");
}

View File

@ -1,54 +0,0 @@
// This file is a stupid wrapper to avoid the automated suffixing libicu is
// doing in unicode/urename.h.
//
// By default it will suffix each of its symbols with "_65" (with 65 being the
// soname), which completely messes with Rusts binding ability.
#include <unicode/umachine.h>
#include <unicode/utypes.h>
#include <unicode/usprep.h>
#include <unicode/utrace.h>
#include <unicode/uidna.h>
#include <unicode/uspoof.h>
#include <unicode/ustring.h>
#include <string.h>
const char* icu_error_code_to_name(UErrorCode code) {
return u_errorName(code);
}
UIDNA* icu_idna_open(uint32_t options, UErrorCode* pErrorCode) {
return uidna_openUTS46(options, pErrorCode);
}
int32_t icu_idna_name_to_ascii(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
return uidna_nameToASCII_UTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
}
int32_t icu_idna_name_to_unicode(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
return uidna_nameToUnicodeUTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
}
UStringPrepProfile* icu_stringprep_open(UStringPrepProfileType type, UErrorCode* status) {
return usprep_openByType(type, status);
}
int32_t icu_stringprep_prepare(const UStringPrepProfile* prep, const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status) {
return usprep_prepare(prep, src, srcLength, dest, destCapacity, options, parseError, status);
}
void icu_trace_set_level(UTraceLevel traceLevel) {
utrace_setLevel(traceLevel);
}
USpoofChecker* icu_spoof_open(UErrorCode* status) {
return uspoof_open(status);
}
void icu_spoof_set_checks(USpoofChecker* sc, int32_t checks, UErrorCode* status) {
uspoof_setChecks(sc, checks, status);
}
int32_t icu_spoof_get_skeleton(USpoofChecker* sc, uint32_t type, const char* id, int32_t length, char* dest, int32_t destCapacity, UErrorCode* status) {
return uspoof_getSkeletonUTF8(sc, type, id, length, dest, destCapacity, status);
}

View File

@ -1,149 +0,0 @@
//! Crate wrapping what we need from ICUs C API for JIDs.
//!
//! See <http://site.icu-project.org/>
use std::os::raw::c_char;
// From unicode/umachine.h
pub(crate) type UChar = u16;
// From unicode/utypes.h
pub(crate) type UErrorCode = u32;
pub(crate) const U_ZERO_ERROR: UErrorCode = 0;
pub(crate) type UStringPrepProfile = u32;
type UParseError = u32;
// From unicode/usprep.h
pub(crate) const USPREP_DEFAULT: i32 = 0;
pub(crate) const USPREP_ALLOW_UNASSIGNED: i32 = 1;
pub(crate) type UStringPrepProfileType = u32;
pub(crate) const USPREP_RFC3491_NAMEPREP: UStringPrepProfileType = 0;
pub(crate) const USPREP_RFC3920_NODEPREP: UStringPrepProfileType = 7;
pub(crate) const USPREP_RFC3920_RESOURCEPREP: UStringPrepProfileType = 8;
pub(crate) const USPREP_RFC4013_SASLPREP: UStringPrepProfileType = 10;
// From unicode/utrace.h
type UTraceLevel = i32;
pub(crate) const UTRACE_VERBOSE: UTraceLevel = 9;
// From unicode/uidna.h
#[repr(C)]
pub(crate) struct UIDNA {
_unused: [u8; 0],
}
type UBool = i8;
#[repr(C)]
pub(crate) struct UIDNAInfo {
size: i16,
is_transitional_different: UBool,
reserved_b3: UBool,
errors: u32,
reserved_i2: i32,
reserved_i3: i32,
}
impl UIDNAInfo {
pub(crate) fn new() -> UIDNAInfo {
assert_eq!(std::mem::size_of::<UIDNAInfo>(), 16);
UIDNAInfo {
size: std::mem::size_of::<UIDNAInfo>() as i16,
is_transitional_different: false as UBool,
reserved_b3: false as UBool,
errors: 0,
reserved_i2: 0,
reserved_i3: 0,
}
}
// TODO: Return a String instead, or a custom error type, this is a bitflag (defined in
// uidna.h) where multiple errors can be accumulated.
pub(crate) fn get_errors(&self) -> u32 {
self.errors
}
}
pub(crate) const UIDNA_DEFAULT: u32 = 0;
pub(crate) const UIDNA_USE_STD3_RULES: u32 = 2;
pub(crate) type UIdnaFunction = unsafe extern "C" fn(
*const UIDNA,
*const u8,
i32,
*mut u8,
i32,
*mut UIDNAInfo,
*mut u32,
) -> i32;
// From unicode/uspoof.h
#[repr(C)]
pub(crate) struct USpoofChecker {
_unused: [u8; 0],
}
pub(crate) const USPOOF_CONFUSABLE: i32 = 7;
#[link(name = "bindings")]
extern "C" {
// From unicode/ustring.h
pub(crate) fn icu_error_code_to_name(code: UErrorCode) -> *const c_char;
// From unicode/usprep.h
pub(crate) fn icu_stringprep_open(
type_: UStringPrepProfileType,
status: *mut UErrorCode,
) -> *mut UStringPrepProfile;
pub(crate) fn icu_stringprep_prepare(
prep: *const UStringPrepProfile,
src: *const UChar,
srcLength: i32,
dest: *mut UChar,
destCapacity: i32,
options: i32,
parseError: *mut UParseError,
status: *mut UErrorCode,
) -> i32;
// From unicode/utrace.h
pub(crate) fn icu_trace_set_level(traceLevel: UTraceLevel);
// From unicode/uidna.h
pub(crate) fn icu_idna_open(options: u32, pErrorCode: *mut UErrorCode) -> *mut UIDNA;
pub(crate) fn icu_idna_name_to_ascii(
idna: *const UIDNA,
name: *const u8,
length: i32,
dest: *mut u8,
capacity: i32,
pInfo: *mut UIDNAInfo,
pErrorCode: *mut UErrorCode,
) -> i32;
pub(crate) fn icu_idna_name_to_unicode(
idna: *const UIDNA,
name: *const u8,
length: i32,
dest: *mut u8,
capacity: i32,
pInfo: *mut UIDNAInfo,
pErrorCode: *mut UErrorCode,
) -> i32;
// From unicode/uspoof.h
pub(crate) fn icu_spoof_open(status: *mut UErrorCode) -> *mut USpoofChecker;
pub(crate) fn icu_spoof_set_checks(
sc: *mut USpoofChecker,
checks: i32,
status: *mut UErrorCode,
);
pub(crate) fn icu_spoof_get_skeleton(
sc: *const USpoofChecker,
type_: u32,
id: *const u8,
length: i32,
dest: *mut u8,
destCapacity: i32,
status: *mut UErrorCode,
) -> i32;
}

View File

@ -1,51 +0,0 @@
//! Crate wrapping what we need from ICUs C API for JIDs.
//!
//! See <http://site.icu-project.org/>
use crate::bindings::{icu_error_code_to_name, UErrorCode};
use std::ffi::CStr;
/// Errors this library can produce.
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
/// An error produced by one of the ICU functions.
Icu(String),
/// An error produced by one of the IDNA2008 ICU functions.
Idna(u32),
/// Some ICU function didnt produce a valid UTF-8 string, should never happen.
Utf8(std::string::FromUtf8Error),
/// Some ICU function didnt produce a valid UTF-8 string, should never happen.
Utf16(std::char::DecodeUtf16Error),
/// Some string was too long for its profile in JID.
TooLong,
}
impl Error {
pub(crate) fn from_icu_code(err: UErrorCode) -> Error {
let ptr = unsafe { icu_error_code_to_name(err) };
let c_str = unsafe { CStr::from_ptr(ptr) };
Error::Icu(c_str.to_string_lossy().into_owned())
}
}
impl From<UErrorCode> for Error {
fn from(err: UErrorCode) -> Error {
Error::from_icu_code(err)
}
}
impl From<std::string::FromUtf8Error> for Error {
fn from(err: std::string::FromUtf8Error) -> Error {
Error::Utf8(err)
}
}
impl From<std::char::DecodeUtf16Error> for Error {
fn from(err: std::char::DecodeUtf16Error) -> Error {
Error::Utf16(err)
}
}

View File

@ -1,69 +0,0 @@
//! Crate wrapping what we need from ICUs C API for JIDs.
//!
//! See <http://site.icu-project.org/>
use crate::bindings::{
icu_idna_name_to_ascii, icu_idna_name_to_unicode, icu_idna_open, UErrorCode, UIDNAInfo,
UIdnaFunction, UIDNA, U_ZERO_ERROR,
};
use crate::error::Error;
/// TODO: IDNA2008 support.
pub struct Idna {
inner: *mut UIDNA,
}
impl Idna {
/// Create a new Idna struct.
pub fn new(options: u32) -> Result<Idna, UErrorCode> {
let mut err: UErrorCode = U_ZERO_ERROR;
let inner = unsafe { icu_idna_open(options, &mut err) };
match err {
U_ZERO_ERROR => Ok(Idna { inner }),
err => Err(err),
}
}
/// Converts a whole domain name into its ASCII form for DNS lookup.
pub fn to_ascii(&self, input: &str) -> Result<String, Error> {
self.idna(input, icu_idna_name_to_ascii)
}
/// Converts a whole domain name into its Unicode form for human-readable display.
pub fn to_unicode(&self, input: &str) -> Result<String, Error> {
self.idna(input, icu_idna_name_to_unicode)
}
fn idna(&self, input: &str, function: UIdnaFunction) -> Result<String, Error> {
if input.len() > 255 {
return Err(Error::TooLong);
}
let mut err: UErrorCode = U_ZERO_ERROR;
let mut dest: Vec<u8> = vec![0u8; 256];
let mut info = UIDNAInfo::new();
let len = unsafe {
function(
self.inner,
input.as_ptr(),
input.len() as i32,
dest.as_mut_ptr(),
dest.len() as i32,
&mut info,
&mut err,
)
};
if err != U_ZERO_ERROR {
return Err(Error::from_icu_code(err));
}
let errors = info.get_errors();
if errors != 0 {
return Err(Error::Idna(errors));
}
if len > 255 {
return Err(Error::TooLong);
}
dest.truncate(len as usize);
Ok(String::from_utf8(dest)?)
}
}

View File

@ -1,154 +0,0 @@
//! Crate wrapping what we need from ICUs C API for JIDs.
//!
//! See <http://site.icu-project.org/>
#![deny(missing_docs)]
mod bindings;
mod error;
mod idna2008;
mod spoof;
mod stringprep;
use crate::bindings::{
icu_trace_set_level, UIDNA_DEFAULT, UIDNA_USE_STD3_RULES, USPOOF_CONFUSABLE,
USPREP_RFC3491_NAMEPREP, USPREP_RFC3920_NODEPREP, USPREP_RFC3920_RESOURCEPREP,
USPREP_RFC4013_SASLPREP, UTRACE_VERBOSE,
};
pub use crate::error::Error;
pub use crate::idna2008::Idna;
pub use crate::spoof::SpoofChecker;
use crate::stringprep::Stringprep;
/// How unassigned codepoints should be handled.
pub enum Strict {
/// All codepoints should be assigned, otherwise an error will be emitted.
True,
/// Codepoints can be unassigned.
AllowUnassigned,
}
/// Main struct of this module, exposing the needed ICU functions to JID.
pub struct Icu {
nameprep: Stringprep,
nodeprep: Stringprep,
resourceprep: Stringprep,
saslprep: Stringprep,
/// IDNA2008 support.
///
/// See [RFC5891](https://tools.ietf.org/html/rfc5891).
pub idna2008: Idna,
/// Spoof checker TODO: better doc.
pub spoofchecker: SpoofChecker,
}
impl Icu {
/// Create a new ICU struct, initialising stringprep profiles, IDNA2008, as well as a spoof
/// checker.
pub fn new() -> Result<Icu, Error> {
unsafe { icu_trace_set_level(UTRACE_VERBOSE) };
let nameprep = Stringprep::new(USPREP_RFC3491_NAMEPREP)?;
let nodeprep = Stringprep::new(USPREP_RFC3920_NODEPREP)?;
let resourceprep = Stringprep::new(USPREP_RFC3920_RESOURCEPREP)?;
let saslprep = Stringprep::new(USPREP_RFC4013_SASLPREP)?;
let mut options = UIDNA_DEFAULT;
options |= UIDNA_USE_STD3_RULES;
let idna2008 = Idna::new(options)?;
let spoofchecker = SpoofChecker::new(USPOOF_CONFUSABLE)?;
Ok(Icu {
nameprep,
nodeprep,
resourceprep,
saslprep,
idna2008,
spoofchecker,
})
}
/// Perform stringprep using the Nameprep profile.
///
/// See [RFC3491](https://tools.ietf.org/html/rfc3491).
pub fn nameprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
self.nameprep.stringprep(string, strict)
}
/// Perform stringprep using the Nodeprep profile.
///
/// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
pub fn nodeprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
self.nodeprep.stringprep(string, strict)
}
/// Perform stringprep using the Resourceprep profile.
///
/// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
pub fn resourceprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
self.resourceprep.stringprep(string, strict)
}
/// Perform stringprep using the Saslprep profile.
///
/// See [RFC4013](https://tools.ietf.org/html/rfc4013).
pub fn saslprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
self.saslprep.stringprep(string, strict)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn nameprep() {
let name = "Link";
let icu = Icu::new().unwrap();
let name = icu.nodeprep.stringprep(name, Strict::True).unwrap();
assert_eq!(name, "link");
}
#[test]
fn resourceprep() {
let name = "Test™";
let icu = Icu::new().unwrap();
let name = icu
.resourceprep
.stringprep(name, Strict::AllowUnassigned)
.unwrap();
assert_eq!(name, "TestTM");
}
#[test]
fn idna() {
let name = "☃.coM";
let icu = Icu::new().unwrap();
let name = icu.idna2008.to_ascii(name).unwrap();
assert_eq!(name, "xn--n3h.com");
let name = "xn--N3H.com";
let icu = Icu::new().unwrap();
let name = icu.idna2008.to_unicode(name).unwrap();
assert_eq!(name, "☃.com");
}
#[test]
fn spoof() {
// Non-breakable and narrow non-breakable spaces spoofing.
let name = "foo barbaz";
let icu = Icu::new().unwrap();
let name = icu.spoofchecker.get_skeleton(name).unwrap();
assert_eq!(name, "foo bar baz");
// Cyrillic spoofing.
let name = "Неllо wоrld";
let icu = Icu::new().unwrap();
let name = icu.spoofchecker.get_skeleton(name).unwrap();
assert_eq!(name, "Hello world");
}
}

View File

@ -1,52 +0,0 @@
//! Crate wrapping what we need from ICUs C API for JIDs.
//!
//! See <http://site.icu-project.org/>
use crate::bindings::{
icu_spoof_get_skeleton, icu_spoof_open, icu_spoof_set_checks, UErrorCode, USpoofChecker,
U_ZERO_ERROR,
};
use crate::error::Error;
/// TODO: spoof checker.
pub struct SpoofChecker {
inner: *mut USpoofChecker,
}
impl SpoofChecker {
/// Create a new SpoofChecker.
pub fn new(checks: i32) -> Result<SpoofChecker, UErrorCode> {
let mut err: UErrorCode = U_ZERO_ERROR;
let inner = unsafe { icu_spoof_open(&mut err) };
if err != U_ZERO_ERROR {
return Err(err);
}
unsafe { icu_spoof_set_checks(inner, checks, &mut err) };
if err != U_ZERO_ERROR {
return Err(err);
}
Ok(SpoofChecker { inner })
}
/// Transform a string into a skeleton for matching it with other potentially similar strings.
pub fn get_skeleton(&self, input: &str) -> Result<String, Error> {
let mut err: UErrorCode = U_ZERO_ERROR;
let mut dest: Vec<u8> = vec![0u8; 256];
let len = unsafe {
icu_spoof_get_skeleton(
self.inner,
0,
input.as_ptr(),
input.len() as i32,
dest.as_mut_ptr(),
dest.len() as i32,
&mut err,
)
};
if err != U_ZERO_ERROR {
return Err(Error::from_icu_code(err));
}
dest.truncate(len as usize);
Ok(String::from_utf8(dest)?)
}
}

View File

@ -1,88 +0,0 @@
//! Crate wrapping what we need from ICUs C API for JIDs.
//!
//! See <http://site.icu-project.org/>
use crate::bindings::{
icu_stringprep_open, icu_stringprep_prepare, UChar, UErrorCode, UStringPrepProfile,
UStringPrepProfileType, USPREP_ALLOW_UNASSIGNED, USPREP_DEFAULT, U_ZERO_ERROR,
};
use crate::error::Error;
use crate::Strict;
use std::ptr::null_mut;
/// Struct representing a given stringprep profile.
pub(crate) struct Stringprep {
inner: *mut UStringPrepProfile,
}
impl Stringprep {
/// Create a new Stringprep struct for the given profile.
pub(crate) fn new(profile: UStringPrepProfileType) -> Result<Stringprep, UErrorCode> {
let mut err: UErrorCode = U_ZERO_ERROR;
let inner = unsafe { icu_stringprep_open(profile, &mut err) };
match err {
U_ZERO_ERROR => Ok(Stringprep { inner }),
err => Err(err),
}
}
/// Perform a stringprep operation using this profile.
///
/// # Panics
/// Panics if ICU doesnt return a valid UTF-16 string, which should never happen.
pub(crate) fn stringprep(&self, input: &str, strict: Strict) -> Result<String, Error> {
if input.len() > 1023 {
return Err(Error::TooLong);
}
// ICU works on UTF-16 data, so convert it first.
let unprepped: Vec<UChar> = input.encode_utf16().collect();
// Now do the actual stringprep operation.
let mut prepped: Vec<UChar> = vec![0u16; 1024];
let flags = match strict {
Strict::True => USPREP_DEFAULT,
Strict::AllowUnassigned => USPREP_ALLOW_UNASSIGNED,
};
self.prepare(&unprepped, &mut prepped, flags)?;
// And then convert it back to UTF-8.
let output = std::char::decode_utf16(prepped.into_iter())
//.map(Result::unwrap)
.try_fold(Vec::new(), |mut acc, c| match c {
Ok(c) => {
acc.push(c);
Ok(acc)
}
Err(err) => Err(err),
})?;
let output: String = output.into_iter().collect();
if output.len() > 1023 {
return Err(Error::TooLong);
}
Ok(output)
}
fn prepare(&self, input: &[UChar], buf: &mut Vec<UChar>, flags: i32) -> Result<(), UErrorCode> {
let mut err: UErrorCode = U_ZERO_ERROR;
let prepped_len = unsafe {
icu_stringprep_prepare(
self.inner,
input.as_ptr(),
input.len() as i32,
buf.as_mut_ptr(),
buf.len() as i32,
flags,
null_mut(),
&mut err,
)
};
if err != U_ZERO_ERROR {
return Err(err);
}
buf.truncate(prepped_len as usize);
Ok(())
}
}