2022-11-11 19:00:37 +01:00
|
|
|
use std::{collections::{HashMap, HashSet}, time::Duration};
|
2022-11-02 22:42:43 +01:00
|
|
|
use chrono::{DateTime, FixedOffset};
|
2022-11-11 21:52:52 +01:00
|
|
|
use futures::{Stream, StreamExt};
|
2022-11-11 19:00:37 +01:00
|
|
|
use eventsource_stream::Eventsource;
|
2022-11-02 22:42:43 +01:00
|
|
|
|
2022-11-15 00:45:02 +01:00
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
2022-11-02 21:12:16 +01:00
|
|
|
pub struct Account {
|
|
|
|
pub username: String,
|
|
|
|
pub display_name: String,
|
|
|
|
pub url: String,
|
|
|
|
pub bot: bool,
|
|
|
|
pub avatar: String,
|
|
|
|
pub header: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Account {
|
|
|
|
pub fn host(&self) -> Option<String> {
|
|
|
|
reqwest::Url::parse(&self.url)
|
|
|
|
.ok()
|
|
|
|
.and_then(|url| url.domain()
|
2022-11-03 17:37:06 +01:00
|
|
|
.map(|s| s.to_lowercase())
|
2022-11-02 21:12:16 +01:00
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-15 00:45:02 +01:00
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
2022-11-02 21:12:16 +01:00
|
|
|
pub struct Tag {
|
|
|
|
pub name: String,
|
|
|
|
}
|
|
|
|
|
2022-11-15 00:45:02 +01:00
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
2022-11-02 21:12:16 +01:00
|
|
|
pub struct Application {
|
|
|
|
pub name: String,
|
2022-11-02 22:42:20 +01:00
|
|
|
pub website: Option<String>,
|
2022-11-02 21:12:16 +01:00
|
|
|
}
|
|
|
|
|
2022-11-15 00:45:02 +01:00
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
2022-11-03 03:42:13 +01:00
|
|
|
pub struct Mention {
|
|
|
|
pub username: Option<String>,
|
|
|
|
pub url: String,
|
|
|
|
pub acct: Option<String>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Mention {
|
|
|
|
pub fn user_host(&self) -> Option<String> {
|
|
|
|
reqwest::Url::parse(&self.url)
|
|
|
|
.ok()
|
|
|
|
.and_then(|url| url.domain()
|
2022-11-03 17:37:06 +01:00
|
|
|
.map(|host| host.to_lowercase())
|
2022-11-03 03:42:13 +01:00
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-25 02:43:28 +01:00
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
|
|
|
pub struct MediaAttachment {
|
|
|
|
#[serde(rename = "type")]
|
|
|
|
pub media_type: String,
|
|
|
|
pub remote_url: Option<String>,
|
|
|
|
}
|
|
|
|
|
2022-11-15 00:45:02 +01:00
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
2022-11-02 21:12:16 +01:00
|
|
|
pub struct Post {
|
|
|
|
pub created_at: String,
|
2022-11-04 15:50:00 +01:00
|
|
|
pub uri: String,
|
2022-11-25 02:43:28 +01:00
|
|
|
#[serde(default = "String::new")]
|
2022-11-02 21:12:16 +01:00
|
|
|
pub content: String,
|
|
|
|
pub account: Account,
|
2022-11-25 02:43:28 +01:00
|
|
|
#[serde(default)]
|
2022-11-02 21:12:16 +01:00
|
|
|
pub tags: Vec<Tag>,
|
|
|
|
pub application: Option<Application>,
|
|
|
|
pub sensitive: Option<bool>,
|
2022-11-25 02:43:28 +01:00
|
|
|
#[serde(default)]
|
2022-11-03 03:42:13 +01:00
|
|
|
pub mentions: Vec<Mention>,
|
2022-11-03 17:13:03 +01:00
|
|
|
pub language: Option<String>,
|
2022-11-25 02:43:28 +01:00
|
|
|
#[serde(default)]
|
|
|
|
pub media_attachments: Vec<MediaAttachment>,
|
2022-11-02 21:12:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Post {
|
2022-11-04 15:50:00 +01:00
|
|
|
pub fn uri_host(&self) -> Option<String> {
|
|
|
|
reqwest::Url::parse(&self.uri)
|
2022-11-02 21:49:37 +01:00
|
|
|
.ok()
|
2022-11-04 15:50:00 +01:00
|
|
|
.and_then(|uri| uri.domain()
|
2022-11-03 16:17:04 +01:00
|
|
|
.map(|host| host.to_owned())
|
|
|
|
)
|
2022-11-02 21:49:37 +01:00
|
|
|
}
|
2022-11-02 22:42:43 +01:00
|
|
|
|
2022-11-14 01:13:49 +01:00
|
|
|
pub fn user_id(&self) -> Option<String> {
|
|
|
|
let username = self.account.username.to_lowercase();
|
|
|
|
let host = self.uri_host()?;
|
|
|
|
Some(format!("{}@{}", username, host))
|
|
|
|
}
|
|
|
|
|
2022-11-02 22:42:43 +01:00
|
|
|
pub fn timestamp(&self) -> Option<DateTime<FixedOffset>> {
|
|
|
|
DateTime::parse_from_rfc3339(&self.created_at)
|
|
|
|
.ok()
|
|
|
|
}
|
2022-11-08 00:43:46 +01:00
|
|
|
|
|
|
|
/// clip "en-us" to "en"
|
|
|
|
pub fn lang(&self) -> Option<String> {
|
|
|
|
let language = match &self.language {
|
|
|
|
Some(language) => language,
|
|
|
|
None => return None,
|
|
|
|
};
|
|
|
|
|
|
|
|
if language.len() < 2 {
|
|
|
|
None
|
|
|
|
} else if language.len() == 2 {
|
|
|
|
Some(language.to_lowercase())
|
|
|
|
} else {
|
|
|
|
Some(language[..2].to_lowercase())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn tags_set(&self) -> HashMap<String, HashSet<String>> {
|
|
|
|
let mut result: HashMap<String, HashSet<String>> = HashMap::with_capacity(self.tags.len());
|
|
|
|
for tag in &self.tags {
|
|
|
|
let name = tag.name.to_lowercase();
|
|
|
|
if name.contains(char::is_whitespace) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
match result.entry(name) {
|
|
|
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
|
|
|
let mut r = HashSet::new();
|
|
|
|
r.insert(tag.name.clone());
|
|
|
|
entry.insert(r);
|
|
|
|
}
|
|
|
|
std::collections::hash_map::Entry::Occupied(mut entry) => {
|
|
|
|
entry.get_mut().insert(tag.name.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
2022-11-02 21:12:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Feed {
|
|
|
|
pub posts: Vec<Post>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Feed {
|
2022-11-11 19:00:37 +01:00
|
|
|
/// Analyze time intervals between posts to estimate when to fetch
|
|
|
|
/// next
|
|
|
|
pub fn mean_post_interval(&self) -> Option<Duration> {
|
|
|
|
let mut timestamps = self.posts.iter()
|
|
|
|
.filter_map(|post| post.timestamp())
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
timestamps.sort();
|
|
|
|
|
|
|
|
if timestamps.len() > 2 {
|
|
|
|
Some(
|
|
|
|
((*timestamps.last().unwrap() - timestamps[0]) / (timestamps.len() as i32 - 1)
|
|
|
|
).to_std().unwrap()
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-02 21:12:16 +01:00
|
|
|
pub async fn fetch(client: &reqwest::Client, url: &str) -> Result<Self, reqwest::Error> {
|
|
|
|
let posts: Vec<Post> = client.get(url)
|
|
|
|
.send()
|
|
|
|
.await?
|
|
|
|
.json()
|
|
|
|
.await?;
|
2022-11-03 20:59:36 +01:00
|
|
|
log::trace!("{} {} posts", url, posts.len());
|
2022-11-02 21:12:16 +01:00
|
|
|
Ok(Feed { posts })
|
|
|
|
}
|
2022-11-11 19:00:37 +01:00
|
|
|
|
2022-11-11 21:52:52 +01:00
|
|
|
pub async fn stream(client: &reqwest::Client, url: &str) -> Result<impl Stream<Item = Post>, String> {
|
2022-11-11 19:00:37 +01:00
|
|
|
let res = client.get(url)
|
2022-11-11 21:52:52 +01:00
|
|
|
.timeout(Duration::MAX)
|
2022-11-11 19:00:37 +01:00
|
|
|
.send()
|
2022-11-11 21:52:52 +01:00
|
|
|
.await
|
|
|
|
.map_err(|e| format!("{}", e))?;
|
|
|
|
if res.status() != 200 {
|
|
|
|
return Err(format!("HTTP {}", res.status()));
|
|
|
|
}
|
|
|
|
let ct = res.headers().get("content-type")
|
|
|
|
.and_then(|c| c.to_str().ok());
|
|
|
|
if ct.map_or(true, |ct| ct != "text/event-stream") {
|
|
|
|
return Err(format!("Invalid Content-Type: {:?}", ct));
|
|
|
|
}
|
|
|
|
|
|
|
|
let src = res.bytes_stream().eventsource()
|
|
|
|
.filter_map(|result| async {
|
|
|
|
let result = result.ok()?;
|
|
|
|
if result.event == "update" {
|
|
|
|
Some(result)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.filter_map(|event| async move {
|
|
|
|
match serde_json::from_str(&event.data) {
|
2022-11-17 00:09:02 +01:00
|
|
|
Ok(post) => {
|
|
|
|
drop(event);
|
|
|
|
Some(post)
|
|
|
|
},
|
2022-11-11 21:52:52 +01:00
|
|
|
Err(e) => {
|
|
|
|
log::error!("Error decoding stream data: {}", e);
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2022-11-11 19:00:37 +01:00
|
|
|
Ok(src)
|
|
|
|
}
|
2022-11-02 21:12:16 +01:00
|
|
|
}
|