implement next_interval heuristics

This commit is contained in:
Astro 2022-11-02 22:42:43 +01:00
parent deaa232716
commit 6439eb611d
4 changed files with 215 additions and 3 deletions

180
Cargo.lock generated
View File

@ -8,6 +8,15 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "async-compression"
version = "0.3.15"
@ -55,6 +64,7 @@ checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
name = "caveman"
version = "0.0.0"
dependencies = [
"chrono",
"reqwest",
"serde",
"serde_yaml",
@ -73,6 +83,31 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
dependencies = [
"iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"time",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "core-foundation"
version = "0.9.3"
@ -98,6 +133,50 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "cxx"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b7d4e43b25d3c994662706a1d4fcfc32aaa6afd287502c111b237093bb23f3a"
dependencies = [
"cc",
"cxxbridge-flags",
"cxxbridge-macro",
"link-cplusplus",
]
[[package]]
name = "cxx-build"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84f8829ddc213e2c1368e51a2564c552b65a8cb6a28f31e576270ac81d5e5827"
dependencies = [
"cc",
"codespan-reporting",
"once_cell",
"proc-macro2",
"quote",
"scratch",
"syn",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e72537424b474af1460806647c41d4b6d35d09ef7fe031c5c2fa5766047cc56a"
[[package]]
name = "cxxbridge-macro"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "309e4fb93eed90e1e14bea0da16b209f81813ba9fc7830c20ed151dd7bc0a4d7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "encoding_rs"
version = "0.8.31"
@ -300,6 +379,30 @@ dependencies = [
"tokio-native-tls",
]
[[package]]
name = "iana-time-zone"
version = "0.1.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
dependencies = [
"cxx",
"cxx-build",
]
[[package]]
name = "idna"
version = "0.3.0"
@ -362,6 +465,15 @@ version = "0.2.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89"
[[package]]
name = "link-cplusplus"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369"
dependencies = [
"cc",
]
[[package]]
name = "lock_api"
version = "0.4.9"
@ -410,7 +522,7 @@ checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de"
dependencies = [
"libc",
"log",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.42.0",
]
@ -432,6 +544,25 @@ dependencies = [
"tempfile",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.13.1"
@ -637,6 +768,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scratch"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898"
[[package]]
name = "security-framework"
version = "2.7.0"
@ -775,6 +912,26 @@ dependencies = [
"winapi",
]
[[package]]
name = "termcolor"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
dependencies = [
"winapi-util",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@ -898,6 +1055,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unsafe-libyaml"
version = "0.2.4"
@ -931,6 +1094,12 @@ dependencies = [
"try-lock",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
@ -1029,6 +1198,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"

View File

@ -8,3 +8,4 @@ tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.11", features = ["json", "deflate", "gzip"] }
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9"
chrono = "0.4"

View File

@ -1,3 +1,5 @@
use chrono::{DateTime, FixedOffset};
#[derive(Debug, serde::Deserialize)]
pub struct Account {
pub username: String,
@ -49,6 +51,11 @@ impl Post {
.and_then(|url| url.domain()
.map(|s| s.to_owned()))
}
pub fn timestamp(&self) -> Option<DateTime<FixedOffset>> {
DateTime::parse_from_rfc3339(&self.created_at)
.ok()
}
}
#[derive(Debug)]

View File

@ -2,6 +2,8 @@ use std::collections::HashSet;
use std::time::Duration;
use crate::feed;
const DEFAULT_INTERVAL: Duration = Duration::from_secs(3600);
#[derive(Debug)]
pub enum Message {
Fetched { host: String, next_interval: Duration },
@ -19,7 +21,31 @@ pub fn fetch_and_process(
tokio::spawn(async move {
match feed::Feed::fetch(&client, &url).await {
Ok(feed) => {
// Analyze
// Analyze time intervals between posts to estimate when to fetch next
let mut timestamps = feed.posts.iter()
.filter_map(|post| post.timestamp())
.collect::<Vec<_>>();
timestamps.sort();
let mut intervals = Vec::with_capacity(feed.posts.len().saturating_sub(1));
let mut last_timestamp = None;
for timestamp in timestamps.into_iter() {
if let Some(last_timestamp) = last_timestamp {
intervals.push(((timestamp - last_timestamp) as chrono::Duration).to_std().unwrap());
}
last_timestamp = Some(timestamp);
}
let intervals_len = intervals.len() as u32;
let next_interval = if intervals.len() > 0 {
(intervals.into_iter()
.fold(Duration::ZERO, |sum, interval| sum + interval)
/ intervals_len
).min(DEFAULT_INTERVAL)
} else {
DEFAULT_INTERVAL
};
dbg!(next_interval);
// introduce new hosts, validate posts
let mut posts = Vec::with_capacity(feed.posts.len());
let mut hosts = HashSet::new();
for post in feed.posts.into_iter() {
@ -36,7 +62,7 @@ pub fn fetch_and_process(
tx.send(Message::Fetched {
host: host.clone(),
next_interval: Duration::from_secs(10),
next_interval,
}).unwrap();
}
Err(e) => {