caveman/hunter/src/scheduler.rs

164 lines
4.9 KiB
Rust

use std::collections::{HashMap, BTreeMap, hash_map};
use std::sync::Arc;
use std::time::Duration;
use cave::feed::{Mention, Account};
use rand::{thread_rng, Rng};
use tokio::time::Instant;
use cave::block_list::BlockList;
const MIN_INTERVAL: Duration = Duration::from_secs(30);
const MAX_INTERVAL: Duration = Duration::from_secs(6 * 3600);
const DEFAULT_INTERVAL: Duration = Duration::from_secs(600);
pub type Host = Arc<String>;
#[derive(Debug)]
pub struct InstanceHost {
pub host: Host,
pub known_user: Option<String>,
}
impl InstanceHost {
pub fn just_host(host: String) -> Self {
InstanceHost {
host: Arc::new(host),
known_user: None,
}
}
}
impl TryFrom<&Account> for InstanceHost {
type Error = ();
fn try_from(account: &Account) -> Result<Self, Self::Error> {
Ok(InstanceHost {
host: Arc::new(account.host().ok_or(())?),
known_user: Some(account.username.clone()),
})
}
}
impl TryFrom<&Mention> for InstanceHost {
type Error = ();
fn try_from(mention: &Mention) -> Result<Self, Self::Error> {
Ok(InstanceHost {
host: Arc::new(mention.host().ok_or(())?),
known_user: Some(mention.username.clone()),
})
}
}
pub struct Instance {
last_fetch: Option<Instant>,
error: bool,
known_user: Option<String>,
}
/// Scheduler
pub struct Scheduler {
instances: HashMap<Host, Instance>,
queue: BTreeMap<Instant, Host>,
block_list: BlockList,
}
impl Scheduler {
pub fn new(block_list: BlockList) -> Self {
Scheduler {
instances: HashMap::new(),
queue: BTreeMap::new(),
block_list,
}
}
pub fn size(&self) -> usize {
self.instances.len()
}
pub fn queue_len(&self) -> usize {
self.queue.len()
}
pub async fn introduce(&mut self, introduce_host: InstanceHost) -> bool {
if self.block_list.is_blocked(&introduce_host.host).await {
return false;
}
let now = Instant::now();
let host = introduce_host.host;
match self.instances.entry(host.clone()) {
hash_map::Entry::Vacant(entry) => {
entry.insert(Instance {
last_fetch: None,
error: false,
known_user: introduce_host.known_user,
});
self.queue.insert(now, host);
}
hash_map::Entry::Occupied(ref mut entry) => {
if let Some(known_user) = &introduce_host.known_user {
entry.get_mut().known_user = Some(known_user.clone());
}
}
}
true
}
pub fn reenqueue(&mut self, host: Host, new_post_ratio: Option<f64>, mean_interval: Option<Duration>) {
let now = Instant::now();
let instance = self.instances.get_mut(&host).expect("Scheduler::reenqueue()");
let last_interval = instance.last_fetch.map(|last_fetch| now - last_fetch);
instance.last_fetch = Some(now);
instance.error = false;
let next_interval = match (new_post_ratio, mean_interval, last_interval) {
(Some(new_post_ratio), Some(mean_interval), _) if new_post_ratio > 0. =>
mean_interval,
(_, _, Some(last_interval)) => {
let a = thread_rng().gen_range(2. .. 3.);
last_interval.mul_f64(a)
}
_ =>
DEFAULT_INTERVAL,
}.max(MIN_INTERVAL).min(MAX_INTERVAL);
let mut next = now + next_interval;
let mut d = 1;
// avoid timestamp collision in self.queue
while self.queue.get(&next).is_some() {
d *= 2;
next += Duration::from_micros(d);
}
self.queue.insert(next, host);
}
pub fn dequeue(&mut self) -> Result<InstanceHost, Duration> {
let now = Instant::now();
if let Some(time) = self.queue.keys().next().copied() {
if time <= now {
self.queue.remove(&time)
.ok_or(Duration::from_secs(1))
.map(|host| {
let instance = self.instances.get(&host);
if let Some(last_fetch) = instance.and_then(|i| i.last_fetch) {
tracing::debug!("Fetch {} - last before {:.0?}", host, now - last_fetch);
} else {
tracing::debug!("Fetch {} - NEW", host);
}
InstanceHost {
host,
known_user: instance.and_then(|instance| instance.known_user.clone()),
}
})
} else {
Err(time - now)
}
} else {
tracing::warn!("empty queue");
Err(Duration::from_secs(60))
}
}
}