caveman/hunter/src/scheduler.rs

115 lines
3.4 KiB
Rust
Raw Normal View History

2022-11-02 21:12:16 +01:00
use std::collections::{HashMap, BTreeMap};
2022-12-02 00:50:01 +01:00
use std::sync::Arc;
2022-11-02 21:12:16 +01:00
use std::time::Duration;
use rand::{thread_rng, Rng};
2022-11-02 21:12:16 +01:00
use tokio::time::Instant;
2023-01-26 00:17:13 +01:00
use crate::block_list::BlockList;
2022-11-07 00:58:28 +01:00
const MIN_INTERVAL: Duration = Duration::from_secs(30);
const MAX_INTERVAL: Duration = Duration::from_secs(7200);
const DEFAULT_INTERVAL: Duration = Duration::from_secs(120);
2022-12-02 00:50:01 +01:00
pub type Host = Arc<String>;
2022-11-02 21:12:16 +01:00
pub struct Instance {
last_fetch: Option<Instant>,
error: bool,
}
2022-11-03 02:54:28 +01:00
/// Scheduler
2022-11-03 15:40:20 +01:00
pub struct Scheduler {
2022-12-02 00:50:01 +01:00
instances: HashMap<Host, Instance>,
queue: BTreeMap<Instant, Host>,
2023-01-26 00:17:13 +01:00
block_list: BlockList,
2022-11-02 21:12:16 +01:00
}
2022-11-03 15:40:20 +01:00
impl Scheduler {
2023-01-26 00:17:13 +01:00
pub fn new(block_list: BlockList) -> Self {
2022-11-03 15:40:20 +01:00
Scheduler {
2022-11-02 21:12:16 +01:00
instances: HashMap::new(),
queue: BTreeMap::new(),
2023-01-26 00:17:13 +01:00
block_list,
2022-11-02 21:12:16 +01:00
}
}
2022-11-02 22:06:43 +01:00
pub fn size(&self) -> usize {
self.instances.len()
}
pub fn queue_len(&self) -> usize {
self.queue.len()
}
pub async fn introduce(&mut self, host: String) -> bool {
2023-01-26 00:17:13 +01:00
if self.block_list.is_blocked(&host).await {
return false;
}
2022-12-04 03:54:42 +01:00
2022-11-02 21:12:16 +01:00
let now = Instant::now();
2022-12-02 00:50:01 +01:00
let host = Arc::new(host);
2022-11-02 21:12:16 +01:00
if let std::collections::hash_map::Entry::Vacant(entry) = self.instances.entry(host.clone()) {
entry.insert(Instance {
2022-11-02 21:12:16 +01:00
last_fetch: None,
error: false,
});
self.queue.insert(now, host);
}
true
2022-11-02 21:12:16 +01:00
}
2022-12-02 00:50:01 +01:00
pub fn reenqueue(&mut self, host: Host, new_post_ratio: Option<f64>, mean_interval: Option<Duration>) {
2022-11-02 21:12:16 +01:00
let now = Instant::now();
let instance = self.instances.get_mut(&host).unwrap();
2022-11-07 00:58:28 +01:00
let last_interval = instance.last_fetch.map(|last_fetch| now - last_fetch);
instance.last_fetch = Some(now);
2022-11-02 21:12:16 +01:00
instance.error = false;
2022-11-03 02:54:28 +01:00
2022-11-07 00:58:28 +01:00
let next_interval = match (new_post_ratio, mean_interval, last_interval) {
(Some(new_post_ratio), Some(mean_interval), _) if new_post_ratio > 0. =>
mean_interval,
(_, _, Some(last_interval)) => {
let a = thread_rng().gen_range(2. .. 3.);
last_interval.mul_f64(a)
}
2022-11-07 00:58:28 +01:00
_ =>
DEFAULT_INTERVAL,
}.max(MIN_INTERVAL).min(MAX_INTERVAL);
let mut next = now + next_interval;
2022-11-03 02:54:28 +01:00
let mut d = 1;
// avoid timestamp collision in self.queue
while self.queue.get(&next).is_some() {
d *= 2;
next += Duration::from_micros(d);
}
2022-11-02 23:10:59 +01:00
self.queue.insert(next, host);
2022-11-02 21:12:16 +01:00
}
2022-12-02 00:50:01 +01:00
pub fn dequeue(&mut self) -> Result<Host, Duration> {
2022-11-02 21:12:16 +01:00
let now = Instant::now();
if let Some(time) = self.queue.keys().next().cloned() {
if time <= now {
self.queue.remove(&time)
.ok_or(Duration::from_secs(1))
2022-11-03 15:39:05 +01:00
.map(|host| {
if let Some(last_fetch) = self.instances.get(&host).and_then(|i| i.last_fetch) {
2022-12-01 01:39:38 +01:00
tracing::debug!("Fetch {} - last before {:.0?}", host, now - last_fetch);
2022-11-03 15:39:05 +01:00
} else {
2022-12-01 01:39:38 +01:00
tracing::debug!("Fetch {} - NEW", host);
2022-11-03 15:39:05 +01:00
}
host
})
2022-11-02 21:12:16 +01:00
} else {
2022-11-02 22:06:43 +01:00
Err(time - now)
2022-11-02 21:12:16 +01:00
}
} else {
2022-12-01 01:39:38 +01:00
tracing::warn!("empty queue");
2022-11-02 22:06:43 +01:00
Err(Duration::from_secs(60))
2022-11-02 21:12:16 +01:00
}
}
}