caveman/hunter/src/main.rs

144 lines
4.9 KiB
Rust
Raw Normal View History

2022-11-02 21:12:16 +01:00
use std::time::Duration;
2022-11-12 01:02:44 +01:00
use futures::{StreamExt, pin_mut};
2022-12-26 03:44:42 +01:00
use metrics_util::MetricKindMask;
use metrics_exporter_prometheus::PrometheusBuilder;
2022-11-02 22:06:43 +01:00
use tokio::time::timeout;
2023-10-18 02:03:41 +02:00
use cave::{
block_list::BlockList,
config::LoadConfig,
};
2022-11-02 21:12:16 +01:00
2022-11-02 22:06:43 +01:00
mod config;
2022-11-03 15:40:20 +01:00
mod scheduler;
2022-11-02 21:12:16 +01:00
mod worker;
mod webfinger;
2022-11-02 21:12:16 +01:00
use scheduler::InstanceHost;
2022-11-02 21:12:16 +01:00
use worker::Message;
2022-12-03 01:47:44 +01:00
#[global_allocator]
2023-10-01 23:31:17 +02:00
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
2022-12-03 01:47:44 +01:00
2023-04-24 19:35:19 +02:00
fn main() -> Result<(), Box<dyn std::error::Error>> {
tokio_uring::start(async {
run().await;
Ok(())
})
}
async fn run() {
cave::systemd::extend_timeout(100_000);
2022-11-05 20:51:18 +01:00
cave::init::exit_on_panic();
2022-12-01 01:39:38 +01:00
cave::init::init_logger(5555);
2022-11-03 17:22:21 +01:00
2022-11-05 20:51:18 +01:00
let config = config::Config::load();
2022-11-02 21:12:16 +01:00
2022-12-26 03:44:42 +01:00
PrometheusBuilder::new()
.with_http_listener(([0; 8], config.prometheus_port))
.add_global_label("application", env!("CARGO_PKG_NAME"))
.idle_timeout(MetricKindMask::ALL, Some(Duration::from_secs(6 * 3600)))
2022-12-26 03:44:42 +01:00
.install()
.unwrap();
2023-08-08 18:42:34 +02:00
let db = cave::db::Database::connect(&config.database).await;
let mut store = cave::store::Store::new(16, config.redis, config.redis_password_file).await;
2023-10-12 23:28:48 +02:00
let posts_cache = cave::posts_cache::PostsCache::new(65536);
2022-11-08 00:43:46 +01:00
2023-10-18 02:03:41 +02:00
let block_list = BlockList::new(&config.blocklist).await;
2022-11-05 20:51:18 +01:00
cave::systemd::status("Starting scheduler");
let mut scheduler = scheduler::Scheduler::new(block_list.clone());
2022-11-07 03:30:12 +01:00
cave::systemd::status("Loading known hosts from config");
2022-11-02 22:06:43 +01:00
for host in config.hosts.into_iter() {
scheduler.introduce(InstanceHost::just_host(host)).await;
2022-11-03 18:58:37 +01:00
}
#[cfg(not(dev))]
{
2022-11-12 01:02:44 +01:00
cave::systemd::status("Loading known hosts from redis");
let mut n = 1;
let mut store_ = store.clone();
let hosts = store_.get_hosts()
2022-11-12 01:02:44 +01:00
.await.expect("get_hosts");
pin_mut!(hosts);
while let Some(host) = hosts.next().await {
if scheduler.introduce(InstanceHost::just_host(host.clone())).await == false {
2022-12-01 01:39:38 +01:00
tracing::debug!("Remove host {}", host);
store.remove_host(&host).await.expect("remove_host");
}
n += 1;
if n > 1000 {
cave::systemd::extend_timeout(10_000_000);
n = 0;
}
2022-11-12 01:02:44 +01:00
}
2022-11-02 22:06:43 +01:00
}
2022-11-03 16:50:40 +01:00
2022-11-05 20:51:18 +01:00
cave::systemd::status("Starting HTTP client");
2022-11-02 21:12:16 +01:00
let client = reqwest::Client::builder()
2022-11-03 02:54:28 +01:00
.timeout(Duration::from_secs(30))
.tcp_keepalive(Duration::from_secs(300))
2022-11-03 20:48:36 +01:00
.pool_max_idle_per_host(0)
.redirect(reqwest::redirect::Policy::limited(2))
2023-04-24 19:38:29 +02:00
.user_agent(
format!("{}/{} (+https://fedi.buzz/)", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
)
2022-11-02 21:12:16 +01:00
.deflate(true)
.gzip(true)
.build()
.expect("reqwest::Client");
cave::systemd::ready();
2022-11-03 17:34:29 +01:00
2022-11-02 22:06:43 +01:00
let mut workers_active = 0usize;
2022-11-03 01:21:53 +01:00
let (message_tx, mut message_rx) = tokio::sync::mpsc::unbounded_channel();
2022-11-02 21:12:16 +01:00
loop {
2022-12-01 01:39:38 +01:00
tracing::trace!("{} workers active, queued {} of {}", workers_active, scheduler.queue_len(), scheduler.size());
2022-11-05 20:51:18 +01:00
cave::systemd::status(&format!("{} workers active, queued {} of {}", workers_active, scheduler.queue_len(), scheduler.size()));
2022-12-26 03:44:42 +01:00
metrics::gauge!("hunter_workers", workers_active as f64, "worker" => "active");
metrics::gauge!("hunter_workers", scheduler.queue_len() as f64, "worker" => "queued");
metrics::gauge!("hunter_workers", scheduler.size() as f64, "worker" => "total");
2022-11-02 22:06:43 +01:00
let next_task = if workers_active < config.max_workers {
2022-11-03 15:40:20 +01:00
scheduler.dequeue()
2022-11-02 22:06:43 +01:00
} else {
2022-11-03 00:27:16 +01:00
Err(Duration::from_secs(5))
2022-11-02 22:06:43 +01:00
};
match next_task {
Err(duration) => {
2022-11-02 21:49:37 +01:00
let _ = timeout(duration, async {
2022-11-03 01:21:53 +01:00
let message = message_rx.recv().await.unwrap();
2022-11-02 21:49:37 +01:00
match message {
2022-11-11 21:52:52 +01:00
Message::WorkerDone => {
2022-11-02 22:06:43 +01:00
workers_active -= 1;
2022-11-11 21:52:52 +01:00
}
Message::Fetched { host, mean_interval, new_post_ratio } => {
2022-11-07 00:58:28 +01:00
scheduler.reenqueue(host, new_post_ratio, mean_interval);
2022-11-02 22:06:43 +01:00
}
Message::IntroduceHost(introduce_host) => {
scheduler.introduce(introduce_host).await;
2022-11-02 21:12:16 +01:00
}
}
}).await;
}
Ok(host) => {
2022-11-02 22:06:43 +01:00
workers_active += 1;
2022-12-01 01:39:38 +01:00
tokio::task::Builder::new()
.name(&format!("{} worker", host.host))
2022-12-01 01:39:38 +01:00
.spawn(worker::run(
message_tx.clone(),
store.clone(),
2023-08-08 18:42:34 +02:00
db.clone(),
2022-12-01 01:39:38 +01:00
posts_cache.clone(),
block_list.clone(),
2022-12-01 01:39:38 +01:00
client.clone(),
host
)).unwrap();
metrics::counter!("hunter_worker_starts", 1);
cave::systemd::watchdog();
2022-11-02 21:12:16 +01:00
}
}
}
}