Compare commits

...

8 Commits

Author SHA1 Message Date
Astro 7c7b75537e hunter: add URL in User-Agent 2023-04-24 19:38:44 +02:00
Astro 2847bdc69e hunter: try out tokio-uring 2023-04-24 19:35:19 +02:00
Astro 33c910ff7a hunter: redesign hosts introduction in worker to add scanning reblog mentions 2023-04-24 19:27:13 +02:00
Astro 0bef428894 hunter: remove unnecessary cloning 2023-04-24 19:21:15 +02:00
Astro 18de86dde2 hunter: avoid double lookup in Scheduler::introduce() 2023-04-24 18:48:31 +02:00
Astro daed3f3280 Cargo.lock: update 2023-04-24 18:40:50 +02:00
Astro fa4dabfc11 flake.lock: Update
Flake lock file updates:

• Updated input 'fenix':
    'github:nix-community/fenix/cbcb84751afdfd52d3dd3dc9caf328b59250052c' (2023-03-31)
  → 'github:nix-community/fenix/4ffd0701b8872a8f81ca9492a21fe1c4af17abcf' (2023-04-24)
• Updated input 'fenix/rust-analyzer-src':
    'github:rust-lang/rust-analyzer/42d671fcb7173f1bdb4e785d732e6fb31b5bea0b' (2023-03-30)
  → 'github:rust-lang/rust-analyzer/bc78ebd9d83d614562f0a9280bdedf91a3841a73' (2023-04-23)
• Updated input 'nixpkgs':
    'path:/nix/store/m13y8w5rcz9zd01pfkj4k4dv17lahhdi-source?lastModified=1679966490&narHash=sha256-k0jV+y1jawE6w4ZvKgXDNg4+O9NNtcaWwzw8gufv0b4=&rev=5b7cd5c39befee629be284970415b6eb3b0ff000' (2023-03-28)
  → 'path:/nix/store/40w1i37kf1s1ljj3a22ngi5b66bsd225-source?lastModified=1680865339&narHash=sha256-H6rmJ1CyJ3Q5ZyoLMYq%2fUEYMS9Q1orJjRpWiQ47HudE=&rev=0040164e473509b4aee6aedb3b923e400d6df10b' (2023-04-07)
• Updated input 'utils':
    'github:numtide/flake-utils/93a2b84fc4b70d9e089d029deacc3583435c2ed6' (2023-03-15)
  → 'github:numtide/flake-utils/cfacdce06f30d2b68473a46042957675eebb3401' (2023-04-11)
• Added input 'utils/systems':
    'github:nix-systems/default/da67096a3b9bf56a91d16901293e51ba5b49a27e' (2023-04-09)
2023-04-24 18:34:04 +02:00
Astro 29d5086872 flake.lock: Update
Flake lock file updates:

• Updated input 'fenix':
    'github:nix-community/fenix/1c9e4995fc7022258817f9c9b02c7c4b3d09ca65' (2023-03-11)
  → 'github:nix-community/fenix/cbcb84751afdfd52d3dd3dc9caf328b59250052c' (2023-03-31)
• Updated input 'fenix/rust-analyzer-src':
    'github:rust-lang/rust-analyzer/9fca0a4afefead3daf8f66fd357999d7cd520880' (2023-03-10)
  → 'github:rust-lang/rust-analyzer/42d671fcb7173f1bdb4e785d732e6fb31b5bea0b' (2023-03-30)
• Updated input 'naersk':
    'github:nmattia/naersk/d998160d6a076cfe8f9741e56aeec7e267e3e114' (2022-12-15)
  → 'github:nmattia/naersk/88cd22380154a2c36799fe8098888f0f59861a15' (2023-03-23)
• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/2ce9b9842b5e63884dfc3dea6689769e2a1ea309' (2023-03-11)
  → 'path:/nix/store/m13y8w5rcz9zd01pfkj4k4dv17lahhdi-source?lastModified=1679966490&narHash=sha256-k0jV+y1jawE6w4ZvKgXDNg4+O9NNtcaWwzw8gufv0b4=&rev=5b7cd5c39befee629be284970415b6eb3b0ff000' (2023-03-28)
• Updated input 'utils':
    'github:numtide/flake-utils/3db36a8b464d0c4532ba1c7dda728f4576d6d073' (2023-02-13)
  → 'github:numtide/flake-utils/93a2b84fc4b70d9e089d029deacc3583435c2ed6' (2023-03-15)
2023-03-31 20:31:14 +02:00
6 changed files with 382 additions and 214 deletions

486
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -8,11 +8,11 @@
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1678515726,
"narHash": "sha256-+WQZ+yfpbyX37fTOPTWrOXAWIjNC65/3SHrSGpbeMUk=",
"lastModified": 1682317371,
"narHash": "sha256-szJscjPsOR+pFri8eZ6c6PCGw9Ge3llJ2rkz7dyxKwM=",
"owner": "nix-community",
"repo": "fenix",
"rev": "1c9e4995fc7022258817f9c9b02c7c4b3d09ca65",
"rev": "4ffd0701b8872a8f81ca9492a21fe1c4af17abcf",
"type": "github"
},
"original": {
@ -28,11 +28,11 @@
]
},
"locked": {
"lastModified": 1671096816,
"narHash": "sha256-ezQCsNgmpUHdZANDCILm3RvtO1xH8uujk/+EqNvzIOg=",
"lastModified": 1679567394,
"narHash": "sha256-ZvLuzPeARDLiQUt6zSZFGOs+HZmE+3g4QURc8mkBsfM=",
"owner": "nmattia",
"repo": "naersk",
"rev": "d998160d6a076cfe8f9741e56aeec7e267e3e114",
"rev": "88cd22380154a2c36799fe8098888f0f59861a15",
"type": "github"
},
"original": {
@ -43,12 +43,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1678500213,
"narHash": "sha256-A5s2rXawJ+dCThkMXoMuYW8dgyUmkElcyfVJUot/Vr0=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "2ce9b9842b5e63884dfc3dea6689769e2a1ea309",
"type": "github"
"lastModified": 1680865339,
"narHash": "sha256-H6rmJ1CyJ3Q5ZyoLMYq/UEYMS9Q1orJjRpWiQ47HudE=",
"path": "/nix/store/40w1i37kf1s1ljj3a22ngi5b66bsd225-source",
"rev": "0040164e473509b4aee6aedb3b923e400d6df10b",
"type": "path"
},
"original": {
"id": "nixpkgs",
@ -66,11 +65,11 @@
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1678443920,
"narHash": "sha256-If9ztMKOJG1jfSAVxbgqnfEqmZHQnfIH5reDvreyfwk=",
"lastModified": 1682284919,
"narHash": "sha256-Z07/byuJdxLK6E8Yb9qNvUMhUCOWEgYAriojU/wZHu8=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "9fca0a4afefead3daf8f66fd357999d7cd520880",
"rev": "bc78ebd9d83d614562f0a9280bdedf91a3841a73",
"type": "github"
},
"original": {
@ -80,13 +79,31 @@
"type": "github"
}
},
"utils": {
"systems": {
"locked": {
"lastModified": 1676283394,
"narHash": "sha256-XX2f9c3iySLCw54rJ/CZs+ZK6IQy7GXNY4nSOyu2QG4=",
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1681202837,
"narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "3db36a8b464d0c4532ba1c7dda728f4576d6d073",
"rev": "cfacdce06f30d2b68473a46042957675eebb3401",
"type": "github"
},
"original": {

View File

@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
futures = "0.3"
tokio = { version = "1", features = ["full", "tracing"] }
tokio-uring = "0.4"
reqwest = { version = "0.11", features = ["json", "deflate", "gzip", "trust-dns"] }
serde = { version = "1", features = ["derive"] }
# serde_yaml = "0.9"

View File

@ -17,8 +17,14 @@ use worker::Message;
#[global_allocator]
static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[tokio::main]
async fn main() {
fn main() -> Result<(), Box<dyn std::error::Error>> {
tokio_uring::start(async {
run().await;
Ok(())
})
}
async fn run() {
cave::systemd::extend_timeout(100_000);
cave::init::exit_on_panic();
@ -72,11 +78,9 @@ async fn main() {
.timeout(Duration::from_secs(30))
.tcp_keepalive(Duration::from_secs(300))
.pool_max_idle_per_host(0)
.user_agent(concat!(
env!("CARGO_PKG_NAME"),
"/",
env!("CARGO_PKG_VERSION"),
))
.user_agent(
format!("{}/{} (+https://fedi.buzz/)", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
)
.deflate(true)
.gzip(true)
.trust_dns(true)

View File

@ -49,8 +49,8 @@ impl Scheduler {
let now = Instant::now();
let host = Arc::new(host);
if self.instances.get(&host).is_none() {
self.instances.insert(host.clone(), Instance {
if let std::collections::hash_map::Entry::Vacant(entry) = self.instances.entry(host.clone()) {
entry.insert(Instance {
last_fetch: None,
error: false,
});

View File

@ -2,6 +2,7 @@ use std::collections::HashSet;
use std::future::Future;
use std::sync::Arc;
use std::time::{Duration, Instant};
use cave::feed::Post;
use cave::{
feed::{Feed, EncodablePost},
store::Store,
@ -192,25 +193,28 @@ async fn process_posts(
if ! posts_cache.insert(post.uri.clone()) {
let t1 = Instant::now();
// introduce instances from reblog authors
if let Some(reblog_account_host) = post.reblog.as_ref().and_then(|reblog| reblog.account.host()) {
introduce_hosts.insert(reblog_account_host);
}
// introduce instances from mentions
for mention in &post.mentions {
if let Some(user_host) = mention.user_host() {
introduce_hosts.insert(user_host);
fn scan_for_hosts(introduce_hosts: &mut HashSet<String>, post: &Post) {
// introduce instances from accounts
if let Some(account_host) = post.account.host() {
introduce_hosts.insert(account_host);
}
// introduce instances from mentions
for mention in &post.mentions {
if let Some(user_host) = mention.user_host() {
introduce_hosts.insert(user_host);
}
}
}
scan_for_hosts(&mut introduce_hosts, &post);
if let Some(reblog) = &post.reblog {
scan_for_hosts(&mut introduce_hosts, &reblog);
}
// check if it's an actual post
if let Some(account_host) = post.account.host() {
if let Some(_account_host) = post.account.host() {
// send away to redis
if store.save_post(post).await == Ok(true) {
new_posts += 1;
}
// introduce instances from accounts
introduce_hosts.insert(account_host);
} else {
tracing::warn!("drop repost ({:?} on {})", post.account.host(), host);
}
@ -230,10 +234,6 @@ async fn process_posts(
None
};
let introduce_hosts = introduce_hosts.into_iter()
.map(|host| host.to_owned())
.collect();
(new_post_ratio, introduce_hosts)
}