caveman/gatherer/src/tag.rs

158 lines
4.4 KiB
Rust
Raw Normal View History

2022-11-06 02:20:36 +01:00
use std::collections::BTreeSet;
2022-11-06 01:29:58 +01:00
use std::sync::Arc;
2022-11-05 20:04:31 +01:00
use redis::{
aio::ConnectionManager,
RedisError,
};
const MIN_AFTER_MENTIONS: usize = 3;
#[derive(Debug)]
pub struct Tag {
2022-11-06 01:29:58 +01:00
pub name: Arc<String>,
2022-11-05 20:04:31 +01:00
by_hour: Vec<(u64, usize)>,
other: Vec<(String, String)>,
}
impl Tag {
pub async fn for_each<F: FnMut(Self)>(
redis_man: &mut ConnectionManager,
language: Option<String>,
mut f: F,
) -> Result<(), RedisError> {
let prefix = match language {
Some(language) => format!("l:{}:", language),
None => "g:".to_string(),
};
let mut cursor = None;
while cursor != Some(0) {
let mut cmd = redis::cmd("SCAN");
cmd.cursor_arg(cursor.unwrap_or(0))
.arg("MATCH").arg(format!("{}*", prefix))
.arg("COUNT").arg(1000);
let (next_cursor, keys) =
cmd.query_async::<_, (u64, Vec<String>)>(redis_man)
.await?;
let mut cmd = redis::pipe();
for key in &keys {
cmd.hgetall(key);
}
let others =
cmd.query_async::<_, Vec<Vec<String>>>(redis_man)
.await?;
for (key, other) in keys.iter().zip(others) {
let name = key[prefix.len()..].to_string();
let tag = Self::from_hash(name, other);
f(tag);
}
cursor = Some(next_cursor);
}
Ok(())
}
fn from_hash(name: String, hash_values: Vec<String>) -> Self {
let mut by_hour = Vec::with_capacity(hash_values.len() / 2);
let mut other = Vec::with_capacity(hash_values.len() / 2);
let mut key: Option<String> = None;
for value in hash_values.into_iter() {
if let Some(key) = key.take() {
if &key[..2] == "t:" {
if let (Ok(hour), Ok(value)) = (str::parse(&key[2..]), str::parse(&value)) {
by_hour.push((hour, value));
}
} else if let Ok(value) = str::parse(&value) {
other.push((key, value));
}
} else {
key = Some(value);
}
}
#[cfg(debug)]
by_hour.sort();
Tag {
2022-11-06 01:29:58 +01:00
name: Arc::new(name),
2022-11-05 20:04:31 +01:00
by_hour,
other,
}
}
pub fn score(&self, period: u64, until: u64) -> f64 {
let from = until - period;
let mut before_mentions = 0;
let mut before_hours = 0;
let mut after_mentions = 0;
for (hour, mentions) in self.by_hour.iter().cloned() {
if hour <= from {
before_mentions += mentions;
before_hours += 1;
} else {
after_mentions += mentions;
}
}
if after_mentions < MIN_AFTER_MENTIONS * (period as usize) {
return 0.;
}
let before = if before_hours > 0 && before_mentions > 0 {
(before_mentions as f64) / (before_hours as f64)
} else { 0.1 };
let after = (after_mentions as f64) / (period as f64);
after / before
}
2022-11-06 02:20:36 +01:00
fn spellings(&self) -> impl Iterator<Item = (usize, &str)> {
self.other.iter()
.filter_map(|(key, value)| {
if &key[..2] != "s:" {
return None;
}
if let Ok(count) = str::parse(value) {
return Some((count, &key[2..]));
}
None
})
}
pub fn spelling(&self) -> &str {
self.spellings()
.map(|(count, spelling)| {
if spelling.chars().any(|c| c.is_uppercase()) {
// favor captialized spelling
(10 * count, spelling)
} else {
(count, spelling)
}
})
.max()
.map(|(_count, spelling)| spelling)
.unwrap_or(&self.name)
}
pub fn hosts(&self) -> BTreeSet<(usize, &str)> {
self.other.iter()
.filter_map(|(key, value)| {
if &key[..2] != "h:" {
return None;
}
if let Ok(count) = str::parse(value) {
return Some((count, &key[2..]));
}
None
})
.collect()
}
2022-11-05 20:04:31 +01:00
}