caveman/cave/src/trend_tag.rs

138 lines
3.8 KiB
Rust

use std::collections::BTreeSet;
use crate::PERIOD_COMPARE_WINDOW;
const MIN_AFTER_MENTIONS: &[(u64, usize)] = &[
(4, 9),
(24, 17),
(168, 37),
];
#[derive(Debug)]
pub struct TrendTag {
pub name: String,
pub hour_users: Vec<(u64, usize)>,
pub other: Vec<(String, String)>,
}
impl TrendTag {
pub(crate) fn from_hash(name: String, hash_values: Vec<String>, hour_users: Vec<(u64, usize)>) -> Self {
let mut other = Vec::with_capacity(hash_values.len() / 2);
let mut key: Option<String> = None;
for value in hash_values {
if let Some(key) = key.take() {
if let Ok(value) = str::parse(&value) {
other.push((key, value));
}
} else {
key = Some(value);
}
}
TrendTag {
name,
hour_users,
other,
}
}
#[must_use] pub fn score(&self, period: u64, until: u64) -> f64 {
// ignore spam that comes from only 1 instance
if self.hosts().nth(1).is_none() {
return -1.;
}
let from = until - period;
let not_before = from - PERIOD_COMPARE_WINDOW * period;
let mut before_mentions = 0;
let mut before_hours = 0;
let mut after_mentions = 0;
for (hour, mentions) in self.hour_users.iter().copied() {
if hour > from {
if mentions > 1 {
after_mentions += mentions;
}
} else if hour > not_before {
before_mentions += mentions;
before_hours += 1;
}
}
for (min_period, min_after_mentions) in MIN_AFTER_MENTIONS {
if period >= *min_period && after_mentions < *min_after_mentions {
return 0.;
}
}
let before = if before_hours > 0 && before_mentions > 0 {
(before_mentions as f64) / f64::from(before_hours)
} else { 0.1 };
let after = (after_mentions as f64) / (period as f64);
after / before
}
#[must_use] pub fn hour_scores_data(&self, period: u64) -> String {
let offset = self.hour_users.len().saturating_sub(period as usize);
self.hour_users[offset..]
.iter()
.map(|(_, count)| *count)
.enumerate()
.map(|(i, count)| if i == 0 {
format!("{count}")
} else {
format!(" {count}")
})
.collect()
}
fn spellings(&self) -> impl Iterator<Item = (usize, &str)> {
self.other.iter()
.filter_map(|(key, value)| {
if &key[..2] != "s:" {
return None;
}
if let Ok(count) = str::parse(value) {
return Some((count, &key[2..]));
}
None
})
}
#[must_use] pub fn spelling(&self) -> &str {
self.spellings()
.map(|(count, spelling)| {
if spelling.chars().any(char::is_uppercase) {
// favor captialized spelling
(10 * count, spelling)
} else {
(count, spelling)
}
})
.max()
.map_or(&self.name, |(_count, spelling)| spelling)
}
pub fn hosts(&self) -> impl Iterator<Item = (usize, &str)> {
self.other.iter()
.filter_map(|(key, value)| {
if &key[..2] != "h:" {
return None;
}
if let Ok(count) = str::parse(value) {
return Some((count, &key[2..]));
}
None
})
}
/// ordered by count
#[must_use] pub fn hosts_set(&self) -> BTreeSet<(usize, &str)> {
self.hosts().collect()
}
}