use std::collections::BTreeSet; use crate::PERIOD_COMPARE_WINDOW; const MIN_AFTER_MENTIONS: &[(u64, usize)] = &[ (4, 7), (24, 17), (168, 57) ]; #[derive(Debug)] pub struct TrendTag { pub name: String, pub hour_users: Vec<(u64, usize)>, pub other: Vec<(String, String)>, } impl TrendTag { pub(crate) fn from_hash(name: String, hash_values: Vec, hour_users: Vec<(u64, usize)>) -> Self { let mut other = Vec::with_capacity(hash_values.len() / 2); let mut key: Option = None; for value in hash_values.into_iter() { if let Some(key) = key.take() { if let Ok(value) = str::parse(&value) { other.push((key, value)); } } else { key = Some(value); } } TrendTag { name, hour_users, other, } } pub fn score(&self, period: u64, until: u64) -> f64 { // ignore spam that comes from only 1 instance if self.hosts().skip(1).next().is_none() { return -1.; } let from = until - period; let not_before = from - PERIOD_COMPARE_WINDOW * period; let mut before_mentions = 0; let mut before_hours = 0; let mut after_mentions = 0; for (hour, mentions) in self.hour_users.iter().cloned() { if hour > from { after_mentions += mentions; } else if hour > not_before { before_mentions += mentions; before_hours += 1; } } for (min_period, min_after_mentions) in MIN_AFTER_MENTIONS { if period >= *min_period && after_mentions < *min_after_mentions { return 0.; } } let before = if before_hours > 0 && before_mentions > 0 { (before_mentions as f64) / (before_hours as f64) } else { 0.1 }; let after = (after_mentions as f64) / (period as f64); after / before } pub fn hour_scores_data(&self, period: u64) -> String { let offset = self.hour_users.len().saturating_sub(period as usize); self.hour_users[offset..] .iter() .map(|(_, count)| *count) .enumerate() .map(|(i, count)| if i == 0 { format!("{}", count) } else { format!(" {}", count) }) .collect() } fn spellings(&self) -> impl Iterator { self.other.iter() .filter_map(|(key, value)| { if &key[..2] != "s:" { return None; } if let Ok(count) = str::parse(value) { return Some((count, &key[2..])); } None }) } pub fn spelling(&self) -> &str { self.spellings() .map(|(count, spelling)| { if spelling.chars().any(|c| c.is_uppercase()) { // favor captialized spelling (10 * count, spelling) } else { (count, spelling) } }) .max() .map(|(_count, spelling)| spelling) .unwrap_or(&self.name) } pub fn hosts(&self) -> impl Iterator { self.other.iter() .filter_map(|(key, value)| { if &key[..2] != "h:" { return None; } if let Ok(count) = str::parse(value) { return Some((count, &key[2..])); } None }) } /// ordered by count pub fn hosts_set(&self) -> BTreeSet<(usize, &str)> { self.hosts().collect() } }