2022-11-14 22:44:16 +01:00
|
|
|
use std::collections::BTreeSet;
|
2022-11-14 03:14:12 +01:00
|
|
|
use crate::PERIOD_COMPARE_WINDOW;
|
2022-11-05 20:04:31 +01:00
|
|
|
|
2023-10-30 00:35:23 +01:00
|
|
|
const MIN_AFTER_MENTIONS: &[(u64, usize)] = &[
|
2023-11-10 23:32:12 +01:00
|
|
|
(4, 9),
|
2023-10-30 00:35:23 +01:00
|
|
|
(24, 17),
|
2023-11-10 23:32:12 +01:00
|
|
|
(168, 37),
|
2023-10-30 00:35:23 +01:00
|
|
|
];
|
2022-11-05 20:04:31 +01:00
|
|
|
|
|
|
|
#[derive(Debug)]
|
2022-11-08 00:43:46 +01:00
|
|
|
pub struct TrendTag {
|
2022-11-09 18:11:02 +01:00
|
|
|
pub name: String,
|
2022-11-14 03:14:12 +01:00
|
|
|
pub hour_users: Vec<(u64, usize)>,
|
|
|
|
pub other: Vec<(String, String)>,
|
2022-11-05 20:04:31 +01:00
|
|
|
}
|
|
|
|
|
2022-11-08 00:43:46 +01:00
|
|
|
impl TrendTag {
|
2022-11-14 03:14:12 +01:00
|
|
|
pub(crate) fn from_hash(name: String, hash_values: Vec<String>, hour_users: Vec<(u64, usize)>) -> Self {
|
2022-11-05 20:04:31 +01:00
|
|
|
let mut other = Vec::with_capacity(hash_values.len() / 2);
|
|
|
|
|
|
|
|
let mut key: Option<String> = None;
|
|
|
|
for value in hash_values.into_iter() {
|
|
|
|
if let Some(key) = key.take() {
|
2022-11-14 03:14:12 +01:00
|
|
|
if let Ok(value) = str::parse(&value) {
|
2022-11-05 20:04:31 +01:00
|
|
|
other.push((key, value));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
key = Some(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-08 00:43:46 +01:00
|
|
|
TrendTag {
|
2022-11-09 18:11:02 +01:00
|
|
|
name,
|
2022-11-14 03:14:12 +01:00
|
|
|
hour_users,
|
2022-11-05 20:04:31 +01:00
|
|
|
other,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn score(&self, period: u64, until: u64) -> f64 {
|
2022-11-09 19:05:40 +01:00
|
|
|
// ignore spam that comes from only 1 instance
|
|
|
|
if self.hosts().skip(1).next().is_none() {
|
2022-11-10 15:23:02 +01:00
|
|
|
return -1.;
|
2022-11-09 19:05:40 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 20:04:31 +01:00
|
|
|
let from = until - period;
|
2022-11-14 03:14:12 +01:00
|
|
|
let not_before = from - PERIOD_COMPARE_WINDOW * period;
|
2022-11-05 20:04:31 +01:00
|
|
|
let mut before_mentions = 0;
|
|
|
|
let mut before_hours = 0;
|
|
|
|
let mut after_mentions = 0;
|
|
|
|
|
2023-11-10 23:32:12 +01:00
|
|
|
for (hour, mut mentions) in self.hour_users.iter().cloned() {
|
2022-11-10 03:28:20 +01:00
|
|
|
if hour > from {
|
2023-11-10 23:32:12 +01:00
|
|
|
if mentions > 1 {
|
|
|
|
after_mentions += mentions;
|
|
|
|
}
|
2022-11-10 03:28:20 +01:00
|
|
|
} else if hour > not_before {
|
2022-11-05 20:04:31 +01:00
|
|
|
before_mentions += mentions;
|
|
|
|
before_hours += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-30 00:35:23 +01:00
|
|
|
for (min_period, min_after_mentions) in MIN_AFTER_MENTIONS {
|
|
|
|
if period >= *min_period && after_mentions < *min_after_mentions {
|
|
|
|
return 0.;
|
|
|
|
}
|
2022-11-05 20:04:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
let before = if before_hours > 0 && before_mentions > 0 {
|
|
|
|
(before_mentions as f64) / (before_hours as f64)
|
|
|
|
} else { 0.1 };
|
|
|
|
let after = (after_mentions as f64) / (period as f64);
|
|
|
|
after / before
|
|
|
|
}
|
2022-11-06 02:20:36 +01:00
|
|
|
|
2022-11-14 22:44:16 +01:00
|
|
|
pub fn hour_scores_data(&self, period: u64) -> String {
|
|
|
|
let offset = self.hour_users.len().saturating_sub(period as usize);
|
|
|
|
self.hour_users[offset..]
|
2022-11-11 16:45:43 +01:00
|
|
|
.iter()
|
2022-11-14 22:44:16 +01:00
|
|
|
.map(|(_, count)| *count)
|
|
|
|
.enumerate()
|
|
|
|
.map(|(i, count)| if i == 0 {
|
|
|
|
format!("{}", count)
|
|
|
|
} else {
|
|
|
|
format!(" {}", count)
|
|
|
|
})
|
2022-11-11 16:45:43 +01:00
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2022-11-06 02:20:36 +01:00
|
|
|
fn spellings(&self) -> impl Iterator<Item = (usize, &str)> {
|
|
|
|
self.other.iter()
|
|
|
|
.filter_map(|(key, value)| {
|
|
|
|
if &key[..2] != "s:" {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(count) = str::parse(value) {
|
|
|
|
return Some((count, &key[2..]));
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn spelling(&self) -> &str {
|
|
|
|
self.spellings()
|
|
|
|
.map(|(count, spelling)| {
|
|
|
|
if spelling.chars().any(|c| c.is_uppercase()) {
|
|
|
|
// favor captialized spelling
|
|
|
|
(10 * count, spelling)
|
|
|
|
} else {
|
|
|
|
(count, spelling)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.max()
|
|
|
|
.map(|(_count, spelling)| spelling)
|
|
|
|
.unwrap_or(&self.name)
|
|
|
|
}
|
|
|
|
|
2022-11-06 23:49:42 +01:00
|
|
|
pub fn hosts(&self) -> impl Iterator<Item = (usize, &str)> {
|
2022-11-06 02:20:36 +01:00
|
|
|
self.other.iter()
|
|
|
|
.filter_map(|(key, value)| {
|
|
|
|
if &key[..2] != "h:" {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(count) = str::parse(value) {
|
|
|
|
return Some((count, &key[2..]));
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
})
|
2022-11-06 23:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// ordered by count
|
|
|
|
pub fn hosts_set(&self) -> BTreeSet<(usize, &str)> {
|
|
|
|
self.hosts().collect()
|
2022-11-06 02:20:36 +01:00
|
|
|
}
|
2022-11-05 20:04:31 +01:00
|
|
|
}
|