use std::{cmp::Ordering, collections::HashSet}; use std::collections::BTreeMap; use std::sync::Arc; use std::time::Instant; use redis::{ RedisError, }; use cave::current_hour; use cave::store::Store; use cave::trend_tag::TrendTag; pub type TrendsResults = Vec<(u64, u64, Vec<(f64, Arc)>)>; #[derive(Debug, Clone, PartialEq, PartialOrd)] pub struct ScoreKey { score: f64, tag: Arc, } impl Eq for ScoreKey {} impl Ord for ScoreKey { fn cmp(&self, other: &Self) -> Ordering { if self.score == other.score { self.tag.as_ref().cmp(other.tag.as_ref()) } else if self.score < other.score { Ordering::Less } else { Ordering::Greater } } } #[derive(Debug)] pub struct TrendAnalyzer { /// in hours period: u64, /// *now* in hours until: u64, size: usize, /// key contains name to avoid collision by just score pub result: BTreeMap>, score_threshold: Option, } impl TrendAnalyzer { pub async fn run( store: &mut Store, size: usize, periods: &[u64], language: Option, ) -> Result { let lang = if language.is_some() { "some" } else { "any" }; let until = current_hour(); let mut analyzers: Vec = periods.iter() .copied() .map(|period| TrendAnalyzer { period, until, size, result: BTreeMap::new(), score_threshold: None, }).collect(); let t1 = Instant::now(); let tags = store.get_trend_pools(&language, periods).await? .into_iter() .flat_map(|(_period, tags)| tags.into_iter()) .collect::>(); let tags_len = tags.len(); let t2 = Instant::now(); let trend_tags = store.get_trend_tags(&language, tags.into_iter()).await?; let t3 = Instant::now(); metrics::histogram!("trends_page_time", t2 - t1, "step" => "get_trend_pools", "lang" => lang); metrics::histogram!("trends_page_time", t3 - t2, "step" => "get_trend_tags", "lang" => lang); metrics::histogram!("trends_page_tags", tags_len as f64, "lang" => lang); for trend_tag in trend_tags { let trend_tag = Arc::new(trend_tag); let name = Arc::new(trend_tag.name.clone()); for analyzer in &mut analyzers { analyzer.process_tag(&name, &trend_tag); } } let results = analyzers.into_iter() .map(|analyzer| { let result = analyzer.result.iter() .rev() .map(|(key, tag)| (key.score, tag.clone())) .collect(); (analyzer.until, analyzer.period, result) }) .collect(); let t4 = Instant::now(); metrics::histogram!("trends_page_time", t4 - t3, "step" => "analyze", "lang" => lang); Ok(results) } pub fn process_tag(&mut self, name: &Arc, tag: &Arc) { let score = tag.score(self.period, self.until); if score <= 0. { return; } if self.result.len() >= self.size && self.score_threshold.map_or(false, |score_threshold| score < score_threshold) { // score is below self.result[..self.size].score return; } self.result.insert(ScoreKey { score, tag: name.clone(), }, tag.clone()); let mut least = self.result.keys().next().cloned().unwrap(); if self.result.len() > self.size { self.result.remove(&least); least = self.result.keys().next().cloned().unwrap().clone(); } self.score_threshold = Some(least.score); } }