123 lines
3.8 KiB
Rust
123 lines
3.8 KiB
Rust
use std::{cmp::Ordering, collections::HashSet};
|
|
use std::collections::BTreeMap;
|
|
use std::sync::Arc;
|
|
use std::time::Instant;
|
|
use redis::{
|
|
RedisError,
|
|
};
|
|
use cave::current_hour;
|
|
use cave::store::Store;
|
|
use cave::trend_tag::TrendTag;
|
|
|
|
pub type TrendsResults = Vec<(u64, u64, Vec<(f64, Arc<TrendTag>)>)>;
|
|
|
|
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
|
pub struct ScoreKey {
|
|
score: f64,
|
|
tag: Arc<String>,
|
|
}
|
|
|
|
impl Eq for ScoreKey {}
|
|
|
|
impl Ord for ScoreKey {
|
|
fn cmp(&self, other: &Self) -> Ordering {
|
|
if self.score == other.score {
|
|
self.tag.as_ref().cmp(other.tag.as_ref())
|
|
} else if self.score < other.score {
|
|
Ordering::Less
|
|
} else {
|
|
Ordering::Greater
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct TrendAnalyzer {
|
|
/// in hours
|
|
period: u64,
|
|
/// *now* in hours
|
|
until: u64,
|
|
size: usize,
|
|
/// key contains name to avoid collision by just score
|
|
pub result: BTreeMap<ScoreKey, Arc<TrendTag>>,
|
|
score_threshold: Option<f64>,
|
|
}
|
|
|
|
impl TrendAnalyzer {
|
|
pub async fn run(
|
|
store: &mut Store,
|
|
size: usize,
|
|
periods: &[u64],
|
|
language: Option<String>,
|
|
) -> Result<TrendsResults, RedisError> {
|
|
let lang = if language.is_some() { "some" } else { "any" };
|
|
|
|
let until = current_hour();
|
|
let mut analyzers: Vec<TrendAnalyzer> = periods.iter()
|
|
.copied()
|
|
.map(|period| TrendAnalyzer {
|
|
period,
|
|
until,
|
|
size,
|
|
result: BTreeMap::new(),
|
|
score_threshold: None,
|
|
}).collect();
|
|
|
|
let t1 = Instant::now();
|
|
let tags = store.get_trend_pools(&language, periods).await?
|
|
.into_iter()
|
|
.flat_map(|(_period, tags)| tags.into_iter())
|
|
.collect::<HashSet<String>>();
|
|
let tags_len = tags.len();
|
|
let t2 = Instant::now();
|
|
let trend_tags = store.get_trend_tags(&language, tags.into_iter()).await?;
|
|
let t3 = Instant::now();
|
|
metrics::histogram!("trends_page_time", t2 - t1, "step" => "get_trend_pools", "lang" => lang);
|
|
metrics::histogram!("trends_page_time", t3 - t2, "step" => "get_trend_tags", "lang" => lang);
|
|
metrics::histogram!("trends_page_tags", tags_len as f64, "lang" => lang);
|
|
for trend_tag in trend_tags {
|
|
let trend_tag = Arc::new(trend_tag);
|
|
let name = Arc::new(trend_tag.name.clone());
|
|
for analyzer in &mut analyzers {
|
|
analyzer.process_tag(&name, &trend_tag);
|
|
}
|
|
}
|
|
|
|
let results = analyzers.into_iter()
|
|
.map(|analyzer| {
|
|
let result = analyzer.result.iter()
|
|
.rev()
|
|
.map(|(key, tag)| (key.score, tag.clone()))
|
|
.collect();
|
|
(analyzer.until, analyzer.period, result)
|
|
})
|
|
.collect();
|
|
let t4 = Instant::now();
|
|
metrics::histogram!("trends_page_time", t4 - t3, "step" => "analyze", "lang" => lang);
|
|
Ok(results)
|
|
}
|
|
|
|
pub fn process_tag(&mut self, name: &Arc<String>, tag: &Arc<TrendTag>) {
|
|
let score = tag.score(self.period, self.until);
|
|
if score <= 0. {
|
|
return;
|
|
}
|
|
|
|
if self.result.len() >= self.size &&
|
|
self.score_threshold.map_or(false, |score_threshold| score < score_threshold) {
|
|
// score is below self.result[..self.size].score
|
|
return;
|
|
}
|
|
|
|
self.result.insert(ScoreKey { score, tag: name.clone(), }, tag.clone());
|
|
|
|
let mut least = self.result.keys().next().cloned().unwrap();
|
|
if self.result.len() > self.size {
|
|
self.result.remove(&least);
|
|
least = self.result.keys().next().cloned().unwrap().clone();
|
|
}
|
|
|
|
self.score_threshold = Some(least.score);
|
|
}
|
|
}
|