gatherer: ignore trends from <2 hosts

This commit is contained in:
Astro 2022-11-06 23:49:42 +01:00
parent 52c0200da9
commit c218cae316
3 changed files with 14 additions and 4 deletions

View File

@ -139,7 +139,7 @@ impl Tag {
.unwrap_or(&self.name)
}
pub fn hosts(&self) -> BTreeSet<(usize, &str)> {
pub fn hosts(&self) -> impl Iterator<Item = (usize, &str)> {
self.other.iter()
.filter_map(|(key, value)| {
if &key[..2] != "h:" {
@ -152,6 +152,10 @@ impl Tag {
None
})
.collect()
}
/// ordered by count
pub fn hosts_set(&self) -> BTreeSet<(usize, &str)> {
self.hosts().collect()
}
}

View File

@ -78,6 +78,11 @@ impl TrendAnalyzer {
}
pub fn process_tag(&mut self, tag: &Arc<Tag>) {
if tag.hosts().skip(1).next().is_none() {
// ignore spam that is posted by accounts on <1 host
return;
}
let score = tag.score(self.period, self.until);
if score <= 1.0 {
@ -97,7 +102,7 @@ impl TrendAnalyzer {
self.result.remove(&least);
least = self.result.keys().cloned().next().unwrap().clone();
}
self.score_threshold = Some(least.score);
}
}

View File

@ -27,11 +27,12 @@
<h3>#{{ tag.spelling() }}</h3>
<p class="score">{{ format!("{:.0}%", 100. * score) }}</p>
<ul class="hosts">
{% for (count, host) in tag.hosts().into_iter().rev().take(5) %}
{% for (count, host) in tag.hosts_set().into_iter().rev().take(5) %}
<li>
<a href="https://{{ host }}/tags/{{ tag.name }}">
{{ host }}
</a>
({{ count }})
</li>
{% endfor %}
</ul>