491 lines
15 KiB
Rust
491 lines
15 KiB
Rust
use std::pin::Pin;
|
|
|
|
use bb8::ManageConnection;
|
|
use futures::{Future, Stream, stream::unfold, StreamExt};
|
|
use redis::{Value, RedisError, aio::ConnectionLike, FromRedisValue};
|
|
use url::Url;
|
|
use crate::{
|
|
feed::{EncodablePost, Post},
|
|
trend_tag::TrendTag,
|
|
PERIOD_COMPARE_WINDOW,
|
|
current_hour,
|
|
PERIODS,
|
|
};
|
|
|
|
const POST_EXPIRE: usize = 86400;
|
|
const TAG_EXPIRE: u64 = 30 * 24;
|
|
const HOST_EXPIRE: usize = 30 * 86400;
|
|
|
|
pub const TREND_POOL_SIZE: usize = 20;
|
|
pub const IMAGES_PER_TAG: usize = 8;
|
|
|
|
pub type Error = RedisError;
|
|
|
|
/// wrapper so we can impl `ManageConnection`
|
|
struct RedisPool {
|
|
redis_url: Url,
|
|
}
|
|
|
|
impl ManageConnection for RedisPool {
|
|
type Connection = redis::aio::ConnectionManager;
|
|
type Error = Error;
|
|
|
|
fn connect<'life0, 'async_trait>(
|
|
&'life0 self
|
|
) -> Pin<Box<dyn Future<Output = Result<Self::Connection, Self::Error>> + Send + 'async_trait>>
|
|
where
|
|
'life0: 'async_trait,
|
|
Self: 'async_trait
|
|
{
|
|
Box::pin(async {
|
|
let client = redis::Client::open(self.redis_url.clone())
|
|
.expect("redis::Client");
|
|
let manager = redis::aio::ConnectionManager::new(client)
|
|
.await
|
|
.expect("redis::Client");
|
|
Ok(manager)
|
|
})
|
|
}
|
|
|
|
fn is_valid<'life0, 'life1, 'async_trait>(
|
|
&'life0 self,
|
|
_conn: &'life1 mut Self::Connection
|
|
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send + 'async_trait>>
|
|
where
|
|
'life0: 'async_trait,
|
|
'life1: 'async_trait,
|
|
Self: 'async_trait
|
|
{
|
|
Box::pin(async {
|
|
Ok(())
|
|
})
|
|
}
|
|
|
|
fn has_broken(&self, _conn: &mut Self::Connection) -> bool {
|
|
false
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct Store {
|
|
pool: bb8::Pool<RedisPool>,
|
|
}
|
|
|
|
impl ConnectionLike for Store {
|
|
fn req_packed_command<'a>(
|
|
&'a mut self,
|
|
cmd: &'a redis::Cmd
|
|
) -> redis::RedisFuture<'a, Value> {
|
|
Box::pin(async move {
|
|
let mut conn = self.pool.get().await.unwrap();
|
|
conn.req_packed_command(cmd).await
|
|
})
|
|
}
|
|
|
|
fn req_packed_commands<'a>(
|
|
&'a mut self,
|
|
cmd: &'a redis::Pipeline,
|
|
offset: usize,
|
|
count: usize
|
|
) -> redis::RedisFuture<'a, Vec<Value>> {
|
|
Box::pin(async move {
|
|
let mut conn = self.pool.get().await.unwrap();
|
|
conn.req_packed_commands(cmd, offset, count).await
|
|
})
|
|
}
|
|
|
|
fn get_db(&self) -> i64 {
|
|
// wrong ;-)
|
|
0
|
|
}
|
|
}
|
|
|
|
impl Store {
|
|
pub async fn new(pool_max_size: u32, redis_url: String, redis_password_file: String) -> Self {
|
|
crate::systemd::status("Starting redis client");
|
|
let redis_password = std::fs::read_to_string(redis_password_file)
|
|
.expect("redis_password_file");
|
|
let mut redis_url = Url::parse(&redis_url)
|
|
.expect("redis_url");
|
|
redis_url.set_password(Some(&redis_password)).unwrap();
|
|
|
|
let pool = bb8::Pool::builder()
|
|
.max_size(pool_max_size)
|
|
.build(RedisPool { redis_url })
|
|
.await
|
|
.unwrap();
|
|
Self { pool }
|
|
}
|
|
|
|
pub async fn save_post(&mut self, mut post: EncodablePost) -> Result<bool, RedisError> {
|
|
let post_key = format!("p:{}", post.url);
|
|
let check = redis::pipe()
|
|
.getset(&post_key, "1")
|
|
.expire(post_key, POST_EXPIRE)
|
|
.ignore()
|
|
.query_async::<_, Value>(self)
|
|
.await?;
|
|
if check != Value::Bulk(vec![Value::Nil]) {
|
|
// post is not new
|
|
return Ok(false);
|
|
}
|
|
|
|
tracing::info!("New post ({}{} tags): {}",
|
|
if post.account.bot { "bot, " } else { "" },
|
|
post.tags.len(), post.url);
|
|
|
|
match post.encode() {
|
|
Ok(encoded_post) => {
|
|
let mut data = Vec::<u8>::with_capacity(
|
|
post.event_type.len() + 1 + encoded_post.len()
|
|
);
|
|
data.extend(post.event_type.as_bytes());
|
|
data.extend(&[0]);
|
|
data.extend(&encoded_post);
|
|
redis::Cmd::publish("firehose", data)
|
|
.query_async::<_, Value>(self)
|
|
.await?;
|
|
}
|
|
Err(e) =>
|
|
tracing::error!("cannot encode post: {:?}", e),
|
|
}
|
|
|
|
// post was new
|
|
Ok(true)
|
|
}
|
|
|
|
pub async fn save_post_tags(&mut self, post: &Post, tagged_unsafe: bool) {
|
|
if post.account.bot || post.tags.is_empty() {
|
|
// irrelevant
|
|
return;
|
|
}
|
|
|
|
let Some(host) = post.url_host() else {
|
|
tracing::warn!("no url_host");
|
|
return;
|
|
};
|
|
|
|
let Some(timestamp) = post.timestamp() else {
|
|
tracing::warn!("no timestamp");
|
|
return;
|
|
};
|
|
let hour = timestamp.to_utc().timestamp() as u64 / 3600;
|
|
let until = current_hour();
|
|
if hour > until {
|
|
tracing::warn!("future post from {}", timestamp);
|
|
return;
|
|
}
|
|
let from = until - TAG_EXPIRE;
|
|
if hour < from {
|
|
tracing::warn!("ancient post from {}", timestamp);
|
|
return;
|
|
}
|
|
|
|
let user_id = post.user_id();
|
|
// clip "en-us" to "en"
|
|
let language = post.lang();
|
|
|
|
let mut cmd = redis::pipe();
|
|
let store_tags = |cmd: &mut redis::Pipeline, spellings, tag_key, user_key| {
|
|
// by spelling
|
|
for spelling in spellings {
|
|
cmd.hincr(
|
|
&tag_key,
|
|
format!("s:{spelling}"),
|
|
1
|
|
).ignore();
|
|
}
|
|
// by instance
|
|
cmd.hincr(
|
|
tag_key,
|
|
format!("h:{host}"),
|
|
1
|
|
).ignore();
|
|
if let Some(user_id) = &user_id {
|
|
// users by tag/hour
|
|
cmd.sadd(&user_key, user_id).ignore()
|
|
.expire(&user_key, TAG_EXPIRE as usize * 3600)
|
|
.ignore();
|
|
}
|
|
};
|
|
let tags = post.tags_set();
|
|
let images = if !tagged_unsafe && tags.len() < 3 {
|
|
post.media_attachments.iter()
|
|
.filter(|a| a.media_type == "image")
|
|
.filter_map(|a| a.remote_url.as_ref())
|
|
.filter(|url| !url.contains(char::is_whitespace))
|
|
.take(2)
|
|
.collect::<Vec<&String>>()
|
|
} else {
|
|
tracing::warn!("unsafe: {:?}/{:?}", post.sensitive, tags.keys());
|
|
// ignore disturbing porn images from sensitive posts
|
|
vec![]
|
|
};
|
|
let mut image_keys = vec![];
|
|
for (name, spellings) in tags {
|
|
// global
|
|
store_tags(&mut cmd,
|
|
spellings.clone(),
|
|
format!("g:{name}"),
|
|
format!("u::{hour}:{name}"),
|
|
);
|
|
// by language
|
|
if let Some(language) = &language {
|
|
store_tags(&mut cmd,
|
|
spellings,
|
|
format!("l:{language}:{name}"),
|
|
format!("u:{language}:{hour}:{name}"),
|
|
);
|
|
}
|
|
|
|
for image in &images {
|
|
let image_key = format!("i:{name}");
|
|
cmd.sadd(&image_key, image)
|
|
.ignore()
|
|
.expire(&image_key, TAG_EXPIRE as usize * 3600)
|
|
.ignore()
|
|
.scard(&image_key);
|
|
image_keys.push(image_key);
|
|
}
|
|
}
|
|
|
|
match cmd.query_async::<_, Vec<usize>>(self).await {
|
|
Ok(image_key_sizes) => {
|
|
assert_eq!(image_keys.len(), image_key_sizes.len());
|
|
let mut cmd = redis::pipe();
|
|
for (image_key, size) in image_keys.into_iter().zip(image_key_sizes.into_iter()) {
|
|
let excess = size.saturating_sub(IMAGES_PER_TAG);
|
|
if excess > 0 {
|
|
cmd.spop(image_key).arg(excess)
|
|
.ignore();
|
|
}
|
|
}
|
|
let _ = cmd.query_async::<_, ()>(self).await;
|
|
}
|
|
Err(e) => {
|
|
tracing::error!("redis error: {:?}", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub async fn save_host(&mut self, host: &str) -> Result<(), RedisError> {
|
|
let key = format!("h:{host}");
|
|
redis::pipe()
|
|
.set(&key, "1")
|
|
.ignore()
|
|
.expire(&key, HOST_EXPIRE)
|
|
.ignore()
|
|
.query_async::<_, ()>(self)
|
|
.await
|
|
}
|
|
|
|
pub async fn remove_host(&mut self, host: &str) -> Result<(), RedisError> {
|
|
redis::Cmd::del(format!("h:{host}"))
|
|
.query_async::<_, ()>(self)
|
|
.await
|
|
}
|
|
|
|
pub async fn get_hosts(&mut self) -> Result<impl Stream<Item = String> + '_, RedisError> {
|
|
self.scan_prefix("h:")
|
|
.await
|
|
}
|
|
|
|
pub async fn get_languages(&mut self) -> Result<Vec<String>, RedisError> {
|
|
redis::Cmd::hkeys("r")
|
|
.query_async(self)
|
|
.await
|
|
}
|
|
|
|
pub async fn get_tag_images(&mut self, tag: &str) -> Result<Vec<String>, RedisError> {
|
|
redis::Cmd::smembers(format!("i:{tag}"))
|
|
.query_async(self)
|
|
.await
|
|
}
|
|
|
|
pub async fn get_tags_global(&mut self) -> Result<impl Stream<Item = String> + '_, RedisError> {
|
|
let global = self.scan_prefix("g:")
|
|
.await?
|
|
.map(|tag| tag);
|
|
Ok(global)
|
|
}
|
|
|
|
pub async fn get_tags_by_language(&mut self) -> Result<impl Stream<Item = (Option<String>, String)> + '_, RedisError> {
|
|
let by_language = self.scan("l:")
|
|
.await?
|
|
.filter_map(|key| async move {
|
|
let s = &key[2..];
|
|
if let Some(i) = s.find(':') {
|
|
let language = s[..i].to_string();
|
|
let tag = s[i + 1..].to_string();
|
|
Some((Some(language), tag))
|
|
} else {
|
|
None
|
|
}
|
|
});
|
|
Ok(by_language)
|
|
}
|
|
|
|
pub async fn scan_prefix<'a>(&'a mut self, prefix: &'a str) -> Result<impl Stream<Item = String> + '_, RedisError> {
|
|
let keys = self.scan(&format!("{prefix}*"))
|
|
.await?
|
|
.map(|key| key[prefix.len()..].to_string());
|
|
Ok(keys)
|
|
}
|
|
|
|
pub async fn scan(&mut self, pattern: &str) -> Result<impl Stream<Item = String> + '_, RedisError> {
|
|
let mut cmd = redis::cmd("SCAN");
|
|
cmd.cursor_arg(0)
|
|
.arg("MATCH").arg(pattern)
|
|
.arg("COUNT").arg(10000);
|
|
let iter = cmd.iter_async::<String>(self)
|
|
.await?;
|
|
|
|
let stream = unfold(iter, |mut iter| async move {
|
|
iter.next_item().await
|
|
.map(|tag| (tag, iter))
|
|
});
|
|
Ok(stream)
|
|
}
|
|
|
|
pub async fn clean_trend_tag(&self, language: &Option<String>, tag: &TrendTag) -> Result<(), RedisError> {
|
|
if ! tag.other.iter().any(|(name, _)| &name[..2] == "t:") {
|
|
return Ok(());
|
|
}
|
|
|
|
let mut cmd = redis::pipe();
|
|
for (name, _) in &tag.other {
|
|
if &name[..2] == "t:" {
|
|
cmd.hdel(tag_key(language, &tag.name), name)
|
|
.ignore();
|
|
}
|
|
}
|
|
cmd.query_async(&mut self.clone()).await
|
|
}
|
|
|
|
pub async fn get_trend_tags(
|
|
&self,
|
|
language: &Option<String>,
|
|
names: impl Iterator<Item = String>,
|
|
) -> Result<Vec<TrendTag>, RedisError> {
|
|
let until = current_hour();
|
|
let from = until - PERIODS.last().unwrap() * (1 + PERIOD_COMPARE_WINDOW);
|
|
|
|
let mut cmd = redis::pipe();
|
|
let names = names.map(|name| {
|
|
cmd.hgetall(tag_key(language, &name));
|
|
for hour in from..=until {
|
|
cmd.scard(format!("u:{}:{}:{}", language.as_ref().map_or("", |l| l), hour, name));
|
|
}
|
|
name
|
|
}).collect::<Vec<String>>();
|
|
let mut values = cmd.query_async::<_, Vec<Value>>(&mut self.clone()).await?
|
|
.into_iter();
|
|
|
|
let mut results = Vec::with_capacity(names.len());
|
|
for name in names {
|
|
let Some(Value::Bulk(hash_values)) = values.next() else {
|
|
panic!("hash_values");
|
|
};
|
|
let hash_values = hash_values.iter()
|
|
.map(|value| String::from_redis_value(value).expect("hash_values value"))
|
|
.collect();
|
|
let hour_users = (from..=until).map(|hour| {
|
|
let users = usize::from_redis_value(&values.next().unwrap()).expect("hour_users");
|
|
(hour, users)
|
|
}).collect();
|
|
results.push(TrendTag::from_hash(name.to_string(), hash_values, hour_users));
|
|
}
|
|
Ok(results)
|
|
}
|
|
|
|
pub async fn get_trend_pools(
|
|
&mut self,
|
|
language: &Option<String>,
|
|
periods: &[u64],
|
|
) -> Result<Vec<(u64, Vec<String>)>, RedisError> {
|
|
let mut cmd = redis::pipe();
|
|
for period in periods {
|
|
cmd.smembers(pool_key(language, *period));
|
|
}
|
|
let sets: Vec<Vec<String>> = cmd.query_async(self)
|
|
.await?;
|
|
let results = periods.iter().copied()
|
|
.zip(sets.into_iter())
|
|
.collect();
|
|
Ok(results)
|
|
}
|
|
|
|
pub async fn update_trend_pools(
|
|
&mut self,
|
|
language: &Option<String>,
|
|
remove: impl Iterator<Item = (u64, Vec<&str>)>,
|
|
add: impl Iterator<Item = (u64, Vec<&str>)>,
|
|
pool_sizes: impl Iterator<Item = (u64, usize)>,
|
|
) -> Result<(), RedisError> {
|
|
let mut cmd = redis::pipe();
|
|
for (period, tags) in remove {
|
|
if ! tags.is_empty() {
|
|
let pool_key = pool_key(language, period);
|
|
cmd.srem(&pool_key, tags)
|
|
.ignore()
|
|
.expire(pool_key, period as usize * 3600)
|
|
.ignore();
|
|
}
|
|
}
|
|
for (period, tags) in add {
|
|
if ! tags.is_empty() {
|
|
let pool_key = pool_key(language, period);
|
|
cmd.sadd(&pool_key, tags)
|
|
.ignore()
|
|
.expire(pool_key, period as usize * 3600)
|
|
.ignore();
|
|
}
|
|
}
|
|
if let Some(language) = language {
|
|
let max_pool_size = pool_sizes.map(|(_, pool_size)| pool_size)
|
|
.max()
|
|
.unwrap_or(0);
|
|
if max_pool_size > 0 {
|
|
cmd.hset("r", language, max_pool_size)
|
|
.ignore();
|
|
} else {
|
|
cmd.hdel("r", language)
|
|
.ignore();
|
|
}
|
|
}
|
|
cmd.query_async(self)
|
|
.await?;
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn delete_tag(
|
|
&mut self,
|
|
language: &Option<String>,
|
|
tag: &str,
|
|
) -> Result<(), RedisError> {
|
|
let key = match language {
|
|
Some(language) => format!("l:{language}:{tag}"),
|
|
None => format!("g:{tag}"),
|
|
};
|
|
redis::Cmd::del(key)
|
|
.query_async(self)
|
|
.await
|
|
}
|
|
}
|
|
|
|
fn tag_key(language: &Option<String>, name: &str) -> String {
|
|
match language {
|
|
Some(language) => format!("l:{language}:{name}"),
|
|
None => format!("g:{name}"),
|
|
}
|
|
}
|
|
|
|
fn pool_key(language: &Option<String>, period: u64) -> String {
|
|
match language {
|
|
Some(language) =>
|
|
format!("q:{period}:{language}"),
|
|
None =>
|
|
format!("q:{period}"),
|
|
}
|
|
}
|