butcher: implement reloadable profanity list
This commit is contained in:
parent
2776e007c3
commit
1e247b4768
|
@ -1,2 +1,3 @@
|
|||
#redis: redis://10.233.12.2:6379/
|
||||
redis: redis://127.0.0.1:6378/
|
||||
profanity: ../profanity.txt
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct Config {
|
||||
pub redis: String,
|
||||
pub profanity: String,
|
||||
}
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
use std::{
|
||||
sync::Arc,
|
||||
ops::Deref,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use cave::{
|
||||
config::LoadConfig,
|
||||
feed::Post,
|
||||
firehose::FirehoseFactory,
|
||||
word_list::WordList,
|
||||
};
|
||||
use trend_setter::UpdateSet;
|
||||
|
||||
|
@ -9,12 +15,28 @@ mod config;
|
|||
mod trend_setter;
|
||||
mod tag_trimmer;
|
||||
|
||||
async fn is_profane(profanity: &WordList, post: &Post) -> bool {
|
||||
if post.sensitive == Some(true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
let tags_set = post.tags_set();
|
||||
let tagged_profanity =
|
||||
futures::stream::iter(
|
||||
tags_set.iter()
|
||||
)
|
||||
.any(|(tag, _spellings)| profanity.contains(tag));
|
||||
|
||||
tagged_profanity.await
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
cave::init::exit_on_panic();
|
||||
cave::init::init_logger(5555);
|
||||
|
||||
let config = config::Config::load();
|
||||
let profanity = WordList::new(&config.profanity).await;
|
||||
|
||||
let store = cave::store::Store::new(16, config.redis.clone()).await;
|
||||
|
||||
|
@ -34,6 +56,7 @@ async fn main() {
|
|||
firehose.for_each(move |data| {
|
||||
let trend_setter_tx = trend_setter_tx.clone();
|
||||
let mut store = store.clone();
|
||||
let profanity = profanity.clone();
|
||||
tokio::spawn(async move {
|
||||
let post = match serde_json::from_slice(&data) {
|
||||
Ok(post) =>
|
||||
|
@ -43,9 +66,10 @@ async fn main() {
|
|||
return;
|
||||
},
|
||||
};
|
||||
store.save_post_tags(&post).await;
|
||||
|
||||
let update_set = UpdateSet::from(&post);
|
||||
let post = Arc::new(post);
|
||||
store.save_post_tags(&post, is_profane(&profanity, &post).await).await;
|
||||
|
||||
let update_set = UpdateSet::from(post.deref());
|
||||
if ! update_set.is_empty() {
|
||||
trend_setter_tx.send(update_set).await.unwrap();
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ pub mod store;
|
|||
pub mod trend_tag;
|
||||
pub mod firehose;
|
||||
pub mod live_file;
|
||||
pub mod word_list;
|
||||
|
||||
pub const PERIODS: &[u64] = &[4, 24, 7 * 24];
|
||||
|
||||
|
|
|
@ -18,100 +18,6 @@ const HOST_EXPIRE: usize = 30 * 86400;
|
|||
pub const TREND_POOL_SIZE: usize = 20;
|
||||
pub const IMAGES_PER_TAG: usize = 8;
|
||||
|
||||
pub const UNSAFE_TAGS: &[&str] = &[
|
||||
"bigdick",
|
||||
"gayporn",
|
||||
"porn",
|
||||
"p0rn",
|
||||
"pr0n",
|
||||
"lolita",
|
||||
"lolitas",
|
||||
"boob",
|
||||
"b00b",
|
||||
"tit",
|
||||
"tits",
|
||||
"breast",
|
||||
"breasts",
|
||||
"fuck",
|
||||
"fucked",
|
||||
"fucking",
|
||||
"sex",
|
||||
"sexy",
|
||||
"anal",
|
||||
"adult",
|
||||
"penis",
|
||||
"dick",
|
||||
"cock",
|
||||
"c0ck",
|
||||
"lewd",
|
||||
"hentai",
|
||||
"transselfie",
|
||||
"femdom",
|
||||
"kink",
|
||||
"kinky",
|
||||
"erotic",
|
||||
"erotica",
|
||||
"nude",
|
||||
"nudism",
|
||||
"nudist",
|
||||
"nakt",
|
||||
"naked",
|
||||
"exhibitionism",
|
||||
"flashing",
|
||||
"piss",
|
||||
"pee",
|
||||
"poop",
|
||||
"shit",
|
||||
"dogshit",
|
||||
"gore",
|
||||
"nsfw",
|
||||
"nsfwart",
|
||||
"pussy",
|
||||
"pussies",
|
||||
"vagina",
|
||||
"ass",
|
||||
"asses",
|
||||
"arsch",
|
||||
"ärsche",
|
||||
"heinie",
|
||||
"butt",
|
||||
"butts",
|
||||
"bukkake",
|
||||
"cumshot",
|
||||
"domsub",
|
||||
"cw",
|
||||
"bigpenis",
|
||||
"pokephilia",
|
||||
"pokeporn",
|
||||
"tentacle",
|
||||
"yiff",
|
||||
"semen",
|
||||
"rule34",
|
||||
"r34",
|
||||
"yaoi",
|
||||
"swastika",
|
||||
"hardcore",
|
||||
"shota",
|
||||
"dildo",
|
||||
"nutte",
|
||||
"nutten",
|
||||
"whore",
|
||||
"whores",
|
||||
"hoe",
|
||||
"hoes",
|
||||
"prostitute",
|
||||
"prostitutes",
|
||||
"prostitution",
|
||||
"adultcartoon",
|
||||
"adultcartoons",
|
||||
"cartoonporn",
|
||||
"bigtit",
|
||||
"bigtits",
|
||||
"bigboobs",
|
||||
"blowjob",
|
||||
"topless",
|
||||
];
|
||||
|
||||
pub type Error = RedisError;
|
||||
|
||||
/// wrapper so we can impl ManageConnection
|
||||
|
@ -237,7 +143,7 @@ impl Store {
|
|||
Ok(true)
|
||||
}
|
||||
|
||||
pub async fn save_post_tags(&mut self, post: &Post) {
|
||||
pub async fn save_post_tags(&mut self, post: &Post, tagged_unsafe: bool) {
|
||||
if post.account.bot || post.tags.is_empty() {
|
||||
// irrelevant
|
||||
return;
|
||||
|
@ -297,11 +203,6 @@ impl Store {
|
|||
.ignore();
|
||||
}
|
||||
};
|
||||
let tags_set = post.tags_set();
|
||||
let tagged_unsafe = post.sensitive != Some(false) ||
|
||||
UNSAFE_TAGS.iter().any(|unsafe_tag|
|
||||
tags_set.contains_key(&unsafe_tag[..])
|
||||
);
|
||||
let images = if !tagged_unsafe {
|
||||
post.media_attachments.iter()
|
||||
.filter(|a| a.media_type == "image")
|
||||
|
@ -315,7 +216,7 @@ impl Store {
|
|||
vec![]
|
||||
};
|
||||
let mut image_keys = vec![];
|
||||
for (name, spellings) in tags_set {
|
||||
for (name, spellings) in post.tags_set() {
|
||||
// global
|
||||
store_tags(&mut cmd,
|
||||
spellings.clone(),
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
use std::{sync::Arc, collections::HashSet};
|
||||
|
||||
use tokio::{
|
||||
io::{BufReader, AsyncBufReadExt},
|
||||
sync::RwLock,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WordList {
|
||||
list: Arc<RwLock<HashSet<String>>>,
|
||||
}
|
||||
|
||||
impl WordList {
|
||||
pub async fn new(path: &str) -> WordList {
|
||||
let list = crate::live_file::load(path, |file| async move {
|
||||
let mut list = HashSet::new();
|
||||
let mut file = BufReader::new(file);
|
||||
let mut line = String::new();
|
||||
while let Ok(_) = file.read_line(&mut line).await {
|
||||
if line == "" {
|
||||
break
|
||||
}
|
||||
|
||||
list.insert(line.trim_end().to_string());
|
||||
|
||||
line = String::new();
|
||||
}
|
||||
list
|
||||
}).await.unwrap();
|
||||
|
||||
|
||||
WordList { list }
|
||||
}
|
||||
|
||||
pub async fn contains(&self, word: &str) -> bool {
|
||||
self.list.read().await
|
||||
.contains(word)
|
||||
}
|
||||
}
|
|
@ -4,6 +4,7 @@ let
|
|||
cfg = config.services.caveman;
|
||||
|
||||
blocklistPath = "/etc/caveman.blocklist";
|
||||
profanityPath = "/etc/caveman.profanity";
|
||||
|
||||
hunterDefaultSettings = {
|
||||
redis = "redis://127.0.0.1:${toString cfg.redis.port}/";
|
||||
|
@ -21,6 +22,7 @@ let
|
|||
|
||||
butcherDefaultSettings = {
|
||||
redis = "redis://127.0.0.1:${toString cfg.redis.port}/";
|
||||
profanity = profanityPath;
|
||||
};
|
||||
|
||||
butcherSettings = lib.recursiveUpdate butcherDefaultSettings cfg.butcher.settings;
|
||||
|
@ -127,6 +129,10 @@ in
|
|||
hunterSettings.prometheus_port
|
||||
];
|
||||
|
||||
systemd.tmpfiles.rules = [
|
||||
"L ${profanityPath} - - - - ${./profanity.txt}"
|
||||
];
|
||||
|
||||
services.redis.servers.caveman = lib.mkIf cfg.hunter.enable {
|
||||
enable = true;
|
||||
port = cfg.redis.port;
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
bigdick
|
||||
gayporn
|
||||
porn
|
||||
p0rn
|
||||
pr0n
|
||||
lolita
|
||||
lolitas
|
||||
loli
|
||||
lolicon
|
||||
boob
|
||||
b00b
|
||||
tit
|
||||
tits
|
||||
breast
|
||||
breasts
|
||||
fuck
|
||||
fucked
|
||||
fucking
|
||||
sex
|
||||
sexy
|
||||
anal
|
||||
adult
|
||||
penis
|
||||
dick
|
||||
cock
|
||||
c0ck
|
||||
lewd
|
||||
hentai
|
||||
transselfie
|
||||
femdom
|
||||
kink
|
||||
kinky
|
||||
erotic
|
||||
erotica
|
||||
nude
|
||||
nudism
|
||||
nudist
|
||||
nakt
|
||||
naked
|
||||
exhibitionism
|
||||
flashing
|
||||
piss
|
||||
pee
|
||||
poop
|
||||
shit
|
||||
dogshit
|
||||
gore
|
||||
nsfw
|
||||
nsfwart
|
||||
pussy
|
||||
pussies
|
||||
vagina
|
||||
ass
|
||||
asses
|
||||
arsch
|
||||
ärsche
|
||||
heinie
|
||||
butt
|
||||
butts
|
||||
bukkake
|
||||
cumshot
|
||||
domsub
|
||||
cw
|
||||
bigpenis
|
||||
pokephilia
|
||||
pokeporn
|
||||
tentacle
|
||||
yiff
|
||||
semen
|
||||
rule34
|
||||
r34
|
||||
yaoi
|
||||
swastika
|
||||
hardcore
|
||||
shota
|
||||
dildo
|
||||
nutte
|
||||
nutten
|
||||
whore
|
||||
whores
|
||||
hoe
|
||||
hoes
|
||||
prostitute
|
||||
prostitutes
|
||||
prostitution
|
||||
adultcartoon
|
||||
adultcartoons
|
||||
cartoonporn
|
||||
bigtit
|
||||
bigtits
|
||||
bigboobs
|
||||
blowjob
|
||||
topless
|
||||
masturbate
|
||||
masturbation
|
||||
shemale
|
||||
shemales
|
||||
beautifulgirls
|
||||
stripchat
|
Loading…
Reference in New Issue