From cf67ec439e589e95109e0e8a50653f34a0f66dc5 Mon Sep 17 00:00:00 2001 From: Finn Bear Date: Sat, 4 May 2024 11:05:10 -0700 Subject: [PATCH] Improve wordlist, replacements. --- Cargo.toml | 7 ++- README.md | 2 +- src/dictionary_blacklist.txt | 1 + src/dictionary_extra.txt | 3 ++ src/false_positives.txt | 72 +++++++++++++++++++++++++++- src/pii.rs | 21 +++++---- src/profanity.csv | 91 ++++++++++++++++++++++++++++++++---- src/replacements.csv | 75 ++++++++++++++++++++++++++++- src/replacements_extra.csv | 26 ++++++++++- src/test_positive.txt | 3 +- 10 files changed, 276 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0377e7c..89705da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rustrict" authors = ["Finn Bear"] -version = "0.7.24" +version = "0.7.25" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/finnbear/rustrict/" @@ -48,6 +48,9 @@ serde = ["dep:serde", "arrayvec/serde"] [package.metadata.docs.rs] features = ["censor", "context", "customize", "width"] +[profile.release] +panic = 'abort' + [dependencies] arrayvec = {version = "0.7", optional = true} finl_unicode = "1.2" @@ -73,7 +76,7 @@ serde = {version = "1", features=["derive"], optional = true} rand = "0.8" csv = "1.1" censor_crate = {package = "censor", version = "0.3.0"} -rustrict_old = {package = "rustrict", version = "0.7.21"} +rustrict_old = {package = "rustrict", version = "0.7.24"} serial_test = "0.5" bincode = "1.3.3" serde_json = "1" \ No newline at end of file diff --git a/README.md b/README.md index 7b157da..7d5d6af 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ is used as a dataset. Positive accuracy is the percentage of profanity detected | Crate | Accuracy | Positive Accuracy | Negative Accuracy | Time | |-------|----------|-------------------|-------------------|------| -| [rustrict](https://crates.io/crates/rustrict) | 79.81% | 93.99% | 76.27% | 9s | +| [rustrict](https://crates.io/crates/rustrict) | 79.74% | 94.00% | 76.18% | 9s | | [censor](https://crates.io/crates/censor) | 76.16% | 72.76% | 77.01% | 23s | ## Development diff --git a/src/dictionary_blacklist.txt b/src/dictionary_blacklist.txt index 99f50ec..a50e864 100644 --- a/src/dictionary_blacklist.txt +++ b/src/dictionary_blacklist.txt @@ -329,6 +329,7 @@ h hand job handjobs hang yourself +hate negro hater haters hates diff --git a/src/dictionary_extra.txt b/src/dictionary_extra.txt index e0e825e..4222075 100644 --- a/src/dictionary_extra.txt +++ b/src/dictionary_extra.txt @@ -1,4 +1,5 @@ #8 +# of 2 secs 3 secs 4 secs @@ -155,11 +156,13 @@ make a hole minigame mini game n't eat +negativly ngad nigth of agitation omg opps +outgaminged pegging the plss plsss diff --git a/src/false_positives.txt b/src/false_positives.txt index 4b2f488..94c37c4 100644 --- a/src/false_positives.txt +++ b/src/false_positives.txt @@ -1,3 +1,4 @@ +# of #8 0 secs 2 secs @@ -953,6 +954,7 @@ antiromance antisex antislavery antispastic +antonio as antonio or ants cumulative ants ext @@ -3344,6 +3346,9 @@ bundles bunga burgh little burgundies +burn china +burn israel +burn palestine burst fu burst its burst texts @@ -4979,7 +4984,6 @@ deboned deboners debug ger debuggers -debut hole decadic decaspermal decimosexto @@ -7244,6 +7248,10 @@ gns hit gns lut gns perm gns seeks +go died +go diego +go dies +go diet go ok go vary go vulnerability @@ -7357,6 +7365,7 @@ graphs hit graphs lut graphs perm graphs seeks +grapiest graping grappling grass cocktail @@ -9182,6 +9191,7 @@ inns perm inns seeks inns semina ino life +ino parents inohymenitic ins cumulative ins eminem @@ -9361,6 +9371,7 @@ it wat it wats it watts italiano life +italiano parents italic cocktail italic commission italic cook @@ -9752,6 +9763,7 @@ kennedy ker kennedy kevin kennedy key keno life +keno parents kers cumulative kers ext kers hilt @@ -11353,6 +11365,7 @@ miss seeks missionary mistful mitchell +mitchell hole mitchell illinois mix linge mixer da @@ -11484,6 +11497,7 @@ moments hit moments lut moments perm moments seeks +moms milk monaco jones monaco om monaco on @@ -11717,6 +11731,7 @@ n't eat nabobish nabobs naggar +nail ger nail zimb nail zinc nake da @@ -11750,6 +11765,7 @@ nances lut nances perm nances seeks nano life +nano parents nanocephalus nap anti nap peru @@ -11838,6 +11854,7 @@ negativing negativism negativist negativity +negativly negaton negator negatron @@ -11990,6 +12007,38 @@ nigrosin nigrous nigth nigua +nike er +nike exercise +nike rabbi +nike race +nike rach +nike racial +nike racing +nike rack +nike rad +nike rag +nike raid +nike rail +nike rain +nike rais +nike rale +nike rall +nike ralph +nike ran +nike rap +nike rare +nike rat +nike ray +nike re +nike refrig +nike republic +nike rh +nike ri +nike ro +nike ru +nike rw +nike rya +nike xerox nilgai nilgau nilghai @@ -12334,6 +12383,7 @@ ones lut ones perm ones seeks ono life +ono parents ont its ont texts ont thick @@ -12390,6 +12440,7 @@ or appeal or appear or append or jewish +or phantom ora appeal ora appear ora append @@ -12443,6 +12494,17 @@ organize men orgyia ornithocephalus oroanal +orphanages +orphancy +orphandom +orphaned +orphange +orphanhood +orphaning +orphanism +orphanize +orphanry +orphans orra appeal orra appear orra append @@ -12521,6 +12583,7 @@ outers hit outers lut outers perm outers seeks +outgaminged outligger outromance outsuck @@ -13190,6 +13253,7 @@ phys lut phys perm phys seeks piano life +piano parents pic cocktail pic commission pic cook @@ -15031,6 +15095,7 @@ reno observation reno observe reno obtain reno obv +reno parents rents cumulative rents ext rents hilt @@ -15723,6 +15788,7 @@ scopulate scouriness scrap scrapling +scrappiest screens cumulative screens ext screens hilt @@ -16148,6 +16214,7 @@ sheets perm sheets seeks sheiklike shell +shell hole shell illinois shellcracker sheth @@ -17605,6 +17672,7 @@ tech linking tech links tech little techno life +techno parents teens cumulative teens ext teens hilt @@ -18392,6 +18460,7 @@ toshiba arizona toshiba arling toshiba arri totanus +touch children towcock towers cumulative towers ext @@ -18520,6 +18589,7 @@ trapezoid trapezophora trapezophoron trapezophozophora +trappiest trasses travesti dies travesti it diff --git a/src/pii.rs b/src/pii.rs index 7ef1041..ac68c80 100644 --- a/src/pii.rs +++ b/src/pii.rs @@ -6,15 +6,15 @@ lazy_static! { static ref PHONE : Regex = Regex::new(r#"(\+\d{1,2})?\s*\(?\d{3}\)?[\s\.-]*\d{3}[\s\.-]*\d{4}"#).unwrap(); static ref IP_ADDRESS : Regex = Regex::new(r#"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"#).unwrap(); static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*[a-z]{2,3}"#).unwrap(); - static ref ADDRESS : Regex = Regex::new(r#"(?i)\d+[ ](?:[A-Za-z0-9\.-]+ )+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)\.?(\s+#[0-9]{1,5})?"#).unwrap(); + //static ref ADDRESS : Regex = Regex::new(r#"(?i)\d+[ ](?:[A-Za-z0-9\.-]+ )+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)\.?(\s+#[0-9]{1,5})?"#).unwrap(); static ref NAME : Regex = Regex::new(r#"(?i)(real\s)?name\s+is:?\s[a-zA-Z]+(\s[a-zA-z]+)?"#).unwrap(); - static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]+\.[a-zA-Z]{2,3}"#).unwrap(); + static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.[a-zA-Z]{2,3}"#).unwrap(); } /// Returns [`s`] with personally-identifiable information censored out, and a `true` if /// anything was censored. /// - phone numbers -/// - physical addresses +/// - physical addresses (disabled for now, due to excessive false positives) /// - ip addresses /// - email addresses /// - self-described full names @@ -28,8 +28,9 @@ pub fn censor_and_analyze_pii(s: &str) -> (String, bool) { censored |= matches!(ret, Cow::Owned(_)); let ret = EMAIL_ADDRESS.replace_all(&ret, "****@*****.***"); censored |= matches!(ret, Cow::Owned(_)); - let ret = ADDRESS.replace_all(&ret, "***** **** Ave #***"); - censored |= matches!(ret, Cow::Owned(_)); + // too many false positives + //let ret = ADDRESS.replace_all(&ret, "***** **** Ave #***"); + //censored |= matches!(ret, Cow::Owned(_)); let ret = NAME.replace_all(&ret, "name is ***** *****"); censored |= matches!(ret, Cow::Owned(_)); let ret = URL.replace_all(&ret, "******.***"); @@ -51,6 +52,10 @@ mod tests { #[test] fn pii() { + /* + 12345 SW 54th ST #150 + go to 1234 Main Street for free candy + */ let pii = r#" hello@gmail.com hello f00 @ gmail.com @@ -71,8 +76,6 @@ mod tests { 123.123.123.123 8.8.8.8 999.999.999.999 - 12345 SW 54th ST #150 - go to 1234 Main Street for free candy my name is: ALEX Smith my real name is Alex smith his name is alex smith @@ -80,8 +83,8 @@ mod tests { my name is alex. smith hello.com http://hello.com - https://foo.com - bar.com + https://foooo.com + barrr.com example.org twitch.tv http:/chat.dev diff --git a/src/profanity.csv b/src/profanity.csv index 4089a8b..c7faaa2 100644 --- a/src/profanity.csv +++ b/src/profanity.csv @@ -14,17 +14,21 @@ word,profane,offensive,sexual,mean,evasive 🍆🍑,1,0,3,0,2 🍆+🍑,1,0,3,0,2 🍑💦,1,0,0,0,1 +💦🍑,1,0,1,0,1 💋🍆,0,0,3,0,2 +🍆💋,0,0,3,0,2 👉👌,0,0,2,0,0 𓂺,0,0,2,0,3 _l_,1,0,2,0,1 °မ°,1,0,1,0,2 (.)(.),0,0,1,0,1 +(⏝!⏝),0,0,1,0,1 2g1c,0,0,2,0,1 2girls1cup,0,0,2,0,1 3==D,0,0,2,0,1 3==o,0,0,2,0,1 8==>,0,0,2,0,1 + 8==),0,0,2,0,1 8==D,0,0,2,0,1 8==o,0,0,2,0,1 9/11,0,1,0,0,0 @@ -38,6 +42,8 @@ a negro,0,1,0,0,0 a latex,0,0,2,0,2 adlof,0,1,0,0,1 adolf,0,2,0,0,0 +adolphfitler,0,2,0,0,2 +adolphfitlr,0,2,0,0,2 africans,0,1,0,0,0 agina,0,0,1,0,0 ahole,2,0,1,2,0 @@ -102,6 +108,7 @@ ballicker,0,0,1,0,0 ball licking,0,0,2,0,0 balls,0,0,1,0,0 ballsaque,0,0,2,0,2 +balltickler,0,0,2,0,1 ballz,0,0,1,0,1 bals,0,0,1,0,1 balsack,0,0,3,0,0 @@ -231,10 +238,15 @@ bumfuck,2,0,0,2,0 bumhole,1,0,1,0,0 bungabunga,0,0,1,0,0 bunghole,2,0,1,2,1 +burnchina,0,2,0,0,1 +burngaza,0,2,0,0,1 +burnisrael,0,2,0,0,1 +burnpalestine,0,2,0,0,1 butchbabe,0,1,2,0,1 butchdike,0,1,2,0,1 butchdyke,0,1,3,0,0 -but hole,1,0,0,0,0 + but hole,1,0,1,0,1 + but sniffer,1,0,1,0,1 butt,1,0,0,0,0 buttcrack,2,0,1,0,0 buttock,1,0,0,0,0 @@ -265,7 +277,9 @@ cervix,0,0,1,0,0 cheap lay,0,0,2,0,0 cheep lay,0,0,2,0,0 chesticle,0,0,2,0,1 +chickeneater,0,1,0,0,1 chickenshit,2,0,0,1,0 +childraper,0,0,2,0,1 childstew,0,1,0,0,0 chinaflu,0,2,0,0,0 chinaman,0,1,0,0,0 @@ -284,6 +298,7 @@ chode,0,0,1,0,0 chupalo,0,0,2,2,0 chupamela,0,0,2,2,0 chynk,0,2,0,0,2 + cigbock,1,0,1,0,2 clambamer,1,0,2,0,2 clamslamer,1,0,2,0,2 clevelandsteamer,0,0,2,0,1 @@ -300,10 +315,11 @@ cojones,0,0,1,0,0 cok,1,0,2,0,1 coglione,2,0,1,2,0 comebucket,0,1,1,0,1 +comebuster,0,0,1,0,1 comeinyou,0,0,2,0,2 commie,0,2,0,0,0 commis,0,2,0,0,0 -commit die,1,2,0,3,0,2 +commit die,1,2,0,3,2 coming on u,0,0,1,0,1 comingonyou,0,0,1,0,1 comitsuicide,1,2,0,3,0 @@ -382,6 +398,7 @@ dicker,2,0,2,0,1 dickhead,2,1,2,2,0 dieinahole,0,1,0,3,0 digbick,0,1,1,0,2 +diggernick,2,2,1,0,2 dik,2,0,2,1,1 dildo,0,0,3,0,0 diligaf,1,0,0,0,1 @@ -490,6 +507,7 @@ faygot,0,3,1,0,1 faqing,2,0,2,0,2 faqot,2,0,2,0,2 faq u,1,0,0,1,1 +faq you,1,0,0,1,1 fauck,2,0,2,0,2 faucked,2,0,2,0,2 faucker,2,0,2,0,2 @@ -715,6 +733,7 @@ gnagbang,0,2,0,0,2 godamn,1,1,0,0,0 goddam,1,1,0,0,0 godamit,1,1,0,0,0 + go die,0,1,0,0,1 go farm cotton,0,1,0,0,0 goingtobang,0,0,1,0,0 gokkun,0,0,1,0,0 @@ -735,11 +754,13 @@ grostulation,0,0,1,0,0 gun to school,3,0,0,0,0 gun at school,3,0,0,0,0 gunt,0,2,2,0,1 + gyat,0,0,1,0,2 gyatlove,0,0,2,0,2 gyatt,0,0,1,0,1 gyppie,0,1,0,0,1 gypo,0,1,0,0,1 gyppy,0,1,0,0,1 + hamas,0,1,0,0,1 handjob,0,0,3,0,0 hangyourself,0,2,0,3,0 h8,0,0,0,2,1 @@ -755,6 +776,7 @@ hatechinese,0,2,0,2,0 hategingers,0,2,0,0,0 hate mexi,0,1,0,1,0 hatemexican,0,1,0,1,0 +hatenegro,0,2,0,0,1 hator,0,0,0,2,2 havesex,0,0,2,0,0 hayt,0,0,0,2,2 @@ -763,6 +785,7 @@ heil hit,0,1,0,0,0 heil hitler,0,3,0,0,0 heil hyt,0,1,0,0,1 hell,1,0,0,0,0 +hellhole,1,0,0,0,0 hentai,0,0,3,0,0 her ashes,0,1,0,0,0 her bob,0,0,3,0,0 @@ -847,18 +870,23 @@ hytlar,0,1,0,0,1 hytle,0,1,0,0,1 hytler,0,2,0,0,1 hytlr,0,2,0,0,2 +hyuejass,2,0,1,0,2 gnikcuf,1,0,1,0,1 gooch,0,0,2,0,0 i climax,0,0,1,0,0 idiot,0,0,0,1,0 idolfhatler,0,2,0,0,2 +idrugkids,0,1,0,0,1 ifkdurmum,2,2,3,2,2 igger,3,3,0,0,0 +has a black hawk,1,0,1,0,2 +have a black hawk,1,0,1,0,2 i have ED,0,0,1,0,1 i hope your father dies,0,1,0,2,1 i hope your mother dies,0,1,0,2,1 ima negro,0,1,0,0,1 imbecil,0,0,0,2,0 +imma tuch u,0,0,1,0,1 impregnate,0,0,1,0,0 incel,0,1,2,1,0 incest,0,0,3,0,0 @@ -874,6 +902,7 @@ intercourse,0,0,2,0,1 internmentcamp,0,1,0,0,1 irection,0,0,1,0,1 isgay,0,2,0,0,0 +israeldid911,0,2,0,0,1 jackass,1,0,0,1,0 jackoff,0,0,2,0,0 jackshit,2,0,0,0,0 @@ -914,6 +943,7 @@ kafir,0,1,0,0,0 khunt,0,1,1,0,1 kiddiefidl,0,0,3,0,1 kiddyfidl,0,0,3,0,1 +kidraper,0,0,2,0,1 kidsinmy,0,1,1,0,2 kike,0,2,0,0,0 kilthyself,0,2,0,3,2 @@ -966,6 +996,7 @@ killyourfamily,0,2,0,3,0 kinbaku,0,0,1,0,0 kinkster,0,0,2,0,0 kissass,2,0,0,2,0 +kkclan,0,2,0,0,1 kkk,0,3,0,0,0 kkklan,0,3,0,0,0 kkkmember,0,3,0,0,0 @@ -976,6 +1007,7 @@ klanswoman,0,3,0,0,0 klanswomen,0,3,0,0,0 kluklux,0,2,0,0,1 kneega,1,1,0,0,1 + knigas,1,1,0,0,1 knobend,1,1,0,2,1 knobhead,1,1,0,2,1 knobbing,0,0,2,0,1 @@ -1012,6 +1044,7 @@ leccami,0,0,1,0,0 lech,0,0,1,0,1 lemonparty,0,0,1,0,1 lenin,0,1,0,0,0 +lenispicker,0,0,2,0,2 leper,0,1,0,1,0 lesbain,0,0,1,0,1 lesbayn,0,0,1,0,1 @@ -1104,6 +1137,7 @@ milfhunter,0,0,2,0,0 milkyourmother,0,0,1,2,0 minesinches,1,0,1,0,1 minge,0,0,3,0,1 +miqqer,2,3,0,0,2 misionary,0,0,1,0,0 misionaryposition,0,0,2,0,0 moanforme,0,0,1,0,1 @@ -1117,7 +1151,10 @@ molest,0,0,1,0,0 molestation,0,0,1,0,0 molester,0,0,1,0,0 molestor,0,0,1,0,0 +mommamilk,0,0,1,0,1 +mommasmilk,0,0,1,0,1 mommy,0,0,1,0,0 +momsmilk,0,0,1,0,1 monspubis,0,0,1,0,0 monsvenus,0,0,1,0,0 mooncricket,0,1,0,0,0 @@ -1159,6 +1196,7 @@ myseamen,0,0,1,0,2 naked,0,0,2,0,0 nagga,2,3,0,0,2 nagger,3,5,0,0,2 +naiger,1,2,0,0,2 naizi,0,1,0,0,1 nastyho,0,1,1,1,0 naughty,0,0,1,0,0 @@ -1227,12 +1265,15 @@ nihgar,3,5,0,0,2 nihger,3,5,0,0,2 nijja,2,3,0,0,2 nijjer,3,5,0,0,2 + niker,1,1,0,0,2 nikher,1,2,0,0,2 nimpho,0,0,1,0,1 nimphomania,0,0,2,0,1 nippas,2,3,0,0,2 nipple,0,0,1,0,0 nips,0,0,2,0,0 + niqa,1,2,0,0,3 +niqas,1,2,0,0,3 niqer,2,3,0,0,2 niqga,2,3,0,0,2 niqger,3,5,0,0,2 @@ -1245,6 +1286,7 @@ nobjokey,0,0,1,0,1 noggers,3,5,0,0,2 nolife,0,0,0,2,0 no nut nov,0,0,2,0,1 + no parents,0,0,0,1,1 nonutnovember,0,0,2,0,1 nood,0,0,2,0,2 nooky,0,0,1,0,1 @@ -1255,6 +1297,7 @@ nude,0,0,2,0,0 nudes,0,0,3,0,0 nuger,2,3,0,1,1 nudity,0,0,1,0,0 +nukeyourhouse,0,1,0,0,1 numb digger,2,3,0,0,2 numbnuts,0,0,0,2,0 nutcase,0,0,0,1,0 @@ -1273,18 +1316,25 @@ nutsack,0,0,2,0,0 nuts ack,0,0,2,0,1 nutte,0,1,1,0,0 nuzi,0,2,0,0,2 -nyger,3,5,0,0,3 +nygar,1,2,0,0,3 +nyger,3,5,0,0,2 nygga,2,3,0,0,3 +nygher,2,3,0,0,3 nympho,0,0,1,0,0 nymphomania,0,0,2,0,0 nziger,3,5,0,0,3 nzis,0,1,0,0,1 offmyself,2,0,0,0,1 +oilupkids,0,0,1,0,1 omorashi,0,0,1,0,0 onecuptwogirls,0,0,1,0,1 oneguyonejar,0,0,1,0,1 onemanonejar,0,0,1,0,1 onlyfans,0,0,2,0,0 +only2genders,0,2,0,0,1 +only2gerders,0,2,0,0,2 +onlytwogenders,0,2,0,0,1 +onlytwogerders,0,2,0,0,2 on the rag,0,0,1,0,0 oraface,0,0,1,0,1 orafice,0,0,1,0,1 @@ -1299,6 +1349,8 @@ orgies,0,0,1,0,0 oriface,0,0,1,0,0 orifice,0,0,1,0,0 orifis,0,0,1,0,1 +orphan,0,0,0,1,0 +orphanage,0,0,0,2,1 osama,0,1,0,0,0 osamabin,0,1,0,0,0 osamabinladen,0,1,0,0,0 @@ -1310,6 +1362,8 @@ padophil,0,0,2,0,0 paedo,0,0,1,0,1 paedofile,0,0,2,0,2 paedophile,0,0,2,0,2 +paenis,1,0,2,0,2 +paenus,1,0,2,0,3 paki,0,1,0,0,0 pakis,0,1,0,0,0 panooch,0,0,2,0,1 @@ -1318,6 +1372,8 @@ pantie,0,0,1,0,0 panties,0,0,1,0,0 panty,0,0,1,0,0 pay gorn,1,0,2,0,2 + pdfile,0,0,1,0,2 + pdofile,0,0,1,0,2 peado,0,0,1,0,1 peadofile,0,0,2,0,2 peadophile,0,0,2,0,1 @@ -1337,6 +1393,7 @@ pecker,1,0,0,0,1 pecorina,0,0,2,0,0 pedafile,0,0,2,0,2 pedaphile,0,0,2,0,1 + pedfile,0,0,1,0,1 pedo,0,0,1,0,0 pedofile,0,0,2,0,1 pedophile,0,0,2,0,0 @@ -1393,6 +1450,7 @@ poopy,1,0,0,0,0 poostabber,1,0,3,0,1 pormhub,0,0,1,0,1 porn,0,0,3,0,0 +p()rn,0,0,2,0,2 pornhub,0,0,3,0,0 pornographic,0,0,2,0,0 pornography,0,0,3,0,0 @@ -1428,6 +1486,7 @@ pussee,0,2,2,2,2 pussey,0,2,2,2,1 pussi,0,1,1,1,1 pussy,0,2,2,2,0 +put it in deeply,0,0,1,0,2 puttana,1,0,1,0,0 puta madre,2,0,1,0,0 eresputa,0,0,2,2,0 @@ -1444,6 +1503,7 @@ racist little,0,1,0,2,0 raghead,0,1,0,0,0 rape,0,0,3,0,0 rapeyou,0,2,3,0,0 +rapiest,0,0,1,0,1 raping,0,0,3,0,0 rapist,0,0,2,0,0 rascist,0,0,0,1,1 @@ -1454,6 +1514,7 @@ raunchy,0,0,1,0,0 rectum,0,0,2,0,0 recktum,0,0,2,0,2 redskins,0,1,0,0,0 +refrainfrombreathing,0,1,0,0,1 reggin,2,2,0,0,3 reich,0,1,0,0,0 renob,0,0,1,0,3 @@ -1511,6 +1572,7 @@ secs,0,0,2,0,2 seex,0,0,2,0,0 seggs,0,0,1,0,1 selfharm,1,0,0,0,1 +sellkiddrugs,0,1,0,0,1 semen,0,0,3,0,0 sendfeet,0,0,1,0,1 sendnude,0,0,3,0,0 @@ -1641,6 +1703,8 @@ strangerbanger,0,0,1,0,1 strappado,0,1,0,0,0 stripclub,0,0,2,0,0 striper,0,0,2,0,0 +stroking her,0,0,1,0,0 +stroking his,0,0,1,0,0 stronzo,2,0,1,0,0 stupid,0,0,0,1,0 squaw,0,1,0,0,0 @@ -1700,6 +1764,8 @@ ticklmypickl,0,0,2,0,2 ticklyourpickl,0,0,2,0,2 tiddy,0,0,2,0,1 tidies,0,0,2,0,1 +tigbides,0,0,1,0,2 +tigbidies,0,0,2,0,2 tigbities,0,0,2,0,2 tit,0,0,2,0,1 tits,0,0,2,0,1 @@ -1713,14 +1779,17 @@ tonto del culo,1,0,0,2,0 topless,0,0,1,0,0 tosser,2,0,0,0,0 tossur,1,0,0,0,1 -touchkids,0,0,1,0,0 -touched a kid,0,0,1,0,0 -touched kids,0,0,1,0,0 -touching a kid,0,0,1,0,0 -touching kids,0,0,1,0,0 +touchchildren,0,0,1,0,1 +touchkids,0,0,1,0,1 +touched a kid,0,0,1,0,1 +touched kids,0,0,1,0,1 +touching a kid,0,0,1,0,1 +touching kids,0,0,1,0,1 touchherprivate,0,0,1,0,1 touchhisprivate,0,0,1,0,0 touchmyprivate,0,0,1,0,1 +touchsmallchildren,0,0,1,0,1 +touchsmallkids,0,0,1,0,1 touchyourprivate,0,0,1,0,1 towelhead,0,1,0,0,0 trailertrash,0,1,0,2,0 @@ -1752,8 +1821,10 @@ twogirlsonecup,0,0,3,0,0 twunts,0,1,0,1,0 u mf,2,1,1,2,2 u motherfu,2,1,1,2,2 +uadidcrocus,0,2,0,0,2 ufuk,2,0,2,0,2 ugly,0,0,0,2,0 +ukrainedidcrocus,0,2,0,0,2 unalive,0,1,0,0,0 uncletom,0,2,0,2,0 underskirt,0,0,2,0,0 @@ -1785,6 +1856,7 @@ vajayjay,0,0,3,0,2 vajj,0,0,2,0,2 vajina,0,0,3,0,1 valjina,0,0,3,0,2 + vegina,0,0,1,0,1 verfickt,1,0,1,0,0 verga,1,0,1,0,0 vete al demonio,1,0,0,2,0 @@ -1834,8 +1906,11 @@ wiglestick,1,0,2,0,2 wigga,2,3,0,0,0 wigger,2,3,0,0,0 willies,0,0,1,0,0 +wilmadicfit,0,0,3,0,1 +wilmadiqfit,0,0,3,0,1 wichser,0,0,1,1,0 wiseass,1,0,0,1,0 + with autism,0,1,0,1,1 wives,0,0,1,0,0 wnker,0,0,1,0,1 wuss,0,0,0,1,0 diff --git a/src/replacements.csv b/src/replacements.csv index b995099..ada834e 100644 --- a/src/replacements.csv +++ b/src/replacements.csv @@ -190,7 +190,7 @@ z,sz Ţ,ţ Ť,ť Ŧ,Ttŧ -ŧ,t +ŧ,it Ũ,ũ Ū,ū Ŭ,ŭ @@ -2526,7 +2526,7 @@ z,sz ⌀,Oo ⌂,Aa ⌃,n -⌄,v +⌄,uv ⌅,n ⌆,n ⌇,l @@ -2781,12 +2781,75 @@ z,sz ⓩ,z │,il ╳,x╳ +▢,Oo +◉,Oo +○,Oo +◌,Oo +◍,Oo +◎,Oo +◦,o +◯,Oo +◴,Oo +◵,Oo +◶,Oo +◷,Oo +◻,Oo ♍,m ♎,no ♏,m ⛎,Uu ❗,il ⟙,t⟙ +⠇,Iil +⠏,ILil +⠓,Ll +⠛,o +⠜,Jj +⠝,Jj +⠟,Pp +⠣,l +⠦,Ll +⠧,Ll +⠪,Cc +⠯,Cc +⠶,o +⠷,b +⠸,ILil +⠻,q +⠼,Ll +⠽,Cc +⠾,b +⡅,i +⡆,Ii +⡇,ILil +⡟,Pp +⡪,Ss +⡫,Ss +⡯,Ff +⡱,CDcd +⡷,Dd +⡸,Jj +⡹,Jj +⡺,t +⢗,t +⢨,i +⢸,ILil +⢻,q +⢽,Ff +⢾,Dd +⣇,Ll +⣏,Cc +⣒,o +⣖,c +⣗,t +⣤,o +⣪,Ss +⣫,Ss +⣲,c +⣸,Ll +⣹,Cc +⣺,t +⣼,b ⤫,x⤫ ⤬,x⤬ ⨯,x⨯ @@ -2978,6 +3041,10 @@ z,sz ⷕ,n ⸁,f 〇,o〇 +「,c +」,i +『,c +』,i 〹,Hh え,h く,Vcv @@ -3558,6 +3625,10 @@ z,sz ﮫ,oﮫ ﮬ,oﮬ ﮭ,oﮭ +︽,n +︾,Vuv +︿,n +﹀,Vuv ﺍ,lﺍ ﺎ,lﺎ ﻩ,oﻩ diff --git a/src/replacements_extra.csv b/src/replacements_extra.csv index 0804609..c432f96 100644 --- a/src/replacements_extra.csv +++ b/src/replacements_extra.csv @@ -2985,7 +2985,7 @@ B,Bb ⌀,O ⌂,A ⌃,n -⌄,v +⌄,uv ⌅,n ⌆,n ⌇,l @@ -3211,4 +3211,28 @@ B,Bb ⣺,t ⣼,b +﹀,uV +︾,uV +︿,n +︽,n +「,c +『,c +」,i +』,i +ŧ,ti +○,O +◯,O +◻,O +◻,O +◷,O +◶,O +◵,O +◴,O +◦,o +◎,O +◍,O +◌,O +◉,O +▢,O + \u{0627},li diff --git a/src/test_positive.txt b/src/test_positive.txt index eedc9ed..dd92680 100644 --- a/src/test_positive.txt +++ b/src/test_positive.txt @@ -2716,4 +2716,5 @@ Mein Kampf ima negro gabe_itch ᶰᵃᶻᶤ -🅝🅔ggir \ No newline at end of file +🅝🅔ggir +m1qq3r \ No newline at end of file