def main(components=None): initials, vowels, finals, repeat_cnt, total_cnt = components or gibberish_components( ) pf = ProfanityFilter() cnt = 0 profane_cnt = 0 with alive_bar(total_cnt) as bar: for i in initials: for v in vowels: for f in finals: prefix = ''.join([i, v, f]) if pf.is_profane(prefix): print( cnt, 'All %s words beginning with "%s..."' % (repeat_cnt, prefix)) cnt += repeat_cnt profane_cnt += repeat_cnt bar(incr=repeat_cnt) continue for v2 in vowels: for f2 in finals: cnt += 1 word = ''.join([prefix, v2, f2]) if pf.is_profane(word): profane_cnt += 1 print(cnt, word) bar() print('Done! Found %s profane words in %s total' % (profane_cnt, cnt))
def main(): parser = argparse.ArgumentParser( description='Profanity filter console utility') group = parser.add_mutually_exclusive_group() group.add_argument('-t', '--text', dest='text', help='Test the given text for profanity') group.add_argument('-f', '--file', dest='path', help='Test the given file for profanity') parser.add_argument( '-l', '--languages', dest='languages', default='en', help='Test for profanity using specified languages (comma separated)') parser.add_argument('-o', '--output', dest='output_file', help='Write the censored output to a file') parser.add_argument('--show', action='store_true', help='Print the censored text') args = parser.parse_args() if args.text and args.path: parser.print_help() exit() if args.text: text = args.text elif args.path: with open(args.path) as f: text = ''.join(f.readlines()) else: text = '' pf = ProfanityFilter(languages=args.languages.split(',')) censored_text = pf.censor(text) if args.output_file: with open(args.output_file, 'w') as f: f.write(censored_text) print("Censored text written to output file at: " + args.output_file) if args.show: print("Censored text:\n") print(censored_text) if args.show or args.output_file: return if pf.is_clean(text): print("This text is clean.") else: print("This text is not clean!")
def main(): start_time = time() print("Running Basic Setup Steps....") config_loader = ConfigLoader() output_directory_path = config_loader.get_base_path( ) + config_loader.get_output_directory_name() if not os.path.exists(output_directory_path): os.makedirs(output_directory_path) parser = Parser(config_loader) profanity_filter = ProfanityFilter(config_loader, parser) de_duplicator = DeDuplicator(parser) keyword_dictionary_builder = KeywordDictionaryBuilder(parser) sym_spell_checker = SymSpellChecker(config_loader, parser) print("Running Parser....") parser.parse(config_loader.get_query_logs_file_path(), config_loader.get_frequency_file_path(), config_loader.get_max_total_queries()) print("Running De-duplicator....") de_duplicator.remove_duplicates( config_loader.get_frequency_file_path(), config_loader.get_frequency_file_path(), config_loader.get_de_duplicated_keyword_ordered_1_file_path(), config_loader.get_de_duplicated_missing_space_1_file_path(), config_loader.get_de_duplicated_synonyms_1_file_path()) print("Running Profanity Filter....") profanity_filter.remove_profane_queries( config_loader.get_frequency_file_path(), config_loader.get_frequency_file_path(), config_loader.get_filtered_profane_queries_file_path()) print("Running Keyword Dictionary Builder....") keyword_dictionary_builder.build_dictionary_file_from_frequency_file( config_loader.get_frequency_file_path(), config_loader.get_dictionary_file_path()) print("Running SymSpell Checker....") sym_spell_checker.run_sym_spell(config_loader.get_sym_spell_iterations(), config_loader.get_frequency_file_path(), config_loader.get_dictionary_file_path(), config_loader.get_dictionary_file_path()) print("Running De-duplicator....") de_duplicator.remove_duplicates( config_loader.get_dictionary_file_path(), config_loader.get_dictionary_file_path(), config_loader.get_de_duplicated_keyword_ordered_2_file_path(), config_loader.get_de_duplicated_missing_space_2_file_path(), config_loader.get_de_duplicated_synonyms_2_file_path()) print("Completed!!!") print("Total time taken: ", (time() - start_time) / 60, " minutes")
def get_profanities(words, custom_profanities=None): pf = ProfanityFilter() if custom_profanities is not None: pf.custom_profane_word_dictionaries = {'en': custom_profanities} swears = [] for w in words: cw = pf.censor_word(w) if cw.is_profane: swears.append(cw.original_profane_word) return swears
def __init__(self, profane_words_filepath: str): words = [] with open(profane_words_filepath, encoding='utf8') as f: for line in f: word = line.strip() words.append(word) if word.count('ё') > 0: word = word.replace('ё', 'е') words.append(word) self._ru_words = words self._ru_pf = ProfanityFilter() self._ru_pf.custom_profane_word_dictionaries = {'en': words} self._r = sr.Recognizer()
def applyProfanityFilter(): pf = ProfanityFilter() pf.censor_char = '@' with open('media/recording1/transcript.csv', mode='w+') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_reader = csv.DictReader(csv_file) for row in csv_reader: if pf.is_clean(row['sentence']): continue else: csv_writer.writerow(['***', '****', '****' , '*****', '*****']) csv_file.close
def Predict(texts): pf = ProfanityFilter() sid = SentimentIntensityAnalyzer() labels = [] for text in texts: if (pf.is_profane(text)): labels.append(0) else: ss = sid.polarity_scores(text) if (ss['compound'] <= -0.05): labels.append(0) else: labels.append(1) return labels
class CommentForm(forms.Form): name = forms.CharField(label='Your Name', max_length=100) email = forms.EmailField(label='Your Email') content = forms.CharField(label='Comment', widget=forms.Textarea) def __init__(self, *args, **kwargs): self.pf = ProfanityFilter() super(CommentForm, self).__init__(*args, **kwargs) def clean_name(self): name = self.cleaned_data['name'] # The name can only have a certain size if len(name) > 80: raise ValidationError('The name cannot be longer than 80 characters') return name def clean_content(self): content = self.cleaned_data['content'] # Profanity is not allowed if not self.pf.is_clean(content): raise ValidationError('Profanity is not allowed in the comments!') # TODO: Alternative is to use a html sanitizer # No html markup is allowed soup = BeautifulSoup(content, 'html.parser') if bool(soup.find()): raise ValidationError('No html markup allowed in the content of a comment! Please understand that ' 'permitting html markup in comments is risky and vulnerable to attacks.') return content
def is_profane(url): if len(url) < 3: return False if getattr(settings, "ENABLE_FAST_PROFANITY_CHECKING", True): parts = urlparse(get_decodedurl(url)) partslist = [] if not (parts.path or parts.netloc): raise InvalidURLError( "Badly formatted URL passed to is_url_profane") splitters = r"\.|\/|\_|\-|\~|\$|\+|\!|\*|\(|\)|\," # all the URL-safe characters, escaped if parts.netloc: partslist = partslist + re.split(splitters, parts.netloc) if parts.path: partslist = partslist + re.split(splitters, parts.path) if parts.query: partslist = partslist + re.split(splitters, parts.query) # speed optimization check4btlw = True stringlist = [] for item in partslist: if len(item) > 0: if len(item) > 5: check4btlw = False for substring in get_all_substrings(item, 2): if len(substring) > 0: stringlist.append(substring) partslist = list(dict.fromkeys(stringlist)) # removes dupes if check4btlw: for part in partslist: if part in BAD_THREE_LETTER_WORDS: return True score = PredictProfanity(partslist) if score.any() == 1: return True if getattr(settings, "ENABLE_DEEP_PROFANITY_CHECKING", True): pf = ProfanityFilter() for part in partslist: if pf.is_profane(part): return True return False
def list(self, request): if all(k in request.query_params for k in ('comment', 'deep_flag', 'lang')): comment = request.query_params['comment'] deep_flag = util.strtobool(request.query_params['deep_flag']) lang = request.query_params['lang'] pf = ProfanityFilter(censor_whole_words=False, deep_analysis=deep_flag, languages=[lang]) return Response({ 'comment': pf.censor(comment), 'approved': pf.is_clean(comment) }) else: return Response({'error_message': 'All params are required'}, status=status.HTTP_400_BAD_REQUEST)
def chat(): incoming_msg = request.values.get('Body', '') resp = MessagingResponse() msg = resp.message() msg.body("") if "!start" in incoming_msg: msg.body( "Greetings! I am ModBot, here to watch over this chat. \n\nNow that you're all here, feel free to introduce yourselves. To break the ice, answer the following question: " + choose_icebreaker()) elif "!icebreaker" in incoming_msg: msg.body("Answer the question: " + choose_icebreaker()) else: pf = ProfanityFilter() if not pf.is_clean(incoming_msg): msg.body( "Please refrain from using inappropriate language. This is meant to be a safe space." ) return str(resp)
def check_profanity_filter_text(): pf = ProfanityFilter() #Opens the text file from the given location. file_location = open('/yourfilelocation/filename.txt') # Opens the file. This line should be added when if your text file is in same location a program. #file_location = open('profanity.txt') #Read is a built in function of python to read files. content_of_file = file_location.read() #Censor is a built in function of ProfanityFilter package to check profanity of a sentence. text = pf.censor(content_of_file) #Prints the contents of the file where offensive words are marked by "*". print(text)
def process(self, message, **kwargs): # burada custom olarak ne yapmak istiyorsak tanimliyoruz pf = ProfanityFilter() text = message.text value = "na" confidence = 0 #ornegin text = "This is shit." # Eger kelime kufurse confidence skor olarak 100 atiyoruz if pf.is_profane(text): tokens = text.split(" ") for token in tokens: if pf.is_profane(token): value = token confidence =100 if value != 'na': entity = self.convert_to_rasa(value, confidence) message.set("entities", [entity], add_to_output=True) else: pass
def process(self, message, **kwargs): pf = ProfanityFilter() text = message.text #text = "This is shit" == True | False if True: value = 'na' confidence = 0 if pf.is_profane(text): tokens = text.split(" ") for token in tokens: if pf.is_profane(token): value = token confidence = 100 if value != 'na': entity = self.convert_to_rasa(value, confidence) message.set("entities", [entity], add_to_output=True) else: pass
def test2(channel): import time start = time.time() from profanity_filter import ProfanityFilter pf = ProfanityFilter() end = time.time() #import json #j = json.load(open("/srv/CARL/channels/"+channel+".json",'r')) #s = "" #n = 0 #for phrase in j["phrases"]: #clean = pf.is_clean(phrase) #s += str(clean) + " " + phrase + "<br/>\n" #if not clean: n += 1 #return s + str(n) + "<br/>\n" + str(end-start) return str(end - start)
class Administration(commands.Cog): """Commands for server admins.""" def __init__(self, bot: commands.Bot): self.bot = bot self.check_actions.start() self.pf = ProfanityFilter() @property def db(self): return self.bot.get_cog("Database") async def check_message(self, ctx: commands.Context): ignore = False delete = False if not self.pf.is_clean(ctx.message.content) and not ( ctx.message.channel.id == 728830756071276665 ): ignore = True delete = True return ignore, delete async def mute_member(self, member: discord.Member, duration: timedelta = None): role = next(filter(lambda x: x.name == "Muted", member.guild.roles)) if role in member.roles: raise AlreadyDoneError() if duration is None: self.db.update_member(member, muted=True) await member.add_roles(role) else: self.db.create_temp_action(member, "mute", duration) self.db.update_member(member, muted=True) await member.add_roles(role) async def unmute_member(self, member: discord.Member): if ( role := next(filter(lambda x: x.name == "Muted", member.guild.roles)) ) in member.roles: models.TempAction.objects(member=self.db.fetch_member(member)).delete() self.db.update_member(member, muted=False) await member.remove_roles(role) else:
def profanity_filter(): return ProfanityFilter()
import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from textblob import Word, TextBlob from profanity_check import predict, predict_prob from nltk.sentiment.vader import SentimentIntensityAnalyzer import pickle from profanity_filter import ProfanityFilter from tensorflow.keras.preprocessing.image import ImageDataGenerator from keras.preprocessing.sequence import pad_sequences import time #---------------------------------------------------------- # Config .... sid = SentimentIntensityAnalyzer() nlp = spacy.load("en_core_web_sm") pf = ProfanityFilter(nlps={'en': nlp}) nlp.add_pipe(pf.spacy_component, last=True) stop = stopwords.words('english') special_char = [ '~', '@', '$', '#', '%', '^', '&', '*', '(', ')', '-', '_', ',', ';', '/', '\\', '>', '<', '|', '[', ']', '}', '{', '"', '\'', '`', '?', '!', '...' ] path_dir = '/home/bassem/DataDriven_HatfulMemes/data/' print('----------------------------------------------------------------------') # Get features from text : def getsentiment(text): # remove stopp words: text = text.replace('[^\w\s]', '') text = " ".join(x for x in text.split() if x not in stop)
def __init__(self, *args, **kwargs): self.pf = ProfanityFilter() super(CommentForm, self).__init__(*args, **kwargs)
import discord from profanity_filter import ProfanityFilter client = discord.Client() pf = ProfanityFilter() pf.extra_profane_word_dictionaries = { 'en': { 'dumbass', 'MOTHERFUCKERS', 'motherfuckers', 'benchod', 'madrachod', 'BENCHOD', 'MADRACHOD' } } # not case insensitive; should I add dhigger (the n word for Indians)... nah @client.event async def on_ready(): print('We have logged in as {0.user}'.format(client)) await client.change_presence(activity=discord.Activity( type=discord.ActivityType.listening, name="52 Stories by Omar Waseem on Spotify and iTunes", title="52 Stories", color=discord.Color.green())) # selmshots left the podcast :( #await client.change_presence(activity=discord.Spotify(type=discord.ActivityType.listening, name="Spotify", title="52 Stories")) #await client.change_presence(activity=discord.Spotify(title="52 Stories")) @client.event async def on_message(message): if message.author == client.user: return
import argparse import json import pickle import os import random import subprocess import torch import time import tqdm from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler from style_paraphrase.inference_utils import GPT2Generator from profanity_filter import ProfanityFilter pf = ProfanityFilter() parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=34, help='Random seed to use for selecting inputs.') args = parser.parse_args() with open("config.json", "r") as f: configuration = json.loads(f.read()) OUTPUT_DIR = configuration["output_dir"] with torch.cuda.device(0): print("Loading paraphraser....") paraphraser = GPT2Generator(OUTPUT_DIR + "/models/paraphraser_gpt2_large")
import discord from profanity_filter import ProfanityFilter client = discord.Client() pf = ProfanityFilter() @client.event async def on_ready(): print('We have logged in as {0.user}'.format(client)) await client.change_presence(activity=discord.Activity( type=discord.ActivityType.listening, name="52 Stories by Omar Waseem on Spotify and iTunes", title="52 Stories", color=discord.Color.green())) # selmshots left the podcast :( @client.event async def on_message(message): if message.author == client.user: return if message.content.startswith('sultanim spam'): i = 1 while (i <= 3): await message.channel.send( "SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! SPAM !!! " ) i = i + 1 if message.content.startswith('sultanim anti-fbi'):
from profanity_filter import ProfanityFilter pf = ProfanityFilter() with open(input("Enter the name of Your File"), "r") as myFile: j = myFile.read() filtered=pf.censor(j) print(filtered)
def test(): from profanity_filter import ProfanityFilter pf = ProfanityFilter() return pf.censor("That's bullshit!")
import logging import configparser import sys import json import os.path from telegram import Update from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext from profanity_filter import ProfanityFilter from forex_python.converter import CurrencyRates from forex_python.bitcoin import BtcConverter config = configparser.ConfigParser() config.read('bot_config.ini') pf = ProfanityFilter(languages=['ru', 'en']) token = config['DEFAULT']['BotToken'] updater = Updater(token=token, use_context=True) dispatcher = updater.dispatcher logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) class ChatMemberCensorRepository: repository_file = 'cencored_users' censored_users = set() def __init__(self):
import os from pycorenlp.corenlp import StanfordCoreNLP from profanity_filter import ProfanityFilter from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer import rpy2.robjects as robjects from rpy2.robjects.packages import importr ## INIT GLOBAL VARS chunk_size = 5000 num_chunks = 0 ## INIT REDDIT INSTANCE reddit = praw.Reddit() ## INIT PROFANITY FILTER pf = ProfanityFilter() pf.censor_whole_words = False ## INIT VADER SENTIMENT vader = SentimentIntensityAnalyzer() ## INIT TEXTCLEAN R textclean = importr('textclean', lib_loc="C:/Users/Ben/Documents/R/win-library/3.6") importr('stringi', lib_loc="C:/Users/Ben/Documents/R/win-library/3.6") ## coreNLP sent. analysis def getSentiment(text): ## connect to CoreNLP server host = "http://localhost"
class ProfanityDetector: def __init__(self, profane_words_filepath: str): words = [] with open(profane_words_filepath, encoding='utf8') as f: for line in f: word = line.strip() words.append(word) if word.count('ё') > 0: word = word.replace('ё', 'е') words.append(word) self._ru_words = words self._ru_pf = ProfanityFilter() self._ru_pf.custom_profane_word_dictionaries = {'en': words} self._r = sr.Recognizer() def get_profanity(self, voice_path: str) -> list: data = [] phrases = 0 profane_phrases = 0 for root, dirs, files in os.walk(voice_path): for file in files: phrases += 1 with sr.AudioFile(os.path.join(root, file)) as source: audio = self._r.record(source) try: res = self._r.recognize_google(audio, show_all=True) res_ru = self._r.recognize_google(audio, language="ru", show_all=True) if res_ru: is_profane = False text = "" for text_alt in res_ru['alternative']: text = text_alt['transcript'] if '*' in text: is_profane = True break text = " ".join(word.lower() for word in text.split()) text = re.sub(r'-\s\r\n\s+|-\s\r\n|\r\n', '', text) text = re.sub( r'[.,:;%©?*!@#$^&()\d]|[+=]|[\[]|[\]]|[/]|"|\s{2,}|-', ' ', text) text = " ".join( pymorphy2.MorphAnalyzer().parse(str( word))[0].normal_form for word in text.split()) is_profane = self._ru_pf.is_profane(text) if is_profane: break if is_profane: profane_phrases += 1 print(file, "RU PROFANE", text, "best:", res_ru['alternative'][0]['transcript']) else: print(file, "RU NOT PROFANE", res_ru['alternative'][0]['transcript']) row = { "filename": file, "lang": "ru", "is_profane": is_profane, "text_best_recogn": res_ru['alternative'][0]['transcript'], "text_profane": text if is_profane else "", "prob": 1 if is_profane else 0, } data.append(row) if res: is_profane = False prob = 0 text = "" for text_alt in res['alternative']: text = text_alt['transcript'] if '*' in text: is_profane = True break text = " ".join(word.lower() for word in text.split()) prob = predict_prob([text])[0] is_profane = prob > 0.5 if is_profane: break if is_profane: profane_phrases += 1 print(file, "ENG PROFANE", text, "best:", res['alternative'][0]['transcript']) else: print(file, "ENG NOT PROFANE", res['alternative'][0]['transcript']) row = { "filename": file, "lang": "eng", "is_profane": is_profane, "text_best_recogn": res['alternative'][0]['transcript'], "text_profane": text if is_profane else "", "prob": prob, } data.append(row) except sr.UnknownValueError: print( "Google Speech Recognition could not understand audio" ) except sr.RequestError as e: print( "Could not request results from Google Speech Recognition service; {0}" .format(e)) return data
def post(self, request): nlp = en_core_web_sm.load() pf = ProfanityFilter(nlps={'en': nlp}) # pf.custom_profane_word_dictionaries = {'en': {'sold down the river', 'dog'}} # pf.extra_profane_word_dictionaries = {'en': {'sold', 'orange'}} wordlist = [] context = {} # FILE UPLOADED if 'doc' in request.FILES: doc = request.FILES['doc'] if doc.name.endswith(".docx"): docx = docx2python(doc, extract_image=False) context['doc'] = docx.text elif doc.name.endswith(".txt"): print("This is a test") mytext = str(doc.read()) context['doc'] = mytext return render(request, 'index.html', context=context) # RETRIEVE WORDS AND SPLIT document = request.POST['document'] word_lines = document.splitlines() # CHECK EACH WORD IF PROFANITY for line in word_lines: if line == '': wordlist.append(r'\n') # NO LINE BREAK CONTINUE HERE else: words = line.split() temp_list = [] original_list = [] # LOOP THROUGH EACH WORD. for word in words: clean_word = clear_punctuation(word).lower() in_db = Words.objects.all().filter( word__icontains=clean_word) # WORD IS IN DATABASE if in_db: temp_list.append(clean_word) temp_word = " ".join(temp_list) starting_phrase = Words.objects.all().filter( word__istartswith=temp_word) # CURRENT WORD IS THE START OF THE PHRASE if starting_phrase: original_list.append(word) completed = Words.objects.all().filter( word__iexact=temp_word) # CURRENT PHRASE IS COMPLETED if completed: original = " ".join(original_list) original_list.clear() new_word = format_word(original) wordlist.append(new_word) temp_list.clear() # # TEMP WORD DID NOT COMPLETE THE PHRASE # else: # print('now we here bish') # original = " ".join(original_list) # original_list.clear() # wordlist.append(original) # temp_list.clear() # NOT START OF PHRASE KEEP GOING else: wordlist.append(word) temp_list.clear() original_list.clear() # WORD IS A PROFANITY elif pf._is_profane_word('en', clean_word): temp_word = " ".join(temp_list) wordlist.append(temp_word) new_word = format_word(word) wordlist.append(new_word) temp_list.clear() # JUST A REGULAR WORD else: temp_word = " ".join(temp_list) wordlist.append(temp_word) wordlist.append(word) temp_list.clear() context["results"] = " ".join(wordlist) context['document'] = document return render(request, 'index.html', context=context)
def answer(carlAsked, userAnswered, allowProfanity): if allowProfanity: channel = "E2" else: channel = "default" from profanity_filter import ProfanityFilter pf = ProfanityFilter() storageFile = ROOT_DIR + "/channels/" + channel + ".json" if os.path.isfile(storageFile): storage = json.load(open(storageFile, 'r')) else: storage = { 'phrases': [], 'links': [], } illegalChars = ('{', '}', '[', ']', '(', ')', '|', '\\', '<', '>', '/') for illegalChar in illegalChars: carlAsked = carlAsked.replace(illegalChar, "") userAnswered = userAnswered.replace(illegalChar, "") phrases = storage['phrases'] #a list of phrases links = storage[ 'links'] #a list of links to other phrases from each phrase if len(userAnswered) == 0 or userAnswered[-1] not in ('.', '!', '?', '"', "'"): userAnswered += '.' if len(userAnswered) > 250: userAnswered = userAnswered[:250] if carlAsked in phrases: askIdx = phrases.index(carlAsked) else: askIdx = -1 futureAskIdx = -1 if userAnswered in phrases: answerIdx = phrases.index(userAnswered) if len(links[answerIdx]) > 0: futureAskIdx = random.choice(links[answerIdx]) else: futureAskIdx = getLeastUsed(links, answerIdx) #exclude answerIdx if askIdx != -1: links[askIdx].append(answerIdx) else: bestIdx, best = spellcheckPhrase(userAnswered, phrases) if best > 0.6: if len(links[bestIdx]) > 0: futureAskIdx = random.choice(links[bestIdx]) else: futureAskIdx = getLeastUsed(links, bestIdx) #exclude answerIdx if askIdx != -1: links[askIdx].append(bestIdx) else: futureAskIdx = getLeastUsed(links, bestIdx) #exclude answerIdx if allowProfanity or pf.is_clean(userAnswered): if askIdx != -1: links[askIdx].append(len(phrases)) links.append([]) phrases.append(userAnswered) json.dump(storage, open(storageFile, 'w')) return phrases[futureAskIdx]
def profanity_filter_ru_en(): return ProfanityFilter(languages=['ru', 'en'])