def check_profanity(user_message): debug_log('in check_profanity, user_message: ' + user_message) user_message_words = re.compile("\W+").split(user_message) actual_words_in_user_message = [ word for word in user_message_words if len(word) > 0 ] if len(actual_words_in_user_message) < 1: return False else: debug_log('in check_profanity, user_message_words: ' + str(actual_words_in_user_message)) overall_profanity = sum(predict(actual_words_in_user_message)) if overall_profanity > 0: return True debug_log('in check_profanity, overall_profanity: ' + str(overall_profanity)) overall_profanity_average = overall_profanity / len( actual_words_in_user_message) debug_log('in check_profanity, overall_profanity_average: ' + str(overall_profanity_average)) debug_log('in check_profanity, profanity probability: ' + str(predict_prob(actual_words_in_user_message))) debug_log('in check_profanity, profanity probability: ' + str( sum(predict_prob(actual_words_in_user_message)) / len(actual_words_in_user_message))) if overall_profanity_average > 0.5: return True return False
async def check(self, ctx, *, message): """Vérifie les insultes d'un message.""" from profanity_check import predict_prob p = predict_prob([message]) judgments = [ "est gentil", "est cool", "est sympathique", "est pas fou", "est moyen", "est pas cool", "est insultant", "est vulgaire", "est violent", "est innacceptable", "mérite un ban", ] i = min(int(p * len(judgments)), len(judgments) - 1) judgment = judgments[i] title = f"`{int(p*100)}% vulgaire`" r = int(255 * p) g = int(255 * (1 - p)) color = 256**2 * r + 256 * g color = discord.Color(color) description = f"Ce message {judgment}!" embed = discord.Embed(title=title, description=description, color=color) await ctx.send(embed=embed)
def sent(message: str): binPred = predict([message]) probPred = predict_prob([message]) dct = {"bin": str(binPred[0]), "prob": str(probPred[0])} jsonData = jsonable_encoder(dct) return JSONResponse(jsonData)
def sentence_profanity_prob(string): """ returns the probability that a string is profanity """ try: result = predict_prob([ string ]) return result.item() except: return 0.
def profanity_score_min_max(*args): profanity_score_list_ = [] for artist in set(artists_list(*args)): for song in artist_s_cleaned_songs_list(artist, *args): words_of_lyrics = [] raw_text = "" f = open(dir_given(*args) + '/Cleaned_Songs/' + song, 'rb') for line in f.readlines(): this_line_wordlist = line.decode('utf-8').split() for word in this_line_wordlist: words_of_lyrics.append(word) for word_ in words_of_lyrics: raw_text += word_ + " " song_lyrics_for_profanity_check_ = [raw_text] profanity_check = predict_prob(song_lyrics_for_profanity_check_) profanity_score = 1 - float(' '.join(map(str, profanity_check))) profanity_score_list_.append(profanity_score) # print(profanity_score) min_profanity_score_list_ = round(min(profanity_score_list_), 2) max_profanity_score_list_ = round(max(profanity_score_list_), 2) return min_profanity_score_list_, max_profanity_score_list_
def main(argv): min_prob = float(sys.argv[1]) ret = 0 with open(sys.argv[2], 'r') as f: ignoreds = f.readlines() ignoreds = [v.strip() for v in ignoreds] print(ignoreds) summary = '' for fn in sys.argv[3:]: with open(fn, 'r') as f: lines = f.readlines() ps = predict_prob(lines) for i in range(len(lines)): if ps[i] >= min_prob: keysrc = "{}\n{}".format(fn, lines[i]).encode() key = '{} {}'.format( hashlib.sha256(keysrc).hexdigest(), fn) summary += key + '\n' if key not in ignoreds: ret = ret + 1 print("\n❌ L{} of {}:\n{}".format( i + 1, fn, lines[i][:-1]), file=sys.stderr) print("ℹ️ Add '{}' to {} to ignore this issue".format( key, sys.argv[2]), file=sys.stderr) print('\n\n\nSummary to ignore everything using {}:\n{}'.format( sys.argv[2], summary)) return ret
def main(argv): min_prob = float(sys.argv[1]) ret = 0 with open(sys.argv[2], 'r') as f: ignoreds = f.readlines() ignoreds = [v.strip() for v in ignoreds] print(ignoreds) for fn in sys.argv[3:]: with open(fn, 'r') as f: lines = f.readlines() ps = predict_prob(lines) for i in range(len(lines)): if ps[i] >= min_prob: keysrc = "{}\n{}".format(fn, lines[i]).encode() key = '{} {}'.format( hashlib.sha256(keysrc).hexdigest(), fn) if key not in ignoreds: ret = ret + 1 print("\n\n\n❌ L{} of {}:\n{}".format( i + 1, fn, lines[i]), file=sys.stderr) print("ℹ️ Add '{}' to {} to ignore this issue".format( key, sys.argv[2]), file=sys.stderr) else: print("\n\n\nℹ️ IGNORED: L{} of {}:\n{}".format( i + 1, fn, lines[i])) print( "ℹ️ Remove '{}' from {} to stop ignoring this issue" .format(key, sys.argv[2])) return ret
def Make_Features( df ): df['Freq'] = df['text'].map(df['text'].value_counts()) df['word_count'] = df['text'].swifter.apply(lambda x: len(str(x).split(" ") ) ) df['char_count'] = df['text'].str.len() df['stpw_count'] = df['text'].swifter.apply( lambda x: len( [x for x in x.split() if x in stop] ) ) df['spchar_count'] = df['text'].swifter.apply( lambda x: len( [x for x in list(x) if x in special_char] ) ) #prepare for sentiment analysis: # 1- remove the punctuations : df['text_modif'] = df['text'].str.replace('[^\w\s]','') # 2- remove stop words: df['text_modif'] = df['text_modif'].swifter.apply(lambda x: " ".join(x for x in x.split() if x not in stop)) df['text_modif'] = df['text_modif'].swifter.apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()])) df['sentiment'] = df['text_modif'].swifter.apply(lambda x: TextBlob(x).sentiment[0] ) # finally add profanity_check df['profane_modf'] = predict_prob(df['text_modif']) #print(df[['word_count','char_count','stpw_count','spchar_count', 'profane', 'sentiment']].head()) df['profane_pfilter'] = df['text'].swifter.apply(lambda x:nlp(x)._.is_profane ) # spacy to identify people organizations etc ... df['org'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='ORG'])) df['Money'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='MONEY'])) df['tDate'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='DATE' or str(y.label_) =='TIME'])) df['Pers'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='PERSON'])) df['GPE'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='GPE'])) print(df.describe() ) return df
def profanity_score(song_to_be_scored, profanity_score_min, profanity_score_max, *args): words_of_lyrics_of_song_to_be_scored = [] raw_text = "" f = open(dir_given(*args) + '/Cleaned_Songs/' + song_to_be_scored, 'rb') for line in f.readlines(): this_line_wordlist = line.decode('utf-8').split() for word in this_line_wordlist: words_of_lyrics_of_song_to_be_scored.append(word) for word_ in words_of_lyrics_of_song_to_be_scored: raw_text += word_ + " " song_lyrics_for_profanity_check_of_song_to_be_scored = [raw_text] profanity_check = predict_prob( song_lyrics_for_profanity_check_of_song_to_be_scored) profanity_score_of_song_to_be_scored = 1 - \ float(' '.join(map(str, profanity_check))) regularization_step = (profanity_score_of_song_to_be_scored - profanity_score_min) / (profanity_score_max - profanity_score_min) profanity_score_of_song_to_be_scored_regularized = 1 * \ regularization_step + 0*(1-regularization_step) return round(profanity_score_of_song_to_be_scored_regularized, 2)
def _check_for_profanity(self, message): """ Checks message content to see if it contains a blacklisted word :param message: content to be checked against blacklist :return True if content is flagged, False otherwise """ return predict_prob(message)[0] > THRESHOLD
def censor(word): new_words_list='' for i,item in enumerate(word.split()): if predict_prob([item])>=0.3 or item in chkchk(word):#(e for e in badwords['word'] if e == item ):#any(badword in item for badword in badwords_lst): item = item[0]+'*'*len(item[1:]) new_words_list += item+" " return new_words_list
def get_prob(message): ''' Parameter (Discord Message): the discord message the bot has recieved Returns (float): a float value represent the probability profanity is the message ''' #use .content to get the string in the Discord message and the wrap it in a list #get the probability from the first value in the numpy array then convert it into a native python float using .item() return predict_prob([message.content])[0].item()
def profanityCheck(text): """ Initial profanity check using profanity_check :param: text The text to analyse :param: keyNum The key number to use :param: lang The language of the text :return: The probability of the text containing profanity """ return predict_prob([text])[0]
async def process_message(self, message): """Checks a message for profanity. Currently does nothing.""" if message.author != self.bot.user: prof_percent = predict_prob([message.clean_content]) if prof_percent[0] >= 0.75: await message.add_reaction('😡' ) # Pretty strong mute candidate elif prof_percent[0] >= 0.5: await message.add_reaction('😠')
def doc_calc(self, article): """Helper code to compute average word length of a name""" flesch_ease = textstat.flesch_reading_ease(article) flesch_grade = textstat.flesch_kincaid_grade(article) gunning = textstat.gunning_fog(article) profanity = predict_prob([article])[0] polarity = TextBlob(article).sentiment.polarity return pd.Series( [flesch_ease, flesch_grade, gunning, profanity, polarity])
async def read_root(query: str): prediction = predict_prob([query])[0] leetSp33k = profanity.contains_profanity(query) if prediction > .5: return True else: if leetSp33k: return True else: return False
async def rating(request): key = request.match_info.get("key", "") if key not in ALLOWED_API_KEYS: return web.Response(status=401) data = request.match_info.get("b64", "") data = b64decode(data) return web.Response(body=str(predict_prob([data])[0]).encode("utf-8"))
async def on_message(self, message): if message.channel.id != channels["system"]: swearing = any(predict([message.content])) if swearing: probability = round(predict_prob([message.content])[0], 2) * 100 if probability > 85: await message.guild.get_channel(channels["system"]).send( f'{message.author.mention} swore in {message.channel.mention}: "{message.content}" ({probability}%)' )
def offensiveness(sentence): """ Compute and return the probability that the given sentence is offensive. Args: sentence: The sentence to check Returns: The probability that the given sentence is offensive as a float p (1 = offensive, 0 = nice, 0 <= p <= 1) """ profane_prob = predict_prob([sentence]) return profane_prob[0]
async def action(self, channel, sender, message): quick_chats = [ "OMG!", "Wow!", "Okay.", "Savage!", "Thanks!", "Holy cow!" ] profanity = predict_prob([message]) if profanity[0] > self.profanity_threshold and random.random( ) < self.random_response_chance: say = Message().set_target(channel) say.add_field(name="", value=random.choice(quick_chats)) await self.connector.send_message(say)
async def on_message(message): if message.author == dscClient.user: return print(str(predict_prob([message.content])[0]) + " " + str(message.guild)) if predict_prob([message.content ])[0] > .79 or messageContainsTriggerWord(message): await deleteBlacklistedMessage(message) if message.channel.name == "catras-diary": archiveChannel = dscClient.get_channel(738415449582075924) await archiveChannel.send(message.author.name + ": " + message.content) time.sleep(calculateDelayTime(message.content)) try: await message.delete() except discord.errors.NotFound: await log( "Someone deleted a message before me. Sneaky. System may reboot, this is normal behavior." ) elif message.content.lower().startswith( "lighthope") or message.content.lower().endswith("lighthope"): response = witClient.message(msg=message.content) await handle_message(response, message.channel)
def profanityAnalysis(text_content): start_index = 0 text_content_arr = text_content.split() end_index = len(text_content_arr) profanity_baseline = predict_prob([text_content ])[0] / (end_index - start_index) profanity_split_1 = predict_prob([ ' '.join(text_content_arr[start_index:min(end_index // 2 + 1, end_index - 1)]) ])[0] / (min(end_index // 2 + 1, end_index - 1) - start_index) profanity_split_2 = predict_prob([ ' '.join(text_content_arr[max(1, end_index // 2 - 1):end_index]) ])[0] / (end_index - max(1, end_index // 2 - 1)) while ((end_index - start_index) > 1 and (profanity_baseline <= profanity_split_1 or profanity_baseline <= profanity_split_2)): if profanity_split_1 >= profanity_split_2: profanity_baseline = profanity_split_1 start_index = start_index end_index = min((start_index + end_index) // 2, end_index - 1) else: profanity_baseline = profanity_split_1 start_index = max(1, (start_index + end_index) // 2) end_index = end_index profanity_split_1 = predict_prob([ ' '.join(text_content_arr[start_index:min( (start_index + end_index) // 2, end_index - 1)]) ])[0] / max(1, (min( (start_index + end_index) // 2, end_index - 1) - start_index)) profanity_split_2 = predict_prob([ ' '.join(text_content_arr[max(1, (start_index + end_index) // 2):end_index]) ])[0] / max(1, (end_index - max(1, (start_index + end_index) // 2))) return (start_index, end_index)
def feature_profanity(dataset_filename_pkl): feature_filename = 'features/' + os.path.basename( dataset_filename_pkl)[:-4] + '_feature_profanity.pkl' if not os.path.isfile(feature_filename): author_data = utils.load_feature(dataset_filename_pkl) author_profanity = {} for author in tqdm(author_data): single_text = ''.join(author_data[author]) profanity_rate = predict_prob([single_text]) author_profanity[author] = profanity_rate[0] profanity_file = open(feature_filename, 'wb') pickle.dump(author_profanity, profanity_file) profanity_file.close() return feature_filename
def censoring(input_list): ''' INPUT: input_list : (list of sentences) list of sentences want to check OUTPUT: predictions : (list of booleans) list consists of true/false if ther sentence includes offensiveness or profanity ''' threshold = 0.15 predictions = [] predictions = [prob>threshold for prob in predict_prob(input_list)] return predictions
def censor_profanity(comment_text, profanity_threshold=0.9): """ Replaces profanity using a (probably) inefficient use of the alt-profanity-filter. Input: comment_text (str) Output: comment text with profane words censored """ repl_text = [ (x, f"{x[0]}{''.join(['*' for x in range(len(x)-1)])}") for x in comment_text.split(' ') if predict_prob([x])[0] > profanity_threshold ] comment_text_censored = comment_text for text in repl_text: comment_text_censored = comment_text_censored.replace(text[0], text[1]) return comment_text_censored
def get_profanity(txt, prob=False): if not isinstance(txt, str): raise Exception('txt has to be a string') if len(txt.strip()) == 0: return False blob = TextBlob(txt) lang = blob.detect_language() result = None # if blob.detect_language() != 'en': # raise Exception('Only english text can be verified for profanity!') if lang == 'en': if not prob: result = bool(predict([txt])[0] == 1) else: result = predict_prob([txt])[0] # aparent egalitatea urmatoarea nu intoarce python bool si trebuie sa convertim la python bool return {'result': result, 'lang': lang}
def test_accuracy(): texts = [ 'Hello there, how are you', 'Lorem Ipsum is simply dummy text of the printing and typesetting industry.', '!!!! Click this now!!! -> https://example.com', 'f**k you', 'f**K u', 'GO TO hElL, you dirty scum', ] assert list(predict(texts)) == [0, 0, 0, 1, 1, 1] probs = predict_prob(texts) for i in range(len(probs)): if i < 3: assert probs[i] <= 0.5 else: assert probs[i] >= 0.5
def add_tweet(): user = request.json['user'] description = request.json['description'] private = request.json['private'] pic = request.json['pic'] number = predict_prob([description]) if number <= 0.5: tweet = dict(user=user, description=description, private=private, upvote=0, date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), pic=pic, _id=str(ObjectId())) tweets[tweet['_id']] = tweet if push_to_redis: rjjsonsetwrapper('ttwi-' + ssm() + '-' + tweet['_id'], Path.rootPath(), tweet) return jsonify(tweet)
async def on_message(message): # SAO Easter Egg punctuations = '!()-[]{};:\'"\\,<>./?@#$%^&*_~' # remove punctuation from the string msg = "" for char in message.content.lower(): if char not in punctuations: msg = msg + char # profanity check prob = predict_prob([msg]) if prob >= 0.8: em = discord.Embed(title=f"AI Analysis Results", color=0xC54B4F) em.add_field(name='PROFANITY DETECTED! ', value=str(prob[0])) await message.channel.send(embed=em) if msg.startswith("system call "): content = msg[12:].split(" ") if content[0].lower() == "inspect": if content[1].lower() == "entire": if content[2].lower() == "command": if content[3].lower() == "list": em = discord.Embed(title=f"🍢 SAO Command List", color=0x7400FF) em.set_thumbnail( url="https://cdn.discordapp.com/attachments/668816286784159763/674285661510959105/Kirito-Sao-Logo-1506655414__76221.1550241566.png") em.add_field(name='Commands', value="generate xx element\ngenerate xx element xx shape\ninspect entire command list") em.set_footer(text=f"{teapot.copyright()} | Code licensed under the MIT License") await message.channel.send(embed=em) elif content[0].lower() == "generate": if content[-1].lower() == "element": em = discord.Embed(title=f"✏ Generated {content[1].lower()} element!", color=0xFF0000) await message.channel.send(embed=em) if content[-1].lower() == "shape": if content[2].lower() == "element": em = discord.Embed( title=f"✏ Generated {content[-2].lower()} shaped {content[1].lower()} element!", color=0xFF0000) await message.channel.send(embed=em) await bot.process_commands(message)
def api_offensive(): filepath = 'C:/Users/dipta/OneDrive/Documents/MCS/CS 410/CourseProject/file-read-api/search.txt' response = [] with open(filepath, errors='ignore') as f: for line in f: profanity = [] profanity.append(line); det_val = predict(profanity) prob_val = predict_prob(profanity) val_list = list(det_val) prob_list = list(prob_val) prediction = int(val_list[0]) prediction_prob = float(prob_list[0]) data = {} data['tweet'] = line data['prediction'] = prediction data['prob'] = prediction_prob response.append(data) return json.dumps(response)