def hatedetect(hds): ypred = [] sonar = Sonar() for k in hds: check = [] res = sonar.ping(text=k) res_final = json.dumps(res) #person_dict = json.loads(str(res)) res_dict = json.loads(res_final) #print(res_dict) #for x in res_dict["classes"]: if res_dict["classes"][0]["confidence"] > res_dict["classes"][1][ "confidence"]: check = res_dict["classes"][0]["confidence"] val = 0 else: check = res_dict["classes"][1]["confidence"] val = 1 if check > res_dict["classes"][2]["confidence"]: check_final = check else: check_final = res_dict["classes"][2]["confidence"] val = 2 if val == 0 or val == 1: yp = 0 else: yp = 1 ypred.append(yp) return ypred
def isHateSpeech(self, line): #using open source hate sonar api sonar = Sonar() response = sonar.ping(text=line) if response["top_class"] != "neither": # line is hate speech return 1 else: return 0
def __init__(self, bot, mysql: MySQLWrapper): self.bot = bot self.mysql = mysql self.sonar = Sonar() self.discordForum = Forum("ECC-Discord", "ECC-Discord") self.discordForum.insert(self.mysql) self.discordAuthor = ForumAuthor("Discord", "-1") self.discordAuthor.insert(self.mysql) # self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID = "709753463323754539") self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID="456834448558653451") self.generalThread.insert(self.mysql) # self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID = "727107131416903750") self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID="468121733929369610") self.shitpostingThread.insert(self.mysql) self.mysql.db.commit()
def refine_jsonl_file(path, votes_threshold=10, hate_limit=0.4, offensive_limit=0.7, general_limit=0.8): sonar = Sonar() name, _ = os.path.splitext(path) refined_name = "refined_{name}.txt".format(name=name) if os.path.exists(refined_name): os.remove(refined_name) refined_file = open(refined_name, mode='w') with open(path, 'rb') as f: # opening file in binary(rb) mode with refined_file as rf: for item in json_lines.reader(f): if int(item['votes']) > 0: text = item['text'] hate_confidence = sonar.ping( text=text)['classes'][0]['confidence'] offensive_confidence = sonar.ping( text=text)['classes'][1]['confidence'] if not ((hate_confidence > hate_limit) or (offensive_confidence > offensive_limit) or (hate_confidence + offensive_confidence > general_limit)): try: print(text, file=rf) except: continue rf.close() f.close()
def classify_texts(texts): """ Classify texts with hate sonar (classifier). """ # initialize parameters and containers texts = texts.split(".")[:-1] rating_map = { "neither": "green", "offensive_language": "orange", "hate_speech": "red" } sonar = Sonar() results = [] # for each text, perform detection and format dictionary for i, sentence in enumerate(texts): sentence_res = sonar.ping(text=sentence) top_class = sentence_res["top_class"] sentence_output = { "index": i, "sentence": sentence, "top_class": top_class, "rating": rating_map[top_class] } results.append(sentence_output) return results
def isAbusiveComment(x): sonar = Sonar() a = sonar.ping(text=x) #print(x) #print(a) hateConfidence = 0.0 offenseConfidence = 0.0 neitherConfidence = 0.0 for result in a['classes'] : #print(result) #print(result['confidence']) #print(result["class_name"]) if result["class_name"] == "hate_speech" : hateConfidence = result['confidence'] if result["class_name"] == "offensive_language" : offenseConfidence = result['confidence'] if result["class_name"] == "neither" : neitherConfidence = result['confidence'] #print("Hate ",hateConfidence, " || offenseConfidence ",offenseConfidence," || neither ",neitherConfidence) rez = False if neitherConfidence > 0.7: rez = False if hateConfidence > 0.6 : rez = True if offenseConfidence >0.55 : rez = True return MessageScreenerResult(rez,hateConfidence,offenseConfidence,"No Tips, Sorry!")
def receive_tweet(request): tweet = request.GET.get('tweet') print(tweet) tweet = repr(tweet) sonar = Sonar() result = sonar.ping(text=tweet) print(result) json_result = json.dumps(result) return render(request, 'result_api/page2.html', result)
def isHateSpeech(self, line): '''Assign a 'hatespeech score' using sonar api ''' indices = {"hate_speech": 0, "offensive_language": 1, "neither": 2} sonar = Sonar() response = sonar.ping(text=line) indexOfLanguage = indices[response["top_class"]] if response["top_class"] != "neither": return response['classes'][indexOfLanguage]['confidence'] else: return 0
def hateSpeech(comments): sonar = Sonar() print('Inside hatespeech') print('Comments len ='+str(len(comments))) for i in range(len(comments)): x = sonar.ping(text=comments[i].text) if x['top_class'] == "hate_speech": comments[i].hateType = 'hate' elif x['top_class'] == "offensive_language": comments[i].hateType = 'offensive' else: comments[i].hateType = 'neutral' return comments
def do_GET(self): query_components = parse_qs(urlparse(self.path).query) imsi = query_components["imsi"] print(imsi) # Send headers self._set_headers() # Send message back to client sonar = Sonar() message = sonar.ping(text=str(imsi[0])) # Write content as utf-8 data self.wfile.write(bytes(json.dumps(message), "utf-8")) return message
def main(argv): global sonar # setting up the database db = MongoClient().gab # initialising the hateometer sonar = Sonar() # parsing all posts print("hateometing the posts...") # loop over all english posts for post in db.posts.find({'post.language':'en'}, no_cursor_timeout=True): # run hate detection on the post body obj = hateometer(post['post']['body']) postid = post['_id'] # store the results in mongodb db.posts.update_one({'_id':postid},{"$set":{'post.hateometer':obj}}) print("hateometing the comments...") # do the same trick for the comments for comment in db.comments.find({'language':'en'}, no_cursor_timeout=True): obj = hateometer(comment['body']) commentid = comment['_id'] db.comments.update_one({'_id':commentid},{"$set":{'hateometer':obj}}) print("done!")
def getOffensiveness(self, string, output_a): sonar = Sonar() data_raw = str(sonar.ping(string)) data_raw_split = data_raw.split(": [{")[1].split("}]}")[0].replace("'class_name': '", "").replace("', 'confidence':", "").split("}, {") output = data_raw_split[0].split(" ")[0] + " " if "e" in data_raw_split[0].split(" ")[1]: output += "0.00" else: output += str('{0:.{1}f}'.format(float(data_raw_split[0].split(" ")[1]) * 100, 2)) + " " output += data_raw_split[1].split(" ")[0] + " " if "e" in data_raw_split[1].split(" ")[1]: output += "0.00" else: output += str('{0:.{1}f}'.format(float(data_raw_split[1].split(" ")[1]) * 100, 2)) + " " output += data_raw_split[2].split(" ")[0] + " " if "e" in data_raw_split[2].split(" ")[1]: output += "0.00" else: output += str('{0:.{1}f}'.format(float(data_raw_split[2].split(" ")[1]) * 100, 2)) output_a.append(output)
def ping_file(dataset_path): """Run ping function on each line in the chat log file :param dataset_path: file path of the dataset (specify directory name if the file is under a folder) """ sonar = Sonar() input_file = open(dataset_path, 'r', encoding="utf-8") # to read and remove "\n" escape characters at the end of each chat message chat_lines = input_file.read().splitlines() # to trim whitespaces before and after each chat message chat_lines = [each_line.strip() for each_line in chat_lines] # to get only the message after the [timestamp] <username> chat_lines = [each_line.partition("> ")[2] for each_line in chat_lines] return [ sonar.ping(each_line) for each_line in tqdm( chat_lines, desc="Processing {} rows".format(len(chat_lines))) ]
def main(): sonar=Sonar() textArray=readFile() Class=[] hate=[] offensive=[] neither=[] hate_speech_classifier(textArray,Class,hate,offensive,neither,sonar) displayResaults(hate,offensive,neither)
def getTweet(api, hashTag, fileName, startDT, numTweet=1): hashTag = "\"#" + hashTag + "\"" startDT = "\"" + startDT + "\"" csvFile = open(fileName, 'w') csvWriter = csv.writer(csvFile) sonar = Sonar() try: for tweet in tweepy.Cursor(api.search, q=hashTag, count=numTweet, lang="en", since=startDT).items(): if tweet.retweeted == False: cleantweet = cleanTweet(tweet) text = "text=\"" + str(cleanTweet) + "\"" csvWriter.writerow([ tweet.created_at, tweet.user.screen_name, str(cleantweet), sonar.ping(text).top_class ]) except: print("Unable to connect Twitter API.")
def get_report(webpage_html): sonar = Sonar() report = sonar.ping(text=webpage_html) return report
from hatesonar import Sonar # dir = "../graphics/bower_gabs/" # images = os.listdir(dir) # # # image = cv2.imread(images[0]) # # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # gabs = [] # for img in images: # text = pytesseract.image_to_string(Image.open(dir+img)) # gabs.append(text) # # fp2 = open('case_study.json', 'w') # json.dump(gabs, fp2) # fp2.close() with open('case_study.json', 'r') as fp: gabs = json.load(fp) sonar = Sonar() responses = [] for i in range(len(gabs)): responses.append(sonar.ping(text=gabs[i])) # print(sonar.ping(text=gabs[i])) # break fp2 = open('case_study_sonar.json', 'w') json.dump(responses, fp2) fp2.close()
from hatesonar import Sonar import json import re import nltk from nltk.corpus import stopwords sonar = Sonar() with open('gabs999.json', 'r') as fp: gabs = json.load(fp) fp.close() text = '' for gab in gabs: text = text + gab['post']['body'] text = re.sub(r"http\S+", "", text) word_list = re.sub("[^\w]", " ", text).split() stop_words = list(stopwords.words('english')) words_to_remove = [] for i in range(0, len(word_list)): word_list[i] = word_list[i].lower() for word in word_list: if word in stop_words: words_to_remove.append(word) for word_to_remove in words_to_remove: if word_to_remove in word_list: word_list.remove(word_to_remove)
class MainCog(commands.Cog): def __init__(self, bot, mysql: MySQLWrapper): self.bot = bot self.mysql = mysql self.sonar = Sonar() self.discordForum = Forum("ECC-Discord", "ECC-Discord") self.discordForum.insert(self.mysql) self.discordAuthor = ForumAuthor("Discord", "-1") self.discordAuthor.insert(self.mysql) # self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID = "709753463323754539") self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID="456834448558653451") self.generalThread.insert(self.mysql) # self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID = "727107131416903750") self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID="468121733929369610") self.shitpostingThread.insert(self.mysql) self.mysql.db.commit() @commands.Cog.listener() async def on_message(self, message): # Checks if the message is in an observed channel and saves thread ID channelID = str(message.channel.id) threadID = 0 if channelID == self.generalThread.hltvID: threadID = self.generalThread.sqlID elif channelID == self.shitpostingThread.hltvID: threadID = self.shitpostingThread.sqlID else: return # Compiles message info messageID = message.id content = message.clean_content authorID = message.author.id authorName = message.author.name timestamp = message.created_at # Calculates hate speech and offensive language rating rating = self.sonar.ping(f"{authorName}: {content}") hateRating = 0 offRating = 0 # Extracts confidence values for hate speech and offensive language from result for ratingClass in rating['classes']: if ratingClass['class_name'] == 'hate_speech': hateRating = ratingClass['confidence'] elif ratingClass['class_name'] == 'offensive_language': offRating = ratingClass['confidence'] author = ForumAuthor(authorName, authorID) author.insert(self.mysql) post = ForumPost(messageID, threadID, -1, author, content, timestamp, hateRating, offRating) post.insert(self.mysql) self.mysql.db.commit()
def check(self, txt): sonar = Sonar() res = sonar.ping(text=txt) return json.dumps(res)
## ---(Mon Jul 27 12:32:03 2020)--- debugfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') ## ---(Mon Jul 27 12:43:11 2020)--- runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') ## ---(Mon Jul 27 12:51:27 2020)--- runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') import hateSonar import hatesonar runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') text="hey guys more here i'm now going to say every curse word ending in the english language so i don't oh i'm gonna do this for some reason america do is we have no idea what news but i'm gonna do anyway so effort to do let's get started i'm going to do it all enough about a quarter so enjoy little real quick anus arse our soul as ass hat as jabber as pirates as bag ass bandit ass bangor ass bite ass clown ass clock ass cracker asses ass face as f**k ass f****r ass goblin as hot as head as her haul ass hopper as jagger as slick as liquor asked monkey as much as mature as a nazi network as pirates as shit ass haul ass sucker ass wat ass wipe axe wound right movements a little b b babbitt bastard beater bitch bitch ass bitches bitch t**s bitchy blow job blog bollocks but boehner brother f****r bullshit bumble f**k butt plug but pirates but f****r but but f****r sees camel toe copied much her testicle chink showed clips clips face could f**k clusterfuck c**k c**k ascot bite caught burger caught face c**k f****r c**k head c**k jockey caught narco c**k master cut bungler cog mongrel caught monkey cotton cosmos caught nugget c**k shit c**k smith catch smoke pot smoker cuts the fur c**k sucker c**k waffle coochie coo coon cuter cracker come come double cum dumpster cum guzzler come jockey come foot contort cutty cut cunnilingus c**t contest confess con whole c**t licker cut rag consulate one two d's still won't load diego damn diego dick dick sneeze dick big dick beaters dick face dick f**k dick f****r dick head dick whole big juice dick milk dick monger dicks dicks slap dick sucker dick sucking dick tickler dick wad dick weasel dick weed dick wad dick dyke dodo dish it due to that douche bag dougie douche douche f*g douche waffle dumb as dumb f**k dumb shit moving on to the next letters though eastwood are the sort of f*g f*g big f*g f****r f****t f****t c**k fact hard for a lot theo felch flamer f**k f**k as f**k bang f**k boy f**k brain f**k but f**k butter f****d f****r f****r sucker f**k face f**k head f**k whole f****n f****n footnote f**k off f***s f**k stick for tarred f**k up f**k want to f**k wit fudge packer oh geez gay gas gamer gate okay f**k you f**k is gaylord gay target what god dammit god damn gooch gook gringo ogre guido please get offended these are just curse words i don't mean anything a word i say ages hand job hard on heave ho ho h**o h**o don't shit honky humping right wing onto litter i is the wars with the litter i would store with jay jack ass jag off jap jerk off jerk as gigolo jim is jack jingle bunny jungle bunny k kite huge crowds c**t cake aus lose lay mass large us lesbian les beaux lessee ends let's say uns mcfadden mic mange mother f****r mother f*****g mother f****r mother f*****g muff must driver munching ass go ends ends let's skip ends let's skip ends let's get bends enough to do ends is no owes its do p a citizen says skip ends the let's turn off which oughta know what that means on an associates in a house where no we just turn off i like if we were behind me i p's penny which pekar pick her head penis speed as bangor penis f****r penis puffer piss pissed off piss piss flaps per smoker pollack boon pannone bonetti poon tang porch monkey off prick brunetti pita pussies pussy pussy licking poodle like i said i don't know what half these words mean but apparently the curse words so as a seder it i accuse queen queer queer bait queer whole slowly ars were knob right rush job risky right esses esses shh long scroll shit shit us should bag should beggars should brain should breath shit canned shit c**t shit dick should face should faced s******d shit oh shit house chefs better shit stain shettar shittiest shitty shoes chestnut skiing ski skull f**k s**t s**t bags maggots snatch spic off splurge spook suck us a lot of aid from us t's tart testicle thunder c**t twat twat lips twat waffle no use at ease citizen tvs no these anita views rihanna see this any davies do i wink went job what back w***e w***e bag or face what exes know x's think guy why the y's z's no zs right so those were the curse words in the english language i obviously skipped some for obvious reasons hope you guys enjoyed for some reason if you are in this point of the video what the hell is wrong with you thank you for watching i'll see you next time piece" print("****HATE/OFFENSIVE DETECTION*************") hate_dict={} from hatesonar import Sonar sonar = Sonar() hate_dict=sonar.ping(text) print hate_dict print(hate_dict) runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') clar clear runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') ## ---(Mon Jul 27 15:27:50 2020)--- runfile('C:/Users/Nikhil Bhargava/.spyder-py3/videoTranscribeGoogle.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') clear runfile('C:/Users/Nikhil Bhargava/.spyder-py3/videoTranscribeGoogle.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') clear runfile('C:/Users/Nikhil Bhargava/.spyder-py3/videoTranscribeSphnix.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3') clear
from hatesonar import Sonar sonar = Sonar() sonar.ping(text="At least I'm not a nigger")
def geocode(place): conn = http.client.HTTPSConnection('us1.locationiq.com', 443) conn.request( 'GET', '/v1/search.php?key=269d9376ef6d64&q=' + place + '&format=json&limit=1') r = json.loads(conn.getresponse().read().decode('utf-8')) return [r[0]['lon'], r[0]['lat']] client = pymongo.MongoClient('mongodb://localhost:27017/') db = client['MOB'] tweets = db['tweets'] posts = db['posts2'] texts, ids, data, data_media, cache = [], [], [], [], [] translator = Translator() sonar = Sonar() for i in tweets.find({}, {'_id': 0}).limit(20): flag = True tweets.delete_one({'id_str': i['id_str']}) if i['lang'] in ['ur', 'bn', 'hi', 'te', 'en'] and i['text'] != None: print('sentence = ' + i['text']) print('lang = ' + i['lang']) try: string = translator.translate(i['text'], dest='en') print(string.text) i['text'] = string.text except: flag = False print('error')
from hatesonar import Sonar import json sonar = Sonar() with open('gabs999.json', 'r') as fp: gabs = json.load(fp) fp.close() responses =[] for i in range(len(gabs)): responses.append(sonar.ping(text=gabs[i]['post']['body'])) fp2 = open('hates999.json', 'w') json.dump(responses, fp2) fp2.close()
import sys import json import string from hatesonar import Sonar # hate speech classifier from hatesonar python library sonar = Sonar() # grab proper arguments filterTypes = sys.argv[1] processType = sys.argv[2] fileName = sys.argv[3] chosenFilter = sys.argv[4] countFlag = sys.argv[5] # open proper files file = open('../../public/' + fileName, 'r') writeFile = open('../../public/processed-' + fileName, 'w') with open('../constants/slurs.json') as f: slurs = json.load(f) filterTypes = filterTypes.split(",") # grab chosen filters and add to filterTypes list if chosenFilter != "none": slurs["c"] = chosenFilter.split(",") filterTypes.append("c") # load codes for counts if count flag present if countFlag == 'yes' and processType == 'word':
from api import Gab import json from hatesonar import Sonar sonar = Sonar() gab = Gab('dheerajpreddy', 'Test@123') with open('username.json', 'r') as fp: people = json.load(fp) fp.close() for i in range(len(people)): flg1 = 0 flg2 = 0 flg = 0 if people[i]['is_private'] is False: try: gabs = gab.getusertimeline(people[i]['username'], 100) except: flg = 1 print("ERROR for " + people[i]['username']) if flg: continue for data in gabs: response = sonar.ping(text=data['post']['body']) if response['top_class'] == 'hate_speech': flg1 = 1 if response['top_class'] == 'offensive_language': flg2 = 1 if flg1: people[i]['is_hate_speech'] = True
def __init__(self): self.sonar = Sonar()
def main(): tweet = sys.argv[1] sonar = Sonar() print(sonar.ping(tweet)['classes'])
data.replace('\r', '') input = data.split('\n') splitInput = [ input[x:x + math.ceil(len(input) / 6)] for x in range(0, len(input), math.ceil(len(input) / 6)) ] #print(chunks[3]) #def f(splitInput, splitInputIndex): #print('{}: hello {} from {}'.format( # dt.datetime.now(), name, current_process().name)) #sys.stdout.flush() from hatesonar import Sonar sonar = Sonar() def f(splitInput, splitInputIndex): #offensiveCount = 0 hatefulCount = 0 for i in splitInput[splitInputIndex]: #for i in input: sonarEval = sonar.ping(i) if sonarEval['top_class'] == "neither": continue if sonarEval['top_class'] == "offensive_language": #os.system("cat {} | jq 'select(.body == \"{}\""")' >> offensive{}".format(args['input'], i, args['input'])) #offensiveCount += 1 continue if sonarEval['top_class'] == "hate_speech":
import json import re from nltk.corpus import stopwords from hatesonar import Sonar from wordcloud import WordCloud import nltk import matplotlib.pyplot as plt fp = open('trending_gabs.json', 'r') gabs = json.load(fp) fp.close() sonar = Sonar() stop_words = list(stopwords.words('english')) for i in range(0, len(stop_words)): stop_words[i] = stop_words[i].lower() hate = 0 offensive = 0 neutral = 0 hate_text = '' offensive_text = '' neutral_text = '' for i in range(360, len(gabs)): text = gabs[i]['post']['body'] text = re.sub(r"http\S+", "", text) word_list = re.sub("[^\w]", " ", text).split() words_to_remove = []