Exemplo n.º 1
0
def hatedetect(hds):
    ypred = []
    sonar = Sonar()
    for k in hds:
        check = []
        res = sonar.ping(text=k)
        res_final = json.dumps(res)
        #person_dict = json.loads(str(res))

        res_dict = json.loads(res_final)
        #print(res_dict)
        #for x in res_dict["classes"]:
        if res_dict["classes"][0]["confidence"] > res_dict["classes"][1][
                "confidence"]:
            check = res_dict["classes"][0]["confidence"]
            val = 0
        else:
            check = res_dict["classes"][1]["confidence"]
            val = 1
        if check > res_dict["classes"][2]["confidence"]:
            check_final = check

        else:
            check_final = res_dict["classes"][2]["confidence"]
            val = 2

        if val == 0 or val == 1:
            yp = 0
        else:
            yp = 1

        ypred.append(yp)
    return ypred
Exemplo n.º 2
0
 def isHateSpeech(self, line):  #using open source hate sonar api
     sonar = Sonar()
     response = sonar.ping(text=line)
     if response["top_class"] != "neither":  # line is hate speech
         return 1
     else:
         return 0
Exemplo n.º 3
0
    def __init__(self, bot, mysql: MySQLWrapper):
        self.bot = bot
        self.mysql = mysql
        self.sonar = Sonar()

        self.discordForum = Forum("ECC-Discord", "ECC-Discord")
        self.discordForum.insert(self.mysql)

        self.discordAuthor = ForumAuthor("Discord", "-1")
        self.discordAuthor.insert(self.mysql)

        # self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID = "709753463323754539")
        self.generalThread = ForumThread("#general-chat",
                                         self.discordAuthor,
                                         self.discordForum.sqlID,
                                         hltvID="456834448558653451")
        self.generalThread.insert(self.mysql)

        # self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID = "727107131416903750")
        self.shitpostingThread = ForumThread("#shitposting-and-media",
                                             self.discordAuthor,
                                             self.discordForum.sqlID,
                                             hltvID="468121733929369610")
        self.shitpostingThread.insert(self.mysql)

        self.mysql.db.commit()
Exemplo n.º 4
0
def refine_jsonl_file(path,
                      votes_threshold=10,
                      hate_limit=0.4,
                      offensive_limit=0.7,
                      general_limit=0.8):
    sonar = Sonar()
    name, _ = os.path.splitext(path)
    refined_name = "refined_{name}.txt".format(name=name)

    if os.path.exists(refined_name):
        os.remove(refined_name)
    refined_file = open(refined_name, mode='w')

    with open(path, 'rb') as f:  # opening file in binary(rb) mode
        with refined_file as rf:
            for item in json_lines.reader(f):
                if int(item['votes']) > 0:
                    text = item['text']
                    hate_confidence = sonar.ping(
                        text=text)['classes'][0]['confidence']
                    offensive_confidence = sonar.ping(
                        text=text)['classes'][1]['confidence']
                    if not ((hate_confidence > hate_limit) or
                            (offensive_confidence > offensive_limit) or
                            (hate_confidence + offensive_confidence >
                             general_limit)):
                        try:
                            print(text, file=rf)
                        except:
                            continue
            rf.close()
        f.close()
Exemplo n.º 5
0
def classify_texts(texts):
    """
    Classify texts with hate sonar (classifier).
    """
    # initialize parameters and containers
    texts = texts.split(".")[:-1]
    rating_map = {
        "neither": "green",
        "offensive_language": "orange",
        "hate_speech": "red"
    }
    sonar = Sonar()
    results = []
    # for each text, perform detection and format dictionary
    for i, sentence in enumerate(texts):
        sentence_res = sonar.ping(text=sentence)
        top_class = sentence_res["top_class"]
        sentence_output = {
            "index": i,
            "sentence": sentence,
            "top_class": top_class,
            "rating": rating_map[top_class]
        }
        results.append(sentence_output)

    return results
def isAbusiveComment(x):
    sonar = Sonar()
    a = sonar.ping(text=x)
    #print(x)
    #print(a)

    hateConfidence = 0.0
    offenseConfidence = 0.0
    neitherConfidence = 0.0


    for result in a['classes'] :
        #print(result)
        #print(result['confidence'])
        #print(result["class_name"])
        if result["class_name"] == "hate_speech" :
            hateConfidence = result['confidence']
        if result["class_name"] == "offensive_language" :
            offenseConfidence = result['confidence']
        if result["class_name"] == "neither" :
            neitherConfidence = result['confidence']
    #print("Hate ",hateConfidence, " || offenseConfidence ",offenseConfidence," || neither ",neitherConfidence)

    rez = False
    if neitherConfidence > 0.7:
        rez = False

    if hateConfidence > 0.6 :
        rez = True

    if offenseConfidence >0.55 :
        rez = True

    return MessageScreenerResult(rez,hateConfidence,offenseConfidence,"No Tips, Sorry!")
Exemplo n.º 7
0
def receive_tweet(request):
    tweet = request.GET.get('tweet')
    print(tweet)
    tweet = repr(tweet)
    sonar = Sonar()
    result = sonar.ping(text=tweet)
    print(result)
    json_result = json.dumps(result)
    return render(request, 'result_api/page2.html', result)
Exemplo n.º 8
0
 def isHateSpeech(self, line):
     '''Assign a 'hatespeech score' using sonar api '''
     indices = {"hate_speech": 0, "offensive_language": 1, "neither": 2}
     sonar = Sonar()
     response = sonar.ping(text=line)
     indexOfLanguage = indices[response["top_class"]]
     if response["top_class"] != "neither":
         return response['classes'][indexOfLanguage]['confidence']
     else:
         return 0
Exemplo n.º 9
0
def hateSpeech(comments):
    sonar = Sonar()
    print('Inside hatespeech')
    print('Comments len ='+str(len(comments)))
    for i in range(len(comments)):
        x = sonar.ping(text=comments[i].text)
        if x['top_class'] == "hate_speech":
            comments[i].hateType = 'hate'
        elif x['top_class'] == "offensive_language":
            comments[i].hateType = 'offensive'
        else:
            comments[i].hateType = 'neutral'
    return comments
Exemplo n.º 10
0
    def do_GET(self):

        query_components = parse_qs(urlparse(self.path).query)
        imsi = query_components["imsi"]
        print(imsi)

        # Send headers
        self._set_headers()

        # Send message back to client
        sonar = Sonar()
        message = sonar.ping(text=str(imsi[0]))
        # Write content as utf-8 data
        self.wfile.write(bytes(json.dumps(message), "utf-8"))
        return message
Exemplo n.º 11
0
def main(argv):
  global sonar

  # setting up the database

  db = MongoClient().gab

  # initialising the hateometer

  sonar = Sonar()

  # parsing all posts

  print("hateometing the posts...")

  # loop over all english posts
  for post in db.posts.find({'post.language':'en'}, no_cursor_timeout=True):
    # run hate detection on the post body
    obj = hateometer(post['post']['body'])
    postid = post['_id']
    # store the results in mongodb
    db.posts.update_one({'_id':postid},{"$set":{'post.hateometer':obj}})

  print("hateometing the comments...")

  # do the same trick for the comments
  for comment in db.comments.find({'language':'en'}, no_cursor_timeout=True):
    obj = hateometer(comment['body'])
    commentid = comment['_id']
    db.comments.update_one({'_id':commentid},{"$set":{'hateometer':obj}})

  print("done!")
Exemplo n.º 12
0
 def getOffensiveness(self, string, output_a):
     sonar = Sonar()
     data_raw = str(sonar.ping(string))
     data_raw_split = data_raw.split(": [{")[1].split("}]}")[0].replace("'class_name': '", "").replace("', 'confidence':", "").split("}, {")
     output = data_raw_split[0].split(" ")[0] + " "
     if "e" in data_raw_split[0].split(" ")[1]:
         output += "0.00"
     else:
         output += str('{0:.{1}f}'.format(float(data_raw_split[0].split(" ")[1]) * 100, 2)) + " "
     output += data_raw_split[1].split(" ")[0] + " "
     if "e" in data_raw_split[1].split(" ")[1]:
         output += "0.00"
     else:
         output += str('{0:.{1}f}'.format(float(data_raw_split[1].split(" ")[1]) * 100, 2)) + " "
     output += data_raw_split[2].split(" ")[0] + " "
     if "e" in data_raw_split[2].split(" ")[1]:
         output += "0.00"
     else:
         output += str('{0:.{1}f}'.format(float(data_raw_split[2].split(" ")[1]) * 100, 2))
     output_a.append(output)
Exemplo n.º 13
0
def ping_file(dataset_path):
    """Run ping function on each line in the chat log file
        :param dataset_path:    file path of the dataset 
                                (specify directory name if the file is under a folder)
    """
    sonar = Sonar()
    input_file = open(dataset_path, 'r', encoding="utf-8")

    # to read and remove "\n" escape characters at the end of each chat message
    chat_lines = input_file.read().splitlines()

    # to trim whitespaces before and after each chat message
    chat_lines = [each_line.strip() for each_line in chat_lines]

    # to get only the message after the [timestamp] <username>
    chat_lines = [each_line.partition("> ")[2] for each_line in chat_lines]

    return [
        sonar.ping(each_line) for each_line in tqdm(
            chat_lines, desc="Processing {} rows".format(len(chat_lines)))
    ]
Exemplo n.º 14
0
def main():
    sonar=Sonar()

    textArray=readFile()

    Class=[]
    hate=[]
    offensive=[]
    neither=[]

    hate_speech_classifier(textArray,Class,hate,offensive,neither,sonar)
    displayResaults(hate,offensive,neither)
Exemplo n.º 15
0
def getTweet(api, hashTag, fileName, startDT, numTweet=1):
    hashTag = "\"#" + hashTag + "\""
    startDT = "\"" + startDT + "\""
    csvFile = open(fileName, 'w')
    csvWriter = csv.writer(csvFile)
    sonar = Sonar()
    try:
        for tweet in tweepy.Cursor(api.search,
                                   q=hashTag,
                                   count=numTweet,
                                   lang="en",
                                   since=startDT).items():
            if tweet.retweeted == False:
                cleantweet = cleanTweet(tweet)
                text = "text=\"" + str(cleanTweet) + "\""
                csvWriter.writerow([
                    tweet.created_at, tweet.user.screen_name,
                    str(cleantweet),
                    sonar.ping(text).top_class
                ])
    except:
        print("Unable to connect Twitter API.")
Exemplo n.º 16
0
def get_report(webpage_html):
    sonar = Sonar()
    report = sonar.ping(text=webpage_html)
    return report
Exemplo n.º 17
0
from hatesonar import Sonar

# dir = "../graphics/bower_gabs/"
# images = os.listdir(dir)
#
# # image = cv2.imread(images[0])
# # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# gabs = []
# for img in images:
#     text = pytesseract.image_to_string(Image.open(dir+img))
#     gabs.append(text)
#
# fp2 = open('case_study.json', 'w')
# json.dump(gabs, fp2)
# fp2.close()

with open('case_study.json', 'r') as fp:
    gabs = json.load(fp)

sonar = Sonar()
responses = []

for i in range(len(gabs)):
    responses.append(sonar.ping(text=gabs[i]))
    # print(sonar.ping(text=gabs[i]))
    # break

fp2 = open('case_study_sonar.json', 'w')
json.dump(responses, fp2)
fp2.close()
Exemplo n.º 18
0
from hatesonar import Sonar
import json
import re
import nltk
from nltk.corpus import stopwords

sonar = Sonar()

with open('gabs999.json', 'r') as fp:
    gabs = json.load(fp)
fp.close()

text = ''
for gab in gabs:
    text = text + gab['post']['body']

text = re.sub(r"http\S+", "", text)
word_list = re.sub("[^\w]", " ", text).split()
stop_words = list(stopwords.words('english'))
words_to_remove = []

for i in range(0, len(word_list)):
    word_list[i] = word_list[i].lower()

for word in word_list:
    if word in stop_words:
        words_to_remove.append(word)

for word_to_remove in words_to_remove:
    if word_to_remove in word_list:
        word_list.remove(word_to_remove)
Exemplo n.º 19
0
class MainCog(commands.Cog):
    def __init__(self, bot, mysql: MySQLWrapper):
        self.bot = bot
        self.mysql = mysql
        self.sonar = Sonar()

        self.discordForum = Forum("ECC-Discord", "ECC-Discord")
        self.discordForum.insert(self.mysql)

        self.discordAuthor = ForumAuthor("Discord", "-1")
        self.discordAuthor.insert(self.mysql)

        # self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID = "709753463323754539")
        self.generalThread = ForumThread("#general-chat",
                                         self.discordAuthor,
                                         self.discordForum.sqlID,
                                         hltvID="456834448558653451")
        self.generalThread.insert(self.mysql)

        # self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID = "727107131416903750")
        self.shitpostingThread = ForumThread("#shitposting-and-media",
                                             self.discordAuthor,
                                             self.discordForum.sqlID,
                                             hltvID="468121733929369610")
        self.shitpostingThread.insert(self.mysql)

        self.mysql.db.commit()

    @commands.Cog.listener()
    async def on_message(self, message):

        # Checks if the message is in an observed channel and saves thread ID
        channelID = str(message.channel.id)
        threadID = 0

        if channelID == self.generalThread.hltvID:
            threadID = self.generalThread.sqlID
        elif channelID == self.shitpostingThread.hltvID:
            threadID = self.shitpostingThread.sqlID
        else:
            return

        # Compiles message info
        messageID = message.id
        content = message.clean_content
        authorID = message.author.id
        authorName = message.author.name
        timestamp = message.created_at

        # Calculates hate speech and offensive language rating
        rating = self.sonar.ping(f"{authorName}: {content}")
        hateRating = 0
        offRating = 0

        # Extracts confidence values for hate speech and offensive language from result
        for ratingClass in rating['classes']:
            if ratingClass['class_name'] == 'hate_speech':
                hateRating = ratingClass['confidence']
            elif ratingClass['class_name'] == 'offensive_language':
                offRating = ratingClass['confidence']

        author = ForumAuthor(authorName, authorID)
        author.insert(self.mysql)

        post = ForumPost(messageID, threadID, -1, author, content, timestamp,
                         hateRating, offRating)
        post.insert(self.mysql)

        self.mysql.db.commit()
Exemplo n.º 20
0
 def check(self, txt):
     sonar = Sonar()
     res = sonar.ping(text=txt)
     return json.dumps(res)
Exemplo n.º 21
0
## ---(Mon Jul 27 12:32:03 2020)---
debugfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')

## ---(Mon Jul 27 12:43:11 2020)---
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')

## ---(Mon Jul 27 12:51:27 2020)---
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')
import hateSonar
import hatesonar
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')
text="hey guys more here i'm now going to say every curse word ending in the english language so i don't oh i'm gonna do this for some reason america do is we have no idea what news but i'm gonna do anyway so effort to do let's get started i'm going to do it all enough about a quarter so enjoy little real quick anus arse our soul as ass hat as jabber as pirates as bag ass bandit ass bangor ass bite ass clown ass clock ass cracker asses ass face as f**k ass f****r ass goblin as hot as head as her haul ass hopper as jagger as slick as liquor asked monkey as much as mature as a nazi network as pirates as shit ass haul ass sucker ass wat ass wipe axe wound right movements a little b b babbitt bastard beater bitch bitch ass bitches bitch t**s bitchy blow job blog bollocks but boehner brother f****r bullshit bumble f**k butt plug but pirates but f****r but but f****r sees camel toe copied much her testicle chink showed clips clips face could f**k clusterfuck c**k c**k ascot bite caught burger caught face c**k f****r c**k head c**k jockey caught narco c**k master cut bungler cog mongrel caught monkey cotton cosmos caught nugget c**k shit c**k smith catch smoke pot smoker cuts the fur c**k sucker c**k waffle coochie coo coon cuter cracker come come double cum dumpster cum guzzler come jockey come foot contort cutty cut cunnilingus c**t contest confess con whole c**t licker cut rag consulate one two d's still won't load diego damn diego dick dick sneeze dick big dick beaters dick face dick f**k dick f****r dick head dick whole big juice dick milk dick monger dicks dicks slap dick sucker dick sucking dick tickler dick wad dick weasel dick weed dick wad dick dyke dodo dish it due to that douche bag dougie douche douche f*g douche waffle dumb as dumb f**k dumb shit moving on to the next letters though eastwood are the sort of f*g f*g big f*g f****r f****t f****t c**k fact hard for a lot theo felch flamer f**k f**k as f**k bang f**k boy f**k brain f**k but f**k butter f****d f****r f****r sucker f**k face f**k head f**k whole f****n f****n footnote f**k off f***s f**k stick for tarred f**k up f**k want to f**k wit fudge packer oh geez gay gas gamer gate okay f**k you f**k is gaylord gay target what god dammit god damn gooch gook gringo ogre guido please get offended these are just curse words i don't mean anything a word i say ages hand job hard on heave ho ho h**o h**o don't shit honky humping right wing onto litter i is the wars with the litter i would store with jay jack ass jag off jap jerk off jerk as gigolo jim is jack jingle bunny jungle bunny k kite huge crowds c**t cake aus lose lay mass large us lesbian les beaux lessee ends let's say uns mcfadden mic mange mother f****r mother f*****g mother f****r mother f*****g muff must driver munching ass go ends ends let's skip ends let's skip ends let's get bends enough to do ends is no owes its do p a citizen says skip ends the let's turn off which oughta know what that means on an associates in a house where no we just turn off i like if we were behind me i p's penny which pekar pick her head penis speed as bangor penis f****r penis puffer piss pissed off piss piss flaps per smoker pollack boon pannone bonetti poon tang porch monkey off prick brunetti pita pussies pussy pussy licking poodle like i said i don't know what half these words mean but apparently the curse words so as a seder it i accuse queen queer queer bait queer whole slowly ars were knob right rush job risky right esses esses shh long scroll shit shit us should bag should beggars should brain should breath shit canned shit c**t shit dick should face should faced s******d shit oh shit house chefs better shit stain shettar shittiest shitty shoes chestnut skiing ski skull f**k s**t s**t bags maggots snatch spic off splurge spook suck us a lot of aid from us t's tart testicle thunder c**t twat twat lips twat waffle no use at ease citizen tvs no these anita views rihanna see this any davies do i wink went job what back w***e w***e bag or face what exes know x's think guy why the y's z's no zs right so those were the curse words in the english language i obviously skipped some for obvious reasons hope you guys enjoyed for some reason if you are in this point of the video what the hell is wrong with you thank you for watching i'll see you next time piece"
print("****HATE/OFFENSIVE DETECTION*************")
hate_dict={}
from hatesonar import Sonar
sonar = Sonar()
hate_dict=sonar.ping(text)
print hate_dict
print(hate_dict)
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')
clar
clear
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/vosk-apiexample.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')

## ---(Mon Jul 27 15:27:50 2020)---
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/videoTranscribeGoogle.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')
clear
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/videoTranscribeGoogle.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')
clear
runfile('C:/Users/Nikhil Bhargava/.spyder-py3/videoTranscribeSphnix.py', wdir='C:/Users/Nikhil Bhargava/.spyder-py3')
clear
Exemplo n.º 22
0
from hatesonar import Sonar
sonar = Sonar()
sonar.ping(text="At least I'm not a nigger")
Exemplo n.º 23
0
def geocode(place):
    conn = http.client.HTTPSConnection('us1.locationiq.com', 443)
    conn.request(
        'GET', '/v1/search.php?key=269d9376ef6d64&q=' + place +
        '&format=json&limit=1')
    r = json.loads(conn.getresponse().read().decode('utf-8'))
    return [r[0]['lon'], r[0]['lat']]


client = pymongo.MongoClient('mongodb://localhost:27017/')
db = client['MOB']
tweets = db['tweets']
posts = db['posts2']
texts, ids, data, data_media, cache = [], [], [], [], []
translator = Translator()
sonar = Sonar()

for i in tweets.find({}, {'_id': 0}).limit(20):
    flag = True
    tweets.delete_one({'id_str': i['id_str']})
    if i['lang'] in ['ur', 'bn', 'hi', 'te', 'en'] and i['text'] != None:
        print('sentence = ' + i['text'])
        print('lang = ' + i['lang'])
        try:
            string = translator.translate(i['text'], dest='en')
            print(string.text)
            i['text'] = string.text
        except:
            flag = False
            print('error')
Exemplo n.º 24
0
from hatesonar import Sonar
import json

sonar = Sonar()

with open('gabs999.json', 'r') as fp:
    gabs = json.load(fp)
fp.close()

responses =[]

for i in range(len(gabs)):
    responses.append(sonar.ping(text=gabs[i]['post']['body']))

fp2 = open('hates999.json', 'w')
json.dump(responses, fp2)
fp2.close()
import sys
import json
import string
from hatesonar import Sonar

# hate speech classifier from hatesonar python library
sonar = Sonar()

# grab proper arguments
filterTypes = sys.argv[1]
processType = sys.argv[2]
fileName = sys.argv[3]
chosenFilter = sys.argv[4]
countFlag = sys.argv[5]

# open proper files
file = open('../../public/' + fileName, 'r')
writeFile = open('../../public/processed-' + fileName, 'w')

with open('../constants/slurs.json') as f:
    slurs = json.load(f)

filterTypes = filterTypes.split(",")

# grab chosen filters and add to filterTypes list
if chosenFilter != "none":
    slurs["c"] = chosenFilter.split(",")
    filterTypes.append("c")

# load codes for counts if count flag present
if countFlag == 'yes' and processType == 'word':
from api import Gab
import json
from hatesonar import Sonar

sonar = Sonar()
gab = Gab('dheerajpreddy', 'Test@123')

with open('username.json', 'r') as fp:
    people = json.load(fp)
fp.close()

for i in range(len(people)):
    flg1 = 0
    flg2 = 0
    flg = 0
    if people[i]['is_private'] is False:
        try:
            gabs = gab.getusertimeline(people[i]['username'], 100)
        except:
            flg = 1
            print("ERROR for " + people[i]['username'])
        if flg:
            continue
        for data in gabs:
            response = sonar.ping(text=data['post']['body'])
            if response['top_class'] == 'hate_speech':
                flg1 = 1
            if response['top_class'] == 'offensive_language':
                flg2 = 1
        if flg1:
            people[i]['is_hate_speech'] = True
Exemplo n.º 27
0
 def __init__(self):
     self.sonar = Sonar()
Exemplo n.º 28
0
def main():
    tweet = sys.argv[1]
    sonar = Sonar()
    print(sonar.ping(tweet)['classes'])
data.replace('\r', '')
input = data.split('\n')

splitInput = [
    input[x:x + math.ceil(len(input) / 6)]
    for x in range(0, len(input), math.ceil(len(input) / 6))
]
#print(chunks[3])

#def f(splitInput, splitInputIndex):
#print('{}: hello {} from {}'.format(
#   dt.datetime.now(), name, current_process().name))
#sys.stdout.flush()

from hatesonar import Sonar
sonar = Sonar()


def f(splitInput, splitInputIndex):
    #offensiveCount = 0
    hatefulCount = 0
    for i in splitInput[splitInputIndex]:
        #for i in input:
        sonarEval = sonar.ping(i)
        if sonarEval['top_class'] == "neither":
            continue
        if sonarEval['top_class'] == "offensive_language":
            #os.system("cat {} | jq 'select(.body == \"{}\""")' >> offensive{}".format(args['input'], i, args['input']))
            #offensiveCount += 1
            continue
        if sonarEval['top_class'] == "hate_speech":
Exemplo n.º 30
0
import json
import re
from nltk.corpus import stopwords
from hatesonar import Sonar
from wordcloud import WordCloud
import nltk
import matplotlib.pyplot as plt

fp = open('trending_gabs.json', 'r')
gabs = json.load(fp)
fp.close()

sonar = Sonar()

stop_words = list(stopwords.words('english'))
for i in range(0, len(stop_words)):
	stop_words[i] = stop_words[i].lower()

hate = 0
offensive = 0
neutral = 0
hate_text = ''
offensive_text = ''
neutral_text = ''

for i in range(360, len(gabs)):
	text = gabs[i]['post']['body']
	text = re.sub(r"http\S+", "", text)
	word_list = re.sub("[^\w]", " ", text).split()
	words_to_remove = []