def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger([ '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml' ])
def main(keys='keys.ini', raw_tweets_file='twitter_data.txt', no_tweets=1000, tracked_words_file='tracks.csv', formatted_tweets_file='formatted_tweets.txt', dictionaries=['misoginy_dictionary.yml', 'curses_dictionary.yml']): """Perform an analyisis to find sexist and rude words in tweets This module employs every other module to perform a full analysis on data retrieved from the Twitter stream. First a TwitterMiner retrieves data and dumps it, then a TweetFormatter parses the data into a list of tweets that are lists of words. Then it uses the spaghetti tagger to POStag every word, yielding a list of tweets that are lists with elements with the form (word, [tags]). A DictionaryTagger adds our custom tags to the [tags] list. Finally a TagCounter perform a count of every tag found in tweets. This program prints the number of coincidences of our custom tags. """ miner = TwitterMiner(keys, raw_tweets_file, no_tweets) miner.mine(tracked_words_file) formatter = TweetFormatter(raw_tweets_file) tweets = formatter.convert2json() tweets = formatter.convert2text(tweets, formatted_tweets_file) tweets = formatter.clean_tweets(tweets) tweets = [tweet.split() for tweet in tweets] tagger = DictionaryTagger(dictionaries) postagged_sents = spgt.pos_tag_sents(tweets) tagged_sents = tagger.tag(postagged_sents) counter = TagCounter(tagged_sents) res = counter.count() try: print("Palabras misóginas: {}".format(res['misóginia'])) except KeyError: pass try: print("Palabras groseras: {}".format(res['grosería'])) except KeyError: pass
def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger( [ "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml", ] )
class SentimentAnalyzingService(object): def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger([ '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml' ]) def valueOf(self, sentiment): if sentiment == 'positive': return 1 if sentiment == 'negative': return -1 return 0 def sentence_score(self, sentence_tokens, previous_token, acum_score): if not sentence_tokens: return acum_score else: current_token = sentence_tokens[0] tags = current_token[2] token_score = sum([self.valueOf(tag) for tag in tags]) if previous_token is not None: previous_tags = previous_token[2] if 'inc' in previous_tags: token_score *= 2.0 elif 'dec' in previous_tags: token_score /= 2.0 elif 'inv' in previous_tags: token_score *= -1.0 return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score) def sentiment_score(self, dictTaggedSentences): return sum([ self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences ]) def performBasicSentimentAnalysis(self, textToBeAnalysed): sentences = self.splitter.splitParagraphToListOfSentences( textToBeAnalysed) pos_tagged_sentences = self.postagger.pos_tag(sentences) dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences) score = self.sentiment_score(dict_tagged_sentences) return score
class SentimentAnalyzingService(object): def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger( [ "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml", ] ) def valueOf(self, sentiment): if sentiment == "positive": return 1 if sentiment == "negative": return -1 return 0 def sentence_score(self, sentence_tokens, previous_token, acum_score): if not sentence_tokens: return acum_score else: current_token = sentence_tokens[0] tags = current_token[2] token_score = sum([self.valueOf(tag) for tag in tags]) if previous_token is not None: previous_tags = previous_token[2] if "inc" in previous_tags: token_score *= 2.0 elif "dec" in previous_tags: token_score /= 2.0 elif "inv" in previous_tags: token_score *= -1.0 return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score) def sentiment_score(self, dictTaggedSentences): return sum([self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences]) def performBasicSentimentAnalysis(self, textToBeAnalysed): sentences = self.splitter.splitParagraphToListOfSentences(textToBeAnalysed) pos_tagged_sentences = self.postagger.pos_tag(sentences) dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences) score = self.sentiment_score(dict_tagged_sentences) return score