def vader_sentiment(self): handler = DataHandler(self.social_network, self.search_word) df_network = handler.read_network_dataset() df = df_network[df_network.tweet != ''] prepross = Processing(self.social_network, self.search_word) analyzer = SentimentIntensityAnalyzer() predict_df = pd.DataFrame( None, columns=['date', 'hashtag', 'tweet', 'clean_tweet', 'sentiment']) for i, row in df.iterrows(): clean_tweet = prepross.clean_text(row['tweet']) sentiment = analyzer.polarity_scores(clean_tweet)['compound'] predict_df.loc[i] = [ row['created_at'], row['hashtag'], row['tweet'], clean_tweet, sentiment ] predict_df.to_csv(r'data/output/dataset_predict.csv', sep=';', index=None) #
def pre_processing(self): handler = DataHandler(self.social_network, self.search_word) df_network = handler.read_network_dataset() df = df_network[df_network.tweets != ''] nlp = spacy.load('pt_core_news_sm') #nlp = spacy.load('en_core_web_sm') nltk.download("stopwords") nltk.download('punkt') stop_words_ = STOP_WORDS.union(stopwords.words('english')) stop_words = [unidecode(stop).lower() for stop in stop_words_] nltk.download('rslp') all_words, all_words_n_gram = Processing.words_dataset( df['tweets'], stop_words, nlp) # Get all dataset words bag_words = [] bag_words_n_gram = [] n_gram = [] clean_tweets = [] for sentence in df['tweets']: clean = Processing.clean_text(sentence, stop_words) token = Processing.lemma(clean.split(), nlp) concat = ' '.join(token) ngram = Processing.n_gram(concat) n_gram.append(Processing.n_gram(concat)) bag_words_n_gram.append( Processing.bag_of_words(ngram, all_words_n_gram)) bag_words.append(Processing.bag_of_words(concat.split(), all_words)) clean_tweets.append(concat) Processing.word_cloud(clean_tweets) dataset = pd.DataFrame({ "Posts": clean_tweets, "BOW": bag_words, "N-gram": n_gram, "BOW-N": bag_words_n_gram }) handler.store_processed_dataset(dataset)