def _analyze_company_source(self, company_id, source_type, from_date, to_date, write_to_filename): # Get documents. documents = getattr(self.dbmodel, 'get_'+source_type)(company_id, from_date, to_date) # Create a final list. docs_list = [] for doc in documents: #print doc['id'], # Process text. if source_type == 'articles': text = TextProcessing.process_article_text(doc['text']) else: text = TextProcessing.process_facebook_text(doc['text']) # Skip empty documents. if len(text) == 0: continue # Get sentiment values of the text. if source_type == 'articles': sentiment_number = self.s_analyzer.calculate_vader_sentiment('custom_dict_orig', text, True) else: sentiment_number = self.s_analyzer.calculate_vader_sentiment('custom_dict_orig', text, False) sentiment_polarity = self.s_analyzer.format_sentiment_value(sentiment_number) # Save data. doc_date = self._get_doc_date(source_type, doc) docs_list.append([company_id, doc_date, sentiment_number, sentiment_polarity]) # Write to file. self.text_writer.write_file(docs_list, write_to_filename, 'csv', ',', 'a')
def _analyze_documents_by_source(self, company_id, source_type, from_date, to_date, write_to_filename=False): # Get documents documents = getattr(self.dbmodel, 'get_'+source_type)(company_id, from_date, to_date) # Create a final list docs_list = [] for doc in documents: # Process text if source_type == 'articles': text = TextProcessing.process_article_text(doc['text']) else: text = TextProcessing.process_facebook_text(doc['text']) # Skip empty documents if len(text) == 0: continue # Get sentiment values of the text sent_sum, sent_division = self.s_analyzer.calculate_vader_sentiment_values('vader', text) # Add this to list docs_list.append([sent_sum, sent_division, text]) # Prepare header header = ['sentiment_sum', 'sentiment_division', 'text'] docs_list.insert(0, header) file_name = '%d_%s' % (company_id, source_type) self.text_writer.write_file(docs_list, file_name, 'csv', '\t', 'w')
def _process_tweets(self, company_id, examined_date, used_dict_name='vader'): # Select all Yahoo Finance articles. tweets = self.dbmodel.get_daily_tweets(company_id, examined_date) counter = {'pos': 0, 'neu': 0, 'neg': 0} # Calculate sentiment for all articles. for tw in tweets: #print("Tweet: %s") % tw['tw_id'], tw_text = TextProcessing.process_facebook_text(tw['text']) if len(tw_text) == 0: continue # skip empty tweets sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, tw_text, False) polarity = self.s_analyzer.format_sentiment_value(sent_value) counter[polarity] += 1 #print("| %s ... %s") % (str(round(sent_value, 4)), polarity) # result return counter
def _process_yahoo(self, company_id, examined_date, used_dict_name='vader'): # Select all Yahoo Finance articles. articles = self.dbmodel.get_daily_articles(company_id, examined_date) counter = {'pos': 0, 'neu': 0, 'neg': 0} # Calculate sentiment for all articles for art in articles: #print("Yahoo article: %s") % art['id'], art_text = TextProcessing.process_article_text(art['text']) if len(art_text) == 0: continue # skip empty articles sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, art_text, True) polarity = self.s_analyzer.format_sentiment_value(sent_value) counter[polarity] += 1 #print("| %s ... %s") % (str(round(sent_value, 4)), polarity) # result return counter
def _process_fb_comments(self, company_id, examined_date, used_dict_name='vader'): # Select all FB comments. comments = self.dbmodel.get_daily_fb_comments(company_id, examined_date) counter = {'pos': 0, 'neu': 0, 'neg': 0} # Calculate sentiment for all posts for com in comments: #print("FB comment: %s") % com['id'], com_text = TextProcessing.process_facebook_text(com['text']) if len(com_text) == 0: continue # skip empty comments sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, com_text, False) polarity = self.s_analyzer.format_sentiment_value(sent_value) counter[polarity] += 1 #print("| %s ... %s") % (str(round(sent_value, 4)), polarity) # result return counter