def adj_patterns_table(): import os from loacore.conf import RESULT_PATH import loacore.utils.file_writer as file_writer import loacore.load.file_load as file_load import loacore.analysis.pattern_recognition as pattern_recognition # English files ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids) for file in files: table = pattern_recognition.adj_pattern_table(file.sentence_list()) directory = os.path.join(RESULT_PATH, 'context_tables', 'adj', file.get_directory_name()) file_writer.write(table, directory, file.get_filename()) # Spanish files ids = file_load.get_id_files_by_file_paths([r'.*/corrected/.+']) files = file_load.load_database(id_files=ids) for file in files: table = pattern_recognition.adj_pattern_table(file.sentence_list(), lang='es') directory = os.path.join(RESULT_PATH, 'context_tables', 'adj', file.get_directory_name()) file_writer.write(table, directory, file.get_filename())
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database(load_deptrees=False) for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) words = [] for reviews in file.reviews: for sentence in reviews.sentences: for word in sentence.words: words.append(word) words = sorted(words, key=lambda w: w.word) text = 'ID_Word\tID_Synset\tSynset\n' for word in words: if word.synset is not None: text += (str(word.id_word) + '\t' + str(word.synset.id_synset) + '\t' + word.synset.synset_name + '\t# ' + word.lemma + '\n') else: text += (str(word.id_word) + '\t' + str(None) + '\t' + str(None) + '\t# ' + word.lemma + '\n') file_writer.write( text, os.path.join('../../data/disambiguated/', directory[0]), filename[0])
def test_count(): import loacore.load.file_load as file_load import loacore.analysis.vocabularies as voc from prettytable import PrettyTable ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+', r'.*/corrected/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) word_count = {} for file in files: word_count[file.id_file] = voc.word_count(file) lemma_count = {} for file in files: lemma_count[file.id_file] = voc.lemma_count(file) synset_count = {} for file in files: synset_count[file.id_file] = voc.synset_count(file) print("Sizes of vocabularies") table = PrettyTable(['File', 'Word Count', 'Lemma Count', 'Synset Count']) for file in files: table.add_row([file.get_filename(), word_count[file.id_file], lemma_count[file.id_file], synset_count[file.id_file]]) print(table)
def test_multiprocessed_load(): import loacore.load.file_load as file_load files = file_load.load_database(id_files=[1], workers=4) for file in files: for r in file.reviews: for s in r.sentences: print(s.dep_tree.dep_tree_str(colored_polarity=True))
def test_tokens(): import loacore.load.file_load as file_load import loacore.learning.word2vec as we ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) print(we.get_tokens_list(files))
def test_check_polarities(): import loacore.load.file_load as file_load import loacore.analysis.polarity_check as polarity_check ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids, load_sentences=False) polarity_check.check_polarity(files)
def test_full_process(): import loacore.learning.svm as svm import loacore.load.file_load as file_load ids = file_load.get_id_files_by_file_paths([r'.*/uci/yelp_labelled.txt']) files = file_load.load_database(id_files=ids, load_deptrees=False) svm.full_process(files)
def test_colored_reviews(): import loacore.load.file_load as file_load ids = file_load.get_id_files_by_file_path(r'.*/uci/.+') files = file_load.load_database(id_files=ids, load_deptrees=False) for file in files: for review in file.reviews: print(review.review_str(colored_polarity=True, analysis=['label']))
def test_vectors(): import loacore.load.file_load as file_load import loacore.learning.word2vec as we ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) vectors = we.word_2_vec(files) print(vectors['movie']) print(len(vectors['movie']))
def test_print_colored_deptree(): import loacore.load.file_load as file_load import itertools # ids = file_load.get_id_files_by_file_paths([r'.*/uci/yelp_labelled.txt']) ids = [1] files = file_load.load_database(id_files=ids) for file in files: for review in file.reviews: for sentence in review.sentences: print(sentence.dep_tree.dep_tree_str(colored_polarity=True))
def test_load_polarities(): import loacore.load.file_load as file_load import itertools ids = file_load.get_id_files_by_file_path(r'.*/uci/.+') files = file_load.load_database(id_files=ids, load_sentences=False) reviews = itertools.chain.from_iterable([f.reviews for f in files]) for review in reviews: print(review.review, " : ", review.polarities["label"].pos_score, ", ", review.polarities["label"].neg_score, ", ", review.polarities["label"].obj_score)
def test_pos_tag_pattern(): import loacore.analysis.pattern_recognition as pattern_recognition files = file_load.load_database(id_files=[31, 33]) # files = file_load.load_database() sentences = [] for file in files: for review in file.reviews: sentences += review.sentences patterns = pattern_recognition.pos_tag_patterns_recognition( sentences, ['RN']) pattern_recognition.print_patterns(patterns)
def test_polarity_pie_charts(): import loacore.load.file_load as file_load import loacore.analysis.sentiment_analysis as sentiment_analysis import loacore.utils.plot_polarities as plot_polarities import os from loacore.conf import RESULT_PATH ids = file_load.get_id_files_by_file_paths([r'./uci/.+']) files = file_load.load_database(id_files=ids) polarities = sentiment_analysis.compute_simple_files_polarity(files) plot_polarities.save_polarity_pie_charts(polarities, file_path=os.path.join(RESULT_PATH, 'sentiment_analysis', 'simple', 'uci'), file_name='uci_polarity_pie_charts.pdf')
def test_write_polarity_check(): import loacore.load.file_load as file_load import loacore.analysis.polarity_check as polarity_check from loacore.conf import RESULT_PATH import os ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) polarity_check.write_polarity_check(files, analysis_to_check=['simple', 'pattern_adj_cc'], colored_polarity=True, select='false_negative', directory_path=os.path.join(RESULT_PATH, 'sentiment_analysis', 'check', 'uci'))
def test_review_vectors(): import loacore.load.file_load as file_load import loacore.learning.word2vec as we ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) word_vectors = we.word_2_vec(files) reviews = [r for review in [f.reviews for f in files] for r in review] review_vectors = we.reviews_2_vec(reviews, word_vectors) for i in range(3): print(reviews[i].review) print(review_vectors[i])
def test_label_pattern(): import loacore.analysis.pattern_recognition as pattern_recognition files = file_load.load_database(id_files=[31, 33]) # files = file_load.load_database() sentences = [] for file in files: for review in file.reviews: sentences += review.sentences patterns = pattern_recognition.label_patterns_recognition( sentences, ['neg']) for pattern in patterns: for node in pattern: print(node.word.word + " : " + node.label)
def verb_patterns_tables(): import os from loacore.conf import RESULT_PATH import loacore.utils.file_writer as file_writer import loacore.load.file_load as file_load import loacore.analysis.pattern_recognition as pattern_recognition ids = file_load.get_id_files_by_file_paths( [r'.*/uci/.+', r'.*/corrected/.+']) files = file_load.load_database(id_files=ids, load_deptrees=False) for file in files: table = pattern_recognition.verb_context_table(file.sentence_list()) directory = os.path.join(RESULT_PATH, 'context_tables', 'verbs', file.get_directory_name()) file_writer.write(table, directory, file.get_filename())
def test_load_db(): files = file_load.load_database() for file in files: for review in file.reviews: print("Review : ") print(str(review.id_review) + " : " + review.review) for sentence in review.sentences: print(" Sentence : ") for word in sentence.words: print(" Word : " + word.word + " : " + word.lemma) print(" Lemma : " + word.lemma) if word.synset is not None: print(" Synset : " + word.synset.synset_name) else: print(" Synset : None")
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database(load_deptrees=False) for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) text = '' for review in file.reviews: text += (str(review.file_index) + '\t') text += review.review text += "\n" file_writer.write(text, os.path.join('../../data/normalized/', directory[0]), filename[0])
def test_pos_tag_frequencies(): import loacore.load.file_load as file_load import loacore.analysis.frequencies as frequencies import loacore.utils.plot_frequencies as plot_frequencies import os from loacore.conf import RESULT_PATH ids = file_load.get_id_files_by_file_paths([r'.*/corrected/.+']) files = file_load.load_database(id_files=ids, load_reviews=False) labels, freq = frequencies.pos_tag_frequencies(files) plot_frequencies.write_frequencies( freq, file_path=os.path.join(RESULT_PATH, 'frequencies', 'simple_pos_tag_frequencies', 'table', 'corrected')) plot_frequencies.frequencies_bar_chart( freq, plot=False, save=True, file_path=os.path.join(RESULT_PATH, 'frequencies', 'simple_pos_tag_frequencies', 'charts', 'corrected'), file_name="corrected_simple_pos_tag_frequencies.pdf", val_number=60)
def print_polarity_table(file_score_dict): """ Print a table with columns File path, Positive Score, Negative Score and Objective Score.\n Notice that displayed scores are rounded values. :param file_score_dict: A :obj:`dict` that maps file_paths to a score tuple. :type file_score_dict: :obj:`dict` of :obj:`int` : :obj:`tuple` """ import re from prettytable import PrettyTable import loacore.load.file_load as file_load files = file_load.load_database(id_files=file_score_dict.keys(), load_reviews=False, load_sentences=False, load_words=False, load_deptrees=False) file_names = dict([(f.id_file, re.findall(r'.+/(.+\.txt)', f.file_name)[0]) for f in files]) table = PrettyTable(['File', 'Pos_Score', 'Neg_Score', 'Obj_Score']) for id_file in file_score_dict.keys(): table.add_row([file_names[id_file], "%.3f" % file_score_dict[id_file][0], "%.3f" % file_score_dict[id_file][1], "%.3f" % file_score_dict[id_file][2]]) print(table)
def test_general_pattern(): import loacore.analysis.pattern_recognition as pattern_recognition files = file_load.load_database(id_files=[31, 33]) # files = file_load.load_database() sentences = [] for file in files: for review in file.reviews: sentences += review.sentences patterns = pattern_recognition.general_pattern_recognition( sentences, [['V'], ['cc', 'cd', 'ci']], ['pos_tag', 'label']) for pattern in patterns: print("( ", end='') for node in pattern[:-1]: print(node.word.word, " : ", node.word.PoS_tag, " : ", node.label, end=", ") print(pattern[-1].word.word, " : ", pattern[-1].word.PoS_tag, " : ", pattern[-1].label, " )")
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database(load_deptrees=False) for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) words = [] for reviews in file.reviews: for sentence in reviews.sentences: for word in sentence.words: words.append(word) words = sorted(words, key=lambda w: w.word) text = 'ID_Word\ttoken' for word in words: text += (str(word.id_word) + '\t' + word.word + '\n') file_writer.write(text, os.path.join('../../data/tokenized/', directory[0]), filename[0])
def write(): import loacore.load.file_load as file_load import loacore.utils.file_writer as file_writer files = file_load.load_database() for file in files: directory = re.findall(r'../../data/raw/(.+)', file.file_name) filename = re.findall(r'.+/(.+\.txt)', file.file_name) text = '' for review in file.reviews: for sentence in review.sentences: text += "+-------------------------------------------------------------------------------------------\n" text += "| File Index : " + str(review.file_index) + "\n" text += "| Sentence : " + " ".join( [w.word for w in sentence.words]) + "\n" text += "+-------------------------------------------------------------------------------------------\n" text += sentence.dep_tree.dep_tree_str(print_dep_tree=False) text += "\n\n" file_writer.write(text, os.path.join('../../data/dep_trees/', directory[0]), filename[0])
def test_polarity_pos_tags(polarity): import loacore.load.file_load as file_load import loacore.analysis.frequencies as frequencies import loacore.utils.plot_frequencies as plot_frequencies import os from loacore.conf import RESULT_PATH ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+']) files = file_load.load_database(id_files=ids, load_reviews=False) labels, freq = frequencies.polarity_word_pos_tag_frequencies(files, polarity) plot_frequencies.write_frequencies( freq, file_path=os.path.join( RESULT_PATH, 'frequencies', 'polarity', polarity, polarity + '_pos_tag_frequencies', 'table', 'uci')) plot_frequencies.frequencies_bar_chart( freq, plot=False, save=True, file_path=os.path.join( RESULT_PATH, 'frequencies', 'polarity', polarity, polarity + '_pos_tag_frequencies', 'charts', 'uci'), file_name="uci_" + polarity + "_pos_tag_frequencies.pdf", val_number=60)
def save_polarity_pie_charts(file_score_dict, gui=False, file_path=os.path.join(RESULT_PATH, 'sentiment_analysis'), file_name='polarity_pie_charts.pdf'): """ Plot polarity pie charts using Matplotlib, and save them into a .pdf file. :param file_score_dict: Data to plot and save. The :obj:`dict` maps ID_Files to a polarity tuple (pos_score, neg_score, obj_score). :type file_score_dict: :obj:`dict` of :obj:`int` : :obj:`tuple` :param gui: Specify if a gui should be used to save file. :type gui: boolean :param file_path: If gui is not called : path of the directory in which plots will be saved. If directory doesn't exist, will be created. Default is set to *RESULT_PATH/sentiment_analysis/* :type file_path: |path-like-object| :param file_name: Name of the saved file. :type file_name: string """ pies_data = [t for t in file_score_dict.values()] import loacore.load.file_load as file_load files = file_load.load_database(id_files=file_score_dict.keys(), load_reviews=False, load_sentences=False, load_words=False, load_deptrees=False) pies_titles = [re.findall(r'.+/(.+\.txt)', f.file_name)[0] for f in files] index = 0 colors = ['green', 'red', 'skyblue'] num_row = 1 + int(len(pies_titles) / 4) if num_row <= 1: num_column = len(pies_titles) else: num_column = 4 fig, axes = plt.subplots(num_row, num_column, constrained_layout=False) if num_column <= 1: axes.axis("off") else: for x in axes.flatten(): x.axis("off") fig.set_figheight(5 * num_row) fig.set_figwidth(5 * num_column) while index < len(pies_titles): axe = fig.add_subplot(num_row, num_column, index + 1) axe.pie(pies_data[index], colors=colors, autopct='%1.1f%%') axe.set_title(pies_titles[index], size='x-small') index += 1 pos_patch = mpatches.Patch(color='green', label='Positive') neg_patch = mpatches.Patch(color='red', label='Negative') obj_patch = mpatches.Patch(color='skyblue', label='Objective') plt.figlegend(handles=[pos_patch, neg_patch, obj_patch]) plt.suptitle("Polarity computation results", size='xx-large') # Save PDF from matplotlib.backends.backend_pdf import PdfPages if gui: from tkinter import filedialog from tkinter import Tk root = Tk() root.withdraw() root.filename = filedialog.asksaveasfilename(initialdir="/", title="Select file", filetypes=[("PDF", "*.pdf")]) pp = PdfPages(root.filename) else: if not os.path.exists(file_path): os.makedirs(file_path) pp = PdfPages(os.path.join(file_path, file_name)) pp.savefig() pp.close()
def load_uci(): import loacore.load.file_load as file_load return file_load.load_database(id_files=[2])
def pattern_test(): import loacore.load.file_load as file_load files = file_load.load_database(id_files=[1, 2, 3]) import loacore.analysis.sentiment_analysis as sentiment_analysis sentiment_analysis.compute_pattern_files_polarity(files)