Exemplo n.º 1
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database(load_deptrees=False)

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        words = []
        for reviews in file.reviews:
            for sentence in reviews.sentences:
                for word in sentence.words:
                    words.append(word)
        words = sorted(words, key=lambda w: w.word)
        text = 'ID_Word\tID_Synset\tSynset\n'
        for word in words:
            if word.synset is not None:
                text += (str(word.id_word) + '\t' +
                         str(word.synset.id_synset) + '\t' +
                         word.synset.synset_name + '\t# ' + word.lemma + '\n')
            else:
                text += (str(word.id_word) + '\t' + str(None) + '\t' +
                         str(None) + '\t# ' + word.lemma + '\n')

        file_writer.write(
            text, os.path.join('../../data/disambiguated/', directory[0]),
            filename[0])
Exemplo n.º 2
0
def adj_patterns_table():
    import os
    from loacore.conf import RESULT_PATH
    import loacore.utils.file_writer as file_writer
    import loacore.load.file_load as file_load
    import loacore.analysis.pattern_recognition as pattern_recognition

    # English files
    ids = file_load.get_id_files_by_file_paths([r'.*/uci/.+'])
    files = file_load.load_database(id_files=ids)
    for file in files:
        table = pattern_recognition.adj_pattern_table(file.sentence_list())
        directory = os.path.join(RESULT_PATH, 'context_tables', 'adj',
                                 file.get_directory_name())
        file_writer.write(table, directory, file.get_filename())

    # Spanish files
    ids = file_load.get_id_files_by_file_paths([r'.*/corrected/.+'])
    files = file_load.load_database(id_files=ids)
    for file in files:
        table = pattern_recognition.adj_pattern_table(file.sentence_list(),
                                                      lang='es')
        directory = os.path.join(RESULT_PATH, 'context_tables', 'adj',
                                 file.get_directory_name())
        file_writer.write(table, directory, file.get_filename())
Exemplo n.º 3
0
def verb_patterns_tables():
    import os
    from loacore.conf import RESULT_PATH
    import loacore.utils.file_writer as file_writer
    import loacore.load.file_load as file_load
    import loacore.analysis.pattern_recognition as pattern_recognition

    ids = file_load.get_id_files_by_file_paths(
        [r'.*/uci/.+', r'.*/corrected/.+'])
    files = file_load.load_database(id_files=ids, load_deptrees=False)
    for file in files:
        table = pattern_recognition.verb_context_table(file.sentence_list())
        directory = os.path.join(RESULT_PATH, 'context_tables', 'verbs',
                                 file.get_directory_name())
        file_writer.write(table, directory, file.get_filename())
Exemplo n.º 4
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database(load_deptrees=False)

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        text = ''
        for review in file.reviews:
            text += (str(review.file_index) + '\t')
            text += review.review
            text += "\n"
        file_writer.write(text,
                          os.path.join('../../data/normalized/', directory[0]),
                          filename[0])
Exemplo n.º 5
0
def write_frequencies(files_frequencies,
                      file_path=os.path.join(RESULT_PATH, 'frequencies', 'tables')):
    """
    Write frequencies to a txt file in file_path folder. Original raw file names are used.

    :param files_frequencies: Dictionary that maps file names to frequencies.
    :type files_frequencies: :obj:`dict` of :obj:`str` : :obj:`dict` of label : :obj:`float` .
    :param file_path: Directory path
    :type file_path: |path-like-object|
    """

    import loacore.utils.file_writer as file_writer

    for file_name in files_frequencies.keys():
        table = ''
        for label in files_frequencies[file_name].keys():
            table += str(label) + '\t' + str(files_frequencies[file_name][label]) + '\n'
        file_writer.write(table, file_path, file_name)
Exemplo n.º 6
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database()

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        text = ''
        for review in file.reviews:
            for sentence in review.sentences:
                text += "+-------------------------------------------------------------------------------------------\n"
                text += "| File Index : " + str(review.file_index) + "\n"
                text += "| Sentence : " + " ".join(
                    [w.word for w in sentence.words]) + "\n"
                text += "+-------------------------------------------------------------------------------------------\n"
                text += sentence.dep_tree.dep_tree_str(print_dep_tree=False)
                text += "\n\n"
        file_writer.write(text,
                          os.path.join('../../data/dep_trees/', directory[0]),
                          filename[0])
Exemplo n.º 7
0
def write():
    import loacore.load.file_load as file_load
    import loacore.utils.file_writer as file_writer

    files = file_load.load_database(load_deptrees=False)

    for file in files:
        directory = re.findall(r'../../data/raw/(.+)', file.file_name)
        filename = re.findall(r'.+/(.+\.txt)', file.file_name)
        words = []
        for reviews in file.reviews:
            for sentence in reviews.sentences:
                for word in sentence.words:
                    words.append(word)
        words = sorted(words, key=lambda w: w.word)
        text = 'ID_Word\ttoken'
        for word in words:
            text += (str(word.id_word) + '\t' + word.word + '\n')

        file_writer.write(text,
                          os.path.join('../../data/tokenized/', directory[0]),
                          filename[0])
Exemplo n.º 8
0
def write_polarity_check(files,
                         analysis_to_check=("simple", "optimistic",
                                            "pessimistic", "pattern_adj_cc",
                                            "pattern_adj", "pattern_cc"),
                         ref="label",
                         select="all",
                         terminal_print=True,
                         colored_polarity=True,
                         directory_path=os.path.join(RESULT_PATH,
                                                     'sentiment_analysis',
                                                     'check')):
    """
    Write polarity in .txt files. If *select* is set to all, each review is written with polarities corresponding to
    *ref* and *analysis_to_check*. If *select* is set to *false_positive* or *false_negative*, only the review with a
    false positive or false negative for at least one analysis of *analysis_to_check* are written, with the
    corresponding(s) polarities.

    :param files: Files to process.
    :type files: :obj:`list` of |File|
    :param analysis_to_check: Analysis to compare with *ref*
    :type analysis_to_check: :obj:`sequence` of :obj:`str`
    :param ref: Reference analysis
    :type ref: string
    :param select: Select option
    :type select: :obj:`list` of :obj:`str` : {'all', 'false_positive', 'false_negative'}
    :param terminal_print: If True, print results in the terminal.
    :type terminal_print: boolean
    :param colored_polarity:
        If True, write words with colored polarity. Notice that colors are not display in most of the .txt editors.
        For example, in Linux, use

            .. code-block:: console

                cat file_name.txt

        To show the colored file in terminal.
    :param directory_path:
        Path of the directory in which to write files.
        Default value computed using os.path.join(RESULT_PATH, 'sentiment_analysis', 'check')
    :type directory_path: |path-like-object|
    """
    import loacore.utils.file_writer as file_writer
    for file in files:
        if select == "all":
            check_str =\
                '\n'.join([r.review_str(colored_polarity=colored_polarity, analysis=ref+analysis_to_check)
                           for r in file.reviews])
        else:
            false_str = []
            for review in file.reviews:
                selected_analysis = [ref]
                for analysis in analysis_to_check:
                    if select == "false_positive" and is_false_positive(review, ref, analysis) \
                            or select == "false_negative" and is_false_negative(review, ref, analysis):
                        selected_analysis.append(analysis)

                if len(selected_analysis) > 1:
                    false_str.append(
                        review.review_str(colored_polarity=colored_polarity,
                                          analysis=selected_analysis))
            check_str = '\n'.join(false_str)

        if terminal_print:
            print(check_str)

        if colored_polarity:
            file_writer.write(check_str, os.path.join(directory_path,
                                                      "colored"),
                              "check_" + select + "_" + file.get_filename())
            print("File write : " + str(
                os.path.join(os.path.join(directory_path, "colored"),
                             "check_" + select + "_" + file.get_filename())))
        else:
            file_writer.write(check_str,
                              os.path.join(directory_path, "uncolored"),
                              "check_" + select + "_" + file.get_filename())
            print("File write : " + str(
                os.path.join(os.path.join(directory_path, "uncolored"),
                             "check_" + select + "_" + file.get_filename())))