示例#1
0
def main(args):

    span_file = args.spans_file
    article_file = args.article_file
    propaganda_techniques_list_file = args.propaganda_techniques_list_file
    debug_on_std = bool(args.debug_on_std)

    if not debug_on_std:
        logging.getLogger("propaganda_scorer").setLevel(logging.ERROR)

    propaganda_techniques = pt.Propaganda_Techniques(
        propaganda_techniques_list_file)
    annotations = aa.Articles_annotations()
    aa.Articles_annotations.techniques = propaganda_techniques

    annotations.load_article_annotations_from_csv_file(span_file)

    with codecs.open(article_file, "r", encoding="utf8") as f:
        article_content = f.read()

    #print("\n".join([str(i)+") "+x for i,x in enumerate(str(aa.techniques).split("\n"))]))
    output_text, footnotes = annotations.tag_text_with_annotations(
        article_content)  #add html tags
    #output_text, footnotes, legend = annotations.mark_text(article_content)    #mark annotations for terminal

    print(output_text)
    print(footnotes)
示例#2
0
def main(args):

    span_file = args.spans_file
    article_file = args.article_file
    print_line_numbers = bool(args.add_line_numbers)
    fix_from_char_index = int(args.fix_from_char_index)
    offset = int(args.offset)
    propaganda_techniques_file = args.propaganda_techniques_file

    annotations = aa.Articles_annotations()
    an.Annotation.set_propaganda_technique_list_obj(pt.Propaganda_Techniques(filename=propaganda_techniques_file))

    annotations.load_article_annotations_from_csv_file(span_file)
    annotations.shift_spans(fix_from_char_index, offset)

    with codecs.open(article_file, "r", encoding="utf8") as f:
        article_content = f.read()
    
    output_text, footnotes, legend = annotations.mark_text(article_content, print_line_numbers)

    print(output_text)
    print(legend)
    print(footnotes)

    if offset != 0:
        annotations.save_annotations_to_file(span_file + ".fix")
        print("Fixed annotations saved to file %s.fix"%(span_file))
def main(args):

    user_submission_file = args.submission
    gold_folder = args.gold
    output_log_file = args.log_file
    prop_vs_non_propaganda = bool(args.fragments_only)
    merge_user_annotations = bool(args.merge_user_annotations)
    per_article_evaluation = bool(args.per_article_evaluation)

    if args.debug_on_std:
        ch.setLevel(logging.DEBUG)

    if output_log_file is not None:
        logger.info("Logging execution to file " + output_log_file)
        fileLogger = logging.FileHandler(output_log_file)
        fileLogger.setLevel(logging.DEBUG)
        fileLogger.setFormatter(formatter)
        logger.addHandler(fileLogger)

    techniques = pt.Propaganda_Techniques()
    #techniques_names = pt.load_technique_names_from_file(args.techniques_file)  # load technique names
    submission_annotations = an.Annotations()
    submission_annotations.load_annotation_list_from_file(user_submission_file)
    if gold_folder is None:
        # no gold file provided, perform only some checks on the submission files
        logger.info('Checking format of user submitted file %s' % (user_submission_file))
        for article_id, annotations in submission_annotations.items():
            check_article_annotations_format(annotations, article_id, techniques_names)
        logger.warning("The format of the submitted file is ok. However, more checks, requiring the gold file, are needed "
                        "for the submission to be correct: the number of article and their ids must correspond to the "
                        "ones of the gold file, etc")
    else:
        logger.info('Checking user submitted file %s against gold folder %s' % (user_submission_file, gold_folder))
        gold_annotations = an.Annotations()
        gold_annotations.load_annotation_list_from_folder(gold_folder)
        #gold_annotations = load_annotation_list_from_folder(gold_folder, techniques_names)
        check_data_file_lists(submission_annotations, gold_annotations)
        if prop_vs_non_propaganda:
            if not check_annotation_spans(submission_annotations, merge_user_annotations):
                logger.info("Error in file %s" %(user_submission_file))
                sys.exit()
            check_annotation_spans(gold_annotations, True)
        else:
            if not check_annotation_spans_with_category_matching(submission_annotations, merge_user_annotations):
                logger.info("Error in file %s" % (user_submission_file))
                sys.exit()
            check_annotation_spans_with_category_matching(gold_annotations, True)
        logger.info('Scoring user submitted file %s against gold files %s/*.labels' % (user_submission_file, gold_folder))

        #logger.info("Scoring the submission with max")
        #score_max = compute_score_max(submission_annotations, gold_annotations, techniques_names, prop_vs_non_propaganda)
        #logger.info("Scoring the submission with min")
        #score_min = compute_score_min(submission_annotations, gold_annotations, techniques_names, prop_vs_non_propaganda)
        logger.info("Scoring the submission with precision and recall")
        score_pr = compute_score_pr(submission_annotations, gold_annotations, techniques_names,
                                    prop_vs_non_propaganda, per_article_evaluation)
        return score_pr
def main(args):

    user_submission_file = args.submission
    gold_file = args.gold
    output_log_file = args.log_file
    propaganda_techniques_list_file = args.propaganda_techniques_list_file
    output_for_script = bool(args.output_for_script)

    if not output_for_script:
        logger.addHandler(ch)

    if args.debug_on_std:
        ch.setLevel(logging.DEBUG)

    if output_log_file is not None:
        logger.info("Logging execution to file " + output_log_file)
        fileLogger = logging.FileHandler(output_log_file)
        fileLogger.setLevel(logging.DEBUG)
        fileLogger.setFormatter(formatter)
        logger.addHandler(fileLogger)

    propaganda_techniques = pt.Propaganda_Techniques(
        propaganda_techniques_list_file)
    an.Annotation.set_propaganda_technique_list_obj(propaganda_techniques)

    user_annotations = ans.Annotations()
    user_annotations.load_annotation_list_from_file(user_submission_file)
    for article in user_annotations.get_article_id_list():
        user_annotations.get_article_annotations_obj(article).sort_spans()

    gold_annotations = ans.Annotations()
    gold_annotations.load_annotation_list_from_file(gold_file)
    for article in gold_annotations.get_article_id_list():
        gold_annotations.get_article_annotations_obj(article).sort_spans()

    logger.info("Checking format: User Predictions -- Gold Annotations")
    if not user_annotations.compare_annotations_identical_article_lists(
            gold_annotations
    ) or not user_annotations.compare_annotations_identical(gold_annotations):
        logger.error("wrong format, no scoring will be performed")
        sys.exit()
    logger.info("OK: submission file format appears to be correct")
    res_for_output, res_for_script = user_annotations.TC_score_to_string(
        gold_annotations, output_for_script)
    logger.info("Scoring submission" + res_for_output)
    if output_for_script:
        print(res_for_script)
def main(args):
    span_file = args.spans_file
    article_file = args.article_file
    print_line_numbers = bool(args.add_line_numbers)

    an.techniques = pt.Propaganda_Techniques(filename="data/propaganda-techniques-names-semeval2020task11.txt")
    annotations = an.Articles_annotations()

    annotations.load_article_annotations_from_csv_file(span_file)

    with codecs.open(article_file, "r", encoding="utf8") as f:
        article_content = f.read()

    output_text, footnotes, legend = annotations.mark_text(article_content, print_line_numbers)

    print(output_text)
    print(legend)
    print(footnotes)
def main(args):

    span_file = args.spans_file
    article_file = args.article_file
    propaganda_techniques_list_file = args.propaganda_techniques_list_file

    propaganda_techniques = pt.Propaganda_Techniques(
        propaganda_techniques_list_file)
    annotations = aa.Articles_annotations()
    aa.Articles_annotations.techniques = propaganda_techniques

    annotations.load_article_annotations_from_csv_file(span_file)

    with codecs.open(article_file, "r", encoding="utf8") as f:
        article_content = f.read()

    # print("\n".join([str(i)+") "+x for i,x in enumerate(str(aa.techniques).split("\n"))]))
    # output_text, footnotes = annotations.tag_text_with_annotations(article_content)
    output_text, footnotes, legend = annotations.mark_text(article_content)

    print(output_text)
    print(footnotes)
示例#7
0
import sys
sys.path.append("../")
import src.propaganda_techniques as pt
import src.annotation as an
import src.article_annotations as aa


def test_remove_annotation(artannotations):
    before = str(artannotations)
    print("removing annotation: " + str(artannotations[0]))
    artannotations.remove_annotation(artannotations[0])
    after = str(artannotations)
    assert after == before.replace("\n\t[0, 59] -> Exaggeration,Minimisation",
                                   "", 1)


if __name__ == "__main__":

    propaganda_techniques = pt.Propaganda_Techniques(
        filename="../data/propaganda-techniques-names.txt")
    an.Annotation.set_propaganda_technique_list_obj(propaganda_techniques)

    artannotations = aa.Articles_annotations()
    artannotations.load_article_annotations_from_csv_file(
        "../data/article736757214.task-FLC.labels")
    test_remove_annotation(artannotations)