def main(args): span_file = args.spans_file article_file = args.article_file propaganda_techniques_list_file = args.propaganda_techniques_list_file debug_on_std = bool(args.debug_on_std) if not debug_on_std: logging.getLogger("propaganda_scorer").setLevel(logging.ERROR) propaganda_techniques = pt.Propaganda_Techniques( propaganda_techniques_list_file) annotations = aa.Articles_annotations() aa.Articles_annotations.techniques = propaganda_techniques annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() #print("\n".join([str(i)+") "+x for i,x in enumerate(str(aa.techniques).split("\n"))])) output_text, footnotes = annotations.tag_text_with_annotations( article_content) #add html tags #output_text, footnotes, legend = annotations.mark_text(article_content) #mark annotations for terminal print(output_text) print(footnotes)
def main(args): span_file = args.spans_file article_file = args.article_file print_line_numbers = bool(args.add_line_numbers) fix_from_char_index = int(args.fix_from_char_index) offset = int(args.offset) propaganda_techniques_file = args.propaganda_techniques_file annotations = aa.Articles_annotations() an.Annotation.set_propaganda_technique_list_obj(pt.Propaganda_Techniques(filename=propaganda_techniques_file)) annotations.load_article_annotations_from_csv_file(span_file) annotations.shift_spans(fix_from_char_index, offset) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() output_text, footnotes, legend = annotations.mark_text(article_content, print_line_numbers) print(output_text) print(legend) print(footnotes) if offset != 0: annotations.save_annotations_to_file(span_file + ".fix") print("Fixed annotations saved to file %s.fix"%(span_file))
def main(args): user_submission_file = args.submission gold_folder = args.gold output_log_file = args.log_file prop_vs_non_propaganda = bool(args.fragments_only) merge_user_annotations = bool(args.merge_user_annotations) per_article_evaluation = bool(args.per_article_evaluation) if args.debug_on_std: ch.setLevel(logging.DEBUG) if output_log_file is not None: logger.info("Logging execution to file " + output_log_file) fileLogger = logging.FileHandler(output_log_file) fileLogger.setLevel(logging.DEBUG) fileLogger.setFormatter(formatter) logger.addHandler(fileLogger) techniques = pt.Propaganda_Techniques() #techniques_names = pt.load_technique_names_from_file(args.techniques_file) # load technique names submission_annotations = an.Annotations() submission_annotations.load_annotation_list_from_file(user_submission_file) if gold_folder is None: # no gold file provided, perform only some checks on the submission files logger.info('Checking format of user submitted file %s' % (user_submission_file)) for article_id, annotations in submission_annotations.items(): check_article_annotations_format(annotations, article_id, techniques_names) logger.warning("The format of the submitted file is ok. However, more checks, requiring the gold file, are needed " "for the submission to be correct: the number of article and their ids must correspond to the " "ones of the gold file, etc") else: logger.info('Checking user submitted file %s against gold folder %s' % (user_submission_file, gold_folder)) gold_annotations = an.Annotations() gold_annotations.load_annotation_list_from_folder(gold_folder) #gold_annotations = load_annotation_list_from_folder(gold_folder, techniques_names) check_data_file_lists(submission_annotations, gold_annotations) if prop_vs_non_propaganda: if not check_annotation_spans(submission_annotations, merge_user_annotations): logger.info("Error in file %s" %(user_submission_file)) sys.exit() check_annotation_spans(gold_annotations, True) else: if not check_annotation_spans_with_category_matching(submission_annotations, merge_user_annotations): logger.info("Error in file %s" % (user_submission_file)) sys.exit() check_annotation_spans_with_category_matching(gold_annotations, True) logger.info('Scoring user submitted file %s against gold files %s/*.labels' % (user_submission_file, gold_folder)) #logger.info("Scoring the submission with max") #score_max = compute_score_max(submission_annotations, gold_annotations, techniques_names, prop_vs_non_propaganda) #logger.info("Scoring the submission with min") #score_min = compute_score_min(submission_annotations, gold_annotations, techniques_names, prop_vs_non_propaganda) logger.info("Scoring the submission with precision and recall") score_pr = compute_score_pr(submission_annotations, gold_annotations, techniques_names, prop_vs_non_propaganda, per_article_evaluation) return score_pr
def main(args): user_submission_file = args.submission gold_file = args.gold output_log_file = args.log_file propaganda_techniques_list_file = args.propaganda_techniques_list_file output_for_script = bool(args.output_for_script) if not output_for_script: logger.addHandler(ch) if args.debug_on_std: ch.setLevel(logging.DEBUG) if output_log_file is not None: logger.info("Logging execution to file " + output_log_file) fileLogger = logging.FileHandler(output_log_file) fileLogger.setLevel(logging.DEBUG) fileLogger.setFormatter(formatter) logger.addHandler(fileLogger) propaganda_techniques = pt.Propaganda_Techniques( propaganda_techniques_list_file) an.Annotation.set_propaganda_technique_list_obj(propaganda_techniques) user_annotations = ans.Annotations() user_annotations.load_annotation_list_from_file(user_submission_file) for article in user_annotations.get_article_id_list(): user_annotations.get_article_annotations_obj(article).sort_spans() gold_annotations = ans.Annotations() gold_annotations.load_annotation_list_from_file(gold_file) for article in gold_annotations.get_article_id_list(): gold_annotations.get_article_annotations_obj(article).sort_spans() logger.info("Checking format: User Predictions -- Gold Annotations") if not user_annotations.compare_annotations_identical_article_lists( gold_annotations ) or not user_annotations.compare_annotations_identical(gold_annotations): logger.error("wrong format, no scoring will be performed") sys.exit() logger.info("OK: submission file format appears to be correct") res_for_output, res_for_script = user_annotations.TC_score_to_string( gold_annotations, output_for_script) logger.info("Scoring submission" + res_for_output) if output_for_script: print(res_for_script)
def main(args): span_file = args.spans_file article_file = args.article_file print_line_numbers = bool(args.add_line_numbers) an.techniques = pt.Propaganda_Techniques(filename="data/propaganda-techniques-names-semeval2020task11.txt") annotations = an.Articles_annotations() annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() output_text, footnotes, legend = annotations.mark_text(article_content, print_line_numbers) print(output_text) print(legend) print(footnotes)
def main(args): span_file = args.spans_file article_file = args.article_file propaganda_techniques_list_file = args.propaganda_techniques_list_file propaganda_techniques = pt.Propaganda_Techniques( propaganda_techniques_list_file) annotations = aa.Articles_annotations() aa.Articles_annotations.techniques = propaganda_techniques annotations.load_article_annotations_from_csv_file(span_file) with codecs.open(article_file, "r", encoding="utf8") as f: article_content = f.read() # print("\n".join([str(i)+") "+x for i,x in enumerate(str(aa.techniques).split("\n"))])) # output_text, footnotes = annotations.tag_text_with_annotations(article_content) output_text, footnotes, legend = annotations.mark_text(article_content) print(output_text) print(footnotes)
import sys sys.path.append("../") import src.propaganda_techniques as pt import src.annotation as an import src.article_annotations as aa def test_remove_annotation(artannotations): before = str(artannotations) print("removing annotation: " + str(artannotations[0])) artannotations.remove_annotation(artannotations[0]) after = str(artannotations) assert after == before.replace("\n\t[0, 59] -> Exaggeration,Minimisation", "", 1) if __name__ == "__main__": propaganda_techniques = pt.Propaganda_Techniques( filename="../data/propaganda-techniques-names.txt") an.Annotation.set_propaganda_technique_list_obj(propaganda_techniques) artannotations = aa.Articles_annotations() artannotations.load_article_annotations_from_csv_file( "../data/article736757214.task-FLC.labels") test_remove_annotation(artannotations)