def score_submission(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) slot_mappings = SlotMappings(logger, args.slot_mappings) document_mappings = DocumentMappings(logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } gold_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.gold, 'gold') system_responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.system, args.runid) cluster_alignment = ClusterAlignment(logger, args.alignment) cluster_self_similarities = ClusterSelfSimilarities(logger, args.similarities) scores = ScoresManager(logger, gold_responses, system_responses, cluster_alignment, cluster_self_similarities, args.separator) scores.print_scores(args.scores) exit(ALLOK_EXIT_CODE)
def filter_responses(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) slot_mappings = SlotMappings(logger, args.slot_mappings) document_mappings = DocumentMappings( logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.input, args.runid) annotated_regions = AnnotatedRegions(logger, document_mappings, document_boundaries, args.regions) run_filter_on_all_responses(responses, annotated_regions, document_mappings, document_boundaries) os.mkdir(args.output) for input_filename in responses: output_filename = input_filename.replace(responses.get('path'), args.output) dirname = os.path.dirname(output_filename) if not os.path.exists(dirname): os.mkdir(dirname) output_fh = open(output_filename, 'w') header_printed = False for linenum in sorted(responses.get(input_filename), key=int): entry = responses.get(input_filename).get(str(linenum)) if not header_printed: output_fh.write('{}\n'.format(entry.get('header').get('line'))) header_printed = True if not entry.get('valid'): logger.record_event('EXPECTING_VALID_ENTRY', entry.get('where')) continue if entry.get('passes_filter'): output_fh.write(entry.__str__()) output_fh.close() exit(ALLOK_EXIT_CODE)
def validate_responses(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) slot_mappings = SlotMappings(logger, args.slot_mappings) document_mappings = DocumentMappings(logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } responses = ResponseSet(logger, ontology_type_mappings, slot_mappings, document_mappings, document_boundaries, args.input, args.runid) responses.write_valid_responses(args.output) exit(ALLOK_EXIT_CODE)
def align_clusters(args): logger = Logger(args.log, args.log_specifications, sys.argv) ontology_type_mappings = OntologyTypeMappings(logger, args.ontology_type_mappings) document_mappings = DocumentMappings( logger, args.parent_children, Encodings(logger, args.encodings), CoreDocuments(logger, args.core_documents)) text_boundaries = TextBoundaries(logger, args.sentence_boundaries) image_boundaries = ImageBoundaries(logger, args.image_boundaries) video_boundaries = VideoBoundaries(logger, args.video_boundaries) keyframe_boundaries = KeyFrameBoundaries(logger, args.keyframe_boundaries) document_boundaries = { 'text': text_boundaries, 'image': image_boundaries, 'keyframe': keyframe_boundaries, 'video': video_boundaries } annotated_regions = AnnotatedRegions(logger, ontology_type_mappings, document_mappings, document_boundaries, args.regions) thresholds = { 'ENG': args.eng_iou_threshold, 'SPA': args.spa_iou_threshold, 'RUS': args.rus_iou_threshold, 'image': args.image_iou_threshold, 'video': args.video_iou_threshold } os.mkdir(args.similarities) os.mkdir(args.alignment) for entry in sorted(os.scandir(args.gold), key=str): if entry.is_dir() and entry.name.endswith('.ttl'): kb = entry.name document_id = kb.replace('.ttl', '') if not document_mappings.get('documents').get(document_id).get( 'is_core'): continue message = 'aligning clusters in {}'.format(entry.name) logger.record_event('DEFAULT_INFO', message) print('At {}: {}'.format( time.strftime("%m/%d/%Y %H:%M:%S", time.localtime()), message)) gold_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format( args.gold, kb) gold_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format( args.gold, kb) system_mentions = '{}/{}/AIDA_P2_TA1_CM_A0001.rq.tsv'.format( args.system, kb) system_edges = '{}/{}/AIDA_P2_TA1_AM_A0001.rq.tsv'.format( args.system, kb) gold_mentions = gold_mentions if os.path.exists( gold_mentions) else None gold_edges = gold_edges if os.path.exists(gold_edges) else None system_mentions = system_mentions if os.path.exists( system_mentions) else None system_edges = system_edges if os.path.exists( system_edges) else None similarities = '{}/{}.tab'.format(args.similarities, document_id) alignment = '{}/{}.tab'.format(args.alignment, document_id) check_for_paths_non_existance([similarities, alignment]) clusters = Clusters(logger, document_mappings, document_boundaries, annotated_regions, gold_mentions, gold_edges, system_mentions, system_edges, thresholds) clusters.print_similarities(similarities) clusters.print_alignment(alignment) exit(ALLOK_EXIT_CODE)