def remove_ill_formed_drss(drss, signature_file): '''Remove ill-formed DRSs from a set of DRSs''' # Get signature to do the checking signature = get_signature(signature_file) # Loop over DRSs and only keep valid ones new_drss, remove_idxs = [], [] for idx, drs in enumerate(drss): if is_well_formed_drs([tuple(d) for d in drs_string_to_list(drs)], signature): new_drss.append(drs) # DRS invalid, ignore else: remove_idxs.append(idx) return new_drss, remove_idxs
def remove_ill_formed_drss(drss, signature_file): '''Remove ill-formed DRSs from a set of DRSs''' # Get signature to do the checking signature = get_signature(signature_file) # Loop over DRSs and only keep valid ones new_drss, remove_idxs = [], [] for idx, drs in enumerate(drss): # Remove comments and split list_drs = drs_string_to_list(drs) try: _ = check_clf([tuple(c) for c in list_drs], signature, v=0) new_drss.append(drs) # DRS invalid, ignore except RuntimeError: remove_idxs.append(idx) return new_drss, remove_idxs
def __init__(self, fix_senses, sig_file, input_file, vocab, min_tokens, do_json, remove_clauses, remove_roles_op, remove_concepts, sep, no_sep, var, baseline, fix, fix_disc, no_referee): # Explanation of the error types: # unknown: number of clauses removed that contained the UNKNOWN token # remove: number of clauses removed because --remove_clauses was used # frequency-rolesop: number of clauses removed because --remove_roles_op was used # frequency-name: number of clauses removed because --remove_concepts was used (incl names) # frequency-conc: number of clauses removed because --remove_concepts was used # variables: # of times a variables made an impossible reference (e.g. @3 for last var) # wrong arity: # of times a clause had a wrong (ill-formed) arity and was therefore ignored # no-sense: # of clauses that contained a concept, but no sense, insert default n.01 sense # no-ref: disc var was not introduced, add REF # spurious-ref: disc var was introduced by REF but never used, remove REF clause # sense: number of senses we fixed when using --fix_senses # sub-loop: number of times we fixed the subordinate relation has a loop problem (--fix) # boxes disconnected: number of times we fixed the disconnected boxes problem (--fix_disc) self.possible_repairs = ["unknown", "remove", "frequency-rolesop", "frequency-name", "double", "frequency-conc", "variables", "wrong arity", "no-sense", "no-ref", "spurious-ref", "sense", "sub loop", "boxes disconnected"] self.dummies = ["dummies-pp", "dummies-ref"] self.pp_dict = {} for key in self.possible_repairs + self.dummies: self.pp_dict[key] = [] # Other settings self.senses = [x.split() for x in open(fix_senses, 'r')] if args.fix_senses \ and os.path.isfile(fix_senses) else None self.signature = get_signature(sig_file) if sig_file and os.path.isfile(args.sig_file) \ else None self.lines = read_allennlp_json_predictions(input_file, vocab, min_tokens) \ if do_json else [x.strip() for x in open(args.input_file, 'r')] self.rm_clauses = remove_clauses self.rm_roles_op = remove_roles_op self.rm_concepts = remove_concepts self.sep = sep self.no_sep = no_sep self.var = var self.baseline = baseline self.fix = fix self.fix_disc = fix_disc self.no_referee = no_referee self.box_id = 'b' self.var_id = 'x' # Counter that we use to keep track of current DRS idx self.cur_idx = -1
def extensive_format_check(drss_fixed, sig_file): '''Do a more extensive semantic format check (referee)''' drss_final = [] signature = get_signature(sig_file) error_counter = Counter() for clf in drss_fixed: try: _ = check_clf([tuple(c.split()) for c in clf], signature, v=1) drss_final.append(clf) except RuntimeError as err_message: #DRS invalid, replace by dummy error_counter.update([err_message[0]]) drss_final.append([" ".join(x) for x in dummy_drs()]) error_total = sum(error_counter.values()) print_str = "#wrong = {} ({:.2f}%)".format( error_total, error_total * 100 / float(len(drss_fixed))) for (err, c) in error_counter.most_common(): print_str += str(c) + ' ' + err + ' ' return drss_final, print_str, error_total
graph, con_pars['prov']) SPLIT.write(json.dumps(dict_drg) + '\n', ensure_ascii=False) else: warning( "one of the files doesn't exist: {}, {}".format( clf_file, raw_file)) return error_counter ############################################################################## ################################ Main function ################################ if __name__ == '__main__': args = parse_arguments() # read a signature file sig = clfref.get_signature(args.sig) # conversion parameters con_pars = { 'pmb2': args.pmb2, 'keep-refs': args.keep_refs, 'c': args.concept, 'noarg': args.noarg, 'rmid': args.rmid, 'rle': args.rle, 'in': args.in_box, 'bm': args.bm, 'prov': args.prov } # get main mrp graph's meta info feats = {k: vars(args)[k] for k in 'frwk mrpv flvr prov'.split()} # the directory I/O mode
def main(args): '''Main function of counter score calculation''' start = time.time() # Read in English sense dict for rewriting concepts to synset ID # This is read from a Python import to speed things up (twice as fast as loading JSON every time) from wordnet_dict_en import en_sense_dict signature = get_signature(args.sig_file) # Get signature res = [] # Get all the clauses and check if they are valid clauses_gold_list, original_gold = get_clauses(args.f2, signature, args.ill) clauses_prod_list, original_prod = get_clauses(args.f1, signature, args.ill) # Count ill-DRSs in the system output global ill_drs_ids if args.codalab and args.ill == 'dummy': ill_drs_ids = [ i for (i, x) in enumerate(clauses_prod_list, start=1) if len(x) < 3 and next(( cl for cl in x if len(cl) > 1 and cl[1].startswith('alwayswrong')), False) ] # Don't print the results each time if we do multiple runs no_print = True if args.runs > 1 else False single = True if len( clauses_gold_list) == 1 else False # true if we are doing a single DRS # Check if correct input (number of instances, baseline, etc) original_prod, clauses_prod_list = check_input( clauses_prod_list, original_prod, original_gold, clauses_gold_list, args.baseline, args.f1, args.max_clauses, single) # Processing clauses for _ in range( args.runs ): # for experiments we want to more runs so we can average later arg_list = [] for count, (prod_t, gold_t) in enumerate( zip(clauses_prod_list, clauses_gold_list)): arg_list.append([ prod_t, gold_t, args, single, original_prod[count], original_gold[count], en_sense_dict, signature ]) # Parallel processing here if args.parallel == 1: # no need for parallelization for p=1 all_results = [] for num_count, arguments in enumerate(arg_list): all_results.append(get_matching_clauses(arguments)) else: all_results = multiprocessing.Pool(args.parallel).map( get_matching_clauses, arg_list) # If we find results, print them in a nice way if all_results == ['skip']: #skip result pass elif all_results and all_results[0]: all_clauses = [x[1] for x in all_results] all_vars = [x[7] for x in all_results] if not args.stats: res.append( print_results(all_results, no_print, start, single, args)) else: raise ValueError('No results found') # If multiple runs, print averages if res and args.runs > 1 and not args.stats: print('Average scores over {0} runs:\n'.format(args.runs)) print('Precision: {0}'.format( round( float(sum([x[0] for x in res])) / float(args.runs), args.significant))) print('Recall : {0}'.format( round( float(sum([x[1] for x in res])) / float(args.runs), args.significant))) print('F-score : {0}'.format( round( float(sum([x[2] for x in res])) / float(args.runs), args.significant))) # print scores in scores.txt file for codalab usage if len(res) == 1 and args.runs == 1 and args.codalab: with codecs.open(args.codalab + '.txt', 'w', encoding='UTF-8') as scores_file: [[score_p, score_r, score_f]] = res scores_file.write( "Precision: {}\nRecall: {}\nF-score: {}".format(*res[0])) # Sometimes we are also interested in (saving and printing) some statistics, do that here if args.stats and args.runs <= 1: save_stats(all_clauses, all_vars, args.stats) # We might also want to save all individual F-scores, usually for the sake of doing statistics. We print them to a file here if args.ms_file: with open(args.ms_file, 'w') as out_f: for items in all_results: _, _, f_score = compute_f(items[0], items[1], items[2], args.significant, False) if args.all_idv: print_line = " ".join( [str(x) for x in [items[0], items[1], items[2]]]) out_f.write(print_line + '\n') else: out_f.write(str(f_score) + '\n') out_f.close() # We might want to output statistics about individual types of clauses if args.detailed_stats > 0: save_detailed_stats([x[9] for x in all_results], args)
warning('Both ENTAILMENT and CONTRADICTION') return 'NEUTRAL', 'Mixed answers' if Contradicts: return 'CONTRADICTION', 'Definite answer' return 'NEUTRAL', 'Definite answer' ################################ ############## MAIN ############ if __name__ == '__main__': args = parse_arguments() # Read SICK problems as a lsit of tuples (id, label, premise, hypothesis) problems = read_sick_problems(args.sick) # Get mapping from SICK problem IDs to a pair of PMB documents for the premise and hypothesis sick2pd = sick2pd(problems, mapfile=args.sick2pd) # Read the signature of clausal forms as a dictionary signature = get_signature(args.clf_sig) # read axioms from the knowledge base if file is specified kb = read_kb(args.kb) if args.kb else {} # keep track of statuses of the theorem proving and predicted labels count = Counter() gold_labs, pred_labs, prob_ids = [], [], [] # go through each SICK problem and try to solve it with theorem proving for p in problems: if args.pids and p[0] not in args.pids: continue # skip the rest of the for-loop debug("\n\n{:=^100}".format("SICK-"+p[0])) info("\nSICK-{0} [{1}]\n\tPrem {4}: {2}\n\tHypo {5}: {3}".format(\ *p, *sick2pd[p[0]])) # get FOL formulas for the corresponding PMB documents (done via building a DRS from a clausal form) prem_fol, hypo_fol = [ pmb2fol(args.pmb, pd, sig=signature, drawDRS=args.draw_DRS)\ for pd in sick2pd[p[0]] ]
dnf_ops = args.op_keywords dnf_tks = get_anaphoric_pronouns( out='dnf') if args.pronouns else args.token_keywords if not (dnf_ops or dnf_tks) or (dnf_ops and dnf_tks): raise RuntimeError( "Exactly one of Operator or Token filtering-DNF should be specified" ) # get CLFs from the file src_clf_dict = file_to_clf_dict(args.src, v=args.v) trg_clf_dict = file_to_clf_dict(args.trg, v=args.v) # check number of retrieved clfs src_total = len(src_clf_dict) trg_total = len(trg_clf_dict) print "{} src and {} trg clfs retrieved".format(src_total, trg_total) # get info about signature as dictionary signature = get_signature(args.sig_file, v=args.v) # define counters trg_err_counter = Counter() src_err_counter = Counter() # contrast CLFs sen_ids = [] for sid in trg_clf_dict: # read raw and CLFs (raw, trg_clf) = trg_clf_dict[sid] #pr_clf(trg_clf, pr=True, inline=False) (src_raw, src_clf) = src_clf_dict[sid] #print raw, src_raw #assert raw == src_raw or src_raw is None # check validity of Gold CLF. If it is invalid, report and go to next CLF try: check_clf(trg_clf, signature, v=args.v)