# For the case when use "argument" parser = ArgumentParser() parser.add_argument('--source_file', help='source file for the prediction', default=path_source) parser.add_argument('--db_file', help='source database for the prediction', default=path_db) parser.add_argument('--pred_file', help='predictions by the model', default=path_pred) parser.add_argument('--ordered', action='store_true', help='whether the exact match should consider the order of conditions') args = parser.parse_args() args.ordered = ordered engine = DBEngine(args.db_file) exact_match = [] with open(args.source_file) as fs, open(args.pred_file) as fp: grades = [] for ls, lp in tqdm(zip(fs, fp), total=count_lines(args.source_file)): eg = json.loads(ls) ep = json.loads(lp) qg = Query.from_dict(eg['sql'], ordered=args.ordered) gold = engine.execute_query(eg['table_id'], qg, lower=True) pred = ep.get('error', None) qp = None if not ep.get('error', None): try: qp = Query.from_dict(ep['query'], ordered=args.ordered) pred = engine.execute_query(eg['table_id'], qp, lower=True) except Exception as e: pred = repr(e) correct = pred == gold match = qp == qg grades.append(correct)
if not os.path.isdir(args.dout): os.makedirs(args.dout) # for split in ['train', 'dev', 'test']: for split in ['train', 'dev', 'test']: fsplit = os.path.join(args.din, split) + '.jsonl' ftable = os.path.join(args.din, split) + '.tables.jsonl' fout = os.path.join(args.dout, split) + '_tok.jsonl' print('annotating {}'.format(fsplit)) with open(fsplit) as fs, open(ftable) as ft, open(fout, 'wt') as fo: print('loading tables') # ws: Construct table dict with table_id as a key. tables = {} for line in tqdm(ft, total=count_lines(ftable)): d = json.loads(line) tables[d['id']] = d print('loading examples') n_written = 0 cnt = -1 for line in tqdm(fs, total=count_lines(fsplit)): cnt += 1 d = json.loads(line) # a = annotate_example(d, tables[d['table_id']]) a = annotate_example_ws(d, tables[d['table_id']]) fo.write(json.dumps(a) + '\n') n_written += 1 if answer_toy: if cnt > toy_size:
if not os.path.isdir(args.dout): os.makedirs(args.dout) # for split in ['train', 'dev', 'test']: for split in args.split.split(','): fsplit = os.path.join(args.din, split) + '.jsonl' ftable = os.path.join(args.din, split) + '.tables.jsonl' fout = os.path.join(args.dout, split) + '_tok.jsonl' print('annotating {}'.format(fsplit)) with open(fsplit) as fs, open(ftable) as ft, open(fout, 'wt') as fo: # print('loading tables') # ws: Construct table dict with table_id as a key. tables = {} for line in tqdm(ft, total=count_lines(ftable),ncols = 60, desc = 'Loading Tables'): d = json.loads(line) tables[d['id']] = d # print('loading examples') n_written = 0 cnt = -1 for line in tqdm(fs, total=count_lines(fsplit),ncols = 60, desc = 'Loading Examples'): cnt += 1 d = json.loads(line) # a = annotate_example(d, tables[d['table_id']]) a = annotate_example_ws(d, tables[d['table_id']]) fo.write(json.dumps(a) + '\n') n_written += 1 if answer_toy: if cnt > toy_size:
parser.add_argument('--db_file', help='source database for the prediction', default=path_db) parser.add_argument('--pred_file', help='predictions by the model') parser.add_argument( '--ordered', action='store_true', help='whether the exact match should consider the order of conditions') args = parser.parse_args() args.ordered = ordered engine = DBEngine(args.db_file) exact_match = [] with open(args.source_file) as fs, open(args.pred_file) as fp: grades = [] for ls, lp in tqdm(zip(fs, fp), total=count_lines(args.pred_file)): eg = json.loads(ls) ep = json.loads(lp) qg = Query.from_dict(eg['sql'], ordered=args.ordered) gold = engine.execute_query(eg['table_id'], qg, lower=True) pred = ep.get('error', None) qp = None if not ep.get('error', None): try: qp = Query.from_dict(ep['query'], ordered=args.ordered) pred = engine.execute_query(eg['table_id'], qp, lower=True) except Exception as e: pred = repr(e) correct = pred == gold match = qp == qg grades.append(correct)