Exemplo n.º 1
0
def main(args):
    input_dataset = args[1]
    input_run = args[2]
    output_dir = args[3]

    gold_relations = [json.loads(x) for x in open('%s/relations.json' % input_dataset)]
    predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)]

    language = identify_language(gold_relations)
    all_correct = validate_relation_list(predicted_relations, language)
    if not all_correct:
        exit(1)

    output_file = open('%s/evaluation.prototext' % output_dir, 'w')
    print 'Evaluation for all discourse relations'
    write_results('All', evaluate(gold_relations, predicted_relations), output_file)

    print 'Evaluation for explicit discourse relations only'
    explicit_gold_relations = [x for x in gold_relations if x['Type'] == 'Explicit']
    explicit_predicted_relations = [x for x in predicted_relations if x['Type'] == 'Explicit']
    write_results('Explicit only', \
        evaluate(explicit_gold_relations, explicit_predicted_relations), output_file)

    print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
    non_explicit_gold_relations = [x for x in gold_relations if x['Type'] != 'Explicit']
    non_explicit_predicted_relations = [x for x in predicted_relations if x['Type'] != 'Explicit']
    write_results('Non-explicit only', \
        evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file)

    output_file.close()
Exemplo n.º 2
0
def evaluate_and_visualize(config_name,
                           model_path,
                           output_path,
                           gene_variant=None):
    # Prepare tokenizer, dataset, and model
    configs = get_configs(config_name, verbose=False)
    if configs['use_gene_features']:
        assert (not gene_variant is None)
        configs['gene_variant'] = gene_variant
    tokenizer = BertTokenizer.from_pretrained(configs['transformer'],
                                              do_basic_tokenize=False)
    train_set, dev_set, test_set = load_oneie_dataset(
        configs['base_dataset_path'], tokenizer)
    model = BasicCorefModel(configs)

    # Reload the model and evaluate
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    print('Evaluation on the dev set', flush=True)
    evaluate(model, dev_set, configs)['avg']
    print('Evaluation on the test set', flush=True)
    evaluate(model, test_set, configs)

    # Generate visualizations (for the test set)
    generate_coref_preds(model, test_set, '_predictions.json')
    generate_visualizations('_predictions.json', output_path)
    os.remove('_predictions.json')
Exemplo n.º 3
0
def evaluate(scorer_dataset_file, feature_file, **kwargs):
    scorer_dataset = joblib.load(scorer_dataset_file)
    if feature_file:
        feature_list = json.load(feature_file)['features']
    else:
        feature_list = None

    scorer.evaluate(scorer_dataset, feature_list, **kwargs)
Exemplo n.º 4
0
def main(args):
    input_dataset = args[1]
    input_run = args[2]
    output_dir = args[3]

    gold_relations = [
        json.loads(x) for x in open('%s/relations.json' % input_dataset)
    ]
    predicted_relations = [
        json.loads(x) for x in open('%s/output.json' % input_run)
    ]
    if len(gold_relations) != len(predicted_relations):
        err_message = 'Gold standard has % instances; predicted %s instances' % \
                (len(gold_relations), len(predicted_relations))
        print >> sys.stderr, err_message
        exit(1)

    language = identify_language(gold_relations)
    all_correct = validate_relation_list(predicted_relations, language)
    if not all_correct:
        print >> sys.stderr, 'Invalid format'
        exit(1)

    gold_relations = sorted(gold_relations, key=lambda x: x['ID'])
    predicted_relations = sorted(predicted_relations, key=lambda x: x['ID'])
    use_gold_standard_types(gold_relations, predicted_relations)

    output_file = open('%s/evaluation.prototext' % output_dir, 'w')
    print 'Evaluation for all discourse relations'
    write_results('All', evaluate(gold_relations, predicted_relations),
                  output_file)

    print 'Evaluation for explicit discourse relations only'
    explicit_gold_relations = [
        x for x in gold_relations if x['Type'] == 'Explicit'
    ]
    explicit_predicted_relations = [
        x for x in predicted_relations if x['Type'] == 'Explicit'
    ]
    write_results('Explicit only', \
        evaluate(explicit_gold_relations, explicit_predicted_relations), output_file)

    print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
    non_explicit_gold_relations = [
        x for x in gold_relations if x['Type'] != 'Explicit'
    ]
    non_explicit_predicted_relations = [
        x for x in predicted_relations if x['Type'] != 'Explicit'
    ]
    write_results('Non-explicit only', \
        evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file)

    output_file.close()
Exemplo n.º 5
0
def main(args):
    input_dataset = args[1]
    input_run = args[2]
    output_dir = args[3]

    gold_relations = [
        json.loads(x) for x in open('%s/relations.json' % input_dataset)
    ]
    predicted_relations = [
        json.loads(x) for x in open('%s/output.json' % input_run)
    ]

    language = identify_language(gold_relations)
    all_correct = validate_relation_list(predicted_relations, language)
    if not all_correct:
        exit(1)

    output_file = open('%s/evaluation.prototext' % output_dir, 'w')
    print 'Evaluation for all discourse relations'
    write_results('All', evaluate(gold_relations, predicted_relations),
                  output_file)

    print 'Evaluation for explicit discourse relations only'
    explicit_gold_relations = [
        x for x in gold_relations if x['Type'] == 'Explicit'
    ]
    explicit_predicted_relations = [
        x for x in predicted_relations if x['Type'] == 'Explicit'
    ]
    write_results('Explicit only', \
        evaluate(explicit_gold_relations, explicit_predicted_relations), output_file)

    print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
    non_explicit_gold_relations = [
        x for x in gold_relations if x['Type'] != 'Explicit'
    ]
    non_explicit_predicted_relations = [
        x for x in predicted_relations if x['Type'] != 'Explicit'
    ]
    write_results('Non-explicit only', \
        evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file)

    print '\nPartial Evaluation for all discourse relations'
    write_partial_match_results('All (partial match)', \
        partial_evaluate(gold_relations, predicted_relations, 0.7), output_file)
    print '\nPartial Evaluation for explicit discourse relations'
    write_partial_match_results('Explicit only (partial match)', \
        partial_evaluate(explicit_gold_relations, explicit_predicted_relations, 0.7), output_file)
    print '\nPartial Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
    write_partial_match_results('Non-explicit only (partial match)', \
        partial_evaluate(non_explicit_gold_relations, non_explicit_predicted_relations, 0.7), output_file)

    output_file.close()
Exemplo n.º 6
0
def main(args):
    input_dataset = args[1]
    input_run = args[2]
    output_dir = args[3]

    relation_file = '%s/relations.json' % input_dataset
    gold_relations = []
    file_line = 0
    for x in open(relation_file):
        try:
            gold_relations.append(json.loads(x[x.index('{'):]))
        except:
            print "Error reading json file on line %s" % file_line
            print x
        file_line = file_line + 1
    # gold_relations = [json.loads(x) for x in open('%s/relations.json' % input_dataset)]

    predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)]
    if len(gold_relations) != len(predicted_relations):
        err_message = 'Gold standard has % instances; predicted %s instances' % \
                (len(gold_relations), len(predicted_relations))
        print >> sys.stderr, err_message
        exit(1)

    language = identify_language(gold_relations)
    all_correct = validate_relation_list(predicted_relations, language)
    if not all_correct:
        print >> sys.stderr, 'Invalid format'
        exit(1)

    gold_relations = sorted(gold_relations, key=lambda x: x['ID'])
    predicted_relations = sorted(predicted_relations, key=lambda x: x['ID'])
    use_gold_standard_types(gold_relations, predicted_relations)

    output_file = open('%s/evaluation.prototext' % output_dir, 'w')
    print 'Evaluation for all discourse relations'
    write_results('All', evaluate(gold_relations, predicted_relations), output_file)

    print 'Evaluation for explicit discourse relations only'
    explicit_gold_relations = [x for x in gold_relations if x['Type'] == 'Explicit']
    explicit_predicted_relations = [x for x in predicted_relations if x['Type'] == 'Explicit']
    write_results('Explicit only', \
        evaluate(explicit_gold_relations, explicit_predicted_relations), output_file)

    print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
    non_explicit_gold_relations = [x for x in gold_relations if x['Type'] != 'Explicit']
    non_explicit_predicted_relations = [x for x in predicted_relations if x['Type'] != 'Explicit']
    write_results('Non-explicit only', \
        evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file)

    output_file.close()
Exemplo n.º 7
0
def main():
	"""Test the scorer

	There are 29 gold relations.
	We corrupt 5 relations and remove 1. 
	Precision = (29 - 6) / 28 = 0.8214
	Recall = (29 - 6) / 29 = 0.7931
	F1 = 2 * (0.8214 * 0.7931) / (0.8214 + 0.7931) = 0.8070
	"""
	relations = [json.loads(x) for x in open('tutorial/pdtb_trial_data.json')]
	output_relations = [convert_to_output(x) for x in relations]
	output_relations[1]['Connective']['TokenList'] = [0]
	output_relations[3]['Arg1']['TokenList'].pop(4)
	output_relations[4]['Arg2']['TokenList'].pop(4)
	output_relations[5]['Arg2']['TokenList'].pop(4)
	output_relations[6]['Sense'] = [u'Contingency.Condition'] # This will hurt sense recall
	output_relations.pop(0) # This will hurt all precision
	scorer.evaluate(relations, output_relations)
	return output_relations
Exemplo n.º 8
0
def main():
    """Test the scorer

	There are 29 gold relations.
	We corrupt 5 relations and remove 1. 
	Precision = (29 - 6) / 28 = 0.8214
	Recall = (29 - 6) / 29 = 0.7931
	F1 = 2 * (0.8214 * 0.7931) / (0.8214 + 0.7931) = 0.8070
	"""
    relations = [json.loads(x) for x in open('tutorial/pdtb_trial_data.json')]
    output_relations = [convert_to_output(x) for x in relations]
    output_relations[1]['Connective']['TokenList'] = [0]
    output_relations[3]['Arg1']['TokenList'].pop(4)
    output_relations[4]['Arg2']['TokenList'].pop(4)
    output_relations[5]['Arg2']['TokenList'].pop(4)
    output_relations[6]['Sense'] = [u'Contingency.Condition'
                                    ]  # This will hurt sense recall
    output_relations.pop(0)  # This will hurt all precision
    scorer.evaluate(relations, output_relations)
    return output_relations
Exemplo n.º 9
0
def tester(world_name,punter_names,fout):

    global simple_gui
    global FPS_FACT
    
    if MAC_MUSIC:
        music_proc = subprocess.Popen([MUSIC_CMD, MUSIC[music_id],'-I','rc'])
    
    N=len(punter_names)
    
    
    ###  START OF PROGRAM
    world_test = world.World(world_name)



    pod_list=[]
    zombies=[]    
    cnt=0
    default_dir=os.getcwd()    
    
    for name in punter_names:
        pod   = pods.CarPod(world_test)
        pod_list.append(pod)
        pod.score=0.0
        pod.stat="-"
        pod.name=name
        pod.mess="Uninitialized"
        
        try:
            punters_path='punters_test/'+name
            os.chdir(punters_path)

            plug=importlib.import_module('punters_test.'+name+'.plugin')

            # call the plugin to equip the car 
            # set the current path to the punters directory
            plug.equip_car(pod)
            os.chdir(default_dir)
            
            pod.controller=plug.controller


            hue=(360.0*cnt)/N
            col=pygame.Color(0)
            col.hsla=(hue,100,50,0)
            pod.col=(col.r,col.g,col.b)
            cnt+=1
        except:
            print name
            print "Unexpected error:", sys.exc_info()
#            fout.write(name+" Error "+ str(sys.exc_info()[0]))
            traceback.print_tb(sys.exc_info()[2])
            pod.mess="Loading Error: "+str(sys.exc_info()[0])
            pod.score=0.0
            pod.stat="E"
            zombies.append(pod)
            os.chdir(default_dir)
            
            
    runners=copy.copy(pod_list)

    # remove zombies      
    for pod in zombies:
        runners.remove(pod)
    
    if GUI:
        simple_gui=gui.SimpleGui(frames_per_sec=int(FPS_FACT/world_test.dt),world=world_test,pods=runners,back_ground=(5,5,5))
    
    
    # use a control to activate the car.
    control=pods.Control()
    
    while runners:
    
        zombies=[]
        
        for pod in runners:
            try:



                pod.controller(pod)
                pod.step()
                score,kill,mess=scorer.evaluate(pod)
                pod.score=max(score,0)
                pod.mess=mess
        
            except:
                   
                print name+": Unexpected error:", sys.exc_info()
                traceback.print_tb(sys.exc_info()[2])                
                pod.score=0
                pod.mess="RunError ->"+str(sys.exc_info())
                kill=True
                pod.stat="e"
                
            if kill:
                zombies.append(pod)
          
        # remove crashed      
        for pod in zombies:
            runners.remove(pod)

            
        ranked = sorted(pod_list, key = lambda x:x.score,reverse=True)
        
        
        if GUI:
            disp=""
            pos=[0,10]
            simple_gui.clear()
            
            for pod in ranked:
                col=pod.col
            
                gui_base.draw_string(simple_gui.screen,pod.stat+":"+pod.name,pos,col,FONT_SIZE,'Courier New') 
            
                pos[1]+=FONT_SIZE
                
            simple_gui.display(clear=False,fps=int(FPS_FACT/world_test.dt))
            
            if simple_gui.check_for_quit():
                sys.exit(0)
            
            if simple_gui.get_pressed()[gui.keys.K_p]:
                pause=True
                
            if simple_gui.get_pressed()[gui.keys.K_EQUALS]:
                FPS_FACT = min(FPS_FACT*2,200)
                print FPS_FACT
          
            if simple_gui.get_pressed()[gui.keys.K_MINUS]:
                FPS_FACT = max(int(FPS_FACT/2),1)
                print FPS_FACT
                 
                  
            if simple_gui.get_pressed()[gui.keys.K_s]:
                pause=False
            
            

    ranked=sorted(pod_list, key = lambda x:x.score,reverse=True)

    for pod in ranked:
        buff="%15s %6.3f %s" %   (pod.name+":",pod.score, ":"+pod.mess+"\n")
        fout.write(buff)
 
    if MAC_MUSIC:
        music_proc.terminate()

    return pod_list
Exemplo n.º 10
0
def train(config_name, gene_variant=None):
    # Prepare tokenizer, dataset, and model
    configs = get_configs(config_name, verbose=False)
    if configs['use_gene_features']:
        assert(not gene_variant is None)
        configs['gene_variant'] = gene_variant
    tokenizer = BertTokenizer.from_pretrained(configs['transformer'], do_basic_tokenize=False)
    train_set, dev_set, test_set = load_oneie_dataset(configs['base_dataset_path'], tokenizer)
    model = BasicCorefModel(configs)

    # Initialize the optimizer
    num_train_docs = len(train_set)
    epoch_steps = int(math.ceil(num_train_docs / configs['batch_size']))
    num_train_steps = int(epoch_steps * configs['epochs'])
    num_warmup_steps = int(num_train_steps * 0.1)
    optimizer = model.get_optimizer(num_warmup_steps, num_train_steps)
    print('Initialized optimizer')

    # Main training loop
    best_dev_score, iters, batch_loss = 0.0, 0, 0
    for epoch in range(configs['epochs']):
        #print('Epoch: {}'.format(epoch))
        print('\n')
        progress = tqdm.tqdm(total=epoch_steps, ncols=80,
                             desc='Train {}'.format(epoch))
        accumulated_loss = RunningAverage()

        train_indices = list(range(num_train_docs))
        random.shuffle(train_indices)
        for train_idx in train_indices:
            iters += 1
            inst = train_set[train_idx]
            iter_loss = model(inst, is_training=True)[0]
            iter_loss /= configs['batch_size']
            iter_loss.backward()
            batch_loss += iter_loss.data.item()
            if iters % configs['batch_size'] == 0:
                accumulated_loss.update(batch_loss)
                torch.nn.utils.clip_grad_norm_(model.parameters(), configs['max_grad_norm'])
                optimizer.step()
                optimizer.zero_grad()
                batch_loss = 0
                # Update progress bar
                progress.update(1)
                progress.set_postfix_str('Average Train Loss: {}'.format(accumulated_loss()))
        progress.close()

        # Evaluation after each epoch
        print('Evaluation on the dev set', flush=True)
        dev_score = evaluate(model, dev_set, configs)['avg']

        # Save model if it has better dev score
        if dev_score > best_dev_score:
            best_dev_score = dev_score
            # Evaluation on the test set
            print('Evaluation on the test set', flush=True)
            evaluate(model, test_set, configs)
            # Save the model
            save_path = os.path.join(configs['saved_path'], 'model.pt')
            torch.save({'model_state_dict': model.state_dict()}, save_path)
            print('Saved the model', flush=True)
Exemplo n.º 11
0
    print "Unexpected error:", sys.exc_info()
    traceback.print_tb(sys.exc_info()[2])
    pod.mess = "Loading Error: " + str(sys.exc_info()[0])
    os.chdir(default_dir)
    sys.exit(0)

os.chdir(default_dir)

if GUI:
    simple_gui = gui.SimpleGui(frames_per_sec=int(FPS_FACT / track.dt),
                               world=track,
                               pods=[pod],
                               back_ground=(5, 5, 5))

while True:

    pod.controller(pod)
    pod.step()

    if GUI:
        simple_gui.set_message(str(pod.state))
        simple_gui.display()
        if simple_gui.check_for_quit():
            sys.exit(0)

    score, kill, mess = scorer.evaluate(pod)
    if kill:
        print " mess=", mess
        print " score=", score
        break
Exemplo n.º 12
0
"""CONLL Shared Task 2015 Scorer

"""
import argparse
import json

from scorer import evaluate

if __name__ == '__main__':
	parser = argparse.ArgumentParser(description="Evaluate system's output against the gold standard")
	parser.add_argument('gold', help='Gold standard file')
	parser.add_argument('predicted', help='System output file')
	args = parser.parse_args()
	gold_list = [json.loads(x) for x in open(args.gold)]
	predicted_list = [json.loads(x) for x in open(args.predicted)]

	print 'Evaluation for all discourse relations'
	evaluate(gold_list, predicted_list)

	print 'Evaluation for explicit discourse relations only'
	explicit_gold_list = [x for x in gold_list if x['Type'] == 'Explicit']
	explicit_predicted_list = [x for x in predicted_list if x['Type'] == 'Explicit']
	evaluate(explicit_gold_list, explicit_predicted_list)

	print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
	non_explicit_gold_list = [x for x in gold_list if x['Type'] != 'Explicit']
	non_explicit_predicted_list = [x for x in predicted_list if x['Type'] != 'Explicit']
	evaluate(non_explicit_gold_list, non_explicit_predicted_list)

Exemplo n.º 13
0
def run_test(images, base_dir):
    zoom_prefix = str(zoom_level) + 'x/' if zoom_level > 1 else ''

    for image in images:
        # Set the current image for the evaluation scorer
        scorer.set_current_image(image)

        # if not image.startswith('009-NW'):
        #   continue

        print('Processing: ' + image)

        # Get OCR data from the oxford API
        data = oxford_api.get_json_data(image, base_dir, zoom_level, img_pref)

        ai2_zoom_level = 3
        # ai2_data = ai2_api.get_json_data(image, base_dir, ai2_zoom_level, img_pref);
        # ai2_boxes = ai2_api.convert_to_boxes(ai2_data, ai2_zoom_level)

        # Extract lines from the image
        lines = liner.get_lines(image, base_dir)

        # Extract hierarchical contours
        h_boxes, hierarchy = hallucinator.get_contours(
            image, base_dir, img_pref + 'box_hallucinations/' + image)

        # Here we could filter out top level boxes to get rid
        # of legends, etc.

        root_boxes = hallucinator.get_root_contours(h_boxes, hierarchy)
        #     import pdb;pdb.set_trace()
        #     best_root = hallucinator.get_most_nested(root_boxes, hierarchy, h_boxes)
        #     if best_root is None:
        best_rects = h_boxes
        base_box = get_full_box(image, base_dir)
        #     else:
        #       best_rects = hallucinator.get_rects(best_root[1], h_boxes)
        #       base_box = hallucinator.contour_to_box(best_root[0][1])
        child_boxes = hallucinator.contours_to_boxes(
            hallucinator.get_child_contours(best_rects, hierarchy))

        # gt_boxes = get_gt_boxes(image, img_pref)

        margins = spacer.get_whitespace(image, base_dir)

        ocr_boxes, raw_boxes = boxer.get_boxes(
            data, zoom_level, lines, img_pref + 'combos/' + image + '.txt',
            child_boxes, margins, img_pref + 'google_cache/' + zoom_prefix,
            base_dir + '/' + zoom_prefix, image)

        # box_points = get_v_points(raw_boxes)
        # voronoi.process_image_points(image, base_dir, img_pref + 'voronoi/', box_points)

        # ocr_boxes, raw_boxes = boxer.get_boxes(ai2_data, zoom_level, lines, img_pref + 'combos/' + image + '.txt', child_boxes)

        # Merge the oxford ocr boxes with the ai2 boxes
        # boxer.merge_ocr_boxes(ocr_boxes, ai2_boxes)

        merged_boxes = boxer.merge_box_groups(child_boxes, ocr_boxes, 0.9,
                                              base_box)
        # merged_boxes = gt_boxes

        merged_labels = boxer.merge_ocr_boxes(raw_boxes, [])  # ai2_boxes)

        # TODO: Ensure that this is sorted right
        # boxes = boxer.add_labels(merged_boxes, merged_labels, 0.9)
        # boxes = cloud_api.add_labels(merged_boxes, base_dir + '/', image, img_pref + 'google_cache/', 1)
        boxes = cloud_api.add_labels(merged_boxes,
                                     base_dir + '/' + zoom_prefix, image,
                                     img_pref + 'google_cache/' + zoom_prefix,
                                     zoom_level)

        scores = liner.rate_lines(lines, boxes)

        filtered_lines = liner.filter_lines(lines, boxes, scores)

        new_lines = liner.remove_lines(lines, filtered_lines, scores)

        rows, cols = score_rows.get_structure(boxes, new_lines)

        # predicted_boxes = boxer.predict_missing_boxes(rows, cols, boxes)

        scorer.evaluate_cells(image, img_pref, boxes)  # + predicted_boxes)

        # import pdb;pdb.set_trace()

        if verbose:
            print_structure(rows, 'Rows')
            print_structure(cols, 'Cols')

        # draw_lines(base_dir + '/' + image, lines, img_pref + 'table_labeling/' + image + '_orig.jpg')
        # draw_lines(base_dir + '/' + image, new_lines, img_pref + 'table_labeling/' + image)

        # draw_structure(translate_box_paradigm(raw_boxes), base_dir + '/' + image, img_pref + 'table_structure/' + image + '_oxford_ocr.jpg')
        # draw_structure(translate_box_paradigm(boxes), base_dir + '/' + image, img_pref + 'table_structure/' + image + '_merged_boxes.jpg')
        # draw_structure(translate_box_paradigm(merged_labels), base_dir + '/' + image, img_pref + 'table_structure/' + image + '_merged_ocr.jpg')
        # draw_structure(translate_box_paradigm(ai2_boxes), base_dir + '/' + image, img_pref + 'table_structure/' + image + '_ai2_ocr.jpg')
        # draw_structure(rows, base_dir + '/' + image, img_pref + 'table_structure/' + image + '_rows.jpg')
        # draw_structure(cols, base_dir + '/' + image, img_pref + 'table_structure/' + image + '_cols.jpg')
        spreadsheeter.output(
            rows, cols, boxes,
            img_pref + xlsx_path + '/' + zoom_prefix + image + '.xlsx',
            img_pref + json_out_path + '/' + zoom_prefix + image + '.json')

        if verbose:
            print('Estimating (' + str(len(new_lines[0]) - 1) + ' x ' +
                  str(len(new_lines[1]) - 1) + ')')

            print()

        if sleep_delay > 0:
            time.sleep(sleep_delay)

    scorer.score_cells_overall()
    scorer.evaluate()
Exemplo n.º 14
0
def scores_compute(gold_json, systems):
    """Verify and compute scores of all system outputs."""
    def to_percent(vals):
        return [v * 100.0 for v in vals]

    gold_list = [json.loads(x) for x in open(gold_json)]

    scores = {}
    for system_name, system_json in systems:
        log.debug("- validating system '{}' ('{}')...".format(
            system_name, system_json))
        if system_json != gold_json and not validator.validate_file(
                system_json):
            log.error("Invalid system output format in '{}' ('{}')!".format(
                system_name, system_json))
            exit(-1)

        log.debug("- scoring system '{}' ('{}')...".format(
            system_name, system_json))
        if system_json != gold_json:
            predicted_list = [json.loads(x) for x in open(system_json)]
        else:  # gold standard as system output
            import copy
            predicted_list = conv_gold_to_output(copy.deepcopy(gold_list))
        connective_cm, arg1_cm, arg2_cm, rel_arg_cm, sense_cm, precision, recall, f1 = scorer.evaluate(
            gold_list, predicted_list)

        scores[system_name] = {
            'conn': to_percent(connective_cm.get_prf('yes')),
            'arg1': to_percent(arg1_cm.get_prf('yes')),
            'arg2': to_percent(arg2_cm.get_prf('yes')),
            'comb': to_percent(rel_arg_cm.get_prf('yes')),
            'sense': to_percent(cm_avg_prf(sense_cm)),
            'overall': to_percent((precision, recall, f1)),
        }
    return scores
Exemplo n.º 15
0
def scores_compute(gold_json, systems):
    """Verify and compute scores of all system outputs."""

    def to_percent(vals):
        return [ v * 100.0  for v in vals ]

    gold_list = [ json.loads(x) for x in open(gold_json) ]

    scores = {}
    for system_name, system_json in systems:
        log.debug("- validating system '{}' ('{}')...".format(system_name, system_json))
        if system_json != gold_json and not validator.validate_file(system_json):
            log.error("Invalid system output format in '{}' ('{}')!".format(system_name, system_json))
            exit(-1)

        log.debug("- scoring system '{}' ('{}')...".format(system_name, system_json))
        if system_json != gold_json:
            predicted_list = [ json.loads(x) for x in open(system_json) ]
        else:  # gold standard as system output
            import copy
            predicted_list = conv_gold_to_output(copy.deepcopy(gold_list))
        connective_cm, arg1_cm, arg2_cm, rel_arg_cm, sense_cm, precision, recall, f1 = scorer.evaluate(gold_list, predicted_list)

        scores[system_name] = {
            'conn': to_percent(connective_cm.get_prf('yes')),
            'arg1': to_percent(arg1_cm.get_prf('yes')),
            'arg2': to_percent(arg2_cm.get_prf('yes')),
            'comb': to_percent(rel_arg_cm.get_prf('yes')),
            'sense': to_percent(cm_avg_prf(sense_cm)),
            'overall': to_percent((precision, recall, f1)),
        }
    return scores
Exemplo n.º 16
0
    input_run = sys.argv[2]
    output_dir = sys.argv[3]

    gold_relations = [
        json.loads(x) for x in open('%s/pdtb-data.json' % input_dataset)
    ]
    predicted_relations = [
        json.loads(x) for x in open('%s/output.json' % input_run)
    ]
    all_correct = validate_relation_list(predicted_relations)
    if not all_correct:
        exit(1)

    output_file = open('%s/evaluation.prototext' % output_dir, 'w')
    print 'Evaluation for all discourse relations'
    write_results('All', evaluate(gold_relations, predicted_relations),
                  output_file)

    print 'Evaluation for explicit discourse relations only'
    explicit_gold_relations = [
        x for x in gold_relations if x['Type'] == 'Explicit'
    ]
    explicit_predicted_relations = [
        x for x in predicted_relations if x['Type'] == 'Explicit'
    ]
    write_results(
        'Explicit only',
        evaluate(explicit_gold_relations, explicit_predicted_relations),
        output_file)

    print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
Exemplo n.º 17
0
	write_proto_text('%s Sense recall' % prefix, r, output_file)
	write_proto_text('%s Sense f1' % prefix, f, output_file)

if __name__ == '__main__':
	input_dataset = sys.argv[1]
	input_run = sys.argv[2]
	output_dir = sys.argv[3]

	gold_relations = [json.loads(x) for x in open('%s/pdtb-data.json' % input_dataset)]
	predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)]
	all_correct = validate_relation_list(predicted_relations)
	if not all_correct:
		exit(1)

	output_file = open('%s/evaluation.prototext' % output_dir, 'w')
	print 'Evaluation for all discourse relations'
	write_results('All', evaluate(gold_relations, predicted_relations), output_file)

	print 'Evaluation for explicit discourse relations only'
	explicit_gold_relations = [x for x in gold_relations if x['Type'] == 'Explicit']
	explicit_predicted_relations = [x for x in predicted_relations if x['Type'] == 'Explicit']
	write_results('Explicit only', evaluate(explicit_gold_relations, explicit_predicted_relations), output_file)

	print 'Evaluation for non-explicit discourse relations only (Implicit, EntRel, AltLex)'
	non_explicit_gold_relations = [x for x in gold_relations if x['Type'] != 'Explicit']
	non_explicit_predicted_relations = [x for x in predicted_relations if x['Type'] != 'Explicit']
	write_results('Non-explicit only', evaluate(non_explicit_gold_relations, non_explicit_predicted_relations), output_file)
	
	output_file.close()

Exemplo n.º 18
0

os.chdir(default_dir)

if GUI:
    simple_gui=gui.SimpleGui(frames_per_sec=int(FPS_FACT/track.dt),world=track,pods=[pod],back_ground=(5,5,5))

while True:

    pod.controller(pod)
    pod.step()

    if GUI:
        simple_gui.set_message(str(pod.state))
        simple_gui.display()
        if simple_gui.check_for_quit():
                sys.exit(0)

    score,kill,mess=scorer.evaluate(pod)
    if kill:
        print " mess=",mess
        print " score=",score
        break

    

    
    
         
        
Exemplo n.º 19
0
def write_proto_text(key, value, f):
	f.write('measure {\n key: "%s" \n value: "%s"\n}\n' % (key ,round(value, 4)))

if __name__ == '__main__':
	input_dataset = sys.argv[1]
	input_run = sys.argv[2]
	output_dir = sys.argv[3]

	gold_relations = [json.loads(x) for x in open('%s/pdtb-data.json' % input_dataset)]
	predicted_relations = [json.loads(x) for x in open('%s/output.json' % input_run)]

	all_correct = validate_relation_list(predicted_relations)
	if not all_correct:
		exit(1)
	connective_cm, arg1_cm, arg2_cm, rel_arg_cm, sense_cm, precision, recall, f1 = \
			evaluate(gold_relations, predicted_relations)
	output_file = open('%s/evaluation.prototext' % output_dir, 'w')

	write_proto_text('Parser precision', precision, output_file)
	write_proto_text('Parser recall', recall, output_file)
	write_proto_text('Parser f1', f1, output_file)

	p, r, f = connective_cm.get_prf('yes')
	write_proto_text('Explicit connective precision', p, output_file)
	write_proto_text('Explicit connective recall', r, output_file)
	write_proto_text('Explicit connective f1', f, output_file)

	p, r, f = arg1_cm.get_prf('yes')
	write_proto_text('Arg1 extraction precision', p, output_file)
	write_proto_text('Arg1 extraction recall', r, output_file)
	write_proto_text('Arg1 extraction f1', f, output_file)
Exemplo n.º 20
0
    input_dataset = sys.argv[1]
    input_run = sys.argv[2]
    output_dir = sys.argv[3]

    gold_relations = [
        json.loads(x) for x in open('%s/pdtb-data.json' % input_dataset)
    ]
    predicted_relations = [
        json.loads(x) for x in open('%s/output.json' % input_run)
    ]

    all_correct = validate_relation_list(predicted_relations)
    if not all_correct:
        exit(1)
    connective_cm, arg1_cm, arg2_cm, rel_arg_cm, sense_cm, precision, recall, f1 = \
      evaluate(gold_relations, predicted_relations)
    output_file = open('%s/evaluation.prototext' % output_dir, 'w')

    write_proto_text('Parser precision', precision, output_file)
    write_proto_text('Parser recall', recall, output_file)
    write_proto_text('Parser f1', f1, output_file)

    p, r, f = connective_cm.get_prf('yes')
    write_proto_text('Explicit connective precision', p, output_file)
    write_proto_text('Explicit connective recall', r, output_file)
    write_proto_text('Explicit connective f1', f, output_file)

    p, r, f = arg1_cm.get_prf('yes')
    write_proto_text('Arg1 extraction precision', p, output_file)
    write_proto_text('Arg1 extraction recall', r, output_file)
    write_proto_text('Arg1 extraction f1', f, output_file)