示例#1
0
def perform_eval(eid, data):
    global word_classifiers
    global relation_word_classifiers
    global results_outfile
    
    target_dist = Distribution(data['objects'])
    landmark_dist = Distribution(data['objects'])
    target = data['target']
    increment_data = []
    relation = None

    relation_is_negated = False
    inc = 1
    prev_rank = len(data['objects'])
    for w,tags in [(w['word'],w['tags']) for w in data['speech']]:
        utt = {}
        word = None
        c_rank = None
        relation_dist = None
        prepare_word(utt, w, tags)
        objects = data['objects']
        if 't' in utt:
            word = utt['t'][0]
            target_dist.update(logreg.classify(word, word_classifiers, objects))
        if 'l' in utt:
            word = utt['l'][0]
            landmark_dist.update(logreg.classify(word, word_classifiers, objects))
        if 'r' in utt:
            word = utt['r'][0]
            if relation is not None: relation += '_' + word 
            else: relation = word
        if 'r-' in utt:
            word = utt['r-'][0]
            if relation is not None: relation += '_' + word 
            else: relation = word
            relation_is_negated = True            
                
        if relation is not None: # indent this with above for loop to make it incremental
            tdist = target_dist.copy()
            ldist = landmark_dist.copy()
            relation_dist = apply_relation(tdist, ldist, relation, relation_is_negated, objects)
            
    if relation_dist is not None:      
        return relation_dist.rank(target)
    else:
        target_dist.normalise()
        return target_dist.rank(target) 
示例#2
0
def apply_relation(target_dist, landmark_dist, relation, negated, objects):
    global relation_word_classifiers
    if relation  not in relation_word_classifiers: 
        relation = "UNK_REL"
        negated = False
        
    combined = Distribution()
    for t in objects:
        for l in objects:
            if t == l: continue
            if negated:
                features = get_relational_features(objects[l], objects[t])
            else:
                features = get_relational_features(objects[t], objects[l])
                
            p = logreg.classify_obj(relation, relation_word_classifiers, features.values())
            combined.add(make_id(t,l), target_dist.get(t) * landmark_dist.get(l) * p)    
            
    combined.marginalise()
    return combined
示例#3
0
    print('iteration', itr)
    iter_results = []
    for i in range(1,num_folds +1): #number of folds
        eval_data = data_keys[i*fold_size:][:fold_size]
        training_data = data_keys[:i*fold_size] + data_keys[(i+1)*fold_size:]
        words_list = {}
        relation_words_list = {}

        # gather training data
        for eid in training_data:
            prepare_training(data[eid], max_negs=2, r_only=True)
            
        # train word classifiers
        word_classifiers = {}
        for word in words_list: 
            word_classifiers[word] = logreg.train(words_list[word])
            
        # train relation classifiers, pipe low-count relations into UNK
        unk_rel = []
        relation_word_classifiers = {}        
        for word in relation_words_list:
            if len(relation_words_list[word]) <= 4: unk_rel += relation_words_list[word]
            else: relation_word_classifiers[word] = logreg.train(relation_words_list[word])
        if len(unk_rel) > 0: relation_word_classifiers['UNK_REL'] = logreg.train(unk_rel)
            
        # evaluate
        for eid in eval_data:
            current_rank = perform_eval(eid, data[eid])
	    iter_results.append(current_rank)
        results.append(iter_results)