def perform_eval(eid, data): global word_classifiers global relation_word_classifiers global results_outfile target_dist = Distribution(data['objects']) landmark_dist = Distribution(data['objects']) target = data['target'] increment_data = [] relation = None relation_is_negated = False inc = 1 prev_rank = len(data['objects']) for w,tags in [(w['word'],w['tags']) for w in data['speech']]: utt = {} word = None c_rank = None relation_dist = None prepare_word(utt, w, tags) objects = data['objects'] if 't' in utt: word = utt['t'][0] target_dist.update(logreg.classify(word, word_classifiers, objects)) if 'l' in utt: word = utt['l'][0] landmark_dist.update(logreg.classify(word, word_classifiers, objects)) if 'r' in utt: word = utt['r'][0] if relation is not None: relation += '_' + word else: relation = word if 'r-' in utt: word = utt['r-'][0] if relation is not None: relation += '_' + word else: relation = word relation_is_negated = True if relation is not None: # indent this with above for loop to make it incremental tdist = target_dist.copy() ldist = landmark_dist.copy() relation_dist = apply_relation(tdist, ldist, relation, relation_is_negated, objects) if relation_dist is not None: return relation_dist.rank(target) else: target_dist.normalise() return target_dist.rank(target)
def apply_relation(target_dist, landmark_dist, relation, negated, objects): global relation_word_classifiers if relation not in relation_word_classifiers: relation = "UNK_REL" negated = False combined = Distribution() for t in objects: for l in objects: if t == l: continue if negated: features = get_relational_features(objects[l], objects[t]) else: features = get_relational_features(objects[t], objects[l]) p = logreg.classify_obj(relation, relation_word_classifiers, features.values()) combined.add(make_id(t,l), target_dist.get(t) * landmark_dist.get(l) * p) combined.marginalise() return combined
print('iteration', itr) iter_results = [] for i in range(1,num_folds +1): #number of folds eval_data = data_keys[i*fold_size:][:fold_size] training_data = data_keys[:i*fold_size] + data_keys[(i+1)*fold_size:] words_list = {} relation_words_list = {} # gather training data for eid in training_data: prepare_training(data[eid], max_negs=2, r_only=True) # train word classifiers word_classifiers = {} for word in words_list: word_classifiers[word] = logreg.train(words_list[word]) # train relation classifiers, pipe low-count relations into UNK unk_rel = [] relation_word_classifiers = {} for word in relation_words_list: if len(relation_words_list[word]) <= 4: unk_rel += relation_words_list[word] else: relation_word_classifiers[word] = logreg.train(relation_words_list[word]) if len(unk_rel) > 0: relation_word_classifiers['UNK_REL'] = logreg.train(unk_rel) # evaluate for eid in eval_data: current_rank = perform_eval(eid, data[eid]) iter_results.append(current_rank) results.append(iter_results)