def eval_instances(): instance_file = os.path.join(DATA, 'twitter/self_reveal/user_pool0.csv') filtered_file = os.path.join(DATA, 'twitter/self_reveal/user_pool2.csv') first_model = getModel() fout = open(filtered_file, 'w') for line in open(instance_file): user_id, target = line.rstrip('\n').split('\t') context = getTrTWContext(user_id) if context is None: continue weight = 1 score = first_model.eval(context, target) if score > .25: fout.write(user_id + '\t' + target + '\n') fout.close()
if context is None: continue weight = 1.0 predictions = model.eval_all(context) for label, score in predictions: all_buckets[int(label)][int(20 * score)] += 1 if label == target: correct_buckets[int(label)][int(20 * score)] += 1 print 'correct buckets' print correct_buckets print 'all buckets' print all_buckets print 'caliplot' print correct_buckets / all_buckets if __name__ == "__main__": maxent.set_verbose(1) test_file = '../data/semi/ver2.8-hardLabel.csv' distant_model = getModel() caliplot(test_file, distant_model)