def main(): # load labelled data classes = POS | NEU | NEG train_loc = root+'Data/twitterData/train_alternative.tsv' dev_loc = root+'Data/twitterData/dev_alternative.tsv' test_loc = root+'Data/twitterData/test_alternative.tsv' train, dev, test = get_final_semeval_data(classes, train_loc, dev_loc, test_loc) # load model model = svm_pipeline() # run main routine for funrun in ('km', 'af', 'cl'): run(model, train[0], train[1], dev[0], dev[1], mode=['filter'], retrain=5, token=funrun)
def evaluate_mutator(mutator, threshold, min_percent, latex=True): labels = (POS, NEU, NEG) train_loc = root+'Data/twitterData/train_alternative.tsv' dev_loc = root+'Data/twitterData/dev_alternative.tsv' test_loc = root+'Data/twitterData/test_alternative.tsv' train, dev, test = get_final_semeval_data(reduce(lambda x, y: x|y, labels), train_loc, dev_loc, test_loc) dev_x, dev_y = dev for label in labels: pred_y = [] for tweet in dev_x: pred_y.append(label if mutator.apply_filter(tweet, label) else -1) if pred_y.count(label) < min_percent*len(pred_y): yield str(label), ' (%.3f,0.0)' % threshold else: yield str(label), ' (%.3f,%.4f)' % (threshold, precision(dev_y, pred_y, label))