예제 #1
0
def main():
    # load labelled data
    classes = POS | NEU | NEG
    train_loc = root+'Data/twitterData/train_alternative.tsv'
    dev_loc = root+'Data/twitterData/dev_alternative.tsv'
    test_loc = root+'Data/twitterData/test_alternative.tsv'
    train, dev, test = get_final_semeval_data(classes, train_loc, dev_loc, test_loc)

    # load model
    model = svm_pipeline()

    # run main routine
    for funrun in ('km', 'af', 'cl'):
        run(model, train[0], train[1], dev[0], dev[1], mode=['filter'], retrain=5,
            token=funrun)
예제 #2
0
def evaluate_mutator(mutator, threshold, min_percent, latex=True):
    labels = (POS, NEU, NEG)
    train_loc = root+'Data/twitterData/train_alternative.tsv'
    dev_loc = root+'Data/twitterData/dev_alternative.tsv'
    test_loc = root+'Data/twitterData/test_alternative.tsv'
    train, dev, test = get_final_semeval_data(reduce(lambda x, y: x|y, labels), train_loc, dev_loc, test_loc)
    dev_x, dev_y = dev
    for label in labels:
        pred_y = []
        for tweet in dev_x:
            pred_y.append(label if mutator.apply_filter(tweet, label) else -1)
        if pred_y.count(label) < min_percent*len(pred_y):
            yield str(label), ' (%.3f,0.0)' % threshold
        else:
            yield str(label), ' (%.3f,%.4f)' % (threshold, precision(dev_y, pred_y, label))