Exemplo n.º 1
0
def test_max_lines():
    sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}', '-ot.model', '-n2', TWEETS]
    train()
    sys.argv = ['EvoMSA', '-mt.model', '--max-lines', '500', '-ot.json', TWEETS]
    predict()
    os.unlink('t.model')
    os.unlink('t.json')
Exemplo n.º 2
0
def test_decision_function():
    sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}',
                '-ot.model', '-n2', TWEETS, TWEETS]
    train(output=True)
    sys.argv = ['EvoMSA', '--decision-function', '-mt.model', '-ot1.json', TWEETS]
    predict()
    df = [x['decision_function'] for x in tweet_iterator('t1.json')]
    assert len(df[0]) == 4
    os.unlink('t1.json')
    os.unlink('t.model')
Exemplo n.º 3
0
def test_raw_outputs():
    sys.argv = ['EvoMSA', '--kw={"stacked_method_args": {"popsize": 10, "early_stopping_rounds": 10, "time_limit": 60, "n_estimators": 30}}',
                '-ot.model', '-n2', TWEETS]
    train(output=True)
    sys.argv = ['EvoMSA', '--raw-outputs', '-mt.model', '-ot1.json', TWEETS]
    predict()
    df = [x['decision_function'] for x in tweet_iterator('t1.json')]
    assert len(df[0]) == 30 * 4
    os.unlink('t1.json')
    os.unlink('t.model')
Exemplo n.º 4
0
def test_predict():
    import numpy as np
    sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}',
                '-ot.model', '-n2', TWEETS, TWEETS]
    train(output=True)
    sys.argv = ['EvoMSA', '-mt.model', '-ot1.json', TWEETS]
    predict()
    hy = np.array([x['klass'] for x in tweet_iterator('t1.json')])
    [x['decision_function'] for x in tweet_iterator('t1.json')]
    y = np.array([x['klass'] for x in tweet_iterator(TWEETS)])
    acc = (y == hy).mean()
    print(acc)
    assert acc <= 1 and acc > 0.8
    os.unlink('t1.json')
    os.unlink('t.model')
Exemplo n.º 5
0
def test_predict_numbers():
    from sklearn.preprocessing import LabelEncoder
    import json
    d = [x for x in tweet_iterator(TWEETS)]
    le = LabelEncoder().fit([x['klass'] for x in d])
    y = le.transform([x['klass'] for x in d]).tolist()
    with open('ex.json', 'w') as fpt:
        for x, y0 in zip(d, y):
            x['klass'] = y0
            fpt.write(json.dumps(x) + '\n')
    sys.argv = ['EvoMSA', '--kw={"stacked_method_args": {"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}, "models": [["EvoMSA.model.Corpus", "EvoMSA.model.Bernoulli"]], "TR": false}',
                '-ot.model', '-n1', 'ex.json']
    train(output=True)
    sys.argv = ['EvoMSA', '-mt.model', '-ot1.json', TWEETS]
    predict()
Exemplo n.º 6
0
def test_predict_NearestCentroid():
    from sklearn.preprocessing import LabelEncoder
    import json
    d = [x for x in tweet_iterator(TWEETS)]
    le = LabelEncoder().fit([x['klass'] for x in d])
    y = le.transform([x['klass'] for x in d]).tolist()
    with open('ex.json', 'w') as fpt:
        for x, y0 in zip(d, y):
            x['klass'] = y0
            fpt.write(json.dumps(x) + '\n')
    sys.argv = ['EvoMSA',
                '--kw={"evodag_class": "sklearn.neighbors.NearestCentroid", "TR": false, "models": [["EvoMSA.model.Corpus", "EvoMSA.model.Bernulli"]]}',
                '-ot.model', '-n1', 'ex.json']
    train(output=True)
    sys.argv = ['EvoMSA', '-mt.model', '-ot1.json', TWEETS]
    predict()
Exemplo n.º 7
0
def test_performance_public_set():
    import os
    from EvoMSA.command_line import performance
    for seed in range(4):
        if os.path.isfile('t-%s.model' % seed):
            continue
        sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 3}',
                    '--kw={"seed": %s}' % seed, '-ot-%s.model' % seed, '-n1', TWEETS]
        train(output=True)
    for seed in range(4):
        if os.path.isfile('t-%s.predict' % seed):
            continue
        sys.argv = ['EvoMSA', '-mt-%s.model' % seed, '-ot-%s.predict' % seed, TWEETS]
        predict()
    for score in ['macroF1', 'macroRecall', 'macroPrecision', 'accuracy']:
        sys.argv = ['EvoMSA', '--score', score, '-n2', '-y', TWEETS] + ['t-%s.predict' % seed for seed in range(2)] + ['-'] + ['t-%s.predict' % seed for seed in range(2, 4)]
        m = performance(output=True)
        assert len(m._p) == 2