示例#1
0
def test_numeric_klass():
    from microtc.utils import tweet_iterator
    from microtc.command_line import params, train, predict
    from sklearn.preprocessing import LabelEncoder
    import os
    import json
    import tempfile
    import sys
    numeric = tempfile.mktemp() + '.json'
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    D = [x for x in tweet_iterator(fname)]
    encoder = LabelEncoder().fit([x['klass'] for x in D])
    y = encoder.transform([x['klass'] for x in D])
    for x, k in zip(D, y):
        x['klass'] = int(k)
    with open(numeric, 'w') as fpt:
        [fpt.write(json.dumps(x) + '\n') for x in D]
    sys.argv = ['microtc', '-o', output, '-k', '2', numeric, '-s', '2']
    params()
    sys.argv = ['microtc', '-m', output, numeric, '-o', output]
    train()
    output2 = tempfile.mktemp()
    sys.argv = ['microtc', '-m', output, fname, '-o', output2]
    predict()
    os.unlink(numeric)
    os.unlink(output)
    os.unlink(output2)
示例#2
0
def test_numeric_klass():
    from microtc.utils import tweet_iterator
    from microtc.params import DefaultParams, Fixed
    from microtc.command_line import params, train, predict
    from sklearn.preprocessing import LabelEncoder
    import os
    import json
    import tempfile
    import sys
    numeric = tempfile.mktemp() + '.json'
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    D = [x for x in tweet_iterator(fname)]
    encoder = LabelEncoder().fit([x['klass'] for x in D])
    y = encoder.transform([x['klass'] for x in D])
    for x, k in zip(D, y):
        x['klass'] = int(k)

    with open(numeric, 'w') as fpt:
        [fpt.write(json.dumps(x) + '\n') for x in D]

    P = DefaultParams.copy()
    P["dist_vector"] = Fixed("entropy+0+1")
    params('-o', output, '-k', '2', numeric, '-s', '2', **P)
    train('-m', output, numeric, '-o', output)
    output2 = tempfile.mktemp()
    predict('-m', output, fname, '-o', output2)
    os.unlink(numeric)
    os.unlink(output)
    os.unlink(output2)
示例#3
0
def test_main():
    from microtc.command_line import params
    import os
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params(args=['-o', output, '-k', '2', fname])
    os.unlink(output)
示例#4
0
def test_output():
    from microtc.command_line import params
    import os
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '2', fname)
    assert os.path.isfile(output)
    os.unlink(output)
示例#5
0
def test_train2():
    from microtc.command_line import train, params
    import os
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '2', fname, '-s', '2')
    assert os.path.isfile(output)
    output2 = tempfile.mktemp()
    train('-m', output, fname, '-o', output2)
    os.unlink(output)
    os.unlink(output2)
示例#6
0
def test_score():
    from microtc.command_line import params
    import os
    import sys
    import tempfile
    import json
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '2', '-s', '2', '-S', 'avgf1:POS:NEG', fname)
    with open(output) as fpt:
        a = json.loads(fpt.read())[0]
    assert a['_score'] == a['_avgf1:POS:NEG']
    os.unlink(output)
示例#7
0
def test_seed():
    try:
        from mock import MagicMock
    except ImportError:
        from unittest.mock import MagicMock
    from microtc.command_line import params
    import os
    fname = os.path.dirname(__file__) + '/text.json'
    seed = np.random.seed
    np.random.seed = MagicMock()
    params('-s', '2', '--seed', '1', '-k', '2', '-o', fname + ".params", fname)
    os.unlink(fname + ".params")
    np.random.seed.assert_called_once_with(1)
    np.random.seed = seed
示例#8
0
def test_train():
    from microtc.command_line import params, train
    import os
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    sys.argv[0] = 'test-train:microtc-params'
    params('-o', output, '-k', '2', fname, '-s', '2')
    assert os.path.isfile(output)
    with open(output) as fpt:
        print(fpt.read())

    sys.argv[0] = 'test-train:microtc-train'
    print(train('-m', output, '-o', output + ".model", fname))
    os.unlink(output)
    os.unlink(output + '.model')
示例#9
0
def test_textmodel():
    from microtc.command_line import params, train, textmodel
    import os
    import sys
    import json
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '2', fname, '-s', '2')
    train('-m', output, fname, '-o', output)
    output2 = tempfile.mktemp()
    textmodel('-m', output, fname, '-o', output2)
    os.unlink(output)
    a = open(output2).readline()
    os.unlink(output2)
    a = json.loads(a)
    assert 'klass' in a
示例#10
0
def test_test():
    from microtc.command_line import params, train, predict
    from microtc.utils import read_data_labels
    import os
    import sys
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '0.5:0.5', fname, '-s', '2')
    params('-o', output, '-k', '2', fname, '-s', '2')
    train('-o', output + '.model', '-m', output, fname)
    predict('-m', output + '.model', fname, '-o', output + '.predicted')
    X, y = read_data_labels(fname)
    print(y)
    os.unlink(output)
    os.unlink(output + '.model')
    os.unlink(output + '.predicted')
    assert len(y)
示例#11
0
def test_train_regression():
    from microtc.command_line import params, train, predict
    import os
    import sys
    import tempfile

    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '2', '-s', '2', '-S', 'r2', fname)
    with open(output) as fpt:
        print("XXXXX>", output, fpt.read())

    train('-o', output + '.model', '-R', '-m', output, fname)
    for x in predict('-m', output + '.model', fname, '-o', output + '.predicted'):
        print(x, file=sys.stderr)

    os.unlink(output)
    os.unlink(output + '.model')
    os.unlink(output + '.predicted')
示例#12
0
def test_test():
    from microtc.command_line import params, train, predict
    from microtc.utils import read_data_labels
    import os
    import sys
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    sys.argv = ['microtc', '-o', output, '-k', '2', fname, '-s', '2']
    params()
    sys.argv = ['microtc', '-m', output, fname, '-o', output]
    train()
    output2 = tempfile.mktemp()
    sys.argv = ['microtc', '-m', output, fname, '-o', output2]
    predict()
    X, y = read_data_labels(output2)
    print(y)
    os.unlink(output)
    os.unlink(output2)
    assert len(y)
示例#13
0
def test_kfolds():
    from microtc.command_line import params, kfolds
    import os
    import sys
    import json
    import tempfile
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    params('-o', output, '-k', '2', fname, '-s', '2')
    output2 = tempfile.mktemp()
    kfolds('-m', output, fname, '-o', output2)
    os.unlink(output)
    a = open(output2).readline()
    os.unlink(output2)
    a = json.loads(a)
    assert 'decision_function' in a
    try:
        kfolds('--update-klass', '-m', output, fname, '-o', output2)
    except AssertionError:
        return
    assert False
示例#14
0
def test_nparams():
    from microtc.command_line import params
    import os
    fname = os.path.dirname(__file__) + '/text.json'
    params(args=['-k', '2', '-s', '11', fname, '-o', fname + ".tmp"])
    os.unlink(fname + ".tmp")
示例#15
0
def test_pool():
    from microtc.command_line import params
    import os
    fname = os.path.dirname(__file__) + '/text.json'
    params('-k', '2', '-s', '11', '-n', '2', '-o', fname + ".params", fname)
    os.unlink(fname + ".params")