def test_numeric_klass(): from microtc.utils import tweet_iterator from microtc.command_line import params, train, predict from sklearn.preprocessing import LabelEncoder import os import json import tempfile import sys numeric = tempfile.mktemp() + '.json' output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' D = [x for x in tweet_iterator(fname)] encoder = LabelEncoder().fit([x['klass'] for x in D]) y = encoder.transform([x['klass'] for x in D]) for x, k in zip(D, y): x['klass'] = int(k) with open(numeric, 'w') as fpt: [fpt.write(json.dumps(x) + '\n') for x in D] sys.argv = ['microtc', '-o', output, '-k', '2', numeric, '-s', '2'] params() sys.argv = ['microtc', '-m', output, numeric, '-o', output] train() output2 = tempfile.mktemp() sys.argv = ['microtc', '-m', output, fname, '-o', output2] predict() os.unlink(numeric) os.unlink(output) os.unlink(output2)
def test_numeric_klass(): from microtc.utils import tweet_iterator from microtc.params import DefaultParams, Fixed from microtc.command_line import params, train, predict from sklearn.preprocessing import LabelEncoder import os import json import tempfile import sys numeric = tempfile.mktemp() + '.json' output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' D = [x for x in tweet_iterator(fname)] encoder = LabelEncoder().fit([x['klass'] for x in D]) y = encoder.transform([x['klass'] for x in D]) for x, k in zip(D, y): x['klass'] = int(k) with open(numeric, 'w') as fpt: [fpt.write(json.dumps(x) + '\n') for x in D] P = DefaultParams.copy() P["dist_vector"] = Fixed("entropy+0+1") params('-o', output, '-k', '2', numeric, '-s', '2', **P) train('-m', output, numeric, '-o', output) output2 = tempfile.mktemp() predict('-m', output, fname, '-o', output2) os.unlink(numeric) os.unlink(output) os.unlink(output2)
def test_train2(): from microtc.command_line import train, params import os import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' params('-o', output, '-k', '2', fname, '-s', '2') assert os.path.isfile(output) output2 = tempfile.mktemp() train('-m', output, fname, '-o', output2) os.unlink(output) os.unlink(output2)
def test_train2(): from microtc.command_line import CommandLine, train import os import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' c = CommandLine() args = ['-o', output, '-k', '2', fname, '-s', '2'] c.main(args=args) assert os.path.isfile(output) output2 = tempfile.mktemp() args = ['-m', output, fname, '-o', output2] train(args=args) os.unlink(output) os.unlink(output2)
def test_textmodel(): from microtc.command_line import params, train, textmodel import os import sys import json import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' params('-o', output, '-k', '2', fname, '-s', '2') train('-m', output, fname, '-o', output) output2 = tempfile.mktemp() textmodel('-m', output, fname, '-o', output2) os.unlink(output) a = open(output2).readline() os.unlink(output2) a = json.loads(a) assert 'klass' in a
def test_test(): from microtc.command_line import params, train, predict from microtc.utils import read_data_labels import os import sys import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' params('-o', output, '-k', '0.5:0.5', fname, '-s', '2') params('-o', output, '-k', '2', fname, '-s', '2') train('-o', output + '.model', '-m', output, fname) predict('-m', output + '.model', fname, '-o', output + '.predicted') X, y = read_data_labels(fname) print(y) os.unlink(output) os.unlink(output + '.model') os.unlink(output + '.predicted') assert len(y)
def test_train_regression(): from microtc.command_line import params, train, predict import os import sys import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' params('-o', output, '-k', '2', '-s', '2', '-S', 'r2', fname) with open(output) as fpt: print("XXXXX>", output, fpt.read()) train('-o', output + '.model', '-R', '-m', output, fname) for x in predict('-m', output + '.model', fname, '-o', output + '.predicted'): print(x, file=sys.stderr) os.unlink(output) os.unlink(output + '.model') os.unlink(output + '.predicted')
def test_test(): from microtc.command_line import params, train, predict from microtc.utils import read_data_labels import os import sys import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' sys.argv = ['microtc', '-o', output, '-k', '2', fname, '-s', '2'] params() sys.argv = ['microtc', '-m', output, fname, '-o', output] train() output2 = tempfile.mktemp() sys.argv = ['microtc', '-m', output, fname, '-o', output2] predict() X, y = read_data_labels(output2) print(y) os.unlink(output) os.unlink(output2) assert len(y)
def test_train(): from microtc.command_line import params, train import os import tempfile output = tempfile.mktemp() fname = os.path.dirname(__file__) + '/text.json' sys.argv[0] = 'test-train:microtc-params' params('-o', output, '-k', '2', fname, '-s', '2') assert os.path.isfile(output) with open(output) as fpt: print(fpt.read()) sys.argv[0] = 'test-train:microtc-train' print(train('-m', output, '-o', output + ".model", fname)) os.unlink(output) os.unlink(output + '.model')