""" import numpy as np from util import read_file import ffs import tags from logistic_regression import LogisticRegression """Import a small sample dataset and run calcgis. Export the output to a csv.""" data_sample, labels_sample = read_file('sample') lr = LogisticRegression(method="collins", max_iters=1) labels_proc = lr.preproclabels(labels_sample) i = int(np.random.rand() * len(data_sample)) n = len(data_sample[i]) ws = np.random.rand(ffs.numJ) x = data_sample[i] y = labels_proc[i] #lr.calcgis(ws, x, n) print data_sample[i] print labels_sample[i],y print ws lr.calcAs(x, n) print "As",lr.As
# read data and split training data into training and validation sets data_train, labels_train = read_file('training') #assert len(data_train[0]) == len(labels_train[0]) #assert len(data_train[200]) == len(labels_train[200]) data_test, labels_test = read_file('test') #assert len(data_test[0]) == len(data_test[0]) #assert len(data_test[200]) == len(data_test[200]) return data_train, data_test, labels_train, labels_test def runML(meth, itrs, data_train, data_test, labels_train, labels_test): print meth,datetime.now().time() model = LogisticRegression(method=meth,max_iters=itrs) model.fit(data_train, labels_train) print datetime.now().time() prediction = model.predict(data_test) tagscores = LogisticRegression.tagAccuracy(labels_test, prediction) score = np.mean(tagscores) print " score tags: mean: {}, max: {}, min: {}".format(score,max(tagscores),min(tagscores)) print " error rate: {}".format(1 - score) print datetime.now().time() if __name__ == "__main__": data_train, data_test, labels_train, labels_test = importData() labels_test=LogisticRegression.preproclabels(labels_test) runML("collins",10,data_train, data_test, labels_train, labels_test) runML("cd",10,data_train, data_test, labels_train, labels_test)