def metric(self): totalTimer = Timer() with totalTimer: model = mlpy.Perceptron(**self.build_opts) model.learn(self.data_split[0], self.data_split[1]) if len(self.data) >= 2: predictions = model.pred(self.data[1]) metric = {} metric["runtime"] = totalTimer.ElapsedTime() if len(self.data) == 3: confusionMatrix = Metrics.ConfusionMatrix(self.data[2], predictions) metric['ACC'] = Metrics.AverageAccuracy(confusionMatrix) metric['MCC'] = Metrics.MCCMultiClass(confusionMatrix) metric['Precision'] = Metrics.AvgPrecision(confusionMatrix) metric['Recall'] = Metrics.AvgRecall(confusionMatrix) metric['MSE'] = Metrics.SimpleMeanSquaredError( self.data[2], predictions) return metric
printStats(model.pred(xTest),yTest,modelName) ##To do: model predicted XX% automation error XX% classifer error except: print "Error in", modelName directory = raw_input("What directory are the XML files located:\n") regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n") if(regexParse == ""): regexParse = None [vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse) [X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord) testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron") testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet") testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear") testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA") testModel(mlpy.Golub(), X, Y, "Golub") testModel(mlpy.Parzen(),X,Y,"Parzen") testModel(mlpy.KNN(2),X,Y,"KNN") testModel(mlpy.ClassTree(),X,Y,"Classification Tree") testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")
def BuildModel(self, data, responses): # Create and train the classifier. model = mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=self.iterations) model.learn(data, responses) return model
def BuildModel(self, data, responses): # Create and train the classifier. model = mlpy.Perceptron(**self.opts) model.learn(data, responses) return model
print 'training sample: ', len(train) x, y, fnames = prepareData(train) print 'control sample: ', len(control) xcontrol, ycontrol, fnames = prepareData(control) print '\ntest algorithms:' ld = mlpy.LDAC() ld.learn(x, y) test = ld.pred(xcontrol) # test points print 'LDAC: %.1f percent predicted' % (100 * len(test[test == ycontrol]) / float(len(test))) dic['ld'].append(100 * len(test[test == ycontrol]) / float(len(test))) perc = mlpy.Perceptron() perc.learn(x, y) test = perc.pred(xcontrol) # test points print 'Perceptron: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['perc'].append(100 * len(test[test == ycontrol]) / len(test)) elnet = mlpy.ElasticNetC(lmb=0.01, eps=0.001) elnet.learn(x, y) test = elnet.pred(xcontrol) # test points print 'Elastic Net: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['elnet'].append(100 * len(test[test == ycontrol]) / len(test)) da = mlpy.DLDA(delta=0.1) da.learn(x, y)
featData = [] pairs_feat = [] nonepairs_feat = [] pairs_category = [] nonepairs_category = [] truePairings = [(e.strip(), f.strip()) for ( e, f) in zip(open(opts.english_pairs, "r"), open(opts.foreign_pairs, "r"))] with open(opts.fileName, "r") as pairs: i = 1 for line in pairs: (e, f, features) = line.split("|||") features = [float(feat) for feat in features.strip().split(" ")] if ((e.strip(), f.strip()) in truePairings): pairs_feat.append(features) pairs_category.append(1) else: if (i % 350 == 0): nonepairs_feat.append(features) nonepairs_category.append(-1) i += 1 x = np.concatenate((pairs_feat, nonepairs_feat), axis=0) y = np.concatenate((pairs_category, nonepairs_category)) p = mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=100) # basic perceptron p.learn(x, y) w = p.w() print "\n".join([str(weights) for weights in w])