def train(self, X, Y): """ Find the optimal split point """ # train self.thresh = Threshold([x[0] for x in X], [y == self.hit for y in Y])
def train(self, X, Y): # construct values tables table = {self.hit: [], self.miss: []} for (x, y) in zip(X, Y): table[y].append(x) # transpose columns in table for col in (self.hit, self.miss): table[col] = zip(*table[col]) # compute weights delta = mean(table[self.hit], 1) - mean(table[self.miss], 1) covar = asmatrix(cov(table[self.hit]) + cov(table[self.miss])) self.W = ravel(covar.I.dot(delta)) # compute threshold using this weight function self.thresh = Threshold((self.score(x) for x in X), Y, self.hit)
class LDA(BinaryClassifier): """ Compute a linear discriminant classifier >>> from csv import DictReader >>> X = [] >>> Y = [] >>> for row in DictReader(open('iris.csv', 'r')): ... X.append([float(row['Sepal.Length']), ... float(row['Sepal.Width']), ... float(row['Petal.Length']), ... float(row['Petal.Width'])]) ... Y.append(row['Species']) >>> L = LDA(X, Y, 'versicolor') >>> cm = L.leave_one_out(X, Y) >>> round(cm.accuracy, 2) 0.96 >>> round(AUC(LDA, X, Y), 2) 1.0 """ def __repr__(self): return "{}(weights=[{}], {})".format( self.__class__.__name__, ", ".join("{: 02.3f}".format(w) for w in self.W), self.thresh ) def train(self, X, Y): # construct values tables table = {self.hit: [], self.miss: []} for (x, y) in zip(X, Y): table[y].append(x) # transpose columns in table for col in (self.hit, self.miss): table[col] = zip(*table[col]) # compute weights delta = mean(table[self.hit], 1) - mean(table[self.miss], 1) covar = asmatrix(cov(table[self.hit]) + cov(table[self.miss])) self.W = ravel(covar.I.dot(delta)) # compute threshold using this weight function self.thresh = Threshold((self.score(x) for x in X), Y, self.hit) def score(self, x): return sum(w * f for w, f in zip(self.W, x)) def classify(self, x): return self.hit if self.thresh.is_hit(self.score(x)) else self.miss
class Stump(BinaryClassifier): """ Compute a classifier which makes a single "cut" in a continuous predictor vector X that splits the outcomes into "hit" and "miss" so as to maximize the number of correct classifications >>> from csv import DictReader >>> X = [] >>> Y = [] >>> for row in DictReader(open('iris.csv', 'r')): ... X.append([float(row['Petal.Width'])]) ... Y.append(row['Species']) >>> s = Stump(X, Y, 'versicolor') >>> s.leave_one_out(X, Y) >>> round(s.accuracy(), 2) 0.88 >>> round(s.AUC(X, Y), 2) 0.99 """ def __repr__(self): lower = self.miss upper = self.hit if not self.thresh.hit_upper: # swap (lower, upper) = (upper, lower) return 'Stump({} < {: 02.3f} < {})'.format(lower, self.thresh.split, upper) def train(self, X, Y): """ Find the optimal split point """ # train self.thresh = Threshold([x[0] for x in X], [y == self.hit for y in Y]) def score(self, x): return x[0] def classify(self, x): return self.hit if self.thresh.is_hit(self.score(x)) else self.miss
class Stump(BinaryClassifier): """ Compute a classifier which makes a single "cut" in a continuous predictor vector X that splits the outcomes into "hit" and "miss" so as to maximize the number of correct classifications >>> from csv import DictReader >>> X = [] >>> Y = [] >>> for row in DictReader(open('iris.csv', 'r')): ... X.append([float(row['Petal.Width'])]) ... Y.append(row['Species']) >>> s = Stump(X, Y, 'versicolor') >>> s.leave_one_out(X, Y) >>> round(s.accuracy(), 2) 0.88 >>> round(s.AUC(X, Y), 2) 0.99 """ def __repr__(self): lower = self.miss upper = self.hit if not self.thresh.hit_upper: # swap (lower, upper) = (upper, lower) return "Stump({} < {: 02.3f} < {})".format(lower, self.thresh.split, upper) def train(self, X, Y): """ Find the optimal split point """ # train self.thresh = Threshold([x[0] for x in X], [y == self.hit for y in Y]) def score(self, x): return x[0] def classify(self, x): return self.hit if self.thresh.is_hit(self.score(x)) else self.miss