def estimateGrad(fun, x, delta): """ Given a real-valued function fun, estimate its gradient numerically. """ grad = SparseVector({}) for key in x: e = SparseVector({}) e[key] = 1.0 grad[key] = (fun(x + delta * e) - fun(x)) / delta return grad
def __iter__(self): assert self.file is None file = open(self.fname, 'rb') while True: try: history = marshal.load(file) values = SparseVector.load(file) total = marshal.load(file) yield history, (values, total) except EOFError: break file.close()
def test_gradTotalLoss(self): rdd = self.sc.parallelize([ (SparseVector.SparseVector({'a': -1, 'b': -1}), -1), (SparseVector.SparseVector({'a': -1, 'b': -1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1) ]) beta = SparseVector.SparseVector({'a':2, 'b':3}) kek = ParallelLogisticRegression.gradTotalLossRDD(rdd, beta)
def test_test(self): data = [ (SparseVector.SparseVector({'a': -1, 'b': -1}), -1), (SparseVector.SparseVector({'a': -1, 'b': -1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1) ] beta = SparseVector.SparseVector({'a': 2, 'b': 2}) scores = LogisticRegression.test(data, beta)
def test_test(self): rdd = self.sc.parallelize([ (SparseVector.SparseVector({'a': -1, 'b': -1}), -1), (SparseVector.SparseVector({'a': -1, 'b': -1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1), (SparseVector.SparseVector({'a':1, 'b': 1}), 1) ]) beta = SparseVector.SparseVector({'a':2, 'b':3}) kek = ParallelLogisticRegression.test(rdd, beta) print kek
def add(self, history, values, total): marshal.dump(history, self.file) SparseVector.dump(values, self.file) marshal.dump(total, self.file)
def setUp(self): self.x = SparseVector.SparseVector(dict(a=1,b=1)) self.y = 1 self.beta = SparseVector.SparseVector(dict(a=3,b=3))
def test_p_norm(self): sparse = SparseVector.SparseVector({'a': 2, 'b': 2})
def add(self, word, dimension, freq=1): if not self.has_key(word): self[word] = SparseVector.SparseVector() self[word].add(dimension, freq)