def fit(data, labels, label_size, alpha=1.0): ''' Train standard naive bayes model. Args: data(Expr): documents to be trained. labels(Expr): the correct labels of the training data. label_size(int): the number of different labels. alpha(float): alpha parameter of naive bayes model. ''' labels = expr.force(labels) # calc document freq df = expr.reduce(data, axis=0, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis), accumulate_fn=np.add, tile_hint=(data.shape[1],)) idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1 # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency # by the root mean square of features frequencies in that document square_sum = expr.reduce(data, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis), accumulate_fn=np.add, tile_hint=(data.shape[0],)) rms = expr.sqrt(square_sum * 1.0 / data.shape[1]) # calculate weight normalized Tf-Idf data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1])) # add up all the feature vectors with the same labels sum_instance_by_label = expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add, tile_hint=(label_size / len(labels.tiles), data.shape[1])) sum_instance_by_label = expr.shuffle(data, _sum_instance_by_label_mapper, target=sum_instance_by_label, kw={'labels': labels, 'label_size': label_size}) # sum up all the weights for each label from the previous step weights_per_label = expr.sum(sum_instance_by_label, axis=1, tile_hint=(label_size,)) # generate naive bayes per_label_and_feature weights weights_per_label_and_feature = expr.shuffle(sum_instance_by_label, _naive_bayes_mapper, kw={'weights_per_label': weights_per_label, 'alpha':alpha}) return {'scores_per_label_and_feature': weights_per_label_and_feature.force(), 'scores_per_label': weights_per_label.force(), }
def train_smo_1998(self, data, labels): ''' Train an SVM model using the SMO (1998) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N]) labels = expr.force(labels) self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() for i in xrange(N): self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0] util.log_info("Starting SMO") it = 0 num_changed = 0 examine_all = True while (num_changed > 0 or examine_all) and (it < self.maxiter): util.log_info("Iteration:%d", it) num_changed = 0 if examine_all: for i in xrange(N): num_changed += self.examine_example(i, N, labels, kernel_results) else: for i in xrange(N): if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C: num_changed += self.examine_example(i, N, labels, kernel_results) it += 1 if examine_all: examine_all = False elif num_changed == 0: examine_all = True self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom() self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def fit(data, labels, label_size, alpha=1.0): ''' Train standard naive bayes model. Args: data(Expr): documents to be trained. labels(Expr): the correct labels of the training data. label_size(int): the number of different labels. alpha(float): alpha parameter of naive bayes model. ''' # calc document freq df = expr.reduce(data, axis=0, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis), accumulate_fn=np.add) idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1 # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency # by the root mean square of features frequencies in that document square_sum = expr.reduce(data, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis), accumulate_fn=np.add) rms = expr.sqrt(square_sum * 1.0 / data.shape[1]) # calculate weight normalized Tf-Idf data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1])) # add up all the feature vectors with the same labels #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64) #for i in range(label_size): # i_mask = (labels == i) # weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0)) weights_per_label_and_feature = expr.shuffle(expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)), _sum_instance_by_label_mapper, target=expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add), kw={'labels': labels, 'label_size': label_size}, cost_hint={hash(labels):{'00':0, '01':np.prod(labels.shape)}}) # sum up all the weights for each label from the previous step weights_per_label = expr.sum(weights_per_label_and_feature, axis=1) # generate naive bayes per_label_and_feature weights weights_per_label_and_feature = expr.log((weights_per_label_and_feature + alpha) / (weights_per_label.reshape((weights_per_label.shape[0], 1)) + alpha * weights_per_label_and_feature.shape[1])) return {'scores_per_label_and_feature': weights_per_label_and_feature.optimized().force(), 'scores_per_label': weights_per_label.optimized().force(), }
def learn_topics(terms_docs_matrix, k_topics, alpha=0.1, eta=0.1, max_iter=10, max_iter_per_doc=1): """ Using Collapsed Variational Bayes method (Mahout implementation) to train LDA topic model. Args: terms_docs_matrix(Expr or DistArray): the count of each term in each document. k_topics: the number of topics we need to find. alpha(float): parameter of LDA model. eta(float): parameter of LDA model. max_iter(int):the max iterations to train LDA topic model. max_iter_per_doc: the max iterations to train each document. """ num_terms = terms_docs_matrix.shape[0] num_docs = terms_docs_matrix.shape[1] topic_term_counts = expr.rand(k_topics, num_terms) for i in range(max_iter): # topic_term_counts = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)), # _lda_mapper, # target=expr.ndarray((k_topics, num_terms), dtype=np.float64, reduce_fn=np.add), # kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc, #'topic_term_counts': topic_term_counts}).optimized() topic_term_counts = expr.outer( (terms_docs_matrix, topic_term_counts), (1, None), fn=_lda_mapper, fn_kw={"k_topics": k_topics, "alpha": alpha, "eta": eta, "max_iter_per_doc": max_iter_per_doc}, shape=(k_topics, num_terms), dtype=np.float64, reducer=np.add, ) # calculate the doc-topic inference # doc_topics = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)), # _lda_doc_topic_mapper, # kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc, #'topic_term_counts': topic_term_counts}, # shape_hint=(num_docs, k_topics)).optimized() doc_topics = expr.outer( (terms_docs_matrix, topic_term_counts), (1, None), fn=_lda_doc_topic_mapper, fn_kw={"k_topics": k_topics, "alpha": alpha, "eta": eta, "max_iter_per_doc": max_iter_per_doc}, shape=(num_docs, k_topics), dtype=np.float64, ) # normalize the topic-term distribution norm_val = expr.reduce( topic_term_counts, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.abs(data).sum(axis), accumulate_fn=np.add, ) topic_term_counts = topic_term_counts / norm_val.reshape((k_topics, 1)) topic_term_counts = topic_term_counts.optimized() return doc_topics, topic_term_counts
def learn_topics(terms_docs_matrix, k_topics, alpha=0.1, eta=0.1, max_iter=10, max_iter_per_doc=1): ''' Using Collapsed Variational Bayes method (Mahout implementation) to train LDA topic model. Args: terms_docs_matrix(Expr or DistArray): the count of each term in each document. k_topics: the number of topics we need to find. alpha(float): parameter of LDA model. eta(float): parameter of LDA model. max_iter(int):the max iterations to train LDA topic model. max_iter_per_doc: the max iterations to train each document. ''' topic_term_counts = expr.rand(k_topics, terms_docs_matrix.shape[0], tile_hint=(k_topics, terms_docs_matrix.shape[0])) for i in range(max_iter): new_topic_term_counts = expr.ndarray((k_topics, terms_docs_matrix.shape[0]), dtype=np.float64, reduce_fn=np.add, tile_hint=(k_topics, terms_docs_matrix.shape[0])) topic_term_counts = expr.shuffle(terms_docs_matrix, _lda_mapper, target=new_topic_term_counts, kw={'k_topics': k_topics, 'alpha': alpha, 'eta':eta, 'max_iter_per_doc': max_iter_per_doc, 'topic_term_counts': topic_term_counts}) # calculate the doc-topic inference doc_topics = expr.shuffle(terms_docs_matrix, _lda_doc_topic_mapper, kw={'k_topics': k_topics, 'alpha': alpha, 'eta':eta, 'max_iter_per_doc': max_iter_per_doc, 'topic_term_counts': topic_term_counts}) # normalize the topic-term distribution norm_val = expr.reduce(topic_term_counts, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis:np.abs(data).sum(axis), accumulate_fn=np.add) topic_term_counts = topic_term_counts / norm_val.reshape((topic_term_counts.shape[0], 1)) return doc_topics, topic_term_counts
def learn_topics(terms_docs_matrix, k_topics, alpha=0.1, eta=0.1, max_iter=10, max_iter_per_doc=1): ''' Using Collapsed Variational Bayes method (Mahout implementation) to train LDA topic model. Args: terms_docs_matrix(Expr or DistArray): the count of each term in each document. k_topics: the number of topics we need to find. alpha(float): parameter of LDA model. eta(float): parameter of LDA model. max_iter(int):the max iterations to train LDA topic model. max_iter_per_doc: the max iterations to train each document. ''' num_terms = terms_docs_matrix.shape[0] num_docs = terms_docs_matrix.shape[1] topic_term_counts = expr.rand(k_topics, num_terms) for i in range(max_iter): #topic_term_counts = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)), #_lda_mapper, #target=expr.ndarray((k_topics, num_terms), dtype=np.float64, reduce_fn=np.add), #kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc, #'topic_term_counts': topic_term_counts}).optimized() topic_term_counts = expr.outer( (terms_docs_matrix, topic_term_counts), (1, None), fn=_lda_mapper, fn_kw={ 'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc }, shape=(k_topics, num_terms), dtype=np.float64, reducer=np.add) # calculate the doc-topic inference #doc_topics = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)), #_lda_doc_topic_mapper, #kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc, #'topic_term_counts': topic_term_counts}, #shape_hint=(num_docs, k_topics)).optimized() doc_topics = expr.outer( (terms_docs_matrix, topic_term_counts), (1, None), fn=_lda_doc_topic_mapper, fn_kw={ 'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc }, shape=(num_docs, k_topics), dtype=np.float64) # normalize the topic-term distribution norm_val = expr.reduce( topic_term_counts, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.abs(data).sum(axis), accumulate_fn=np.add) topic_term_counts = topic_term_counts / norm_val.reshape((k_topics, 1)) return doc_topics, topic_term_counts
def train_smo_2005(self, data, labels): ''' Train an SVM model using the SMO (2005) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N]) gradient = expr.ones((N, 1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]) * -1.0 expr_labels = expr.lazify(labels) util.log_info("Starting SMO") pv1 = pv2 = -1 it = 0 while it < self.maxiter: util.log_info("Iteration:%d", it) minObj = 1e100 expr_alpha = expr.lazify(alpha) G = expr.multiply(labels, gradient) * -1.0 v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) + (expr_labels < -self.tol) * (expr_alpha > self.tol)) v1 = expr.argmax(G[v1_mask-True]).glom().item() maxG = G[v1,0].glom() print 'maxv1:', v1, 'maxG:', maxG v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) + (expr_labels < -self.tol) * (expr_alpha < self.C)) min_v2 = expr.argmin(G[v2_mask-True]).glom().item() minG = G[min_v2,0].glom() #print 'minv2:', min_v2, 'minG:', minG set_v2 = v2_mask.glom().nonzero()[0] #print 'actives:', set_v2.shape[0] v2 = -1 for v in set_v2: b = maxG - G[v,0].glom() if b > self.tol: na = (kernel_results[v1,v1] + kernel_results[v,v] - 2*kernel_results[v1,v]).glom()[0][0] if na < self.tol: na = 1e12 obj = -(b*b)/na if obj <= minObj and v1 != pv1 or v != pv2: v2 = v a = na minObj = obj if v2 == -1: break if maxG - minG < self.tol: break print 'opt v1:', v1, 'v2:', v2 pv1 = v1 pv2 = v2 y1 = labels[v1,0] y2 = labels[v2,0] oldA1 = alpha[v1,0] oldA2 = alpha[v2,0] # Calculate new alpha values, to reduce the objective function... b = y2*expr.glom(gradient[v2,0]) - y1*expr.glom(gradient[v1,0]) if y1 != y2: a += 4 * kernel_results[v1,v2].glom() newA1 = oldA1 + y1*b/a newA2 = oldA2 - y2*b/a # Correct for alpha being out of range... sum = y1*oldA1 + y2*oldA2; if newA1 < self.tol: newA1 = 0.0 elif newA1 > self.C: newA1 = self.C newA2 = y2 * (sum - y1 * newA1) if newA2 < self.tol: newA2 = 0.0; elif newA2 > self.C: newA2 = self.C newA1 = y1 * (sum - y2 * newA2) # Update the gradient... dA1 = newA1 - oldA1 dA2 = newA2 - oldA2 gradient += expr.multiply(labels, kernel_results[:,v1]) * y1 * dA1 + expr.multiply(labels, kernel_results[:,v2]) * y2 * dA2 alpha[v1,0] = newA1 alpha[v2,0] = newA2 #print 'alpha:', alpha.glom().T it += 1 #print 'gradient:', gradient.glom().T self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i,0] = expr.reduce(alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom() self.b = 0.0 E = (labels - self.margins(data)).force() minB = -1e100 maxB = 1e100 actualB = 0.0 numActualB = 0 for i in xrange(N): ai = alpha[i,0] yi = labels[i,0] Ei = E[i,0] if ai < 1e-3: if yi < self.tol: maxB = min((maxB,Ei)) else: minB = max((minB,Ei)) elif ai > self.C - 1e-3: if yi < self.tol: minB = max((minB,Ei)) else: maxB = min((maxB,Ei)) else: numActualB += 1 actualB += (Ei - actualB) / float(numActualB) if numActualB > 0: self.b = actualB else: self.b = 0.5*(minB + maxB) self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def take_step(self, i, j, N, labels, kernel_results): ''' Perform one optimization step. Updates model with respect to to training example i, j Modifies self.E to reflect the error of new model to labels Args: i(int): index of the alpha to be optimized j(int): index of the alpha to be optimized N(int): the number of features labels(Expr): the labels of the training data kernel_results(Expr): the result of the kernel function on the training data ''' if i == j: return 0 ai = self.alpha[i,0] aj = self.alpha[j,0] Ei = self.E[i,0] Ej = self.E[j,0] yi = labels[i,0] yj = labels[j,0] kii = kernel_results[i,i].glom() kjj = kernel_results[j,j].glom() kij = kernel_results[i,j].glom() s = yi * yj L = 0 H = self.C if yi == yj: L = max(0, ai+aj-self.C) H = min(self.C, ai+aj) else: L = max(0, aj-ai) H = min(self.C, self.C+aj-ai) if abs(L-H) < self.tol: return 0 eta = kii + kjj - 2 * kij if eta > self.tol: newaj = aj + yj * (Ei-Ej) / eta if newaj > H: newaj = H if newaj < L: newaj = L else: return 0 if abs(aj - newaj) < self.tol*(aj+newaj+self.tol): return 0 newai = ai + s * (aj - newaj) b1 = self.b - Ei - yi * (newai-ai) * kii - yj * (newaj-aj) * kij b2 = self.b - Ej - yi * (newai-ai) * kij - yj * (newaj-aj) * kjj self.b = 0.5*(b1+b2) if (newai > self.tol) and (newai < self.C): self.b = b1 if (newaj > self.tol) and (newaj < self.C): self.b = b2 self.b = self.b[0,0] self.alpha[i,0] = newai self.alpha[j,0] = newaj for i in xrange(N): self.E[i,0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i,0] #print 'b', self.b #print 'alpha', self.alpha.glom().T #print 'E', self.E.glom().T print 'success opt i, j:', i, j return 1
def fit(data, labels, label_size, alpha=1.0): ''' Train standard naive bayes model. Args: data(Expr): documents to be trained. labels(Expr): the correct labels of the training data. label_size(int): the number of different labels. alpha(float): alpha parameter of naive bayes model. ''' # calc document freq df = expr.reduce(data, axis=0, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis), accumulate_fn=np.add) idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1 # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency # by the root mean square of features frequencies in that document square_sum = expr.reduce( data, axis=1, dtype_fn=lambda input: input.dtype, local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis), accumulate_fn=np.add) rms = expr.sqrt(square_sum * 1.0 / data.shape[1]) # calculate weight normalized Tf-Idf data = data / rms.reshape((data.shape[0], 1)) * idf.reshape( (1, data.shape[1])) # add up all the feature vectors with the same labels #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64) #for i in range(label_size): # i_mask = (labels == i) # weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0)) weights_per_label_and_feature = expr.shuffle( expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)), _sum_instance_by_label_mapper, target=expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add), kw={ 'labels': labels, 'label_size': label_size }, cost_hint={hash(labels): { '00': 0, '01': np.prod(labels.shape) }}) # sum up all the weights for each label from the previous step weights_per_label = expr.sum(weights_per_label_and_feature, axis=1) # generate naive bayes per_label_and_feature weights weights_per_label_and_feature = expr.log( (weights_per_label_and_feature + alpha) / (weights_per_label.reshape((weights_per_label.shape[0], 1)) + alpha * weights_per_label_and_feature.shape[1])) return { 'scores_per_label_and_feature': weights_per_label_and_feature.optimized().force(), 'scores_per_label': weights_per_label.optimized().force(), }
def train_smo_2005(self, data, labels): ''' Train an SVM model using the SMO (2005) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 alpha = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N / self.ctx.num_workers, N]) gradient = expr.ones( (N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1 ]) * -1.0 expr_labels = expr.lazify(labels) util.log_info("Starting SMO") pv1 = pv2 = -1 it = 0 while it < self.maxiter: util.log_info("Iteration:%d", it) minObj = 1e100 expr_alpha = expr.lazify(alpha) G = expr.multiply(labels, gradient) * -1.0 v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) + (expr_labels < -self.tol) * (expr_alpha > self.tol)) v1 = expr.argmax(G[v1_mask - True]).glom().item() maxG = G[v1, 0].glom() print 'maxv1:', v1, 'maxG:', maxG v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) + (expr_labels < -self.tol) * (expr_alpha < self.C)) min_v2 = expr.argmin(G[v2_mask - True]).glom().item() minG = G[min_v2, 0].glom() #print 'minv2:', min_v2, 'minG:', minG set_v2 = v2_mask.glom().nonzero()[0] #print 'actives:', set_v2.shape[0] v2 = -1 for v in set_v2: b = maxG - G[v, 0].glom() if b > self.tol: na = (kernel_results[v1, v1] + kernel_results[v, v] - 2 * kernel_results[v1, v]).glom()[0][0] if na < self.tol: na = 1e12 obj = -(b * b) / na if obj <= minObj and v1 != pv1 or v != pv2: v2 = v a = na minObj = obj if v2 == -1: break if maxG - minG < self.tol: break print 'opt v1:', v1, 'v2:', v2 pv1 = v1 pv2 = v2 y1 = labels[v1, 0] y2 = labels[v2, 0] oldA1 = alpha[v1, 0] oldA2 = alpha[v2, 0] # Calculate new alpha values, to reduce the objective function... b = y2 * expr.glom(gradient[v2, 0]) - y1 * expr.glom(gradient[v1, 0]) if y1 != y2: a += 4 * kernel_results[v1, v2].glom() newA1 = oldA1 + y1 * b / a newA2 = oldA2 - y2 * b / a # Correct for alpha being out of range... sum = y1 * oldA1 + y2 * oldA2 if newA1 < self.tol: newA1 = 0.0 elif newA1 > self.C: newA1 = self.C newA2 = y2 * (sum - y1 * newA1) if newA2 < self.tol: newA2 = 0.0 elif newA2 > self.C: newA2 = self.C newA1 = y1 * (sum - y2 * newA2) # Update the gradient... dA1 = newA1 - oldA1 dA2 = newA2 - oldA2 gradient += expr.multiply( labels, kernel_results[:, v1]) * y1 * dA1 + expr.multiply( labels, kernel_results[:, v2]) * y2 * dA2 alpha[v1, 0] = newA1 alpha[v2, 0] = newA2 #print 'alpha:', alpha.glom().T it += 1 #print 'gradient:', gradient.glom().T self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i, 0] = expr.reduce(alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force( data[:, i]))).glom() self.b = 0.0 E = (labels - self.margins(data)).force() minB = -1e100 maxB = 1e100 actualB = 0.0 numActualB = 0 for i in xrange(N): ai = alpha[i, 0] yi = labels[i, 0] Ei = E[i, 0] if ai < 1e-3: if yi < self.tol: maxB = min((maxB, Ei)) else: minB = max((minB, Ei)) elif ai > self.C - 1e-3: if yi < self.tol: minB = max((minB, Ei)) else: maxB = min((maxB, Ei)) else: numActualB += 1 actualB += (Ei - actualB) / float(numActualB) if numActualB > 0: self.b = actualB else: self.b = 0.5 * (minB + maxB) self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def train_smo_1998(self, data, labels): ''' Train an SVM model using the SMO (1998) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 self.alpha = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N / self.ctx.num_workers, N]) labels = expr.force(labels) self.E = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).force() for i in xrange(N): self.E[i, 0] = self.b + expr.reduce( self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict( label=labels, data=kernel_results[:, i].force())).glom() - labels[i, 0] util.log_info("Starting SMO") it = 0 num_changed = 0 examine_all = True while (num_changed > 0 or examine_all) and (it < self.maxiter): util.log_info("Iteration:%d", it) num_changed = 0 if examine_all: for i in xrange(N): num_changed += self.examine_example( i, N, labels, kernel_results) else: for i in xrange(N): if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C: num_changed += self.examine_example( i, N, labels, kernel_results) it += 1 if examine_all: examine_all = False elif num_changed == 0: examine_all = True self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i, 0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force( data[:, i]))).glom() self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def take_step(self, i, j, N, labels, kernel_results): ''' Perform one optimization step. Updates model with respect to to training example i, j Modifies self.E to reflect the error of new model to labels Args: i(int): index of the alpha to be optimized j(int): index of the alpha to be optimized N(int): the number of features labels(Expr): the labels of the training data kernel_results(Expr): the result of the kernel function on the training data ''' if i == j: return 0 ai = self.alpha[i, 0] aj = self.alpha[j, 0] Ei = self.E[i, 0] Ej = self.E[j, 0] yi = labels[i, 0] yj = labels[j, 0] kii = kernel_results[i, i].glom() kjj = kernel_results[j, j].glom() kij = kernel_results[i, j].glom() s = yi * yj L = 0 H = self.C if yi == yj: L = max(0, ai + aj - self.C) H = min(self.C, ai + aj) else: L = max(0, aj - ai) H = min(self.C, self.C + aj - ai) if abs(L - H) < self.tol: return 0 eta = kii + kjj - 2 * kij if eta > self.tol: newaj = aj + yj * (Ei - Ej) / eta if newaj > H: newaj = H if newaj < L: newaj = L else: return 0 if abs(aj - newaj) < self.tol * (aj + newaj + self.tol): return 0 newai = ai + s * (aj - newaj) b1 = self.b - Ei - yi * (newai - ai) * kii - yj * (newaj - aj) * kij b2 = self.b - Ej - yi * (newai - ai) * kij - yj * (newaj - aj) * kjj self.b = 0.5 * (b1 + b2) if (newai > self.tol) and (newai < self.C): self.b = b1 if (newaj > self.tol) and (newaj < self.C): self.b = b2 self.b = self.b[0, 0] self.alpha[i, 0] = newai self.alpha[j, 0] = newaj for i in xrange(N): self.E[i, 0] = self.b + expr.reduce( self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict( label=labels, data=kernel_results[:, i].force())).glom() - labels[i, 0] #print 'b', self.b #print 'alpha', self.alpha.glom().T #print 'E', self.E.glom().T print 'success opt i, j:', i, j return 1