Exemplo n.º 1
0
def fit(data, labels, label_size, alpha=1.0):
  '''
  Train standard naive bayes model.
 
  Args:
    data(Expr): documents to be trained.
    labels(Expr): the correct labels of the training data.
    label_size(int): the number of different labels.
    alpha(float): alpha parameter of naive bayes model.
  '''
  labels = expr.force(labels)
  
  # calc document freq
  df = expr.reduce(data,
                   axis=0,
                   dtype_fn=lambda input: input.dtype,
                   local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis),
                   accumulate_fn=np.add,
                   tile_hint=(data.shape[1],))
  
  idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1
   
  # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency 
  # by the root mean square of features frequencies in that document
  square_sum = expr.reduce(data,
                           axis=1,
                           dtype_fn=lambda input: input.dtype,
                           local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis),
                           accumulate_fn=np.add,
                           tile_hint=(data.shape[0],))
  
  rms = expr.sqrt(square_sum * 1.0 / data.shape[1])
  
  # calculate weight normalized Tf-Idf
  data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1]))
  
  # add up all the feature vectors with the same labels
  sum_instance_by_label = expr.ndarray((label_size, data.shape[1]),
                                       dtype=np.float64, 
                                       reduce_fn=np.add,
                                       tile_hint=(label_size / len(labels.tiles), data.shape[1]))
  sum_instance_by_label = expr.shuffle(data,
                                       _sum_instance_by_label_mapper,
                                       target=sum_instance_by_label,
                                       kw={'labels': labels, 'label_size': label_size})

  # sum up all the weights for each label from the previous step
  weights_per_label = expr.sum(sum_instance_by_label, axis=1, tile_hint=(label_size,))
  
  # generate naive bayes per_label_and_feature weights
  weights_per_label_and_feature = expr.shuffle(sum_instance_by_label,
                                               _naive_bayes_mapper,
                                               kw={'weights_per_label': weights_per_label, 
                                                   'alpha':alpha})
  
  return {'scores_per_label_and_feature': weights_per_label_and_feature.force(),
          'scores_per_label': weights_per_label.force(),
          }
Exemplo n.º 2
0
  def train_smo_1998(self, data, labels):
    '''
    Train an SVM model using the SMO (1998) algorithm.
   
    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''
    
    N = data.shape[0] # Number of instances
    D = data.shape[1]  # Number of features

    self.b = 0.0
    self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    
    # linear kernel
    kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N])   
    
    labels = expr.force(labels)
    self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    for i in xrange(N):
      self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                                          local_reduce_fn=margin_mapper,
                                          accumulate_fn=np.add, 
                                          fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0]
    
    util.log_info("Starting SMO")
    it = 0
    num_changed = 0
    examine_all = True
    while (num_changed > 0 or examine_all) and (it < self.maxiter):
      util.log_info("Iteration:%d", it)

      num_changed = 0
      
      if examine_all:
        for i in xrange(N): 
          num_changed += self.examine_example(i, N, labels, kernel_results)
      else:
        for i in xrange(N):
          if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C:
            num_changed += self.examine_example(i, N, labels, kernel_results)

      it += 1

      if examine_all: examine_all = False
      elif num_changed == 0: examine_all = True
    
    self.w = expr.zeros((D, 1), dtype=np.float64).force()
    for i in xrange(D): 
      self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                              local_reduce_fn=margin_mapper,
                              accumulate_fn=np.add, 
                              fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom()
    self.usew_ = True
    print 'iteration finish:', it
    print 'b:', self.b
    print 'w:', self.w.glom()
Exemplo n.º 3
0
def fit(data, labels, label_size, alpha=1.0):
  '''
  Train standard naive bayes model.
 
  Args:
    data(Expr): documents to be trained.
    labels(Expr): the correct labels of the training data.
    label_size(int): the number of different labels.
    alpha(float): alpha parameter of naive bayes model.
  '''
  # calc document freq
  df = expr.reduce(data,
                   axis=0,
                   dtype_fn=lambda input: input.dtype,
                   local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis),
                   accumulate_fn=np.add)
  
  idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1
   
  # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency 
  # by the root mean square of features frequencies in that document
  square_sum = expr.reduce(data,
                           axis=1,
                           dtype_fn=lambda input: input.dtype,
                           local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis),
                           accumulate_fn=np.add)
  
  rms = expr.sqrt(square_sum * 1.0 / data.shape[1])
  
  # calculate weight normalized Tf-Idf
  data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1]))
  
  # add up all the feature vectors with the same labels
  #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64)
  #for i in range(label_size):
  #  i_mask = (labels == i)
  #  weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0))
  weights_per_label_and_feature = expr.shuffle(expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)),
                                               _sum_instance_by_label_mapper,
                                               target=expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add),
                                               kw={'labels': labels, 'label_size': label_size},
                                               cost_hint={hash(labels):{'00':0, '01':np.prod(labels.shape)}})

  # sum up all the weights for each label from the previous step
  weights_per_label = expr.sum(weights_per_label_and_feature, axis=1)
  
  # generate naive bayes per_label_and_feature weights
  weights_per_label_and_feature = expr.log((weights_per_label_and_feature + alpha) / 
                                           (weights_per_label.reshape((weights_per_label.shape[0], 1)) + 
                                            alpha * weights_per_label_and_feature.shape[1]))

  return {'scores_per_label_and_feature': weights_per_label_and_feature.optimized().force(),
          'scores_per_label': weights_per_label.optimized().force(),
          }
Exemplo n.º 4
0
def learn_topics(terms_docs_matrix, k_topics, alpha=0.1, eta=0.1, max_iter=10, max_iter_per_doc=1):
    """
  Using Collapsed Variational Bayes method (Mahout implementation) to train LDA topic model.

  Args:
    terms_docs_matrix(Expr or DistArray): the count of each term in each document.
    k_topics: the number of topics we need to find.
    alpha(float): parameter of LDA model.
    eta(float): parameter of LDA model.
    max_iter(int):the max iterations to train LDA topic model.
    max_iter_per_doc: the max iterations to train each document.
  """
    num_terms = terms_docs_matrix.shape[0]
    num_docs = terms_docs_matrix.shape[1]

    topic_term_counts = expr.rand(k_topics, num_terms)
    for i in range(max_iter):
        # topic_term_counts = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)),
        # _lda_mapper,
        # target=expr.ndarray((k_topics, num_terms), dtype=np.float64, reduce_fn=np.add),
        # kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc,
        #'topic_term_counts': topic_term_counts}).optimized()
        topic_term_counts = expr.outer(
            (terms_docs_matrix, topic_term_counts),
            (1, None),
            fn=_lda_mapper,
            fn_kw={"k_topics": k_topics, "alpha": alpha, "eta": eta, "max_iter_per_doc": max_iter_per_doc},
            shape=(k_topics, num_terms),
            dtype=np.float64,
            reducer=np.add,
        )
    # calculate the doc-topic inference
    # doc_topics = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)),
    # _lda_doc_topic_mapper,
    # kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc,
    #'topic_term_counts': topic_term_counts},
    # shape_hint=(num_docs, k_topics)).optimized()
    doc_topics = expr.outer(
        (terms_docs_matrix, topic_term_counts),
        (1, None),
        fn=_lda_doc_topic_mapper,
        fn_kw={"k_topics": k_topics, "alpha": alpha, "eta": eta, "max_iter_per_doc": max_iter_per_doc},
        shape=(num_docs, k_topics),
        dtype=np.float64,
    )

    # normalize the topic-term distribution
    norm_val = expr.reduce(
        topic_term_counts,
        axis=1,
        dtype_fn=lambda input: input.dtype,
        local_reduce_fn=lambda ex, data, axis: np.abs(data).sum(axis),
        accumulate_fn=np.add,
    )
    topic_term_counts = topic_term_counts / norm_val.reshape((k_topics, 1))
    topic_term_counts = topic_term_counts.optimized()
    return doc_topics, topic_term_counts
Exemplo n.º 5
0
def learn_topics(terms_docs_matrix, k_topics, alpha=0.1, eta=0.1, max_iter=10, max_iter_per_doc=1):
  '''
  Using Collapsed Variational Bayes method (Mahout implementation) to train LDA topic model.

  Args:
    terms_docs_matrix(Expr or DistArray): the count of each term in each document.
    k_topics: the number of topics we need to find.
    alpha(float): parameter of LDA model.
    eta(float): parameter of LDA model.
    max_iter(int):the max iterations to train LDA topic model.
    max_iter_per_doc: the max iterations to train each document.
  '''
  topic_term_counts = expr.rand(k_topics, terms_docs_matrix.shape[0], 
                                tile_hint=(k_topics, terms_docs_matrix.shape[0]))

  for i in range(max_iter):
    new_topic_term_counts = expr.ndarray((k_topics, terms_docs_matrix.shape[0]), 
                                         dtype=np.float64, 
                                         reduce_fn=np.add, 
                                         tile_hint=(k_topics, terms_docs_matrix.shape[0]))
    topic_term_counts = expr.shuffle(terms_docs_matrix, _lda_mapper, target=new_topic_term_counts, 
                                     kw={'k_topics': k_topics, 'alpha': alpha, 'eta':eta, 
                                         'max_iter_per_doc': max_iter_per_doc, 
                                         'topic_term_counts': topic_term_counts})
    
  # calculate the doc-topic inference
  doc_topics = expr.shuffle(terms_docs_matrix, _lda_doc_topic_mapper, 
                            kw={'k_topics': k_topics, 'alpha': alpha, 'eta':eta, 
                                'max_iter_per_doc': max_iter_per_doc, 
                                'topic_term_counts': topic_term_counts})
  
  # normalize the topic-term distribution  
  norm_val = expr.reduce(topic_term_counts, axis=1, 
                         dtype_fn=lambda input: input.dtype, 
                         local_reduce_fn=lambda ex, data, axis:np.abs(data).sum(axis), 
                         accumulate_fn=np.add)
  topic_term_counts = topic_term_counts / norm_val.reshape((topic_term_counts.shape[0], 1))

  return doc_topics, topic_term_counts
Exemplo n.º 6
0
def learn_topics(terms_docs_matrix,
                 k_topics,
                 alpha=0.1,
                 eta=0.1,
                 max_iter=10,
                 max_iter_per_doc=1):
    '''
  Using Collapsed Variational Bayes method (Mahout implementation) to train LDA topic model.

  Args:
    terms_docs_matrix(Expr or DistArray): the count of each term in each document.
    k_topics: the number of topics we need to find.
    alpha(float): parameter of LDA model.
    eta(float): parameter of LDA model.
    max_iter(int):the max iterations to train LDA topic model.
    max_iter_per_doc: the max iterations to train each document.
  '''
    num_terms = terms_docs_matrix.shape[0]
    num_docs = terms_docs_matrix.shape[1]

    topic_term_counts = expr.rand(k_topics, num_terms)
    for i in range(max_iter):
        #topic_term_counts = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)),
        #_lda_mapper,
        #target=expr.ndarray((k_topics, num_terms), dtype=np.float64, reduce_fn=np.add),
        #kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc,
        #'topic_term_counts': topic_term_counts}).optimized()
        topic_term_counts = expr.outer(
            (terms_docs_matrix, topic_term_counts), (1, None),
            fn=_lda_mapper,
            fn_kw={
                'k_topics': k_topics,
                'alpha': alpha,
                'eta': eta,
                'max_iter_per_doc': max_iter_per_doc
            },
            shape=(k_topics, num_terms),
            dtype=np.float64,
            reducer=np.add)
    # calculate the doc-topic inference
    #doc_topics = expr.shuffle(expr.retile(terms_docs_matrix, tile_hint=util.calc_tile_hint(terms_docs_matrix, axis=1)),
    #_lda_doc_topic_mapper,
    #kw={'k_topics': k_topics, 'alpha': alpha, 'eta': eta, 'max_iter_per_doc': max_iter_per_doc,
    #'topic_term_counts': topic_term_counts},
    #shape_hint=(num_docs, k_topics)).optimized()
    doc_topics = expr.outer(
        (terms_docs_matrix, topic_term_counts), (1, None),
        fn=_lda_doc_topic_mapper,
        fn_kw={
            'k_topics': k_topics,
            'alpha': alpha,
            'eta': eta,
            'max_iter_per_doc': max_iter_per_doc
        },
        shape=(num_docs, k_topics),
        dtype=np.float64)

    # normalize the topic-term distribution
    norm_val = expr.reduce(
        topic_term_counts,
        axis=1,
        dtype_fn=lambda input: input.dtype,
        local_reduce_fn=lambda ex, data, axis: np.abs(data).sum(axis),
        accumulate_fn=np.add)
    topic_term_counts = topic_term_counts / norm_val.reshape((k_topics, 1))
    return doc_topics, topic_term_counts
Exemplo n.º 7
0
  def train_smo_2005(self, data, labels):
    '''
    Train an SVM model using the SMO (2005) algorithm.
   
    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''
    
    N = data.shape[0] # Number of instances
    D = data.shape[1]  # Number of features

    self.b = 0.0
    alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    
    # linear kernel
    kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N])
    gradient = expr.ones((N, 1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]) * -1.0
    
    expr_labels = expr.lazify(labels)
    
    util.log_info("Starting SMO")
    pv1 = pv2 = -1
    it = 0
    while it < self.maxiter:
      util.log_info("Iteration:%d", it)
      
      minObj = 1e100
      
      expr_alpha = expr.lazify(alpha)
      G = expr.multiply(labels, gradient) * -1.0

      v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) + (expr_labels < -self.tol) * (expr_alpha > self.tol))
      v1 = expr.argmax(G[v1_mask-True]).glom().item()
      maxG = G[v1,0].glom()
      print 'maxv1:', v1, 'maxG:', maxG

      v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) + (expr_labels < -self.tol) * (expr_alpha < self.C))     
      min_v2 = expr.argmin(G[v2_mask-True]).glom().item()
      minG = G[min_v2,0].glom()
      #print 'minv2:', min_v2, 'minG:', minG
      
      set_v2 = v2_mask.glom().nonzero()[0]
      #print 'actives:', set_v2.shape[0]
      v2 = -1
      for v in set_v2:
        b = maxG - G[v,0].glom()
        if b > self.tol:
          na = (kernel_results[v1,v1] + kernel_results[v,v] - 2*kernel_results[v1,v]).glom()[0][0]
          if na < self.tol: na = 1e12
          
          obj = -(b*b)/na
          if obj <= minObj and v1 != pv1 or v != pv2:
            v2 = v
            a = na
            minObj = obj
      
      if v2 == -1: break
      if maxG - minG < self.tol: break
      
      print 'opt v1:', v1, 'v2:', v2

      pv1 = v1
      pv2 = v2
    
      y1 = labels[v1,0]
      y2 = labels[v2,0]    
        
      oldA1 = alpha[v1,0]
      oldA2 = alpha[v2,0]
      
      # Calculate new alpha values, to reduce the objective function...
      b = y2*expr.glom(gradient[v2,0]) - y1*expr.glom(gradient[v1,0])
      if y1 != y2:
        a += 4 * kernel_results[v1,v2].glom()
      
      newA1 = oldA1 + y1*b/a
      newA2 = oldA2 - y2*b/a   

      # Correct for alpha being out of range...
      sum = y1*oldA1 + y2*oldA2;
  
      if newA1 < self.tol: newA1 = 0.0
      elif newA1 > self.C: newA1 = self.C
     
      newA2 = y2 * (sum - y1 * newA1) 

      if newA2 < self.tol: newA2 = 0.0;
      elif newA2 > self.C: newA2 = self.C
     
      newA1 = y1 * (sum - y2 * newA2)
  
      # Update the gradient...
      dA1 = newA1 - oldA1
      dA2 = newA2 - oldA2
  
      gradient += expr.multiply(labels, kernel_results[:,v1]) * y1 * dA1 + expr.multiply(labels, kernel_results[:,v2]) * y2 * dA2

      alpha[v1,0] = newA1
      alpha[v2,0] = newA2
 
      #print 'alpha:', alpha.glom().T
      
      it += 1
      #print 'gradient:', gradient.glom().T

    self.w = expr.zeros((D, 1), dtype=np.float64).force()
    for i in xrange(D): 
      self.w[i,0] = expr.reduce(alpha, axis=None, dtype_fn=lambda input: input.dtype,
                                local_reduce_fn=margin_mapper,
                                accumulate_fn=np.add, 
                                fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom()
    
    self.b = 0.0
    E = (labels - self.margins(data)).force()
    
    minB = -1e100
    maxB = 1e100
    actualB = 0.0
    numActualB = 0
    
    for i in xrange(N):
      ai = alpha[i,0]
      yi = labels[i,0]
      Ei = E[i,0]
      
      if ai < 1e-3:
        if yi < self.tol:
          maxB = min((maxB,Ei))
        else:
          minB = max((minB,Ei))
      elif ai > self.C - 1e-3:
        if yi < self.tol:
          minB = max((minB,Ei))
        else:
          maxB = min((maxB,Ei))
      else:
        numActualB += 1
        actualB += (Ei - actualB) / float(numActualB)
    if numActualB > 0:
      self.b = actualB
    else:
      self.b = 0.5*(minB + maxB)

    self.usew_ = True
    print 'iteration finish:', it
    print 'b:', self.b
    print 'w:', self.w.glom()
Exemplo n.º 8
0
  def take_step(self, i, j, N, labels, kernel_results):
    '''
    Perform one optimization step. Updates model with respect to to training example i, j
    Modifies self.E to reflect the error of new model to labels 
    
    Args:
      i(int): index of the alpha to be optimized
      j(int): index of the alpha to be optimized
      N(int): the number of features
      labels(Expr): the labels of the training data
      kernel_results(Expr): the result of the kernel function on the training data
    '''
    
    if i == j: return 0
    
    ai = self.alpha[i,0]
    aj = self.alpha[j,0]
    Ei = self.E[i,0]
    Ej = self.E[j,0]
    yi = labels[i,0]
    yj = labels[j,0]
    kii = kernel_results[i,i].glom()
    kjj = kernel_results[j,j].glom()
    kij = kernel_results[i,j].glom()
    
    s = yi * yj
    
    L = 0
    H = self.C
    if yi == yj:
      L = max(0, ai+aj-self.C)
      H = min(self.C, ai+aj)
    else:
      L = max(0, aj-ai)
      H = min(self.C, self.C+aj-ai)

    if abs(L-H) < self.tol:
      return 0

    eta = kii + kjj - 2 * kij
    if eta > self.tol:
      newaj = aj + yj * (Ei-Ej) / eta
      if newaj > H: newaj = H
      if newaj < L: newaj = L
    else:
      return 0
    
    if abs(aj - newaj) < self.tol*(aj+newaj+self.tol):
      return 0
      
    newai = ai + s * (aj - newaj)
    
    b1 = self.b - Ei - yi * (newai-ai) * kii - yj * (newaj-aj) * kij
    b2 = self.b - Ej - yi * (newai-ai) * kij - yj * (newaj-aj) * kjj
    self.b = 0.5*(b1+b2)
    if (newai > self.tol) and (newai < self.C):
      self.b = b1
    if (newaj > self.tol) and (newaj < self.C):
      self.b = b2
    
    self.b = self.b[0,0]
    self.alpha[i,0] = newai
    self.alpha[j,0] = newaj 
    
    for i in xrange(N):
      self.E[i,0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                                         local_reduce_fn=margin_mapper,
                                         accumulate_fn=np.add, 
                                         fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i,0]
    #print 'b', self.b
    #print 'alpha', self.alpha.glom().T
    #print 'E', self.E.glom().T
    print 'success opt i, j:', i, j
    return 1
Exemplo n.º 9
0
def fit(data, labels, label_size, alpha=1.0):
    '''
  Train standard naive bayes model.
 
  Args:
    data(Expr): documents to be trained.
    labels(Expr): the correct labels of the training data.
    label_size(int): the number of different labels.
    alpha(float): alpha parameter of naive bayes model.
  '''
    # calc document freq
    df = expr.reduce(data,
                     axis=0,
                     dtype_fn=lambda input: input.dtype,
                     local_reduce_fn=lambda ex, data, axis:
                     (data > 0).sum(axis),
                     accumulate_fn=np.add)

    idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1

    # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency
    # by the root mean square of features frequencies in that document
    square_sum = expr.reduce(
        data,
        axis=1,
        dtype_fn=lambda input: input.dtype,
        local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis),
        accumulate_fn=np.add)

    rms = expr.sqrt(square_sum * 1.0 / data.shape[1])

    # calculate weight normalized Tf-Idf
    data = data / rms.reshape((data.shape[0], 1)) * idf.reshape(
        (1, data.shape[1]))

    # add up all the feature vectors with the same labels
    #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64)
    #for i in range(label_size):
    #  i_mask = (labels == i)
    #  weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0))
    weights_per_label_and_feature = expr.shuffle(
        expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)),
        _sum_instance_by_label_mapper,
        target=expr.ndarray((label_size, data.shape[1]),
                            dtype=np.float64,
                            reduce_fn=np.add),
        kw={
            'labels': labels,
            'label_size': label_size
        },
        cost_hint={hash(labels): {
                       '00': 0,
                       '01': np.prod(labels.shape)
                   }})

    # sum up all the weights for each label from the previous step
    weights_per_label = expr.sum(weights_per_label_and_feature, axis=1)

    # generate naive bayes per_label_and_feature weights
    weights_per_label_and_feature = expr.log(
        (weights_per_label_and_feature + alpha) /
        (weights_per_label.reshape((weights_per_label.shape[0], 1)) +
         alpha * weights_per_label_and_feature.shape[1]))

    return {
        'scores_per_label_and_feature':
        weights_per_label_and_feature.optimized().force(),
        'scores_per_label':
        weights_per_label.optimized().force(),
    }
Exemplo n.º 10
0
    def train_smo_2005(self, data, labels):
        '''
    Train an SVM model using the SMO (2005) algorithm.
   
    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''

        N = data.shape[0]  # Number of instances
        D = data.shape[1]  # Number of features

        self.b = 0.0
        alpha = expr.zeros((N, 1),
                           dtype=np.float64,
                           tile_hint=[N / self.ctx.num_workers, 1]).force()

        # linear kernel
        kernel_results = expr.dot(data,
                                  expr.transpose(data),
                                  tile_hint=[N / self.ctx.num_workers, N])
        gradient = expr.ones(
            (N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1
                                                 ]) * -1.0

        expr_labels = expr.lazify(labels)

        util.log_info("Starting SMO")
        pv1 = pv2 = -1
        it = 0
        while it < self.maxiter:
            util.log_info("Iteration:%d", it)

            minObj = 1e100

            expr_alpha = expr.lazify(alpha)
            G = expr.multiply(labels, gradient) * -1.0

            v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) +
                       (expr_labels < -self.tol) * (expr_alpha > self.tol))
            v1 = expr.argmax(G[v1_mask - True]).glom().item()
            maxG = G[v1, 0].glom()
            print 'maxv1:', v1, 'maxG:', maxG

            v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) +
                       (expr_labels < -self.tol) * (expr_alpha < self.C))
            min_v2 = expr.argmin(G[v2_mask - True]).glom().item()
            minG = G[min_v2, 0].glom()
            #print 'minv2:', min_v2, 'minG:', minG

            set_v2 = v2_mask.glom().nonzero()[0]
            #print 'actives:', set_v2.shape[0]
            v2 = -1
            for v in set_v2:
                b = maxG - G[v, 0].glom()
                if b > self.tol:
                    na = (kernel_results[v1, v1] + kernel_results[v, v] -
                          2 * kernel_results[v1, v]).glom()[0][0]
                    if na < self.tol: na = 1e12

                    obj = -(b * b) / na
                    if obj <= minObj and v1 != pv1 or v != pv2:
                        v2 = v
                        a = na
                        minObj = obj

            if v2 == -1: break
            if maxG - minG < self.tol: break

            print 'opt v1:', v1, 'v2:', v2

            pv1 = v1
            pv2 = v2

            y1 = labels[v1, 0]
            y2 = labels[v2, 0]

            oldA1 = alpha[v1, 0]
            oldA2 = alpha[v2, 0]

            # Calculate new alpha values, to reduce the objective function...
            b = y2 * expr.glom(gradient[v2, 0]) - y1 * expr.glom(gradient[v1,
                                                                          0])
            if y1 != y2:
                a += 4 * kernel_results[v1, v2].glom()

            newA1 = oldA1 + y1 * b / a
            newA2 = oldA2 - y2 * b / a

            # Correct for alpha being out of range...
            sum = y1 * oldA1 + y2 * oldA2

            if newA1 < self.tol: newA1 = 0.0
            elif newA1 > self.C: newA1 = self.C

            newA2 = y2 * (sum - y1 * newA1)

            if newA2 < self.tol: newA2 = 0.0
            elif newA2 > self.C: newA2 = self.C

            newA1 = y1 * (sum - y2 * newA2)

            # Update the gradient...
            dA1 = newA1 - oldA1
            dA2 = newA2 - oldA2

            gradient += expr.multiply(
                labels, kernel_results[:, v1]) * y1 * dA1 + expr.multiply(
                    labels, kernel_results[:, v2]) * y2 * dA2

            alpha[v1, 0] = newA1
            alpha[v2, 0] = newA2

            #print 'alpha:', alpha.glom().T

            it += 1
            #print 'gradient:', gradient.glom().T

        self.w = expr.zeros((D, 1), dtype=np.float64).force()
        for i in xrange(D):
            self.w[i, 0] = expr.reduce(alpha,
                                       axis=None,
                                       dtype_fn=lambda input: input.dtype,
                                       local_reduce_fn=margin_mapper,
                                       accumulate_fn=np.add,
                                       fn_kw=dict(label=labels,
                                                  data=expr.force(
                                                      data[:, i]))).glom()

        self.b = 0.0
        E = (labels - self.margins(data)).force()

        minB = -1e100
        maxB = 1e100
        actualB = 0.0
        numActualB = 0

        for i in xrange(N):
            ai = alpha[i, 0]
            yi = labels[i, 0]
            Ei = E[i, 0]

            if ai < 1e-3:
                if yi < self.tol:
                    maxB = min((maxB, Ei))
                else:
                    minB = max((minB, Ei))
            elif ai > self.C - 1e-3:
                if yi < self.tol:
                    minB = max((minB, Ei))
                else:
                    maxB = min((maxB, Ei))
            else:
                numActualB += 1
                actualB += (Ei - actualB) / float(numActualB)
        if numActualB > 0:
            self.b = actualB
        else:
            self.b = 0.5 * (minB + maxB)

        self.usew_ = True
        print 'iteration finish:', it
        print 'b:', self.b
        print 'w:', self.w.glom()
Exemplo n.º 11
0
    def train_smo_1998(self, data, labels):
        '''
    Train an SVM model using the SMO (1998) algorithm.
   
    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''

        N = data.shape[0]  # Number of instances
        D = data.shape[1]  # Number of features

        self.b = 0.0
        self.alpha = expr.zeros((N, 1),
                                dtype=np.float64,
                                tile_hint=[N / self.ctx.num_workers,
                                           1]).force()

        # linear kernel
        kernel_results = expr.dot(data,
                                  expr.transpose(data),
                                  tile_hint=[N / self.ctx.num_workers, N])

        labels = expr.force(labels)
        self.E = expr.zeros((N, 1),
                            dtype=np.float64,
                            tile_hint=[N / self.ctx.num_workers, 1]).force()
        for i in xrange(N):
            self.E[i, 0] = self.b + expr.reduce(
                self.alpha,
                axis=None,
                dtype_fn=lambda input: input.dtype,
                local_reduce_fn=margin_mapper,
                accumulate_fn=np.add,
                fn_kw=dict(
                    label=labels,
                    data=kernel_results[:, i].force())).glom() - labels[i, 0]

        util.log_info("Starting SMO")
        it = 0
        num_changed = 0
        examine_all = True
        while (num_changed > 0 or examine_all) and (it < self.maxiter):
            util.log_info("Iteration:%d", it)

            num_changed = 0

            if examine_all:
                for i in xrange(N):
                    num_changed += self.examine_example(
                        i, N, labels, kernel_results)
            else:
                for i in xrange(N):
                    if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C:
                        num_changed += self.examine_example(
                            i, N, labels, kernel_results)

            it += 1

            if examine_all: examine_all = False
            elif num_changed == 0: examine_all = True

        self.w = expr.zeros((D, 1), dtype=np.float64).force()
        for i in xrange(D):
            self.w[i, 0] = expr.reduce(self.alpha,
                                       axis=None,
                                       dtype_fn=lambda input: input.dtype,
                                       local_reduce_fn=margin_mapper,
                                       accumulate_fn=np.add,
                                       fn_kw=dict(label=labels,
                                                  data=expr.force(
                                                      data[:, i]))).glom()
        self.usew_ = True
        print 'iteration finish:', it
        print 'b:', self.b
        print 'w:', self.w.glom()
Exemplo n.º 12
0
    def take_step(self, i, j, N, labels, kernel_results):
        '''
    Perform one optimization step. Updates model with respect to to training example i, j
    Modifies self.E to reflect the error of new model to labels 
    
    Args:
      i(int): index of the alpha to be optimized
      j(int): index of the alpha to be optimized
      N(int): the number of features
      labels(Expr): the labels of the training data
      kernel_results(Expr): the result of the kernel function on the training data
    '''

        if i == j: return 0

        ai = self.alpha[i, 0]
        aj = self.alpha[j, 0]
        Ei = self.E[i, 0]
        Ej = self.E[j, 0]
        yi = labels[i, 0]
        yj = labels[j, 0]
        kii = kernel_results[i, i].glom()
        kjj = kernel_results[j, j].glom()
        kij = kernel_results[i, j].glom()

        s = yi * yj

        L = 0
        H = self.C
        if yi == yj:
            L = max(0, ai + aj - self.C)
            H = min(self.C, ai + aj)
        else:
            L = max(0, aj - ai)
            H = min(self.C, self.C + aj - ai)

        if abs(L - H) < self.tol:
            return 0

        eta = kii + kjj - 2 * kij
        if eta > self.tol:
            newaj = aj + yj * (Ei - Ej) / eta
            if newaj > H: newaj = H
            if newaj < L: newaj = L
        else:
            return 0

        if abs(aj - newaj) < self.tol * (aj + newaj + self.tol):
            return 0

        newai = ai + s * (aj - newaj)

        b1 = self.b - Ei - yi * (newai - ai) * kii - yj * (newaj - aj) * kij
        b2 = self.b - Ej - yi * (newai - ai) * kij - yj * (newaj - aj) * kjj
        self.b = 0.5 * (b1 + b2)
        if (newai > self.tol) and (newai < self.C):
            self.b = b1
        if (newaj > self.tol) and (newaj < self.C):
            self.b = b2

        self.b = self.b[0, 0]
        self.alpha[i, 0] = newai
        self.alpha[j, 0] = newaj

        for i in xrange(N):
            self.E[i, 0] = self.b + expr.reduce(
                self.alpha,
                axis=None,
                dtype_fn=lambda input: input.dtype,
                local_reduce_fn=margin_mapper,
                accumulate_fn=np.add,
                fn_kw=dict(
                    label=labels,
                    data=kernel_results[:, i].force())).glom() - labels[i, 0]
        #print 'b', self.b
        #print 'alpha', self.alpha.glom().T
        #print 'E', self.E.glom().T
        print 'success opt i, j:', i, j
        return 1