示例#1
0
def check_inputs(x_unlabeled, x_labeled, y_labeled, y_true):
  """Checks the data inputs for both train_step and predict."""
  if x_unlabeled is None:
    if x_labeled is None:
      raise Exception('No data, labeled or unlabeled, passed to check_inputs!')
    x_unlabeled = x_labeled[0:0]
  if x_labeled is not None and y_labeled is not None:
    pass
  elif x_labeled is None and y_labeled is None:
    x_labeled = x_unlabeled[0:0]
    y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list()
    y_labeled = np.empty([0] + y_shape)
  else:
    raise Exception('x_labeled and y_labeled must both be None or have a value')
  return x_unlabeled, x_labeled, y_labeled
示例#2
0
def check_inputs(x_unlabeled, x_labeled, y_labeled, y_true):
    '''
    Checks the data inputs to both train_step and predict and creates
    empty arrays if necessary
    '''
    if x_unlabeled is None:
        if x_labeled is None:
            raise Exception(
                "No data, labeled or unlabeled, passed to check_inputs!")
        x_unlabeled = x_labeled[0:0]
    if x_labeled is not None and y_labeled is not None:
        pass
    elif x_labeled is None and y_labeled is None:
        x_labeled = x_unlabeled[0:0]
        y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list()
        y_labeled = np.empty([0] + y_shape)
    else:
        raise Exception(
            "x_labeled and y_labeled must both be None or have a value")
    return x_unlabeled, x_labeled, y_labeled
示例#3
0
def squared_distance(input_x, input_y=None, weight=None):
  """Calculates the pairwise distance between points in X and Y.

  Args:
    input_x: n x d matrix
    input_y: m x d matrix
    weight: affinity n x m -- if provided, we normalize the distance

  Returns:
    n x m matrix of all pairwise squared Euclidean distances
  """
  if input_y is None:
    input_y = input_x
  sum_dimensions = list(range(2, K.ndim(input_x) + 1))
  input_x = K.expand_dims(input_x, axis=1)
  if weight is not None:
    # if weight provided, we normalize input_x and input_y by weight
    d_diag = K.expand_dims(K.sqrt(K.sum(weight, axis=1)), axis=1)
    input_x /= d_diag
    input_y /= d_diag
  squared_difference = K.square(input_x - input_y)
  distance = K.sum(squared_difference, axis=sum_dimensions)
  return distance
示例#4
0
def squared_distance(X, Y=None, W=None):
    '''
    Calculates the pairwise distance between points in X and Y

    X:          n x d matrix
    Y:          m x d matrix
    W:          affinity -- if provided, we normalize the distance

    returns:    n x m matrix of all pairwise squared Euclidean distances
    '''
    if Y is None:
        Y = X
    # distance = squaredDistance(X, Y)
    sum_dimensions = list(range(2, K.ndim(X) + 1))
    X = K.expand_dims(X, axis=1)
    if W is not None:
        # if W provided, we normalize X and Y by W
        D_diag = K.expand_dims(K.sqrt(K.sum(W, axis=1)), axis=1)
        X /= D_diag
        Y /= D_diag
    squared_difference = K.square(X - Y)
    distance = K.sum(squared_difference, axis=sum_dimensions)
    return distance
示例#5
0
def train_step(return_vars,
               updates,
               x_unlabeled,
               inputs,
               y_true,
               batch_sizes,
               x_labeled=None,
               y_labeled=None,
               batches_per_epoch=100):
    """Performs one training step.

   Evaluates the tensors in return_vars and updates, then returns the values of
   the tensors in return_vars.

  Args:
    return_vars: list of tensors to evaluate and return
    updates: list of tensors to evaluate only
    x_unlabeled: unlabeled input data
    inputs: dictionary containing input_types and input_placeholders as key,
      value pairs, respectively
    y_true: true labels placeholder
    batch_sizes: dictionary containing input_types and batch_sizes as key, value
      pairs, respectively
    x_labeled: labeled input data
    y_labeled: labeled input labels
    batches_per_epoch: parameter updates per epoch*

  Returns:
    the evaluated result of all tensors in return_vars, summed
    across all epochs

  *note: the term epoch is used loosely here, it does not necessarily
         refer to one iteration over the entire dataset. instead, it
         is just batches_per_epoch parameter updates.
  """
    x_unlabeled, x_labeled, y_labeled = check_inputs(x_unlabeled, x_labeled,
                                                     y_labeled, y_true)

    # combine data
    x = np.concatenate((x_unlabeled, x_labeled), 0)

    # get shape of y_true
    y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list()

    return_vars_ = np.zeros(shape=(len(return_vars)))
    # train batches_per_epoch batches
    for _ in range(0, batches_per_epoch):
        feed_dict = {K.learning_phase(): 1}

        # feed corresponding input for each input_type
        for input_type, input_placeholder in inputs.items():
            if input_type == 'Labeled':
                if x_labeled:
                    batch_ids = np.random.choice(len(x_labeled),
                                                 size=min(
                                                     batch_sizes[input_type],
                                                     len(x_labeled)),
                                                 replace=False)
                    feed_dict[input_placeholder] = x_labeled[batch_ids]
                    feed_dict[y_true] = y_labeled[batch_ids]
                else:
                    # we have no labeled points, so feed an empty array
                    feed_dict[input_placeholder] = x[0:0]
                    feed_dict[y_true] = np.empty([0] + y_shape)
            elif input_type == 'Unlabeled':
                if x_unlabeled:
                    batch_ids = np.random.choice(len(x_unlabeled),
                                                 size=batch_sizes[input_type],
                                                 replace=False)
                    feed_dict[input_placeholder] = x_unlabeled[batch_ids]
                else:
                    # we have no unlabeled points, so feed an empty array
                    feed_dict[input_placeholder] = x[0:0]

        all_vars = return_vars + updates
        return_vars_ += np.asarray(K.get_session().run(
            all_vars, feed_dict=feed_dict)[:len(return_vars)])

    return return_vars_
示例#6
0
def predict(predict_var,
            x_unlabeled,
            inputs,
            y_true,
            batch_sizes,
            x_labeled=None,
            y_labeled=None):
    """Evaluates predict_var, batchwise, over all points in x_unlabeled and x_labeled.

  Args:
    predict_var:        list of tensors to evaluate and return
    x_unlabeled:        unlabeled input data
    inputs:             dictionary containing input_types and input_placeholders
      as key, value pairs, respectively
    y_true:             true labels tensorflow placeholder
    batch_sizes:        dictionary containing input_types and batch_sizes as
      key, value pairs, respectively
    x_labeled:          labeled input data
    y_labeled:          labeled input labels

  Returns:
    a list of length n containing the result of all tensors
    in return_var, where n = len(x_unlabeled) + len(x_labeled)
  """
    x_unlabeled, x_labeled, y_labeled = check_inputs(x_unlabeled, x_labeled,
                                                     y_labeled, y_true)

    # combined data
    x = np.concatenate((x_unlabeled, x_labeled), 0)
    # get shape of y_true
    y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list()

    # calculate batches for predict loop
    unlabeled_batch_size = batch_sizes.get('Unlabeled', 0)
    labeled_batch_size = batch_sizes.get('Labeled', 0)
    if 'Labeled' in batch_sizes and 'Unlabeled' in batch_sizes:
        assert unlabeled_batch_size == labeled_batch_size
    batch_size = min(len(x), max(unlabeled_batch_size, labeled_batch_size))
    batches = make_batches(len(x), batch_size)

    y_preds = []
    # predict over all points
    for _, (batch_start, batch_end) in enumerate(batches):
        feed_dict = {K.learning_phase(): 0}

        # feed corresponding input for each input_type
        for input_type, input_placeholder in inputs.items():
            if input_type == 'Unlabeled':
                feed_dict[input_placeholder] = x[batch_start:batch_end]
            elif input_type == 'Labeled':
                if x_labeled:
                    batch_ids = np.random.choice(len(x_labeled),
                                                 size=min(
                                                     batch_sizes[input_type],
                                                     len(x_labeled)),
                                                 replace=False)
                    feed_dict[input_placeholder] = x_labeled[batch_ids]
                    feed_dict[y_true] = y_labeled[batch_ids]
                else:
                    # we have no labeled points, so feed an empty array
                    feed_dict[input_placeholder] = x[0:0]
                    feed_dict[y_true] = np.empty([0] + y_shape)

        # evaluate the batch
        y_pred_batch = np.asarray(K.get_session().run(predict_var,
                                                      feed_dict=feed_dict))
        y_preds.append(y_pred_batch)

    if y_preds[0].shape:
        return np.concatenate(y_preds)
    else:
        return np.sum(y_preds)