Exemplo n.º 1
0
def split_raw_data(images,
                   labels,
                   test_ratio=0,
                   validation_ratio=0,
                   moderation_features=None,
                   augmentation_images=None,
                   augmentation_labels=None,
                   split_labels=True,
                   force_mask_creation=False):
    """Currently depends on test/validation_ratio being 0 when not using test/validation"""
    # serialize labels if they are lists (e.g. for regression)
    if isinstance(labels, list):
        if split_labels:
            labels = [' '.join(map(str, label)) for label in labels]

    n_aug = len(
        augmentation_labels
    ) if augmentation_images is not None and augmentation_labels is not None else 0
    mask = _get_split_mask(test_ratio, validation_ratio, len(labels), n_aug,
                           force_mask_creation)

    if augmentation_images is not None and augmentation_labels is not None:
        images = images + augmentation_images
        labels = labels + augmentation_labels

    try:
        if test_ratio != 0 and validation_ratio != 0:
            train_images, test_images, val_images = tf.dynamic_partition(
                images, mask, 3)
            train_labels, test_labels, val_labels = tf.dynamic_partition(
                labels, mask, 3)
        elif test_ratio != 0 and validation_ratio == 0:
            train_images, test_images = tf.dynamic_partition(images, mask, 2)
            train_labels, test_labels = tf.dynamic_partition(labels, mask, 2)
            val_images, val_labels = None, None
        elif test_ratio == 0 and validation_ratio != 0:
            train_images, val_images = tf.dynamic_partition(images, mask, 2)
            train_labels, val_labels = tf.dynamic_partition(labels, mask, 2)
            test_images, test_labels = None, None
        else:
            # We are just training, but we still need partitions for rest of the code to interact with.
            # dynamic_partition returns a length 1 list in this case instead of just the training set.
            train_images = tf.dynamic_partition(images, mask, 1)[0]
            train_labels = tf.dynamic_partition(labels, mask, 1)[0]
            test_images, test_labels = None, None
            val_images, val_labels = None, None
    except ValueError:
        raise ValueError(
            "Images/labels and partition mask have mismatched lengths")

    # Also partition moderation features if present <-- NEEDS TO BE FIXED/IMPROVED
    train_mf, test_mf, val_mf = None, None, None
    if moderation_features is not None:
        train_mf, test_mf, val_mf = tf.dynamic_partition(
            moderation_features, mask, 2)

    return train_images, train_labels, train_mf, test_images, test_labels, test_mf, val_images, val_labels, val_mf
Exemplo n.º 2
0
def undo_mask(x, mask, pad_val=0.0):
    """Converts the output of boolean_mask to the original input dimensions.

  The boolean_mask is usually used to condense items from multiple batches into
  one large 'batch' for faster processing. This function is used to convert
  back.
  Args:
    x: The input to reshape.
    mask: The mask used in boolean_mask.
    pad_val: value to pad with.

  Returns:
    x reshaped and padded.
  """
    with tf.variable_scope('undo_mask'):
        flat_x = tf.reshape(x, [-1])
        x_shape = tf.shape(x)[1:]
        expanded_mask = tf.tile(
            tf.reshape(
                mask,
                tf.concat([[-1, tf.shape(mask)[1]],
                           tf.ones_like(x_shape)], 0)),
            tf.concat([[1, 1], x_shape], 0))
        flat_mask = tf.reshape(expanded_mask, [-1])
        start_indices = tf.range(tf.shape(flat_mask)[0])
        condition_indices = tf.dynamic_partition(start_indices,
                                                 tf.cast(flat_mask, tf.int32),
                                                 2)
        stitched = tf.dynamic_stitch(condition_indices, [
            tf.ones_like(condition_indices[0], tf.float32) * pad_val,
            tf.reshape(flat_x, [-1])
        ])
        final_shape = tf.shape(mask)
        out_shape = tf.concat([[final_shape[0], final_shape[1]], x_shape], 0)
        return tf.reshape(stitched, out_shape)
def random_substr(str_tensor, max_words):
    """Select random substring if the input has more than max_words."""
    word_batch_r = tf.strings.split(str_tensor, result_type="RaggedTensor")
    row_splits = word_batch_r.row_splits
    words = word_batch_r.values
    start_idx = row_splits[:-1]
    end_idx = row_splits[1:]
    words_per_example = end_idx - start_idx
    ones = tf.ones_like(end_idx)
    max_val = tf.maximum(ones, words_per_example - max_words)
    max_words_batch = tf.reduce_max(words_per_example)
    rnd = tf.random.uniform(tf.shape(start_idx),
                            minval=0,
                            maxval=max_words_batch,
                            dtype=tf.int64)
    off_start_idx = tf.math.floormod(rnd, max_val)
    new_words_per_example = tf.where(tf.equal(max_val, 1), words_per_example,
                                     ones * max_words)
    new_start_idx = start_idx + off_start_idx
    new_end_idx = new_start_idx + new_words_per_example
    indices = tf.expand_dims(tf.range(tf.size(words), dtype=tf.int64), axis=0)
    within_limit = tf.logical_and(
        tf.greater_equal(indices, tf.expand_dims(new_start_idx, axis=1)),
        tf.less(indices, tf.expand_dims(new_end_idx, axis=1)))
    keep_indices = tf.reduce_any(within_limit, axis=0)
    keep_indices = tf.cast(keep_indices, dtype=tf.int32)
    _, selected_words = tf.dynamic_partition(words, keep_indices, 2)
    row_splits = tf.math.cumsum(new_words_per_example)
    row_splits = tf.concat([[0], row_splits], axis=0)
    new_tensor = tf.RaggedTensor.from_row_splits(values=selected_words,
                                                 row_splits=row_splits)
    return tf.strings.reduce_join(new_tensor, axis=1, separator=" ")
Exemplo n.º 4
0
 def _ComputeLoss():
   pair_a = tf.nn.l2_normalize(pregrasp_embedding-postgrasp_embedding, axis=1)
   pair_b = tf.nn.l2_normalize(goal_embedding, axis=1)
   distances = tf.losses.cosine_distance(
       pair_a, pair_b, axis=1, reduction=tf.losses.Reduction.NONE)
   _, mask1_data = tf.dynamic_partition(distances, mask, 2)
   loss = tf.cast(tf.reduce_mean(mask1_data), tf.float32)
   return loss
Exemplo n.º 5
0
    def __init__(self,
                 num_train,
                 lr=None,
                 batch_size=32,
                 num_inputs=1,
                 num_outputs=1,
                 w_threshold=0.3,
                 n_hidden=32,
                 hidden_layers=2,
                 ckpt_file='tmp.ckpt',
                 standardize=True,
                 reg_lambda=None,
                 reg_beta=None,
                 DAG_min=0.5):

        self.w_threshold = w_threshold
        self.DAG_min = DAG_min
        if lr is None:
            self.learning_rate = 0.001
        else:
            self.learning_rate = lr

        if reg_lambda is None:
            self.reg_lambda = 1.
        else:
            self.reg_lambda = reg_lambda

        if reg_beta is None:
            self.reg_beta = 1
        else:
            self.reg_beta = reg_beta

        self.batch_size = batch_size
        self.num_inputs = num_inputs
        self.n_hidden = n_hidden
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs
        self.X = tf.placeholder("float", [None, self.num_inputs])
        self.y = tf.placeholder("float", [None, 1])
        self.rho = tf.placeholder("float", [1, 1])
        self.alpha = tf.placeholder("float", [1, 1])
        self.keep_prob = tf.placeholder("float")
        self.Lambda = tf.placeholder("float")
        self.noise = tf.placeholder("float")
        self.is_train = tf.placeholder(tf.bool, name="is_train")

        self.count = 0
        self.max_steps = 200
        self.saves = 50
        self.patience = 30
        self.metric = mean_squared_error

        # One-hot vector indicating which nodes are trained
        self.sample = tf.placeholder(tf.int32, [self.num_inputs])

        # Store layers weight & bias
        seed = 1
        self.weights = {}
        self.biases = {}

        # Create the input and output weight matrix for each feature
        for i in range(self.num_inputs):
            self.weights['w_h0_' + str(i)] = tf.Variable(
                tf.random_normal([self.num_inputs, self.n_hidden], seed=seed) *
                0.01)
            self.weights['out_' + str(i)] = tf.Variable(
                tf.random_normal([self.n_hidden, self.num_outputs], seed=seed))

        for i in range(self.num_inputs):
            self.biases['b_h0_' + str(i)] = tf.Variable(
                tf.random_normal([self.n_hidden], seed=seed) * 0.01)
            self.biases['out_' + str(i)] = tf.Variable(
                tf.random_normal([self.num_outputs], seed=seed))

        # The first and second layers are shared
        self.weights.update({
            'w_h1':
            tf.Variable(tf.random_normal([self.n_hidden, self.n_hidden]))
        })

        self.biases.update(
            {'b_h1': tf.Variable(tf.random_normal([self.n_hidden]))})

        self.hidden_h0 = {}
        self.hidden_h1 = {}
        self.layer_1 = {}
        self.layer_1_dropout = {}
        self.out_layer = {}

        self.Out_0 = []

        # Mask removes the feature i from the network that is tasked to construct feature i
        self.mask = {}
        self.activation = tf.nn.relu

        for i in range(self.num_inputs):
            indices = [i] * self.n_hidden
            self.mask[str(i)] = tf.transpose(
                tf.one_hot(indices,
                           depth=self.num_inputs,
                           on_value=0.0,
                           off_value=1.0,
                           axis=-1))

            self.weights['w_h0_' +
                         str(i)] = self.weights['w_h0_' +
                                                str(i)] * self.mask[str(i)]
            self.hidden_h0['nn_' + str(i)] = self.activation(
                tf.add(tf.matmul(self.X, self.weights['w_h0_' + str(i)]),
                       self.biases['b_h0_' + str(i)]))
            self.hidden_h1['nn_' + str(i)] = self.activation(
                tf.add(
                    tf.matmul(self.hidden_h0['nn_' + str(i)],
                              self.weights['w_h1']), self.biases['b_h1']))
            self.out_layer['nn_' + str(i)] = tf.matmul(
                self.hidden_h1['nn_' + str(i)],
                self.weights['out_' + str(i)]) + self.biases['out_' + str(i)]
            self.Out_0.append(self.out_layer['nn_' + str(i)])

        # Concatenate all the constructed features
        self.Out = tf.concat(self.Out_0, axis=1)
        self.optimizer_subset = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate)

        self.supervised_loss = tf.reduce_mean(tf.reduce_sum(
            tf.square(self.out_layer['nn_0'] - self.y), axis=1),
                                              axis=0)
        self.regularization_loss = 0

        self.W_0 = []
        for i in range(self.num_inputs):
            self.W_0.append(
                tf.math.sqrt(
                    tf.reduce_sum(tf.square(self.weights['w_h0_' + str(i)]),
                                  axis=1,
                                  keepdims=True)))

        self.W = tf.concat(self.W_0, axis=1)

        #truncated power series
        d = tf.cast(self.X.shape[1], tf.float32)
        coff = 1.0
        Z = tf.multiply(self.W, self.W)

        dag_l = tf.cast(d, tf.float32)

        Z_in = tf.eye(d)
        for i in range(1, 10):

            Z_in = tf.matmul(Z_in, Z)

            dag_l += 1. / coff * tf.linalg.trace(Z_in)
            coff = coff * (i + 1)

        self.h = dag_l - tf.cast(d, tf.float32)

        # Residuals
        self.R = self.X - self.Out
        # Average reconstruction loss
        self.average_loss = 0.5 / num_train * tf.reduce_sum(tf.square(self.R))

        #group lasso
        L1_loss = 0.0
        for i in range(self.num_inputs):
            w_1 = tf.slice(self.weights['w_h0_' + str(i)], [0, 0], [i, -1])
            w_2 = tf.slice(self.weights['w_h0_' + str(i)], [i + 1, 0],
                           [-1, -1])
            L1_loss += tf.reduce_sum(tf.norm(w_1, axis=1)) + tf.reduce_sum(
                tf.norm(w_2, axis=1))

        # Divide the residual into untrain and train subset
        _, subset_R = tf.dynamic_partition(tf.transpose(self.R),
                                           partitions=self.sample,
                                           num_partitions=2)
        subset_R = tf.transpose(subset_R)

        #Combine all the loss
        self.mse_loss_subset = tf.cast(self.num_inputs, tf.float32) / tf.cast(
            tf.reduce_sum(self.sample), tf.float32) * tf.reduce_sum(
                tf.square(subset_R))
        self.regularization_loss_subset = self.mse_loss_subset + self.reg_beta * L1_loss + 0.5 * self.rho * self.h * self.h + self.alpha * self.h

        #Add in supervised loss
        self.regularization_loss_subset += self.Lambda * self.rho * self.supervised_loss
        self.loss_op_dag = self.optimizer_subset.minimize(
            self.regularization_loss_subset)

        self.loss_op_supervised = self.optimizer_subset.minimize(
            self.supervised_loss + self.regularization_loss)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(var_list=tf.global_variables())
        self.tmp = ckpt_file
Exemplo n.º 6
0
def split_raw_data(images,
                   labels,
                   test_ratio=0,
                   validation_ratio=0,
                   moderation_features=None,
                   augmentation_images=None,
                   augmentation_labels=None,
                   split_labels=True):
    """Currently depends on test/validation_ratio being 0 when not using test/validation"""
    # serialize labels if they are lists (e.g. for regression)
    if isinstance(labels, list):
        if split_labels:
            labels = [' '.join(map(str, label)) for label in labels]

    # check if there is a previously saved mask to load from current directory
    mask = []
    try:
        prev_mask_file = open("mask_ckpt.txt", "r", encoding='utf-8-sig')
        found_prev_mask_file = True

        print('{0}: {1}'.format(
            datetime.datetime.now().strftime("%I:%M%p"),
            "Previous mask found. Loading 'mask_ckpt.txt'"))
        for line in prev_mask_file:
            mask.append(int(line.rstrip()))
        prev_mask_file.close()
    except Exception:
        found_prev_mask_file = False

    if not found_prev_mask_file:  # we build the mask
        print('{0}: {1}'.format(datetime.datetime.now().strftime("%I:%M%p"),
                                'No previous mask found. Building new mask.'))
        total_samples = len(labels)
        mask = [0] * total_samples
        val_mask_num = 1  # this changes depending on whether we are using testing or not
        val_start_idx = 0  # if no testing then we idx from beginning, else we change this if there is testing

        if test_ratio != 0:
            # creating a mask [1,1,1,...,0,0,0]
            num_test = int(total_samples * test_ratio)
            mask[:num_test] = [1] * num_test
            val_mask_num = 2
            val_start_idx = num_test

        if validation_ratio != 0:
            # if test_ratio != 0 then val_num_mask = 2 and we will create a mask as [1,1,1,...,2,2,2,...,0,0,0,...]
            # otherwise we will only have train and validation thus creating a mask as [1,1,1,...,0,0,0]
            num_val = int(total_samples * validation_ratio)
            mask[val_start_idx:val_start_idx +
                 num_val] = [val_mask_num] * num_val

        # If we're using a training augmentation set, add them to the training portion
        if augmentation_images is not None and augmentation_labels is not None:
            images = images + augmentation_images
            labels = labels + augmentation_labels
            mask = mask + ([0] * len(augmentation_labels))

        # make the split random <-- ESSENTIAL
        random.shuffle(mask)

        # save the mask file in current directory for future use
        prev_mask_file = open('mask_ckpt.txt', 'w+', encoding='utf-8')
        for entry in mask:
            prev_mask_file.write(str(entry) + '\n')
        prev_mask_file.close()

    # create partitions, we set train/validation to None if they're not being used
    if test_ratio != 0 and validation_ratio != 0:
        train_images, test_images, val_images = tf.dynamic_partition(
            images, mask, 3)
        train_labels, test_labels, val_labels = tf.dynamic_partition(
            labels, mask, 3)
    elif test_ratio != 0 and validation_ratio == 0:
        train_images, test_images = tf.dynamic_partition(images, mask, 2)
        train_labels, test_labels = tf.dynamic_partition(labels, mask, 2)
        val_images, val_labels = None, None
    elif test_ratio == 0 and validation_ratio != 0:
        train_images, val_images = tf.dynamic_partition(images, mask, 2)
        train_labels, val_labels = tf.dynamic_partition(labels, mask, 2)
        test_images, test_labels = None, None
    else:
        # must be just training, still need queues for rest of dpp code to load/interact with
        # dynamic_partition returns a list, which is fine in the above cases but in the following case it returns
        # a list of length 1, hence we index into it with [0] to get what we want
        train_images = tf.dynamic_partition(images, mask, 1)[0]
        train_labels = tf.dynamic_partition(labels, mask, 1)[0]
        test_images, test_labels = None, None
        val_images, val_labels = None, None

    # Also partition moderation features if present <-- NEEDS TO BE FIXED/IMPROVED
    train_mf, test_mf, val_mf = None, None, None
    if moderation_features is not None:
        train_mf, test_mf, val_mf = tf.dynamic_partition(
            moderation_features, mask, 2)

    return train_images, train_labels, train_mf, test_images, test_labels, test_mf, val_images, val_labels, val_mf
Exemplo n.º 7
0
centroids = tf.Variable(tf.gather(vector_values, centroid_indices))
expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroids = tf.expand_dims(centroids, 1)

vectors_subtration = tf.subtract(expanded_vectors, expanded_centroids)

euclidean_distances = tf.reduce_sum(tf.square(vectors_subtration), 2)

assignments = tf.to_int32(tf.argmin(euclidean_distances, 0))

partitions = [0, 0, 1, 1, 0]
num_partitions = 2
data = [10, 20, 30, 40, 50]
#outputs[0] = [10, 20, 50]
#outputs[1] = [30, 40]
partitions = tf.dynamic_partition(vectors, assignments, num_clusters)

update_centroids = tf.concat(0, [
    tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions
])

init_op = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init_op)
for step in range(num_steps):
    _, centroid_values, assignment_values =\
       sess.run([update_centroids,\
                 centroids,\
                 assignments])
Exemplo n.º 8
0
 def _ComputeLoss():
     raw_distances = pregrasp_embedding - goal_embedding - postgrasp_embedding
     distances = tf.reduce_sum(raw_distances**2, axis=1)
     _, mask1_data = tf.dynamic_partition(distances, mask, 2)
     loss = tf.cast(tf.reduce_mean(mask1_data), tf.float32)
     return loss
Exemplo n.º 9
0
 def _ComputeLoss():
     distances = tf.norm(tensor, axis=1)
     _, mask1_data = tf.dynamic_partition(distances, mask, 2)
     loss = tf.cast(tf.reduce_mean(mask1_data), tf.float32)
     return loss