def testSplitMatrixUnevenRows(self): with self.test_session(use_gpu=False): sp_tensors_2 = tf.sparse_split(0, 2, self._SparseTensor_5x7()) self.assertAllEqual( sp_tensors_2[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4], [1, 6], [2, 0], [2, 3], [2, 5]]) self.assertAllEqual(sp_tensors_2[0].values.eval(), [0, 2, 4, 5, 11, 13, 14, 16, 20, 23, 25]) self.assertAllEqual(sp_tensors_2[0].shape.eval(), [3, 7]) self.assertAllEqual( sp_tensors_2[1].indices.eval(), [[0, 0], [0, 2], [0, 3], [0, 5], [1, 1], [1, 4], [1, 6]]) self.assertAllEqual(sp_tensors_2[1].values.eval(), [30, 32, 33, 35, 41, 44, 46]) self.assertAllEqual(sp_tensors_2[1].shape.eval(), [2, 7]) self.assertAllEqual(len(sp_tensors_2), 2) sp_tensors_3 = tf.sparse_split(0, 3, self._SparseTensor_5x7()) self.assertAllEqual(len(sp_tensors_3), 3) self.assertAllEqual(sp_tensors_3[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4], [1, 6]]) self.assertAllEqual(sp_tensors_3[0].values.eval(), [0, 2, 4, 5, 11, 13, 14, 16]) self.assertAllEqual(sp_tensors_3[0].shape.eval(), [2, 7]) self.assertAllEqual(sp_tensors_3[1].values.eval(), [20, 23, 25, 30, 32, 33, 35]) self.assertAllEqual(sp_tensors_3[1].shape.eval(), [2, 7]) self.assertAllEqual(sp_tensors_3[2].indices.eval(), [[0, 1], [0, 4], [0, 6]]) self.assertAllEqual(sp_tensors_3[2].values.eval(), [41, 44, 46]) self.assertAllEqual(sp_tensors_3[2].shape.eval(), [1, 7]) return
def __init__(self, input_dim, output_dim, support, support_t, num_support, u_features_nonzero=None, v_features_nonzero=None, sparse_inputs=False, dropout=0., act=tf.nn.relu, bias=False, share_user_item_weights=False, self_connections=False, **kwargs): self.vars = {} self.input_dim = input_dim self.output_dim = output_dim self.num_support = num_support self.bias = bias self.share_user_item_weights = share_user_item_weights self.dropout = dropout self.u_features_nonzero = u_features_nonzero self.v_features_nonzero = v_features_nonzero self.sparse_inputs = sparse_inputs self.act = act self.self_connections = self_connections self.support = tf.sparse_split(axis=1, num_split=self.num_support, sp_input=support) self.support_t = tf.sparse_split(axis=1, num_split=self.num_support, sp_input=support_t) super(OrdinalMixtureGCN, self).__init__(**kwargs)
def testSplitMatrixUnevenCols(self): with self.test_session(use_gpu=False): sp_tensors_3 = tf.sparse_split(sp_input=self._SparseTensor_5x7(), num_split=3, axis=1) self.assertAllEqual(len(sp_tensors_3), 3) self.assertAllEqual( sp_tensors_3[0].indices.eval(), [[0, 0], [0, 2], [1, 1], [2, 0], [3, 0], [3, 2], [4, 1]] ) self.assertAllEqual(sp_tensors_3[0].values.eval(), [0, 2, 11, 20, 30, 32, 41]) self.assertAllEqual(sp_tensors_3[0].dense_shape.eval(), [5, 3]) self.assertAllEqual(sp_tensors_3[1].indices.eval(), [[0, 1], [1, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) self.assertAllEqual(sp_tensors_3[1].values.eval(), [4, 13, 14, 23, 33, 44]) self.assertAllEqual(sp_tensors_3[1].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_3[2].indices.eval(), [[0, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) self.assertAllEqual(sp_tensors_3[2].values.eval(), [5, 16, 25, 35, 46]) self.assertAllEqual(sp_tensors_3[2].dense_shape.eval(), [5, 2]) sp_tensors_4 = tf.sparse_split(sp_input=self._SparseTensor_5x7(), num_split=4, axis=1) self.assertAllEqual(len(sp_tensors_4), 4) self.assertAllEqual(sp_tensors_4[0].indices.eval(), [[0, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) self.assertAllEqual(sp_tensors_4[0].values.eval(), [0, 11, 20, 30, 41]) self.assertAllEqual(sp_tensors_4[0].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_4[1].indices.eval(), [[0, 0], [1, 1], [2, 1], [3, 0], [3, 1]]) self.assertAllEqual(sp_tensors_4[1].values.eval(), [2, 13, 23, 32, 33]) self.assertAllEqual(sp_tensors_4[1].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_4[2].indices.eval(), [[0, 0], [0, 1], [1, 0], [2, 1], [3, 1], [4, 0]]) self.assertAllEqual(sp_tensors_4[2].values.eval(), [4, 5, 14, 25, 35, 44]) self.assertAllEqual(sp_tensors_4[2].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_4[3].indices.eval(), [[1, 0], [4, 0]]) self.assertAllEqual(sp_tensors_4[3].values.eval(), [16, 46]) self.assertAllEqual(sp_tensors_4[3].dense_shape.eval(), [5, 1])
def __init__(self, input_dim, output_dim, A, B, num_AB, G_u_full, G_v_full, num_G, ufn=None, vfn=None, dropout=0., act=tf.nn.relu, share_uv=False, bias=False): super(HomoGCNLayer_v2, self).__init__(input_dim, output_dim, ufn, vfn, dropout, act, share_uv, bias) self.weight_u = tf.split(self.vars["weight_u"], axis=1, num_or_size_splits=num_G * num_AB) self.weight_v = tf.split(self.vars["weight_v"], axis=1, num_or_size_splits=num_G * num_AB) self.G_u_full = tf.sparse_split(sp_input=G_u_full, num_split=num_G, axis=1) self.G_v_full = tf.sparse_split(sp_input=G_v_full, num_split=num_G, axis=1) self.A_u = tf.sparse_split(sp_input=A, num_split=num_AB, axis=1) self.B_v = tf.sparse_split(sp_input=B, num_split=num_AB, axis=1)
def testSplitMatrixUnevenRows(self): with self.test_session(use_gpu=False): sp_tensors_2 = tf.sparse_split(sp_input=self._SparseTensor_5x7(), num_split=2, axis=0) self.assertAllEqual( sp_tensors_2[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4], [1, 6], [2, 0], [2, 3], [2, 5]], ) self.assertAllEqual(sp_tensors_2[0].values.eval(), [0, 2, 4, 5, 11, 13, 14, 16, 20, 23, 25]) self.assertAllEqual(sp_tensors_2[0].dense_shape.eval(), [3, 7]) self.assertAllEqual( sp_tensors_2[1].indices.eval(), [[0, 0], [0, 2], [0, 3], [0, 5], [1, 1], [1, 4], [1, 6]] ) self.assertAllEqual(sp_tensors_2[1].values.eval(), [30, 32, 33, 35, 41, 44, 46]) self.assertAllEqual(sp_tensors_2[1].dense_shape.eval(), [2, 7]) self.assertAllEqual(len(sp_tensors_2), 2) sp_tensors_3 = tf.sparse_split(sp_input=self._SparseTensor_5x7(), num_split=3, axis=0) self.assertAllEqual(len(sp_tensors_3), 3) self.assertAllEqual( sp_tensors_3[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4], [1, 6]] ) self.assertAllEqual(sp_tensors_3[0].values.eval(), [0, 2, 4, 5, 11, 13, 14, 16]) self.assertAllEqual(sp_tensors_3[0].dense_shape.eval(), [2, 7]) self.assertAllEqual(sp_tensors_3[1].values.eval(), [20, 23, 25, 30, 32, 33, 35]) self.assertAllEqual(sp_tensors_3[1].dense_shape.eval(), [2, 7]) self.assertAllEqual(sp_tensors_3[2].indices.eval(), [[0, 1], [0, 4], [0, 6]]) self.assertAllEqual(sp_tensors_3[2].values.eval(), [41, 44, 46]) self.assertAllEqual(sp_tensors_3[2].dense_shape.eval(), [1, 7]) return
def __init__(self, input_dim, output_dim, support, support_t, num_support, u_features_nonzero=None, v_features_nonzero=None, sparse_inputs=False, dropout=0., act=tf.nn.relu, share_user_item_weights=True, **kwargs): super(StackGCN, self).__init__(**kwargs) assert output_dim % num_support == 0, 'output_dim must be multiple of num_support for stackGC layer' with tf.variable_scope(self.name + '_vars'): self.vars['weights_u'] = weight_variable_random_uniform(input_dim, output_dim, name='weights_u') if not share_user_item_weights: self.vars['weights_v'] = weight_variable_random_uniform(input_dim, output_dim, name='weights_v') else: self.vars['weights_v'] = self.vars['weights_u'] self.weights_u = tf.split(value=self.vars['weights_u'], axis=1, num_or_size_splits=num_support) self.weights_v = tf.split(value=self.vars['weights_v'], axis=1, num_or_size_splits=num_support) self.dropout = dropout self.sparse_inputs = sparse_inputs self.u_features_nonzero = u_features_nonzero self.v_features_nonzero = v_features_nonzero if sparse_inputs: assert u_features_nonzero is not None and v_features_nonzero is not None, \ 'u_features_nonzero and v_features_nonzero can not be None when sparse_inputs is True' self.support = tf.sparse_split(axis=1, num_split=num_support, sp_input=support) self.support_transpose = tf.sparse_split(axis=1, num_split=num_support, sp_input=support_t) self.act = act if self.logging: self._log_vars()
def __init__(self, input_dim, output_dim, num_users, num_items, support, support_t, num_support, u_features_nonzero=None, v_features_nonzero=None, sparse_inputs=False, dropout=0., act=tf.nn.relu, share_user_item_weights=True, **kwargs): super(StackGCN, self).__init__(**kwargs) assert output_dim % num_support == 0, 'output_dim must be multiple of num_support for stackGC layer' with tf.variable_scope(self.name + '_vars'): self.vars['weights_u'] = weight_variable_random_uniform(input_dim, output_dim, name='weights_u') if not share_user_item_weights: self.vars['weights_v'] = weight_variable_random_uniform(input_dim, output_dim, name='weights_v') else: self.vars['weights_v'] = self.vars['weights_u'] self.weights_u = tf.split(value=self.vars['weights_u'], axis=1, num_or_size_splits=num_support) self.weights_v = tf.split(value=self.vars['weights_v'], axis=1, num_or_size_splits=num_support) # TODO: add attention Layer weight hidden_size = int(output_dim / num_support) attention_size = 64 omega_size_u = num_users omega_size_v = num_items for i in range(num_support): # self.vars['w_omega_%s' % ('u' + str(i))] = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1)) # E * T # self.vars['u_omega_%s' % ('u' + str(i))] = tf.Variable(tf.random_normal([omega_size_u], stddev=0.1)) # T # self.vars['w_omega_%s' % ('v' + str(i))] = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1)) # E * T # self.vars['u_omega_%s' % ('v' + str(i))] = tf.Variable(tf.random_normal([omega_size_v], stddev=0.1)) # T self.vars['w_omega_%s' % ('u' + str(i))] = weight_variable_random_uniform(hidden_size, attention_size, name='w_omega_%s' % ('u' + str(i))) # E * T self.vars['u_omega_%s' % ('u' + str(i))] = weight_variable_random_uniform(num_items,num_users, name='u_omega_%s' % ('u' + str(i))) # T self.vars['w_omega_%s' % ('v' + str(i))] = weight_variable_random_uniform(hidden_size, attention_size, name='w_omega_%s' % ('v' + str(i))) # E * T self.vars['u_omega_%s' % ('v' + str(i))] = weight_variable_random_uniform(num_users,num_items, name='u_omega_%s' % ('v' + str(i))) self.dropout = dropout # TODO: add self.input_dim = input_dim self.output_dim = output_dim self.num_users = num_users self.num_items = num_items self.sparse_inputs = sparse_inputs self.u_features_nonzero = u_features_nonzero self.v_features_nonzero = v_features_nonzero if sparse_inputs: assert u_features_nonzero is not None and v_features_nonzero is not None, \ 'u_features_nonzero and v_features_nonzero can not be None when sparse_inputs is True' self.support = tf.sparse_split(axis=1, num_split=num_support, sp_input=support) self.support_transpose = tf.sparse_split(axis=1, num_split=num_support, sp_input=support_t) self.act = act if self.logging: self._log_vars()
def _call(self, inputs): # motif conv new_activations = [] # for each motif for m in range(self.num_motifs): x = inputs if self.sparse_inputs: x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1 - self.dropout) adj_positions = tf.sparse_split( sp_input=self.support[m], num_split=self.motif_positions[m], axis=0) supports = list() # For each position for k in range(0, self.motif_positions[m]): XW = dot(x, self.vars['weights_' + str(m) + '_' + str(k)], sparse=self.sparse_inputs) temp = tf.sparse_reduce_sum_sparse(adj_positions[k], axis=0) support = dot(temp, XW, sparse=True) supports.append(support) output = tf.add_n(supports) if self.bias: output += self.vars['bias_' + str(m)] new_activations.append(self.act(output)) return new_activations
def __put_bboxes_on_image(self, images, boxes, scale): images = tf.split(images, self.batch_size, axis=0) boxes = tf.sparse_split(sp_input=boxes, num_split=self.batch_size, axis=0) output = [] for i in range(self.batch_size): bboxes1 = boxes[i] bboxes1 = tf.sparse_tensor_to_dense(bboxes1, default_value=-1) mask = bboxes1 >= 0 bboxes1 = tf.boolean_mask(bboxes1, mask) bboxes = tf.reshape(bboxes1, [1, -1, 4]) bboxes = bboxes * scale shape = tf.shape(bboxes) bboxes = self.__clip_bboxes(tf.reshape(bboxes, [-1, 4]), 1.0, 1.0) x, y, w, h = tf.split(bboxes, 4, axis=1) bboxes = tf.concat([ y - h / 2.0 - 0.001, x - w / 2.0 - 0.001, y + h / 2.0 + 0.001, x + w / 2.0 + 0.001 ], axis=1) bboxes = tf.reshape(bboxes, shape) bboxes = tf.clip_by_value(bboxes, 0.0, 1.0) image = tf.cond( tf.size(bboxes1) > 0, lambda: tf.image.draw_bounding_boxes(images[i], bboxes), lambda: images[i]) output.append(image) return tf.concat(output, axis=0)
def testSplitAllColumns(self): with self.test_session(use_gpu=False): sparse_tensors = tf.sparse_split(1, 6, self._SparseTensor_4x6()) self.assertAllEqual(len(sparse_tensors), 6) self.assertAllEqual(sparse_tensors[0].indices.eval(), [[0, 0], [2, 0], [3, 0]]) self.assertAllEqual(sparse_tensors[0].values.eval(), [0, 20, 30]) self.assertAllEqual(sparse_tensors[0].shape.eval(), [4, 1]) self.assertAllEqual(sparse_tensors[1].indices.eval(), [[1, 0]]) self.assertAllEqual(sparse_tensors[1].values.eval(), [11]) self.assertAllEqual(sparse_tensors[1].shape.eval(), [4, 1]) self.assertAllEqual(sparse_tensors[2].indices.eval(), [[0, 0], [3, 0]]) self.assertAllEqual(sparse_tensors[2].values.eval(), [2, 32]) self.assertAllEqual(sparse_tensors[2].shape.eval(), [4, 1]) self.assertAllEqual(sparse_tensors[3].indices.eval(), [[1, 0], [2, 0], [3, 0]]) self.assertAllEqual(sparse_tensors[3].shape.eval(), [4, 1]) self.assertAllEqual(sparse_tensors[3].values.eval(), [13, 23, 33]) self.assertAllEqual(sparse_tensors[4].indices.eval(), [[0, 0], [1, 0]]) self.assertAllEqual(sparse_tensors[4].values.eval(), [4, 14]) self.assertAllEqual(sparse_tensors[4].shape.eval(), [4, 1]) self.assertAllEqual(sparse_tensors[5].indices.eval(), [[0, 0], [2, 0], [3, 0]]) self.assertAllEqual(sparse_tensors[5].values.eval(), [5, 25, 35]) self.assertAllEqual(sparse_tensors[5].shape.eval(), [4, 1])
def input_fn(data_dir, subset, num_shards, batch_size, use_distortion_for_training=True): """Create input graph for model. Args: data_dir: Directory where TFRecords representing the dataset are located. subset: one of 'train', 'validate' and 'eval'. num_shards: num of towers participating in data-parallel training. batch_size: total batch size for training to be divided by the number of shards. use_distortion_for_training: True to use distortions. Returns: three """ with tf.device('/cpu:0'): use_distortion = subset == 'train' and use_distortion_for_training dataset = shadownet.ShadownetDataSet(data_dir, subset, use_distortion) inputdata, input_labels = dataset.make_batch(batch_size) if num_shards <= 1: # No GPU available or only 1 GPU. num_shards = 1 feature_shards = tf.split(inputdata, num_shards) label_shards = tf.sparse_split(sp_input=input_labels, num_split=num_shards, axis=0) return feature_shards, label_shards
def testSliceConcat(self): for sp_input in (self._SparseTensorValue_3x4x2(), self._SparseTensor_3x4x2()): with self.test_session(use_gpu=False): sparse_tensors = tf.sparse_split(sp_input=sp_input, num_split=2, axis=1) concat_tensor = tf.sparse_concat(1, sparse_tensors) expected_output = self._SparseTensor_3x4x2() self.assertAllEqual(concat_tensor.indices.eval(), expected_output.indices.eval())
def make_parallel(self, fn, num_gpus, **kwargs): """Parallelize given model on multiple gpu devices. adapted from: https://github.com/vahidk/EffectiveTensorflow#make_parallel """ in_splits = {} for k, v in kwargs.items(): if k in ('num_classes', 'is_training'): in_splits[k] = [v] * num_gpus elif type(v) is tf.SparseTensor: in_splits[k] = tf.sparse_split(sp_input=v, num_split=num_gpus, axis=0) else: in_splits[k] = tf.split(v, num_gpus) out_split = [] for i in range(num_gpus): with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)): with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): outputs = fn(**{k: v[i] for k, v in in_splits.items()}) for o in range(len(outputs)): if o >= len(out_split): out_split.append([]) out_split[o].append(outputs[o]) return [tf.stack(o, axis=0) for o in out_split]
def testSliceConcat(self): with self.test_session(use_gpu=False): sparse_tensors = tf.sparse_split(1, 2, self._SparseTensor_3x4x2()) concat_tensor = tf.sparse_concat(1, sparse_tensors) expected_output = self._SparseTensor_3x4x2() self.assertAllEqual(concat_tensor.indices.eval(), expected_output.indices.eval())
def testSplitMatrixUnevenCols(self): with self.test_session(use_gpu=False): sp_tensors_3 = tf.sparse_split(sp_input=self._SparseTensor_5x7(), num_split=3, axis=1) self.assertAllEqual(len(sp_tensors_3), 3) self.assertAllEqual( sp_tensors_3[0].indices.eval(), [[0, 0], [0, 2], [1, 1], [2, 0], [3, 0], [3, 2], [4, 1]]) self.assertAllEqual(sp_tensors_3[0].values.eval(), [0, 2, 11, 20, 30, 32, 41]) self.assertAllEqual(sp_tensors_3[0].dense_shape.eval(), [5, 3]) self.assertAllEqual( sp_tensors_3[1].indices.eval(), [[0, 1], [1, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) self.assertAllEqual(sp_tensors_3[1].values.eval(), [4, 13, 14, 23, 33, 44]) self.assertAllEqual(sp_tensors_3[1].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_3[2].indices.eval(), [[0, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) self.assertAllEqual(sp_tensors_3[2].values.eval(), [5, 16, 25, 35, 46]) self.assertAllEqual(sp_tensors_3[2].dense_shape.eval(), [5, 2]) sp_tensors_4 = tf.sparse_split(sp_input=self._SparseTensor_5x7(), num_split=4, axis=1) self.assertAllEqual(len(sp_tensors_4), 4) self.assertAllEqual(sp_tensors_4[0].indices.eval(), [[0, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) self.assertAllEqual(sp_tensors_4[0].values.eval(), [0, 11, 20, 30, 41]) self.assertAllEqual(sp_tensors_4[0].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_4[1].indices.eval(), [[0, 0], [1, 1], [2, 1], [3, 0], [3, 1]]) self.assertAllEqual(sp_tensors_4[1].values.eval(), [2, 13, 23, 32, 33]) self.assertAllEqual(sp_tensors_4[1].dense_shape.eval(), [5, 2]) self.assertAllEqual( sp_tensors_4[2].indices.eval(), [[0, 0], [0, 1], [1, 0], [2, 1], [3, 1], [4, 0]]) self.assertAllEqual(sp_tensors_4[2].values.eval(), [4, 5, 14, 25, 35, 44]) self.assertAllEqual(sp_tensors_4[2].dense_shape.eval(), [5, 2]) self.assertAllEqual(sp_tensors_4[3].indices.eval(), [[1, 0], [4, 0]]) self.assertAllEqual(sp_tensors_4[3].values.eval(), [16, 46]) self.assertAllEqual(sp_tensors_4[3].dense_shape.eval(), [5, 1])
def testArgumentErrors(self): with self.assertRaisesRegexp(ValueError, "Keyword arguments are required"): tf.sparse_split(3, 2, 1) with self.assertRaisesRegexp(ValueError, "sp_input is required"): tf.sparse_split() with self.assertRaisesRegexp(ValueError, "num_split is required"): tf.sparse_split(sp_input=1) with self.assertRaisesRegexp(ValueError, "axis is required"): tf.sparse_split(num_split=2, sp_input=1)
def testArgumentErrors(self): with self.assertRaisesRegexp(ValueError, 'Keyword arguments are required'): tf.sparse_split(3, 2, 1) with self.assertRaisesRegexp(ValueError, 'sp_input is required'): tf.sparse_split() with self.assertRaisesRegexp(ValueError, 'num_split is required'): tf.sparse_split(sp_input=1) with self.assertRaisesRegexp(ValueError, 'axis is required'): tf.sparse_split(num_split=2, sp_input=1)
def testSplitMatrixRows(self): with self.test_session(use_gpu=False): sp_tensors = tf.sparse_split(sp_input=self._SparseTensor_4x6(), num_split=2, axis=0) self.assertAllEqual(len(sp_tensors), 2) self.assertAllEqual(sp_tensors[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4]]) self.assertAllEqual(sp_tensors[0].values.eval(), [0, 2, 4, 5, 11, 13, 14]) self.assertAllEqual(sp_tensors[0].dense_shape.eval(), [2, 6]) self.assertAllEqual(sp_tensors[1].indices.eval(), [[0, 0], [0, 3], [0, 5], [1, 0], [1, 2], [1, 3], [1, 5]]) self.assertAllEqual(sp_tensors[1].values.eval(), [20, 23, 25, 30, 32, 33, 35]) self.assertAllEqual(sp_tensors[1].dense_shape.eval(), [2, 6])
def _split_dictionary(dictionary): """Split a dictionary into shards.""" shards = [{} for _ in range(num_shards)] for name, tensor in six.iteritems(dictionary): if isinstance(tensor, tf.SparseTensor): for i, shard in enumerate(tf.sparse_split(sp_input=tensor, num_split=num_shards, axis=0)): shards[i][name] = shard else: for i, shard in enumerate(tf.split(tensor, num_shards)): shards[i][name] = shard return shards
def testSplitColumns(self): with self.test_session(use_gpu=False): sparse_tensors = tf.sparse_split(sp_input=self._SparseTensor_4x6(), num_split=3, axis=1) self.assertAllEqual(len(sparse_tensors), 3) self.assertAllEqual(sparse_tensors[0].indices.eval(), [[0, 0], [1, 1], [2, 0], [3, 0]]) self.assertAllEqual(sparse_tensors[0].values.eval(), [0, 11, 20, 30]) self.assertAllEqual(sparse_tensors[0].dense_shape.eval(), [4, 2]) self.assertAllEqual(sparse_tensors[1].indices.eval(), [[0, 0], [1, 1], [2, 1], [3, 0], [3, 1]]) self.assertAllEqual(sparse_tensors[1].values.eval(), [2, 13, 23, 32, 33]) self.assertAllEqual(sparse_tensors[1].dense_shape.eval(), [4, 2]) self.assertAllEqual(sparse_tensors[2].indices.eval(), [[0, 0], [0, 1], [1, 0], [2, 1], [3, 1]]) self.assertAllEqual(sparse_tensors[2].values.eval(), [4, 5, 14, 25, 35]) self.assertAllEqual(sparse_tensors[2].dense_shape.eval(), [4, 2])
def __init__(self, input_dim, output_dim, G_u, G_v, num_G, ufn=None, vfn=None, dropout=0., act=tf.nn.relu, share_uv=False, bias=False): super(HeteroGCNLayer, self).__init__(input_dim, output_dim, ufn, vfn, dropout, act, share_uv, bias) self.weight_u = tf.split(self.vars["weight_u"], axis=1, num_or_size_splits=num_G) self.weight_v = tf.split(self.vars["weight_v"], axis=1, num_or_size_splits=num_G) self.G_u = tf.sparse_split(sp_input=G_u, num_split=num_G, axis=1) self.G_v = tf.sparse_split(sp_input=G_v, num_split=num_G, axis=1)
def fetch_data(): """ Fetch features, labels and sequence_lengths from a common queue.""" tot_batch_size = ARGS.batch_size * 1 feats, labels, seq_lens = deepSpeech.inputs(eval_data='train', data_dir=ARGS.data_dir, batch_size=tot_batch_size, use_fp16=ARGS.use_fp16, shuffle=ARGS.shuffle) # Split features and labels and sequence lengths for each tower split_feats = tf.split(feats, 1, 0) split_labels = tf.sparse_split(sp_input=labels, num_split=1, axis=0) split_seq_lens = tf.split(seq_lens, 1, 0) return split_feats, split_labels, split_seq_lens
def testSplitMatrixRows(self): with self.test_session(use_gpu=False): sp_tensors = tf.sparse_split(0, 2, self._SparseTensor_4x6()) self.assertAllEqual(len(sp_tensors), 2) self.assertAllEqual(sp_tensors[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4]]) self.assertAllEqual(sp_tensors[0].values.eval(), [0, 2, 4, 5, 11, 13, 14]) self.assertAllEqual(sp_tensors[0].shape.eval(), [2, 6]) self.assertAllEqual(sp_tensors[1].indices.eval(), [[0, 0], [0, 3], [0, 5], [1, 0], [1, 2], [1, 3], [1, 5]]) self.assertAllEqual(sp_tensors[1].values.eval(), [20, 23, 25, 30, 32, 33, 35]) self.assertAllEqual(sp_tensors[1].shape.eval(), [2, 6])
def testSplitColumns(self): with self.test_session(use_gpu=False): sparse_tensors = tf.sparse_split(1, 3, self._SparseTensor_4x6()) self.assertAllEqual(len(sparse_tensors), 3) self.assertAllEqual(sparse_tensors[0].indices.eval(), [[0, 0], [1, 1], [2, 0], [3, 0]]) self.assertAllEqual(sparse_tensors[0].values.eval(), [0, 11, 20, 30]) self.assertAllEqual(sparse_tensors[0].shape.eval(), [4, 2]) self.assertAllEqual(sparse_tensors[1].indices.eval(), [[0, 0], [1, 1], [2, 1], [3, 0], [3, 1]]) self.assertAllEqual(sparse_tensors[1].values.eval(), [2, 13, 23, 32, 33]) self.assertAllEqual(sparse_tensors[1].shape.eval(), [4, 2]) self.assertAllEqual(sparse_tensors[2].indices.eval(), [[0, 0], [0, 1], [1, 0], [2, 1], [3, 1]]) self.assertAllEqual(sparse_tensors[2].values.eval(), [4, 5, 14, 25, 35]) self.assertAllEqual(sparse_tensors[2].shape.eval(), [4, 2])
def testSplitAllRows(self): with self.test_session(use_gpu=False): sp_tensors = tf.sparse_split(0, 4, self._SparseTensor_4x6()) self.assertAllEqual(len(sp_tensors), 4) self.assertAllEqual(sp_tensors[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5]]) self.assertAllEqual(sp_tensors[0].values.eval(), [0, 2, 4, 5]) self.assertAllEqual(sp_tensors[0].shape.eval(), [1, 6]) self.assertAllEqual(sp_tensors[1].indices.eval(), [[0, 1], [0, 3], [0, 4]]) self.assertAllEqual(sp_tensors[1].values.eval(), [11, 13, 14]) self.assertAllEqual(sp_tensors[1].shape.eval(), [1, 6]) self.assertAllEqual(sp_tensors[2].indices.eval(), [[0, 0], [0, 3], [0, 5]]) self.assertAllEqual(sp_tensors[2].values.eval(), [20, 23, 25]) self.assertAllEqual(sp_tensors[2].shape.eval(), [1, 6]) self.assertAllEqual(sp_tensors[3].indices.eval(), [[0, 0], [0, 2], [0, 3], [0, 5]]) self.assertAllEqual(sp_tensors[3].values.eval(), [30, 32, 33, 35]) self.assertAllEqual(sp_tensors[3].shape.eval(), [1, 6])
def get_outputs(self, inputs, input_seq_length, classifier): '''compute the outputs of the decoder Args: inputs: The inputs to the network as a [batch_size x max_input_length x input_dim] tensor input_seq_length: The sequence length of the inputs as a [batch_size] vector classifier: The classifier object that will be used in decoding Returns: A list with batch_size elements containing nbest lists with elements containing pairs of score and output labels ''' #create the decoding graph logits, logits_seq_length =\ classifier( inputs, input_seq_length, targets=None, target_seq_length=None, is_training=False) #Convert logits to time major logits = tf.transpose(logits, [1, 0, 2]) #do the CTC beam search sparse_outputs, logprobs = tf.nn.ctc_greedy_decoder( logits, logits_seq_length) sparse_outputs = sparse_outputs[0] logprobs = tf.unstack(tf.reshape(logprobs, [-1])) #split the sparse tensors into the seperate utterances output_list = tf.sparse_split(axis=0, num_split=self.batch_size, sp_input=sparse_outputs) outputs = [ tf.reshape(tf.sparse_tensor_to_dense(o), [-1]) for o in output_list ] outputs = [[(logprobs[i], outputs[i])] for i in range(self.batch_size)] return outputs
def my_input_fn(data_dir='/data/data/crnn_tfrecords', subset='Train', num_shards=0, batch_size=4, use_distortion_for_training=False): with tf.device('/cpu:0'): # use_distortion = subset == 'train' and use_distortion_for_training dataset = CrnnDataSet(data_dir, subset) input_data, input_labels = dataset.make_batch(batch_size) # labels = tf.one_hot(indices=input_labels, depth=10) # config.cfg.TRAIN.CLASSES_NUMS) # one_hot_labels = tf.cast(labels, tf.int32) if num_shards <= 1: # No GPU available or only 1 GPU. num_shards = 1 feature_shards = tf.split(input_data, num_shards) label_shards = tf.sparse_split(sp_input=input_labels, num_split=num_shards, axis=0) # label_shards = tf.split(input_labels, num_shards) return feature_shards[0], label_shards[0]
def testSplitAllRows(self): with self.test_session(use_gpu=False): sp_tensors = tf.sparse_split(sp_input=self._SparseTensor_4x6(), num_split=4, axis=0) self.assertAllEqual(len(sp_tensors), 4) self.assertAllEqual(sp_tensors[0].indices.eval(), [[0, 0], [0, 2], [0, 4], [0, 5]]) self.assertAllEqual(sp_tensors[0].values.eval(), [0, 2, 4, 5]) self.assertAllEqual(sp_tensors[0].dense_shape.eval(), [1, 6]) self.assertAllEqual(sp_tensors[1].indices.eval(), [[0, 1], [0, 3], [0, 4]]) self.assertAllEqual(sp_tensors[1].values.eval(), [11, 13, 14]) self.assertAllEqual(sp_tensors[1].dense_shape.eval(), [1, 6]) self.assertAllEqual(sp_tensors[2].indices.eval(), [[0, 0], [0, 3], [0, 5]]) self.assertAllEqual(sp_tensors[2].values.eval(), [20, 23, 25]) self.assertAllEqual(sp_tensors[2].dense_shape.eval(), [1, 6]) self.assertAllEqual(sp_tensors[3].indices.eval(), [[0, 0], [0, 2], [0, 3], [0, 5]]) self.assertAllEqual(sp_tensors[3].values.eval(), [30, 32, 33, 35]) self.assertAllEqual(sp_tensors[3].dense_shape.eval(), [1, 6])
def get_recon_loss(): loss = 0 split_per_size = int( len(self.config.node_list) / int(self.config.split_num)) up_num = len(self.config.node_list) % int(self.config.split_num) split_size = [ split_per_size for i in range(int(self.config.split_num)) ] for i in range(up_num): split_size[i] += 1 for i in range(self.views_num): #with tf.device('/gpu:1'): name_p = "private_Y" + str(i) Y_split = tf.split(self.Y[name_p], split_size, 0) X_split = tf.sparse_split(sp_input=self.X["view_%d" % i], num_split=int(self.config.split_num), axis=0) for j in range(int(self.config.split_num)): Y_pred = tf.matmul(Y_split[j], self.Y[name_p], transpose_b=True) Y_true = tf.sparse_to_dense( sparse_indices=X_split[j].indices, output_shape=X_split[j].dense_shape, sparse_values=X_split[j].values) # Y_true = tf.sparse.to_dense(sp_input=X_split[j]) pos = tf.reduce_sum( tf.cast(tf.greater(Y_true, 0.0), tf.float32)) neg = tf.reduce_sum( tf.cast(tf.equal(Y_true, 0.0), tf.float32)) pos_ratio = neg / pos loss += self.config.view_weight[i] * tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits( labels=Y_true, logits=Y_pred, pos_weight=pos_ratio)) return loss
def _build_pair_attention(self, field_indices, field_values, field_shape, hparams): """This function learns the candidate news article's embedding and user embedding. User embedding is generated from click history and also depends on the candidate news article via attention mechanism. Article embedding is generated via KCNN module. Args: field_indices (obj): sparse tensor indices for constructing user clicked history field_values (obj): sparse tensor values for constructing user clicked history field_shape (obj): sparse tensor shape for constructing user clicked history hparams (obj): global hyper-parameters Returns: click_field_embed_final_batch: user embedding news_field_embed_final_batch: candidate news article embedding """ doc_size = hparams.doc_size attention_hidden_sizes = hparams.attention_layer_sizes candidate_word_batch = self.iterator.candidate_news_index_batch click_word_batch = tf.SparseTensor(field_indices, field_values, field_shape) click_word_split = tf.sparse_split(axis=0, num_split=hparams.batch_size, sp_input=click_word_batch) news_word_split = tf.split(axis=0, num_or_size_splits=hparams.batch_size, value=candidate_word_batch) candidate_entity_batch = self.iterator.candidate_news_entity_index_batch news_entity_split = tf.split(axis=0, num_or_size_splits=hparams.batch_size, value=candidate_entity_batch) field_entities = self.iterator.click_news_entity_values click_entity_batch = tf.SparseTensor(field_indices, field_entities, field_shape) click_entity_split = tf.sparse_split(axis=0, num_split=hparams.batch_size, sp_input=click_entity_batch) click_field_embed_final_batch = [] news_field_embed_final_batch = [] with tf.variable_scope("kims_cnn") as kcnn_scope: pass with tf.variable_scope("attention_net", initializer=self.initializer) as scope: for index, news_word in enumerate(news_word_split): click_word = click_word_split[index] # get non-zero val click_word = click_word.values click_word = tf.reshape(click_word, [-1, doc_size]) news_entity = news_entity_split[index] click_entity = click_entity_split[index] click_entity = click_entity.values click_entity = tf.reshape(click_entity, [-1, doc_size]) # use kims cnn to get conv embedding with tf.variable_scope( kcnn_scope, initializer=self.initializer) as cnn_scope: if index > 0: cnn_scope.reuse_variables() news_field_embed = self._kims_cnn(news_word, news_entity, hparams) cnn_scope.reuse_variables() click_field_embed = self._kims_cnn(click_word, click_entity, hparams) avg_strategy = False if avg_strategy: click_field_embed_final = tf.reduce_mean(click_field_embed, axis=0, keepdims=True) else: news_field_embed_repeat = tf.add( tf.zeros_like(click_field_embed), news_field_embed) attention_x = tf.concat( axis=1, values=[click_field_embed, news_field_embed_repeat]) attention_w = tf.get_variable( name="attention_hidden_w", shape=[ self.num_filters_total * 2, attention_hidden_sizes ], dtype=tf.float32, ) attention_b = tf.get_variable( name="attention_hidden_b", shape=[attention_hidden_sizes], dtype=tf.float32, ) curr_attention_layer = tf.nn.xw_plus_b( attention_x, attention_w, attention_b) activation = hparams.attention_activation curr_attention_layer = self._active_layer( logit=curr_attention_layer, activation=activation) attention_output_w = tf.get_variable( name="attention_output_w", shape=[attention_hidden_sizes, 1], dtype=tf.float32, ) attention_output_b = tf.get_variable( name="attention_output_b", shape=[1], dtype=tf.float32) attention_weight = tf.nn.sigmoid( tf.nn.xw_plus_b(curr_attention_layer, attention_output_w, attention_output_b)) # normalization to the weight sum equal to 1 weight_sum = tf.reduce_sum(attention_weight) norm_attention_weight = tf.div(attention_weight, weight_sum) click_field_embed_final = tf.reduce_sum( tf.multiply(click_field_embed, norm_attention_weight), axis=0, keepdims=True, ) if attention_w not in self.layer_params: self.layer_params.append(attention_w) if attention_b not in self.layer_params: self.layer_params.append(attention_b) if attention_output_w not in self.layer_params: self.layer_params.append(attention_output_w) if attention_output_b not in self.layer_params: self.layer_params.append(attention_output_b) news_field_embed_final_batch.append(news_field_embed) click_field_embed_final_batch.append(click_field_embed_final) scope.reuse_variables() click_field_embed_final_batch = tf.concat( click_field_embed_final_batch, axis=0) news_field_embed_final_batch = tf.concat(news_field_embed_final_batch, axis=0) return click_field_embed_final_batch, news_field_embed_final_batch
def __init__(self, input_dim, output_dim, support, support_t, num_support, u_features_nonzero=None, v_features_nonzero=None, sparse_inputs=False, dropout=0., act=tf.nn.relu, bias=False, share_user_item_weights=False, self_connections=False, **kwargs): super(OrdinalMixtureGCN, self).__init__(**kwargs) with tf.variable_scope(self.name + '_vars'): self.vars['weights_u'] = tf.stack([weight_variable_random_uniform(input_dim, output_dim, name='weights_u_%d' % i) for i in range(num_support)], axis=0) if bias: self.vars['bias_u'] = bias_variable_const([output_dim], 0.01, name="bias_u") if not share_user_item_weights: self.vars['weights_v'] = tf.stack([weight_variable_random_uniform(input_dim, output_dim, name='weights_v_%d' % i) for i in range(num_support)], axis=0) if bias: self.vars['bias_v'] = bias_variable_const([output_dim], 0.01, name="bias_v") else: self.vars['weights_v'] = self.vars['weights_u'] if bias: self.vars['bias_v'] = self.vars['bias_u'] self.weights_u = self.vars['weights_u'] self.weights_v = self.vars['weights_v'] self.dropout = dropout self.sparse_inputs = sparse_inputs self.u_features_nonzero = u_features_nonzero self.v_features_nonzero = v_features_nonzero if sparse_inputs: assert u_features_nonzero is not None and v_features_nonzero is not None, \ 'u_features_nonzero and v_features_nonzero can not be None when sparse_inputs is True' self.self_connections = self_connections self.bias = bias support = tf.sparse_split(axis=1, num_split=num_support, sp_input=support) support_t = tf.sparse_split(axis=1, num_split=num_support, sp_input=support_t) if self_connections: self.support = support[:-1] self.support_transpose = support_t[:-1] self.u_self_connections = support[-1] self.v_self_connections = support_t[-1] self.weights_u = self.weights_u[:-1] self.weights_v = self.weights_v[:-1] self.weights_u_self_conn = self.weights_u[-1] self.weights_v_self_conn = self.weights_v[-1] else: self.support = support self.support_transpose = support_t self.u_self_connections = None self.v_self_connections = None self.weights_u_self_conn = None self.weights_v_self_conn = None self.support_nnz = [] self.support_transpose_nnz = [] for i in range(len(self.support)): nnz = tf.reduce_sum(tf.shape(self.support[i].values)) self.support_nnz.append(nnz) self.support_transpose_nnz.append(nnz) self.act = act if self.logging: self._log_vars()
slice2_dim1_indices = tf.reshape( tf.split(slice2_dim1_indices, [1, 1], axis=1)[1], [-1]) slice2_sparse_words = tf.sparse_retain(sparse_words, slice2_dim1_indices) # concat SparseTensor concat_sparse_words = tf.SparseTensor( indices=tf.concat( axis=0, values=[slice1_sparse_words.indices, slice2_sparse_words.indices]), values=tf.concat( axis=0, values=[slice1_sparse_words.values, slice2_sparse_words.values]), dense_shape=slice1_sparse_words.dense_shape) concat_sparse_words = tf.sparse_reorder(concat_sparse_words) # join SparseTensor to 1-D String dense Tensor join_words_list = [] slice_words_list = tf.sparse_split(sp_input=sparse_words, num_split=3, axis=0) # slice_words_list = tf.sparse_split(sp_input=sparse_words, num_split=sparse_words.get_shape()[0], axis=0) for slice_words in slice_words_list: slice_words = slice_words.values join_words = tf.reduce_join(slice_words, reduction_indices=0, separator=" ") join_words_list.append(join_words) join_str = tf.stack(join_words_list) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) print sess.run(join_str)
def __init__(self, sess, dict_size, output_file): self.sess = sess self.dict_size = dict_size self.output_file = output_file self.query_in_shape = np.array([cfg.batch_size, dict_size], np.int64) self.doc_in_shape = np.array( [cfg.batch_size, cfg.negative_size, dict_size], np.int64) with tf.device('/gpu:0'): with tf.name_scope('input'): # Shape [cfg.batch_size, TRIGRAM_D]. self.query_batch = tf.sparse_placeholder( tf.float32, shape=[None, self.dict_size], name='QueryBatch') print("query_batch shape is %s" % self.query_batch.get_shape()) # [1000, BIGRAM_D] # Shape [cfg.batch_size, TRIGRAM_D] self.doc_batch = tf.sparse_placeholder( tf.float32, shape=[None, cfg.negative_size, self.dict_size], name='DocBatch') print("doc_batch shape is %s" % self.doc_batch.get_shape()) # [1000, 20, BIGRAM_D] with tf.name_scope('L1'): l1_par_range = np.sqrt(6.0 / (self.dict_size + cfg.l1_norm)) weight1 = tf.Variable( tf.random_uniform([self.dict_size, cfg.l1_norm], -l1_par_range, l1_par_range)) bias1 = tf.Variable( tf.random_uniform([cfg.l1_norm], -l1_par_range, l1_par_range)) self.variable_summaries(weight1, 'L1_weights') self.variable_summaries(bias1, 'L1_biases') # query_l1 = tf.matmul(tf.to_float(query_batch),weight1)+bias1 query_l1 = tf.sparse_tensor_dense_matmul(self.query_batch, weight1) + bias1 # doc_l1 = tf.matmul(tf.to_float(doc_batch),weight1)+bias1 doc_batches = tf.sparse_split(sp_input=self.doc_batch, num_split=cfg.negative_size, axis=1) doc_l1_batch = [] for doc in doc_batches: doc_l1_batch.append( tf.sparse_tensor_dense_matmul( tf.sparse_reshape( doc, shape=[cfg.batch_size, self.dict_size]), weight1) + bias1) doc_l1 = tf.reshape(tf.convert_to_tensor(doc_l1_batch), shape=[cfg.batch_size, cfg.negative_size, -1]) print("doc_l1 shape is %s" % doc_l1.get_shape()) # tf.convert_to_tensor_or_sparse_tensor(tf.squeeze(doc_l1_batch, axis=0)) query_l1_out = tf.nn.relu(query_l1) print("query_l1_out shape is %s" % query_l1_out.get_shape()) # [1000, 400] doc_l1_out = tf.nn.relu(doc_l1) print("doc_l1_out shape is %s" % doc_l1_out.get_shape()) # [1000, 20, 400] with tf.name_scope('L2'): l2_par_range = np.sqrt(6.0 / (cfg.l1_norm + cfg.l2_norm)) weight2 = tf.Variable( tf.random_uniform([cfg.l1_norm, cfg.l2_norm], -l2_par_range, l2_par_range)) bias2 = tf.Variable( tf.random_uniform([cfg.l2_norm], -l2_par_range, l2_par_range)) self.variable_summaries(weight2, 'L2_weights') self.variable_summaries(bias2, 'L2_biases') query_l2 = tf.matmul(query_l1_out, weight2) + bias2 print("query_l2 shape is %s" % query_l2.get_shape()) # [1000, 120] doc_batches = tf.split(value=doc_l1_out, num_or_size_splits=cfg.negative_size, axis=1) doc_l2_batch = [] for doc in doc_batches: doc_l2_batch.append( tf.matmul(tf.squeeze(doc), weight2) + bias2) doc_l2 = tf.reshape(tf.convert_to_tensor(doc_l2_batch), shape=[cfg.batch_size, cfg.negative_size, -1]) print("doc_l2 shape is %s" % doc_l2.get_shape()[2]) # [1000, 20, 120] query_y = tf.nn.relu(query_l2) print("query_y shape is %s" % query_y.get_shape()) # [1000, 120] doc_y = tf.nn.relu(doc_l2) print("doc_y shape is %s" % doc_y.get_shape()) # [1000, 20, 120] with tf.name_scope('Cosine_Similarity'): # Cosine similarity query_y_tile = tf.tile( query_y, [1, cfg.negative_size]) # [1000, 2400], 2400 = 20 * 120 print("query_y_tile shape is %s" % query_y_tile.get_shape()) doc_y_concat = tf.reshape(doc_y, shape=[cfg.batch_size, -1]) # [1000, 2400] print("doc_y_concat shape is %s" % doc_y_concat.get_shape()) query_norm = tf.tile( tf.sqrt(tf.reduce_sum(tf.square(query_y), 1, True)), [1, cfg.negative_size]) # [1000, 20] print("query_norm shape is %s" % query_norm.get_shape()) doc_norm = tf.squeeze( tf.sqrt(tf.reduce_sum(tf.square(doc_y), 2, True))) # [1000, 20] print("doc_norm shape is %s" % doc_norm.get_shape()) print("tf.multiply(query_y_tile, doc_y_concat) shape is %s" % tf.multiply(query_y_tile, doc_y_concat).get_shape()) prod = tf.reduce_sum( tf.reshape(tf.multiply(query_y_tile, doc_y_concat), shape=[cfg.batch_size, cfg.negative_size, -1]), 2) # [1000, 20] print("prod shape is %s" % prod.get_shape()) norm_prod = tf.multiply(query_norm, doc_norm) # [1000, 20] print("norm_prod shape is %s" % norm_prod.get_shape()) cos_sim_raw = tf.truediv(prod, norm_prod) # [1000, 20] print("cos_sim_raw shape is %s" % cos_sim_raw.get_shape()) cos_sim = tf.transpose( tf.reshape(tf.transpose(cos_sim_raw), [cfg.negative_size, cfg.batch_size ])) * 20 # 20 is \gamma, [1000, 20] print("cos_sim shape is %s" % cos_sim.get_shape()) with tf.name_scope('Loss'): # Train Loss self.prob = tf.nn.softmax((cos_sim)) # [1000, 20] print("prob shape is %s" % self.prob.get_shape()) hit_prob = tf.slice(self.prob, [0, 0], [-1, 1]) # [1000, 1] print("hit_prob shape is %s" % hit_prob.get_shape()) self.loss = -tf.reduce_sum(tf.log(hit_prob)) / cfg.batch_size tf.summary.scalar('loss', self.loss) with tf.name_scope('Training'): # Optimizer self.train_step = tf.train.GradientDescentOptimizer( cfg.learning_rate).minimize(self.loss) self.model = tf.train.Saver() with tf.name_scope('Accuracy'): correct_prediction = tf.equal(tf.argmax(self.prob, 1), 0) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) with tf.name_scope('Test'): self.average_accuracy = tf.placeholder(tf.float32) self.accuracy_summary = tf.summary.scalar('accuracy', self.average_accuracy) with tf.name_scope('Train'): self.average_loss = tf.placeholder(tf.float32) self.loss_summary = tf.summary.scalar('average_loss', self.average_loss)
#define dropout keep_prob = tf.placeholder(tf.float32) # Here we use sparse_placeholder that will generate a # SparseTensor required by ctc_loss op. targets = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None]) istrain = tf.placeholder(tf.bool, [None]) # Split the batch of images and labels for towers. #x_splits = tf.split(inputs, FLAGS.num_gpus, 0) #y_splits = tf.split(targets, FLAGS.num_gpus, 0) x_splits = tf.split(inputs, 4, 0) y_splits = tf.sparse_split(sp_input=targets, num_split=4, axis=0) seq_len_split = tf.split(seq_len, 4, 0) #seq_len = tf.split(targets, FLAGS.num_gpus, 0) #????????????????????? with tf.variable_scope("model"): optimizer = tf.train.MomentumOptimizer(initial_learning_rate, 0.9) #optimizer = tf.train.AdamOptimizer() with tf.variable_scope("tower"): tower_grads = [] tower_costs = [] tower_lers = [] for i in xrange(4): with tf.device('/gpu:%d' % i): # Force all Variables to reside on the CPU.
def export(): checkpoint_path = FLAGS.checkpoint_path export_path = FLAGS.export_path vocab_path = FLAGS.vocab_path num_steps = FLAGS.num_steps vocab_size = FLAGS.vocab_size embedding_size = FLAGS.embedding_size hidden_size = FLAGS.hidden_size keep_prob = FLAGS.keep_prob num_layers = FLAGS.num_layers num_classes = FLAGS.num_classes prop_limit = FLAGS.prop_limit # split 1-D String dense Tensor to words SparseTensor sentences = tf.placeholder(dtype=tf.string, shape=[None], name='input_sentences') sparse_words = tf.string_split(sentences, delimiter=' ') # slice SparseTensor valid_indices = tf.less(sparse_words.indices, tf.constant([num_steps], dtype=tf.int64)) valid_indices = tf.reshape( tf.split(valid_indices, [1, 1], axis=1)[1], [-1]) valid_sparse_words = tf.sparse_retain(sparse_words, valid_indices) excess_indices = tf.greater_equal(sparse_words.indices, tf.constant([num_steps], dtype=tf.int64)) excess_indices = tf.reshape( tf.split(excess_indices, [1, 1], axis=1)[1], [-1]) excess_sparse_words = tf.sparse_retain(sparse_words, excess_indices) # sparse to dense words = tf.sparse_to_dense( sparse_indices=valid_sparse_words.indices, output_shape=[valid_sparse_words.dense_shape[0], num_steps], sparse_values=valid_sparse_words.values, default_value='_PAD') # dict words to token ids words_table = lookup.index_table_from_file(os.path.join( vocab_path, 'words_vocab.txt'), default_value=3) words_ids = words_table.lookup(words) # blstm model predict with tf.variable_scope('model', reuse=None): logits, _ = ner_model.inference(words_ids, valid_sparse_words.dense_shape[0], num_steps, vocab_size, embedding_size, hidden_size, keep_prob, num_layers, num_classes, is_training=False) props = tf.nn.softmax(logits) max_prop_values, max_prop_indices = tf.nn.top_k(props, k=1) predict_scores = tf.reshape(max_prop_values, shape=[-1, num_steps]) predict_labels_ids = tf.reshape(max_prop_indices, shape=[-1, num_steps]) predict_labels_ids = tf.to_int64(predict_labels_ids) # replace untrusted prop that less than prop_limit trusted_prop_flag = tf.greater_equal( predict_scores, tf.constant(prop_limit, dtype=tf.float32)) replace_prop_labels_ids = tf.to_int64( tf.fill(tf.shape(predict_labels_ids), 4)) predict_labels_ids = tf.where(trusted_prop_flag, predict_labels_ids, replace_prop_labels_ids) # dict token ids to labels labels_table = lookup.index_to_string_table_from_file(os.path.join( vocab_path, 'labels_vocab.txt'), default_value='o') predict_labels = labels_table.lookup(predict_labels_ids) # extract real blstm predict label in dense and save to sparse valid_sparse_predict_labels = tf.SparseTensor( indices=valid_sparse_words.indices, values=tf.gather_nd(predict_labels, valid_sparse_words.indices), dense_shape=valid_sparse_words.dense_shape) # create excess label SparseTensor with 'O' excess_sparse_predict_labels = tf.SparseTensor( indices=excess_sparse_words.indices, values=tf.fill(tf.shape(excess_sparse_words.values), 'O'), dense_shape=excess_sparse_words.dense_shape) # concat SparseTensor sparse_predict_labels = tf.SparseTensor( indices=tf.concat(axis=0, values=[ valid_sparse_predict_labels.indices, excess_sparse_predict_labels.indices ]), values=tf.concat(axis=0, values=[ valid_sparse_predict_labels.values, excess_sparse_predict_labels.values ]), dense_shape=excess_sparse_predict_labels.dense_shape) sparse_predict_labels = tf.sparse_reorder(sparse_predict_labels) # join SparseTensor to 1-D String dense Tensor # remain issue, num_split should equal the real size, but here limit to 1 join_labels_list = [] slice_labels_list = tf.sparse_split(sp_input=sparse_predict_labels, num_split=1, axis=0) for slice_labels in slice_labels_list: slice_labels = slice_labels.values join_labels = tf.reduce_join(slice_labels, reduction_indices=0, separator=' ') join_labels_list.append(join_labels) format_predict_labels = tf.stack(join_labels_list) saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(checkpoint_path) if ckpt and ckpt.model_checkpoint_path: print('read model from {}'.format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) global_step = int(ckpt.model_checkpoint_path.split('-')[-1]) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_path) return # Export inference model. output_path = os.path.join(export_path, str(global_step)) print 'Exporting trained model to', output_path builder = tf.saved_model.builder.SavedModelBuilder(output_path) # Build the signature_def_map. predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( sentences) predict_output_tensor_info = tf.saved_model.utils.build_tensor_info( format_predict_labels) prediction_signature = tf.saved_model.signature_def_utils.build_signature_def( inputs={ 'input_sentences': predict_inputs_tensor_info, }, outputs={'classes': predict_output_tensor_info}, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={'predict_ner': prediction_signature}, legacy_init_op=legacy_init_op) builder.save() print 'Successfully exported model to %s' % export_path
def train(self, data_loader): with tf.device('/cpu:0'): input_images, input_labels, input_widths = data_loader.read_with_bucket_queue( batch_size=cfg.TRAIN.BATCH_SIZE, num_threads=cfg.TRAIN.THREADS, num_epochs=cfg.TRAIN.EPOCH, shuffle=cfg.TRAIN.USE_SHUFFLE) images_sp = tf.split(input_images, cfg.NUM_GPUS) labels_sp = tf.sparse_split(sp_input=input_labels, num_split=cfg.NUM_GPUS, axis=0) widths_sp = tf.split(input_widths, cfg.NUM_GPUS) # if cfg.NUM_GPUS > 1: # images_sp = tf.split(input_images, cfg.NUM_GPUS) # labels_sp = tf.sparse_split(sp_input=input_labels, num_split=cfg.NUM_GPUS, axis=0) # widths_sp = tf.split(input_widths, cfg.NUM_GPUS) # else: # images_sp = [input_images] # labels_sp = [input_labels] # widths_sp = [input_widths] tower_grads = [] tower_distance = [] for i, host in enumerate(cfg.HOSTS): reuse = i > 0 with tf.device('/gpu:%d' % host): with tf.name_scope('model_%d' % host) as scope: with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): logits = get_models(cfg.MODEL.BACKBONE)(cfg.MODEL.NUM_CLASSES).build(images_sp[i], True) seqlen = tf.cast(tf.floor_div(widths_sp[i], 2), tf.int32, name='sequence_length') model_loss, total_loss = self.tower_loss(logits, labels_sp[i], seqlen) distance, _ = self.metrics(logits, labels_sp[i], seqlen) if not reuse and cfg.ENABLE_TENSOR_BOARD: tf.summary.image(name='InputImages', tensor=images_sp[i]) tf.summary.scalar(name='ModelLoss', tensor=model_loss) tf.summary.scalar(name='TotalLoss', tensor=total_loss) tf.summary.scalar(name='Distance', tensor=distance) self.solver.bn_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) grads = self.solver.optimizer.compute_gradients(total_loss) tower_grads.append(grads) tower_distance.append(distance) self.solver.apply_gradients(tower_grads) if cfg.SOLVER.USE_MOVING_AVERAGE_DECAY: self.solver.apply_moving_average() train_op = self.solver.get_train_op() distance_mean = tf.reduce_mean(tower_distance) if cfg.ENABLE_TENSOR_BOARD: summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(self.summary_dir, tf.get_default_graph()) saver = tf.train.Saver(tf.global_variables()) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if cfg.RESTORE: logger.info('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(self.output_dir) logger.debug(ckpt) saver.restore(sess, ckpt) else: # Load the pre-trained weights if cfg.TRAIN.WEIGHTS: self.init_weights_fn(cfg.TRAIN.WEIGHTS)(sess) start = time.time() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): _, step, lr, ml, tl, dt = sess.run( [train_op, self.solver.global_step, self.solver.learning_rate, model_loss, total_loss, distance_mean]) if np.isnan(tl): logger.error('Loss diverged, stop training') break if step % cfg.SOLVER.DISPLAY == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * cfg.TRAIN.BATCH_SIZE * cfg.NUM_GPUS) / (time.time() - start) start = time.time() tb = PrettyTable( ['Step', 'LR', 'ModelLoss', 'TotalLoss', 'sec/step', 'exp/sec', 'Distance']) tb.add_row( ['{}/{}'.format(step, cfg.SOLVER.MAX_ITERS), '{:.3f}'.format(lr), '{:.3f}'.format(ml), '{:.3f}'.format(tl), '{:.3f}'.format(avg_time_per_step), '{:.3f}'.format(avg_examples_per_second), '{:.3f}'.format(dt)]) print(tb) if cfg.ENABLE_TENSOR_BOARD: summary_str = sess.run([summary_op, ]) summary_writer.add_summary(summary_str[0], global_step=step) if step % cfg.SOLVER.SNAPSHOT_ITERS == 0: saver.save(sess, os.path.join(self.output_dir, 'model.ckpt'), global_step=self.solver.global_step) if step >= cfg.SOLVER.MAX_ITERS: break except tf.errors.OutOfRangeError: logger.info('Epochs Complete!') finally: coord.request_stop() coord.join(threads)