def test_update_by_optimizer_bpv2(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') sparse_params = de.get_variable('kr193', dim=2, initializer=0.0, bp_v2=True) sparse_params.upsert( constant_op.constant([1, 2, 3], dtype=dtypes.int64), constant_op.constant([[2.4, 3.1], [5.1, -0.7], [-15.2, 3.9]], dtype=dtypes.float32)) shadow = de.shadow_ops.ShadowVariable(sparse_params) dense_params = variables.Variable([[2.4, 3.1], [5.1, -0.7], [-15.2, 3.9]], dtype=dtypes.float32) sparse_optimizer = tf.keras.optimizers.Adam(1E-4) sparse_optimizer = de.DynamicEmbeddingOptimizer(sparse_optimizer) dense_optimizer = tf.keras.optimizers.Adam(1E-4) dense_optimizer = de.DynamicEmbeddingOptimizer(dense_optimizer) rtol = 2e-4 atol = 2e-6 def sparse_loss(): ids = constant_op.constant([1, 3], dtype=dtypes.int64) emb = de.shadow_ops.embedding_lookup(shadow, ids) return math_ops.reduce_mean(emb) def dense_loss(): ids = constant_op.constant([0, 2], dtype=dtypes.int64) emb = array_ops.gather(dense_params, ids) return math_ops.reduce_mean(emb) for i in range(10): sparse_optimizer.minimize(sparse_loss, var_list=[shadow]) dense_optimizer.minimize(dense_loss, var_list=[dense_params]) ids = constant_op.constant([1, 2, 3], dtype=dtypes.int64) values = sparse_params.lookup(ids) self.assertAllClose(values, dense_params, rtol, atol) sparse_slot_params = sparse_params.get_slot_variables(sparse_optimizer) dense_slot_params = [ dense_optimizer.get_slot(dense_params, name) for name in dense_optimizer.get_slot_names() ] for i in range(len(sparse_slot_params)): sparse_values = sparse_slot_params[i].lookup(ids) dense_values = dense_slot_params[i] self.assertAllClose(sparse_values, dense_values)
def test_backward(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode') @tf.function def slot_map_fn(x): return tf.math.floormod(x, 2) init = tf.keras.initializers.RandomNormal(seed=0) model = get_sequential_model(de.keras.layers.FieldWiseEmbedding, 4, 2, slot_map_fn, bp_v2=True, initializer=init, name='oe423') optmz = tf.keras.optimizers.Adam(learning_rate=1E-4, amsgrad=True) optmz = de.DynamicEmbeddingOptimizer(optmz) emb_layer = model.layers[0] model.compile(optimizer=optmz, loss='binary_crossentropy') start = 0 batch_size = 10 for i in range(1, 10): x = math_ops.range(start, start + batch_size * i, dtype=dtypes.int64) x = tf.reshape(x, (batch_size, -1)) start += batch_size * i y = tf.zeros((batch_size, 1), dtype=dtypes.float32) model.fit(x, y, verbose=0) self.assertAllEqual(emb_layer.params.size(), start)
def test_training_with_restrict_policy(self): if not context.executing_eagerly(): self.skipTest('Skip test tf.function in eager mode.') with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): var, shadow_var = _get_sparse_variable( 'pf988', dim=2, restrict_policy=de.TimestampRestrictPolicy) optimizer = optimizer_v2.adam.Adam(1E-4) optimizer = de.DynamicEmbeddingOptimizer(optimizer) @def_function.function def compute_fn(var, ids): emb = de.shadow_ops.embedding_lookup(var, ids) return math_ops.reduce_mean(emb) start = 0 size = 0 for i in range(10): ids = math_ops.range(start, i + 1, dtype=dtypes.int64) start = math_ops.reduce_max(ids) + 1 size += array_ops.size(ids) optimizer.minimize(lambda: compute_fn(shadow_var, ids), [shadow_var]) self.assertAllEqual(var.size(), size) self.assertAllEqual(var.restrict_policy.status.size(), size)
def commonly_apply_restriction_verify(self, optimizer): first_inputs = np.array(range(6), dtype=np.int64) second_inputs = np.array(range(4, 9), dtype=np.int64) overdue_features = np.array(range(4), dtype=np.int64) updated_features = np.array(range(4, 9), dtype=np.int64) all_input_features = np.array(range(9), dtype=np.int64) embedding_dim = 2 oversize_trigger = 100 optimizer = de.DynamicEmbeddingOptimizer(optimizer) with session.Session(config=default_config) as sess: ids = array_ops.placeholder(dtypes.int64) var = de.get_variable('sp_var', key_dtype=ids.dtype, value_dtype=dtypes.float32, initializer=-0.1, dim=embedding_dim, restrict_policy=de.TimestampRestrictPolicy) embed_w, trainable = de.embedding_lookup(var, ids, return_trainable=True, name='ut8900') loss = _simple_loss(embed_w) train_op = optimizer.minimize(loss, var_list=[trainable]) slot_params = [ optimizer.get_slot(trainable, name).params for name in optimizer.get_slot_names() ] all_vars = [var] + slot_params + [var.restrict_policy.status] sess.run(variables.global_variables_initializer()) sess.run([train_op], feed_dict={ids: first_inputs}) time.sleep(1) sess.run([train_op], feed_dict={ids: second_inputs}) for v in all_vars: self.assertAllEqual(sess.run(v.size()), 9) keys, tstp = sess.run(var.restrict_policy.status.export()) kvs = sorted(dict(zip(keys, tstp)).items()) tstp = np.array([x[1] for x in kvs]) for x in tstp[overdue_features]: for y in tstp[updated_features]: self.assertLess(x, y) sess.run( var.restrict_policy.apply_restriction( len(updated_features), trigger=oversize_trigger)) for v in all_vars: self.assertAllEqual(sess.run(v.size()), len(all_input_features)) sess.run( var.restrict_policy.apply_restriction( len(updated_features), trigger=len(updated_features))) for v in all_vars: self.assertAllEqual(sess.run(v.size()), len(updated_features)) keys, _ = sess.run(var.export()) keys_sorted = np.sort(keys) self.assertAllEqual(keys_sorted, updated_features)
def test_training_with_distributed_strategy(self): # TODO(Lifann) Servers will be alive and thus make other test cases # across the cases failed. So this case is kept only for demonstration. self.skipTest('Only for demonstration.') if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') cluster_spec = tf.train.ClusterSpec({ 'ps': ['localhost:2220', 'localhost:2221'], 'worker': ['localhost:2222', 'localhost:2223'] }) ps_list, worker_list = _create_ps_and_worker_servers(cluster_spec) resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( cluster_spec) strategy = tf.distribute.experimental.ParameterServerStrategy(resolver) coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator( strategy) with strategy.scope() as scope: var = de.get_variable('pf988', dim=2, initializer=0.1, devices=['/job:ps/task:0', '/job:ps/task:1']) shadow_var = de.shadow_ops.ShadowVariable(var, name='pf988-shadow', distribute_strategy=strategy) optimizer = optimizer_v2.adam.Adam(1E-4) optimizer = de.DynamicEmbeddingOptimizer(optimizer) def dist_dataset_fn(): dataset_values = np.arange(0, 10, dtype=np.int64) fn = lambda x: tf.data.Dataset.from_tensor_slices(dataset_values).batch( 4).repeat(None) return strategy.distribute_datasets_from_function(fn) dataset = coordinator.create_per_worker_dataset(dist_dataset_fn) @tf.function def step_fn(iterator): def replica_fn(ids): def loss_fn(ids): batch_size = tf.shape(ids)[0] emb = de.shadow_ops.embedding_lookup(shadow_var, ids) loss = tf.reduce_mean(emb) return loss optimizer.minimize(lambda: loss_fn(ids), [shadow_var]) return strategy.run(replica_fn, args=(next(iterator),)) iterator = iter(dataset) for i in range(5): coordinator.schedule(step_fn, args=(iterator,)) coordinator.join() self.assertAllEqual(var.size(), 10)
def train(num_steps): """ Do trainnig and produce model. """ # Create a model model = video_game_model.VideoGameDnn(batch_size=FLAGS.batch_size, embedding_size=FLAGS.embedding_size) optimizer = tf.keras.optimizers.Adam(1E-3, clipnorm=None) optimizer = de.DynamicEmbeddingOptimizer(optimizer) auc = tf.keras.metrics.AUC(num_thresholds=1000) accuracy = tf.keras.metrics.BinaryAccuracy(dtype=tf.float32) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=[accuracy, auc]) # Get data iterator iterator = feature.initialize_dataset(batch_size=FLAGS.batch_size, split='train', shuffle_size=FLAGS.shuffle_size, skips=0, balanced=True) # Run training. try: for step in range(num_steps): features, labels = feature.input_fn(iterator) if step % 10 == 0: verbose = 1 else: verbose = 0 model.fit(features, labels, steps_per_epoch=1, epochs=1, verbose=verbose) if verbose > 0: print('step: {}, size of sparse domain: {}'.format( step, model.embedding_store.size())) model.embedding_store.restrict(int(FLAGS.max_size * 0.8), trigger=FLAGS.max_size) except tf.errors.OutOfRangeError: print('Run out the training data.') # Save the model for inference. options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA']) if FLAGS.save_format == 'tf': model.save(FLAGS.export_dir, options=options) elif FLAGS.save_format == 'keras': tf.keras.models.save_model(model, FLAGS.export_dir, options=options) else: raise NotImplemented
def common_minimize_trainable(self, base_opt, test_opt, name): tf.config.set_soft_device_placement(True) hvd.init() base_opt = de.DynamicEmbeddingOptimizer(base_opt, synchronous=True) for dtype, run_step, dim in itertools.product([dtypes.float32], [1], [10]): x = tf.random.uniform(shape=[32, dim]) y = tf.zeros([32, 1]) global_step = training_util.create_global_step() base_weight = tf.compat.v1.get_variable(name="base_weights", initializer=tf.ones( [10, 1])) base_logits = tf.nn.relu(math_ops.matmul(x, base_weight)) base_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=base_logits) base_opt_op = base_opt.minimize(base_loss, global_step, var_list=[base_weight]) test_weight = tf.compat.v1.get_variable(name="test_weights", initializer=tf.ones( [10, 1])) test_logits = tf.nn.relu(math_ops.matmul(x, test_weight)) test_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=test_logits) grads_and_vars = test_opt.compute_gradients(test_loss, var_list=[test_weight]) var_list = [] aggregated_grad = [] for grad, var in grads_and_vars: var_list.append(var) aggregated_grad.append(hvd.allreduce(grad, op=hvd.Sum)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) test_opt_op = test_opt.apply_gradients(aggregated_grads_and_vars, global_step) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: for _ in range(run_step): sess.run(base_opt_op) sess.run(test_opt_op) self.assertAllCloseAccordingToType( sess.run(base_weight), sess.run(test_weight), msg="Cond:{},{},{}".format(dtype, run_step, dim), )
def test_inference_numberic_correctness(self): train_pred = None infer_pred = None dim = 8 initializer = init_ops.random_normal_initializer(0.0, 0.001) raw_init_vals = np.random.rand(100, dim) for fn in [de.enable_train_mode, de.enable_inference_mode]: with ops.Graph().as_default(): fn() init_ids = constant_op.constant(list(range(100)), dtype=dtypes.int64) init_vals = constant_op.constant(raw_init_vals, dtype=dtypes.float32) with variable_scope.variable_scope("modelmode", reuse=variable_scope.AUTO_REUSE): embeddings = de.get_variable('ModelModeTest-numberic', key_dtype=dtypes.int64, value_dtype=dtypes.float32, devices=_get_devices() * 2, initializer=initializer, dim=dim) w = variables.Variable(1.0, name="w") _ = training_util.create_global_step() init_op = embeddings.upsert(init_ids, init_vals) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) test_var, trainable = de.embedding_lookup([embeddings], ids, return_trainable=True) pred = math_ops.add(test_var, 1) * w loss = pred * pred opt = de.DynamicEmbeddingOptimizer(adagrad.AdagradOptimizer(0.1)) opt.minimize(loss) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: if de.get_model_mode() == de.ModelMode.TRAIN: sess.run(init_op) train_pred = sess.run(pred) elif de.get_model_mode() == de.ModelMode.INFERENCE: sess.run(init_op) infer_pred = sess.run(pred) de.enable_train_mode() ops.reset_default_graph() self.assertAllEqual(train_pred, infer_pred)
def __init__(self, batch_size=1, embedding_size=1): super(VideoGameDnn, self).__init__() self.batch_size = batch_size self.embedding_size = embedding_size # Create embedding variable by `tfra.dynamic_embedding` API. self.embedding_store = de.get_variable( 'video_feature_embedding', key_dtype=tf.int64, value_dtype=tf.float32, dim=embedding_size, devices=['/CPU:0'], initializer=tf.keras.initializers.RandomNormal(-0.1, 0.1), trainable=True, restrict_policy=de.TimestampRestrictPolicy) # Create dense layers. self.dnn0 = tf.keras.layers.Dense( 64, activation='relu', use_bias=True, bias_initializer='glorot_uniform', kernel_regularizer=tf.keras.regularizers.L1(0.01), bias_regularizer=tf.keras.regularizers.L1(0.02), ) self.dnn1 = tf.keras.layers.Dense( 16, activation='relu', use_bias=True, bias_initializer='glorot_uniform', kernel_regularizer=tf.keras.regularizers.L1(0.01), bias_regularizer=tf.keras.regularizers.L1(0.02), ) self.dnn2 = tf.keras.layers.Dense(1, use_bias=False) self.embedding_trainables = [] # Create optimizer. self.optmz = de.DynamicEmbeddingOptimizer( tf.keras.optimizers.Adam(0.01)) # Metric observer. self._auc = tf.metrics.AUC()
def train(): dataset = get_dataset(batch_size=32) model = DualChannelsDeepModel(FLAGS.embedding_size, FLAGS.embedding_size, tf.keras.initializers.RandomNormal(0.0, 0.5)) optimizer = tf.keras.optimizers.Adam(1E-3) optimizer = de.DynamicEmbeddingOptimizer(optimizer) auc = tf.keras.metrics.AUC(num_thresholds=1000) model.compile(optimizer=optimizer, loss=tf.keras.losses.MeanSquaredError(), metrics=[ auc, ]) if os.path.exists(FLAGS.model_dir): model.load_weights(FLAGS.model_dir) model.fit(dataset, epochs=FLAGS.epochs, steps_per_epoch=FLAGS.steps_per_epoch) save_options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA']) model.save(FLAGS.model_dir, options=save_options)
def test_update_with_optimizer_v2(self): if not context.executing_eagerly(): self.skipTest('Only test when eagerly.') for bp_v2 in [False, True]: var, shadow_var = _get_sparse_variable('bh890-bpv2-%s' % bp_v2, dim=2, bp_v2=bp_v2) optimizer = optimizer_v2.adagrad.Adagrad(1E-3) optimizer = de.DynamicEmbeddingOptimizer(optimizer) initialized = False with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): ids = [] for i in range(10): ids.append(i) tf_ids = ops.convert_to_tensor(ids, dtype=dtypes.int64) def _loss_fn(shadow_var, ids): emb = de.shadow_ops.embedding_lookup( shadow_var, ops.convert_to_tensor(ids, dtype=dtypes.int64)) loss = math_ops.reduce_mean(emb, axis=0) loss = array_ops.reshape(loss, (-1, 2)) loss = math_ops.matmul(loss, array_ops.transpose(loss)) return loss train_op = optimizer.minimize(lambda: _loss_fn(shadow_var, ids), [shadow_var]) if not initialized: self.evaluate(variables.global_variables_initializer()) initialized = True self.evaluate(train_op) keys, values = _sort_keys_and_values(*self.evaluate(var.export())) result_keys, result_values = _sort_keys_and_values(*self.evaluate( [shadow_var.ids, shadow_var.read_value(False)])) self.assertAllEqual(keys, result_keys) self.assertAllEqual(values, result_values)
def test_backward(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode') init = tf.keras.initializers.RandomNormal(seed=0) model = get_sequential_model(de.keras.layers.BasicEmbedding, 4, initializer=init, bp_v2=False, name='go582') optmz = adam.AdamOptimizer(1E-4) optmz = de.DynamicEmbeddingOptimizer(optmz) emb_layer = model.layers[0] model.compile(optimizer=optmz, loss='binary_crossentropy') start = 0 batch_size = 10 for i in range(1, 10): x = math_ops.range(start, start + batch_size * i, dtype=dtypes.int64) x = tf.reshape(x, (batch_size, -1)) start += batch_size * i y = tf.zeros((batch_size, 1), dtype=dtypes.float32) model.fit(x, y, verbose=0) self.assertAllEqual(emb_layer.params.size(), start)
def test_sequential_model_save_and_load_weights(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') @tf.function def slot_map_fn(x): return tf.math.floormod(x, 2) init = tf.keras.initializers.RandomNormal(seed=0) model = get_sequential_model(de.keras.layers.FieldWiseEmbedding, 4, 2, slot_map_fn, bp_v2=False, initializer=init, name='pc053') optmz = tf.keras.optimizers.Adam(learning_rate=1E-2, amsgrad=True) optmz = de.DynamicEmbeddingOptimizer(optmz) emb_layer = model.layers[0] model.compile(optimizer=optmz, loss='binary_crossentropy') start = 0 batch_size = 10 for i in range(1, 10): x = math_ops.range(start, start + batch_size * i, dtype=dtypes.int64) x = tf.reshape(x, (batch_size, -1)) start += batch_size * i y = tf.zeros((batch_size, 1), dtype=dtypes.float32) model.fit(x, y, verbose=0) ids = tf.range(0, 10, dtype=tf.int64) ids = tf.reshape(ids, (1, -1)) expected = model(ids) save_dir = tempfile.mkdtemp(prefix='/tmp/') options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA']) model.save(save_dir, signatures=None, options=options) copied_init = tf.keras.initializers.RandomNormal(seed=0) new_model = get_sequential_model(de.keras.layers.FieldWiseEmbedding, 4, 2, slot_map_fn, bp_v2=False, initializer=copied_init, name='pc053') new_emb_layer = new_model.layers[0] new_model.load_weights(save_dir) evaluate = new_model(ids) self.assertAllEqual(evaluate, expected) keys, values = emb_layer.params.export() seq = tf.argsort(keys) keys = tf.sort(keys) values = tf.gather(values, seq) new_keys, new_values = new_emb_layer.params.export() seq = tf.argsort(new_keys) new_keys = tf.sort(new_keys) new_values = tf.gather(new_values, seq) self.assertAllEqual(keys, new_keys) self.assertAllEqual(values, new_values)
def model_fn(features, labels, mode, params): print("features %s labels %s" % (features, labels)) value_fea = features['value_fea'] id_fea = features['id_fea'] id_fea_len = id_fea.shape[1] batch_size = params["batch_size"] is_training = True if mode == tf.estimator.ModeKeys.TRAIN else False devices = None if is_training: devices = [ "/job:ps/replica:0/task:{}/CPU:0".format(i) for i in range(params['ps_num']) ] initializer = tf.keras.initializers.RandomNormal(0.0, 0.1) else: devices = [ "/job:localhost/replica:0/task:{}/CPU:0".format(0) for i in range(params['ps_num']) ] initializer = tf.keras.initializers.Zeros() if mode == tf.estimator.ModeKeys.PREDICT: tfra.dynamic_embedding.enable_inference_mode() dynamic_embeddings = tfra.dynamic_embedding.get_variable( name="dynamic_embeddings", dim=embedding_dim, devices=devices, initializer=initializer, trainable=is_training, init_size=8192) id_fea_shape = id_fea.shape id_fea = tf.reshape(id_fea, [-1]) id_fea_val, id_fea_idx = tf.unique(id_fea) raw_embs = tfra.dynamic_embedding.embedding_lookup( params=dynamic_embeddings, ids=id_fea_val, name="embs") embs = tf.gather(raw_embs, id_fea_idx) embs = tf.reshape(embs, [-1, id_fea_len * embedding_dim]) inputs = tf.concat([value_fea, embs], axis=-1) inputs = tf.compat.v1.layers.batch_normalization(inputs, training=is_training) print("inputs shape %s" % inputs) # three layer mlp out, inners = mlp.multilayer_perception(inputs, [1024, 512, 256, 64, 1]) logits = tf.reshape(out, [-1]) tf.compat.v1.summary.histogram("logits", logits) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.compat.v1.train.get_global_step() loss = tf.math.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels['label'], logits=logits)) opt = de.DynamicEmbeddingOptimizer( tf.compat.v1.train.AdamOptimizer(beta1=0.9, beta2=0.999)) tf.compat.v1.summary.scalar("loss", loss) tf.compat.v1.summary.scalar("global_step", global_step) elif mode == tf.estimator.ModeKeys.EVAL: loss = tf.math.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels['label'], logits=logits)) tf.compat.v1.summary.scalar("loss", loss) elif mode == tf.estimator.ModeKeys.PREDICT: pass if mode == tf.estimator.ModeKeys.TRAIN: update_ops = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.UPDATE_OPS) train_op = opt.minimize(loss, global_step=global_step) train_op = tf.group([train_op, update_ops]) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss) elif mode == tf.estimator.ModeKeys.PREDICT: outputs = { "outputs": tf.estimator.export.PredictOutput(outputs=logits) } predictions = { "logid": features["logid"], "logits": logits, } for k, v in features.items(): predictions.update({k: v}) return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=outputs)
def common_minimize_trainable(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [3], [dtypes.int64], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): with ops.Graph().as_default(): id += 1 raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype) sp_ids = sparse_tensor.SparseTensor( indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids, dense_shape=[3, 2], ) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base var prepare base_var = variables.Variable( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim], ) # test var prepare embeddings = de.get_variable( "t1030-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) # base branch base_embedding = embedding_ops.embedding_lookup_sparse( base_var, sp_ids, None, combiner="sum") base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # test branch test_var, trainable = de.embedding_lookup_sparse( embeddings, sp_ids, sp_weights=None, combiner="sum", return_trainable=True, ) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 gstep = training_util.create_global_step() test_opt_op = test_opt.minimize(loss1, var_list=[trainable], global_step=gstep) table_var = array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim]) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: sess.run(init_op) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape( [len(raw_init_ids), dim]), sess.run(base_var), ) # run base for _ in range(run_step): sess.run(base_opt_op) sess.run(test_opt_op) # Validate global_step self.assertEqual(run_step, sess.run(gstep)) # Validate updated params self.assertAllCloseAccordingToType( sess.run(base_var), sess.run(table_var), msg="Cond:{},{},{},{},{}".format( num_shards, k_dtype, d_dtype, dim, run_step), ) self.device_check(embeddings)
def common_minimize_trainable(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [1, 2], [ dtypes.int64, ], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): id += 1 with self.session(use_gpu=test_util.is_gpu_available(), config=default_config) as sess: # common define raw_init_ids = [0, 1] raw_init_vals = np.random.rand(2, dim) raw_ids = [ 0, ] x = constant_op.constant(np.random.rand(dim, len(raw_ids)), dtype=d_dtype) # base graph base_var = resource_variable_ops.ResourceVariable( raw_init_vals, dtype=d_dtype) ids = constant_op.constant(raw_ids, dtype=k_dtype) pred0 = math_ops.matmul( embedding_ops.embedding_lookup([base_var], ids), x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0) # test graph embeddings = de.get_variable( "t2020-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) self.evaluate(init_op) test_var, trainable = de.embedding_lookup( [embeddings], ids, return_trainable=True) pred1 = math_ops.matmul(test_var, x) loss1 = pred1 * pred1 test_opt_op = test_opt.minimize(loss1, var_list=[trainable]) self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): sess.run(base_opt_op) # Fetch params to validate initial values self.assertAllCloseAccordingToType(raw_init_vals[raw_ids], self.evaluate(test_var)) # Run `run_step` step of sgd for _ in range(run_step): sess.run(test_opt_op) table_var = embeddings.lookup(ids) # Validate updated params self.assertAllCloseAccordingToType( self.evaluate(base_var)[raw_ids], self.evaluate(table_var), msg="Cond:{},{},{},{},{},{}".format( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step), )
def test_traing_save_restore(self): opt = de.DynamicEmbeddingOptimizer(adam.AdamOptimizer(0.3)) id = 0 if test_util.is_gpu_available(): dim_list = [1, 2, 4, 8, 10, 16, 32, 64, 100, 256, 500] else: dim_list = [10] for key_dtype, value_dtype, dim, step in itertools.product( [dtypes.int64], [dtypes.float32], dim_list, [10], ): id += 1 save_dir = os.path.join(self.get_temp_dir(), "save_restore") save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash") ids = script_ops.py_func(_create_dynamic_shape_tensor(), inp=[], Tout=key_dtype, stateful=True) params = de.get_variable( name="params-test-0915-" + str(id), key_dtype=key_dtype, value_dtype=value_dtype, initializer=init_ops.random_normal_initializer(0.0, 0.01), dim=dim, ) _, var0 = de.embedding_lookup(params, ids, return_trainable=True) def loss(): return var0 * var0 params_keys, params_vals = params.export() mini = opt.minimize(loss, var_list=[var0]) opt_slots = [opt.get_slot(var0, _s) for _s in opt.get_slot_names()] _saver = saver.Saver([params] + [_s.params for _s in opt_slots]) with self.session(config=default_config, use_gpu=test_util.is_gpu_available()) as sess: self.evaluate(variables.global_variables_initializer()) for _i in range(step): self.evaluate([mini]) size_before_saved = self.evaluate(params.size()) np_params_keys_before_saved = self.evaluate(params_keys) np_params_vals_before_saved = self.evaluate(params_vals) opt_slots_kv_pairs = [_s.params.export() for _s in opt_slots] np_slots_kv_pairs_before_saved = [ self.evaluate(_kv) for _kv in opt_slots_kv_pairs ] _saver.save(sess, save_path) with self.session(config=default_config, use_gpu=test_util.is_gpu_available()) as sess: self.evaluate(variables.global_variables_initializer()) self.assertAllEqual(0, self.evaluate(params.size())) _saver.restore(sess, save_path) params_keys_restored, params_vals_restored = params.export() size_after_restored = self.evaluate(params.size()) np_params_keys_after_restored = self.evaluate( params_keys_restored) np_params_vals_after_restored = self.evaluate( params_vals_restored) opt_slots_kv_pairs_restored = [ _s.params.export() for _s in opt_slots ] np_slots_kv_pairs_after_restored = [ self.evaluate(_kv) for _kv in opt_slots_kv_pairs_restored ] self.assertAllEqual(size_before_saved, size_after_restored) self.assertAllEqual( np.sort(np_params_keys_before_saved), np.sort(np_params_keys_after_restored), ) self.assertAllEqual( np.sort(np_params_vals_before_saved, axis=0), np.sort(np_params_vals_after_restored, axis=0), ) for pairs_before, pairs_after in zip( np_slots_kv_pairs_before_saved, np_slots_kv_pairs_after_restored): self.assertAllEqual( np.sort(pairs_before[0], axis=0), np.sort(pairs_after[0], axis=0), ) self.assertAllEqual( np.sort(pairs_before[1], axis=0), np.sort(pairs_after[1], axis=0), ) if test_util.is_gpu_available(): self.assertTrue( "GPU" in params.tables[0].resource_handle.device)
def model_fn(features, labels, mode, params): #logging.info('mode: %s, labels: %s, params: %s, features: %s', mode, labels, params, features) if params["args"].get("addon_embedding"): import tensorflow_recommenders_addons as tfra import tensorflow_recommenders_addons.dynamic_embedding as dynamic_embedding else: import tensorflow.dynamic_embedding as dynamic_embedding features.update(labels) logging.info("------ build hyper parameters -------") embedding_size = params["parameters"]["embedding_size"] learning_rate = params["parameters"]["learning_rate"] use_bn = params["parameters"]["use_bn"] feat = params['features'] sparse_feat_list = list(set(feat["sparse"]) - set(SPARSE_MASK)) if 'sparse' in feat else [] sparse_seq_feat_list = list( set(feat["sparse_seq"]) - set(SPARSE_SEQ_MASK)) if 'sparse_seq' in feat else [] sparse_seq_feat_list = [] #dense_feat_list = list(set(feat["dense"]) - set(DENSE_MASK)) if 'dense' in feat else [] # hashtable v1/v2 image均无法同时关bn和mask dense_feat dense_feat_list = [] dense_seq_feat_list = list( set(feat["dense_seq"]) - set(DENSE_SEQ_MASK)) if 'dense_seq' in feat else [] sparse_feat_num = len(sparse_feat_list) sparse_seq_num = len(sparse_seq_feat_list) dense_feat_num = len(dense_feat_list) dense_seq_feat_num = len(dense_seq_feat_list) all_features = (sparse_feat_list + sparse_seq_feat_list + dense_feat_list + dense_seq_feat_list) batch_size = tf.shape(features[goods_id_feat])[0] logging.info("------ show batch_size: {} -------".format(batch_size)) level_0_feats = list( set(params.get('level_0_feat_list')) & set(all_features)) logging.info('level_0_feats: {}'.format(level_0_feats)) new_features = dict() if params["args"].get("level_flag") and params["args"].get( "job_type") == "export": for feature_name in features: if feature_name in level_0_feats: new_features[feature_name] = tf.reshape( tf.tile(tf.reshape(features[feature_name], [1, -1]), [batch_size, 1]), [batch_size, -1]) else: new_features[feature_name] = features[feature_name] features = new_features l2_reg = params["parameters"]["l2_reg"] is_training = True if mode == tf.estimator.ModeKeys.TRAIN else False has_label = True if 'is_imp' in features else False logging.info("is_training: {}, has_label: {}, features: {}".format( is_training, has_label, features)) logging.info("------ build embedding -------") # def partition_fn(keys, shard_num=params["parameters"]["ps_nums"]): # return tf.cast(keys % shard_num, dtype=tf.int32) if is_training: devices_info = [ "/job:ps/replica:0/task:{}/CPU:0".format(i) for i in range(params["parameters"]["ps_num"]) ] initializer = tf.compat.v1.truncated_normal_initializer(0.0, 1e-2) else: devices_info = [ "/job:localhost/replica:0/task:{}/CPU:0".format(0) for i in range(params["parameters"]["ps_num"]) ] initializer = tf.compat.v1.zeros_initializer() logging.info("------ dynamic_embedding devices_info is {}-------".format( devices_info)) if mode == tf.estimator.ModeKeys.PREDICT: dynamic_embedding.enable_inference_mode() deep_dynamic_variables = dynamic_embedding.get_variable( name="deep_dynamic_embeddings", devices=devices_info, initializer=initializer, # partitioner=partition_fn, dim=embedding_size, trainable=is_training, #init_size=INIT_SIZE ) sparse_feat = None sparse_unique_ids = None if sparse_feat_num > 0: logging.info("------ build sparse feature -------") id_list = sorted(sparse_feat_list) ft_sparse_idx = tf.concat( [tf.reshape(features[str(i)], [-1, 1]) for i in id_list], axis=1) sparse_unique_ids, sparse_unique_idx = tf.unique( tf.reshape(ft_sparse_idx, [-1])) sparse_weights = dynamic_embedding.embedding_lookup( params=deep_dynamic_variables, ids=sparse_unique_ids, name="deep_sparse_weights") if params["args"].get("zero_padding"): sparse_weights = tf.reshape(sparse_weights, [-1, embedding_size]) sparse_weights = tf.where( tf.not_equal( tf.expand_dims(sparse_unique_ids, axis=1), tf.zeros_like(tf.expand_dims(sparse_unique_ids, axis=1))), sparse_weights, tf.zeros_like(sparse_weights)) sparse_weights = tf.gather(sparse_weights, sparse_unique_idx) sparse_feat = tf.reshape( sparse_weights, shape=[batch_size, sparse_feat_num * embedding_size]) sparse_seq_feat = None sparse_seq_unique_ids = None if sparse_seq_num > 0: logging.info("---- build sparse seq feature ---") if params["args"].get("merge_sparse_seq"): sparse_seq_name_list = sorted( sparse_seq_feat_list) #[B, s1], [B, s2], ... [B, sn] ft_sparse_seq_ids = tf.concat( [ tf.reshape(features[str(i)], [batch_size, -1]) for i in sparse_seq_name_list ], axis=1) #[B, [s1, s2, ...sn]] => [B, per_seq_len*seq_num] sparse_seq_unique_ids, sparse_seq_unique_idx = tf.unique( tf.reshape(ft_sparse_seq_ids, [-1])) #[u], [B*per_seq_len*seq_num] sparse_seq_weights = dynamic_embedding.embedding_lookup( params=deep_dynamic_variables, ids=sparse_seq_unique_ids, name="deep_sparse_seq_weights") #[u, e] deep_embed_seq = tf.where( tf.not_equal( tf.expand_dims(sparse_seq_unique_ids, axis=1), tf.zeros_like(tf.expand_dims(sparse_seq_unique_ids, axis=1))), sparse_seq_weights, tf.zeros_like(sparse_seq_weights)) #[u, e] deep_embedding_seq = tf.reshape( tf.gather(deep_embed_seq, sparse_seq_unique_idx), #[B*per_seq_len*seq_num, e] shape=[batch_size, sparse_seq_num, -1, embedding_size]) #[B, seq_num, per_seq_len, e] if params["parameters"]["combiner"] == "sum": tmp_feat = tf.reduce_sum(deep_embedding_seq, axis=2) else: tmp_feat = tf.reduce_mean(deep_embedding_seq, axis=2) sparse_seq_feat = tf.reshape( tmp_feat, [batch_size, sparse_seq_num * embedding_size]) #[B, seq_num*e] else: sparse_seq_feats = [] sparse_ids = [] for sparse_seq_name in sparse_seq_feat_list: sp_ids = features[sparse_seq_name] if params["args"].get("zero_padding2"): sparse_seq_unique_ids, sparse_seq_unique_idx, _ = tf.unique_with_counts( tf.reshape(sp_ids, [-1])) deep_sparse_seq_weights = tf.reshape( dynamic_embedding.embedding_lookup( params=deep_dynamic_variables, ids=sparse_seq_unique_ids, name="deep_sparse_weights_{}".format( sparse_seq_name)), [-1, embedding_size]) deep_embed_seq = tf.where( tf.not_equal( tf.expand_dims(sparse_seq_unique_ids, axis=1), tf.zeros_like( tf.expand_dims(sparse_seq_unique_ids, axis=1))), deep_sparse_seq_weights, tf.zeros_like(deep_sparse_seq_weights)) deep_embedding_seq = tf.reshape( tf.gather(deep_embed_seq, sparse_seq_unique_idx), shape=[batch_size, -1, embedding_size]) if params["parameters"]["combiner"] == "sum": tmp_feat = tf.reduce_sum(deep_embedding_seq, axis=1) else: tmp_feat = tf.reduce_mean(deep_embedding_seq, axis=1) sparse_ids.append(sparse_seq_unique_ids) sparse_seq_feats.append( tf.reshape(tmp_feat, [batch_size, embedding_size])) else: tmp_feat = dynamic_embedding.safe_embedding_lookup_sparse( embedding_weights=deep_dynamic_variables, sparse_ids=sp_ids, combiner=params["parameters"]["combiner"], name="safe_embedding_lookup_sparse") temp_uni_id, _, _ = tf.unique_with_counts( tf.reshape(sp_ids.values, [-1])) sparse_ids.append(temp_uni_id) sparse_seq_feats.append( tf.reshape(tmp_feat, [batch_size, embedding_size])) sparse_seq_feat = tf.concat(sparse_seq_feats, axis=1) sparse_seq_unique_ids, _ = tf.unique(tf.concat(sparse_ids, axis=0)) dense_feat = None if dense_feat_num > 0: logging.info("------ build dense feature -------") den_id_list = sorted(dense_feat_list) dense_feat_base = tf.concat( [tf.reshape(features[str(i)], [-1, 1]) for i in den_id_list], axis=1) #deep_dense_w1 = tf.compat.v1.get_variable('deep_dense_w1', # tf.TensorShape([dense_feat_num]), # initializer=tf.compat.v1.truncated_normal_initializer( # 2.0 / math.sqrt(dense_feat_num)), # dtype=tf.float32) #deep_dense_w2 = tf.compat.v1.get_variable('deep_dense_w2', # tf.TensorShape([dense_feat_num]), # initializer=tf.compat.v1.truncated_normal_initializer( # 2.0 / math.sqrt(dense_feat_num)), # dtype=tf.float32) #w1 = tf.tile(tf.expand_dims(deep_dense_w1, axis=0), [tf.shape(dense_feat_base)[0], 1]) #dense_input_1 = tf.multiply(dense_feat_base, w1) #dense_feat = dense_input_1 dense_feat = dense_feat_base dense_seq_feat = None if dense_seq_feat_num > 0: logging.info("------ build dense seq feature -------") den_seq_id_list = sorted(dense_seq_feat_list) dense_seq_feat = tf.concat([ tf.reshape(features[str(i[0])], [-1, i[1]]) for i in den_seq_id_list ], axis=1) logging.info("------ join all feature -------") fc_inputs = tf.concat([ x for x in [sparse_feat, sparse_seq_feat, dense_feat, dense_seq_feat] if x is not None ], axis=1) logging.info("---- tracy debug input is ----") logging.info(sparse_feat) logging.info(sparse_seq_feat) logging.info(dense_feat) logging.info(dense_seq_feat) logging.info(fc_inputs) logging.info("------ join fc -------") for idx, units in enumerate(params["parameters"]["hidden_units"]): fc_inputs = fully_connected_with_bn_ahead( inputs=fc_inputs, num_outputs=units, l2_reg=l2_reg, scope="out_mlp_{}".format(idx), activation_fn=tf.nn.relu, train_phase=is_training, use_bn=use_bn) y_deep_ctr = fully_connected_with_bn_ahead(inputs=fc_inputs, num_outputs=1, activation_fn=tf.identity, l2_reg=l2_reg, scope="ctr_mlp", train_phase=is_training, use_bn=use_bn) logging.info("------ build ctr out -------") sample_rate = params["args"]["sample_rate"] logit = tf.reshape(y_deep_ctr, shape=[-1], name="logit") sample_logit = get_sample_logits(logit, sample_rate) pred_ctr = tf.nn.sigmoid(logit, name="pred_ctr") sample_pred_ctr = tf.nn.sigmoid(sample_logit, name="sample_pred_ctr") logging.info("------ build predictions -------") preds = { 'p_ctr': tf.reshape(pred_ctr, shape=[-1, 1]), } logging.info("---- deep_dynamic_variables.size ----") logging.info(deep_dynamic_variables.size()) size = tf.identity(deep_dynamic_variables.size(), name="size") label_col = "is_clk" if params["args"].get("set_train_labels"): label_col = params["args"]["set_train_labels"]["1"] logging.info( "------ build labels, label_col: {} -------".format(label_col)) if has_label: labels_ctr = tf.reshape(features["is_clk"], shape=[-1], name="labels_ctr") if mode == tf.estimator.ModeKeys.PREDICT: logging.info("---- build tf-serving predict ----") pred_cvr = tf.fill(tf.shape(pred_ctr), 1.0) preds.update({ 'labels_cart': tf.reshape(pred_cvr, shape=[-1, 1]), 'p_car': tf.reshape(features["dense_1608"], shape=[-1, 1]), 'labels_cvr': tf.reshape(pred_cvr, shape=[-1, 1]), 'p_cvr': tf.reshape(pred_cvr, shape=[-1, 1]), }) if 'logid' in features: preds.update( {'logid': tf.reshape(features["logid"], shape=[-1, 1])}) if has_label: logging.info("------ build offline label -------") preds["labels_ctr"] = tf.reshape(labels_ctr, shape=[-1, 1]) export_outputs = { "predict_export_outputs": tf.estimator.export.PredictOutput(outputs=preds) } return tf.estimator.EstimatorSpec(mode, predictions=preds, export_outputs=export_outputs) logging.info("----all vars:-----" + str(tf.compat.v1.global_variables())) for var in tf.compat.v1.trainable_variables(): logging.info("----trainable------" + str(var)) logging.info("------ build metric -------") #loss = tf.reduce_mean( # tf.compat.v1.losses.log_loss(labels=labels_ctr, predictions=sample_pred_ctr), # name="loss") loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( labels=labels_ctr, logits=sample_logit), name="loss") ctr_auc = tf.compat.v1.metrics.auc(labels=labels_ctr, predictions=sample_pred_ctr, name="ctr_auc") label_ctr_avg = tf.reduce_mean(labels_ctr, name="label_ctr_avg") real_pred_ctr_avg = tf.reduce_mean(pred_ctr, name="real_pred_ctr_avg") sample_pred_ctr_avg = tf.reduce_mean(sample_pred_ctr, name="pred_ctr_avg") sample_pred_bias_avg = tf.add(sample_pred_ctr_avg, tf.negative(label_ctr_avg), name="pred_bias_avg") tf.compat.v1.summary.histogram('labels_ctr', labels_ctr) tf.compat.v1.summary.histogram('pred_ctr', sample_pred_ctr) tf.compat.v1.summary.histogram('real_pred_ctr', pred_ctr) tf.compat.v1.summary.scalar('label_ctr_avg', label_ctr_avg) tf.compat.v1.summary.scalar('pred_ctr_avg', sample_pred_ctr_avg) tf.compat.v1.summary.scalar('real_pred_ctr_avg', real_pred_ctr_avg) tf.compat.v1.summary.scalar('pred_bias_avg', sample_pred_bias_avg) tf.compat.v1.summary.scalar('loss', loss) tf.compat.v1.summary.scalar('ctr_auc', ctr_auc[1]) logging.info("------ compute l2 reg -------") if params["parameters"]["use_l2"]: all_unique_ids, _ = tf.unique( tf.concat([ x for x in [sparse_unique_ids, sparse_seq_unique_ids] if x is not None ], axis=0)) all_unique_ids_w = dynamic_embedding.embedding_lookup( deep_dynamic_variables, all_unique_ids, name="unique_ids_weights", return_trainable=False) embed_loss = l2_reg * tf.nn.l2_loss( tf.reshape(all_unique_ids_w, shape=[-1, embedding_size])) + tf.reduce_sum( tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)) tf.compat.v1.summary.scalar('embed_loss', embed_loss) loss = loss + embed_loss loss = tf.identity(loss, name="total_loss") tf.compat.v1.summary.scalar('total_loss', loss) if mode == tf.estimator.ModeKeys.EVAL: logging.info("------ EVAL -------") eval_metric_ops = { "ctr_auc_eval": ctr_auc, } if has_label: logging.info("------ build offline label -------") preds["labels_ctr"] = tf.reshape(labels_ctr, shape=[-1, 1]) export_outputs = { "predict_export_outputs": tf.estimator.export.PredictOutput(outputs=preds) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) logging.info("---- Learning rate ----") lr = get_learning_rate(params["parameters"]["learning_rate"], params["parameters"]["use_decay"]) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.compat.v1.train.get_global_step() logging.info("------ TRAIN -------") optimizer_type = params["parameters"].get('optimizer', 'Adam') if optimizer_type == 'Sgd': optimizer = tf.compat.v1.train.GradientDescentOptimizer( learning_rate=lr) elif optimizer_type == 'Adagrad': optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=lr) elif optimizer_type == 'Rmsprop': optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=lr) elif optimizer_type == 'Ftrl': optimizer = tf.compat.v1.train.FtrlOptimizer(learning_rate=lr) elif optimizer_type == 'Momentum': optimizer = tf.compat.v1.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) else: optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-8) if params["args"].get("addon_embedding"): optimizer = dynamic_embedding.DynamicEmbeddingOptimizer(optimizer) train_op = optimizer.minimize(loss, global_step=global_step) # fix tf2 batch_normalization bug update_ops = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.UPDATE_OPS) logging.info('train ops: {}, update ops: {}'.format( str(train_op), str(update_ops))) train_op = tf.group([train_op, update_ops]) return tf.estimator.EstimatorSpec(mode=mode, predictions=preds, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): print("features %s" % features) value_fea = features['value_fea'] id_fea = features['id_fea'] id_fea_len = id_fea.shape[1] is_training = True if mode == tf.estimator.ModeKeys.TRAIN else False if is_training: initializer = tf.keras.initializers.RandomNormal(0.0, 0.1) else: initializer = tf.keras.initializers.Zeros() if mode == tf.estimator.ModeKeys.PREDICT: tfra.dynamic_embedding.enable_inference_mode() dynamic_embeddings = tfra.dynamic_embedding.get_variable( name="dynamic_embeddings", dim=embedding_dim, initializer=initializer, trainable=is_training) id_fea_shape = id_fea.shape id_fea = tf.reshape(id_fea, [-1]) id_fea_val, id_fea_idx = tf.unique(id_fea) raw_embs = tfra.dynamic_embedding.embedding_lookup( params=dynamic_embeddings, ids=id_fea_val, name="embs") embs = tf.gather(raw_embs, id_fea_idx) embs = tf.reshape(embs, [-1, id_fea_len * embedding_dim]) inputs = tf.concat([value_fea, embs], axis=-1) #inputs = value_fea # three layer mlp out, weight_and_bias = mlp.multilayer_perception(inputs, [256, 64, 1]) logits = tf.reshape(out, [-1]) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.compat.v1.train.get_global_step() loss = tf.math.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) opt = de.DynamicEmbeddingOptimizer(tf.compat.v1.train.AdamOptimizer()) elif mode == tf.estimator.ModeKeys.EVAL: loss = tf.math.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) elif mode == tf.estimator.ModeKeys.PREDICT: pass if mode == tf.estimator.ModeKeys.TRAIN: train_op = opt.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss) elif mode == tf.estimator.ModeKeys.PREDICT: outputs = { "outputs": tf.estimator.export.PredictOutput(outputs=logits) } predictions = { "logits": logits, "embs": embs, "raw_embs": tf.reshape(tf.slice(raw_embs, [0, 0], [100, 32]), [5, -1]) #"weight": tf.reshape(weight_and_bias[0]["weight_0"].read_value(), [5, -1]) } for k, v in features.items(): predictions.update({k: v}) return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=outputs)
def common_minimize_trainable_v2(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [1, 2], [ dtypes.int64, ], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): id += 1 # common define raw_init_ids = [0, 1] raw_init_vals = np.random.rand(2, dim) raw_ids = [ 0, ] # base graph def base_fn(): embeddings = resource_variable_ops.ResourceVariable( raw_init_vals, dtype=d_dtype) def loss_fn(emb): ids = constant_op.constant(raw_ids, dtype=k_dtype) pred = embedding_ops.embedding_lookup([emb], ids) return pred * pred base_opt_op = base_opt.minimize(lambda: loss_fn(embeddings), [embeddings]) self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): self.evaluate(base_opt_op) return embeddings base_opt_val = self.evaluate(base_fn()) def test_fn(): embeddings = de.get_variable( "t2020-v2-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) trainables = [] init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) self.evaluate(embeddings.upsert(init_ids, init_vals)) def var_fn(): return trainables def loss_fn(x, trainables): ids = constant_op.constant(raw_ids, dtype=k_dtype) pred, trainable = de.embedding_lookup( [x], ids, return_trainable=True) trainables.clear() trainables.append(trainable) return pred * pred test_opt_op = test_opt.minimize( lambda: loss_fn(embeddings, trainables), var_fn) self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): self.evaluate(test_opt_op) return embeddings.lookup(init_ids) with ops.device(_get_devices()[0]): test_opt_val = self.evaluate(test_fn()) self.assertAllCloseAccordingToType( base_opt_val, test_opt_val, msg="Cond:{},{},{},{},{},{}".format(num_shards, k_dtype, d_dtype, initial_mode, dim, run_step), )
def commonly_apply_restriction_verify_v2(self, optimizer): if not context.executing_eagerly(): self.skipTest('Skip graph mode test.') optimizer = de.DynamicEmbeddingOptimizer(optimizer) first_inputs = np.array(range(6), dtype=np.int64) second_inputs = np.array(range(3, 9), dtype=np.int64) overdue_features = np.array([0, 1, 2, 6, 7, 8], dtype=np.int64) updated_features = np.array(range(3, 6), dtype=np.int64) all_inputs = np.array(range(9), dtype=np.int64) oversize_trigger = 100 trainables = [] with self.session(config=default_config): var = de.get_variable('sp_var', key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=-0.1, dim=2, restrict_policy=de.FrequencyRestrictPolicy) def loss_fn(var, features, trainables): embed_w, trainable = de.embedding_lookup(var, features, return_trainable=True, name='vt2231') trainables.clear() trainables.append(trainable) return _simple_loss(embed_w) def var_fn(): return trainables optimizer.minimize(lambda: loss_fn(var, first_inputs, trainables), var_fn) self.assertAllEqual(var.size(), len(first_inputs)) slot_params = [ optimizer.get_slot(trainables[0], name).params for name in optimizer.get_slot_names() ] all_vars = [var] + slot_params + [var.restrict_policy.status] optimizer.minimize(lambda: loss_fn(var, second_inputs, trainables), var_fn) for v in all_vars: keys, _ = v.export() self.assertAllEqual(sorted(keys), all_inputs) keys, freq = var.restrict_policy.status.export() kvs = sorted(dict(zip(keys.numpy(), freq.numpy())).items()) freq = np.array([x[1] for x in kvs]) for x in freq[overdue_features]: for y in freq[updated_features]: self.assertLess(x, y) var.restrict_policy.apply_restriction(len(updated_features), trigger=oversize_trigger) for v in all_vars: self.assertAllEqual(v.size(), len(all_inputs)) var.restrict_policy.apply_restriction( len(updated_features), trigger=len(updated_features)) for v in all_vars: keys, _ = v.export() self.assertAllEqual(sorted(keys), updated_features)
def common_minimize_trainable(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 config = config_pb2.ConfigProto( allow_soft_placement=True, gpu_options=config_pb2.GPUOptions(allow_growth=True), ) for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [1, 2], [dtypes.int64], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): with self.session(config=config, use_gpu=test_util.is_gpu_available()): id += 1 raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype) sp_ids = sparse_tensor.SparseTensor( indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids, dense_shape=[3, 2], ) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base var prepare base_var = variables.Variable( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim], ) base_embedding = embedding_ops.safe_embedding_lookup_sparse( base_var, sp_ids, None, combiner="sum") base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # test var prepare embeddings = de.get_variable( "s6030-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) self.evaluate(init_op) # test branch test_var, trainable = de.safe_embedding_lookup_sparse( embeddings, sp_ids, sparse_weights=None, combiner="sum", return_trainable=True, ) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 test_opt_op = test_opt.minimize(loss1, var_list=[trainable]) self.evaluate(variables.global_variables_initializer()) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), self.evaluate(base_var), ) # run base for _ in range(run_step): self.evaluate(base_opt_op) # Run `run_step` step of sgd for _ in range(run_step): self.evaluate(test_opt_op) table_var = array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim]) # Validate updated params self.assertAllCloseAccordingToType( self.evaluate(base_var), self.evaluate(table_var), msg="Cond:{},{},{},{},{}".format(num_shards, k_dtype, d_dtype, dim, run_step), )
def common_minimize_trainable_v2(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [1, 2], [ dtypes.int64, ], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): id += 1 raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype) sp_ids = sparse_tensor.SparseTensor( indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids, dense_shape=[3, 2], ) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(dim, -1)) # # base graph def base_fn(): embeddings = variables.Variable( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim], ) def loss_fn(emb): embedding = embedding_ops.safe_embedding_lookup_sparse( emb, sp_ids, None, combiner="sum") pred0 = math_ops.matmul(embedding, x) return pred0 * pred0 base_opt_op = base_opt.minimize(lambda: loss_fn(embeddings), [embeddings]) self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): self.evaluate(base_opt_op) return embeddings base_opt_val = self.evaluate(base_fn()) def test_fn(): embeddings = de.get_variable( "s6030-v2-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) self.evaluate(embeddings.upsert(init_ids, init_vals)) trainables = [] def var_fn(): return trainables def loss_fn(emb, trainables): test_var, trainable = de.safe_embedding_lookup_sparse( emb, sp_ids, sparse_weights=None, combiner="sum", return_trainable=True, ) pred = math_ops.matmul(test_var, x) trainables.clear() trainables.append(trainable) return pred * pred test_opt_op = test_opt.minimize( lambda: loss_fn(embeddings, trainables), var_fn) self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): self.evaluate(test_opt_op) return embeddings.lookup(init_ids) test_opt_val = test_fn() self.assertAllCloseAccordingToType( base_opt_val, test_opt_val, msg="Cond:{},{},{},{},{},{}".format(num_shards, k_dtype, d_dtype, initial_mode, dim, run_step), )
def common_minimize_trainable(self, base_opt, test_opt, name): from tensorflow.python.framework.errors_impl import NotFoundError # TODO(rhdong): Recover the testing, if the horovod import error is fixed on macOS+TF2.7+. try: import horovod.tensorflow as hvd except NotFoundError: self.skipTest( "Skip the test for horovod import error with Tensorflow-2.7.0 on MacOS-12." ) tf.config.set_soft_device_placement(True) hvd.init() base_opt = de.DynamicEmbeddingOptimizer(base_opt, synchronous=True) for dtype, run_step, dim in itertools.product([dtypes.float32], [1], [10]): x = tf.random.uniform(shape=[32, dim]) y = tf.zeros([32, 1]) global_step = training_util.create_global_step() base_weight = tf.compat.v1.get_variable(name="base_weights", initializer=tf.ones( [10, 1])) base_logits = tf.nn.relu(math_ops.matmul(x, base_weight)) base_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=base_logits) base_opt_op = base_opt.minimize(base_loss, global_step, var_list=[base_weight]) test_weight = tf.compat.v1.get_variable(name="test_weights", initializer=tf.ones( [10, 1])) test_logits = tf.nn.relu(math_ops.matmul(x, test_weight)) test_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=test_logits) grads_and_vars = test_opt.compute_gradients(test_loss, var_list=[test_weight]) var_list = [] aggregated_grad = [] for grad, var in grads_and_vars: var_list.append(var) aggregated_grad.append(hvd.allreduce(grad, op=hvd.Sum)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) test_opt_op = test_opt.apply_gradients(aggregated_grads_and_vars, global_step) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: for _ in range(run_step): sess.run(base_opt_op) sess.run(test_opt_op) self.assertAllCloseAccordingToType( sess.run(base_weight), sess.run(test_weight), msg="Cond:{},{},{}".format(dtype, run_step, dim), )
def test_model_save_and_load(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') @tf.function def slot_map_fn(x): return tf.math.floormod(x, 2) init = tf.keras.initializers.RandomNormal(seed=0) class MyModel(tf.keras.Model): def __init__(self): super(MyModel, self).__init__() self.l0 = tf.keras.layers.InputLayer(input_shape=(None, ), dtype=tf.int64) self.l1 = de.keras.layers.FieldWiseEmbedding(4, 2, slot_map_fn, bp_v2=False, initializer=init, name='sl337') self.l2 = tf.keras.layers.Flatten() self.l3 = tf.keras.layers.Dense(32, 'relu') self.l4 = tf.keras.layers.Dense(1, 'sigmoid') def call(self, x): return self.l4(self.l3(self.l2(self.l1(self.l0(x))))) model = MyModel() optmz = tf.keras.optimizers.Adam(1E-3) optmz = de.DynamicEmbeddingOptimizer(optmz) model.compile(optimizer=optmz, loss='binary_crossentropy') start = 0 batch_size = 10 for i in range(1, 10): x = math_ops.range(start, start + batch_size * i, dtype=dtypes.int64) x = tf.reshape(x, (batch_size, -1)) start += batch_size * i y = tf.zeros((batch_size, 1), dtype=dtypes.float32) model.fit(x, y, verbose=0) ids = tf.range(0, 10, dtype=tf.int64) ids = tf.reshape(ids, (2, -1)) expected = model(ids) save_dir = tempfile.mkdtemp(prefix='/tmp/') options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA']) @tf.function( input_signature=[tf.TensorSpec((None, None), dtype=tf.int64)]) def foo(x): return model(x) model.save(save_dir, signatures=foo, options=options) new_model = tf.saved_model.load(save_dir) sig = new_model.signatures['serving_default'] evaluated = sig(ids)['output_0'] self.assertAllClose(expected, evaluated, 1E-7, 1E-7)