def my_model(x, y): x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = sharded_optimizer.ShardedOptimizer( gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) return [loss, train]
def my_model_fn(features, labels, mode): self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode) with variable_scope.variable_scope("vs", use_resource=True): predictions = layers.Dense(units=1)(features) loss = losses.mean_squared_error(labels=labels, predictions=predictions) sharded_optimizer_obj = sharded_optimizer.ShardedOptimizer( gradient_descent.GradientDescentOptimizer(0.1)) train_op = sharded_optimizer_obj.minimize(loss) return model_fn_lib.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def testMarkOpsWithAutoshardingContext(self): with ipu.scopes.ipu_scope("/device:IPU:0"): with ipu.autoshard.ipu_autoshard(): x = array_ops.placeholder(dtypes.float32, shape=[1, 32, 32, 4]) y = array_ops.placeholder(dtypes.float32, shape=[1, 8]) inp = x with ops.name_scope('gradients'): x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = sharded_optimizer.ShardedOptimizer( gd.GradientDescentOptimizer(0.01)) optim.minimize(loss) ipu.autoshard.automatic_sharding(2, inp, loss) to_autoshard = ops.get_default_graph().get_collection( ipu.sharding._IPU_AUTOSHARD) fwd_ops = [] bwd_ops = [] all_ops = ops.get_default_graph().get_operations() for o in all_ops: if o in to_autoshard: fwd_ops.append(o) else: bwd_ops.append(o) self.assertTrue(len(fwd_ops) > 10) self.assertTrue(len(bwd_ops) > 10) self.assertEqual(len([o for o in fwd_ops if o.type == 'Conv2D']), 3)
def my_model(loss, x, y): with ipu.scopes.ipu_scope("/device:IPU:0"): inp = x lstm_cell = ipu.rnn_ops.PopnnLSTM(256, dtype=dtypes.float32) x, _ = lstm_cell(x, training=True) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = sharded_optimizer.ShardedOptimizer( gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) ipu.autoshard.automatic_sharding(2, inp, loss) return [loss, train]
def my_graph(inp, lab): with ops.device("/device:IPU:0"): with ipu.scopes.ipu_shard(0): x = layers.Conv2D(8, 3, padding='same', name="convA")(inp) with ipu.scopes.ipu_shard(1): x = layers.Conv2D(8, 1, padding='same', name="convB")(x) x = math_ops.reduce_mean(x, axis=[1, 2]) loss = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(lab)) loss = math_ops.reduce_mean(loss) opt = sharded_optimizer.ShardedOptimizer( gradient_descent.GradientDescentOptimizer(0.000001)) train = opt.minimize(loss) return [loss, train]
def my_model(lr, loss, x, y): with ipu.scopes.ipu_scope("/device:IPU:0"): inp = x x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = sharded_optimizer.ShardedOptimizer( gd.GradientDescentOptimizer(lr)) train = optim.minimize(cross_entropy) ipu.autoshard.automatic_sharding(2, inp, loss) return [lr, loss, train]
def my_model_fn(features, labels, mode): self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode) with variable_scope.variable_scope("vs", use_resource=True): with ipu.scopes.ipu_shard(0): out_0 = layers.Dense(units=1)(features) with ipu.scopes.ipu_shard(1): predictions = layers.Dense(units=1)(out_0) loss = losses.mean_squared_error(labels=labels, predictions=predictions) optimizer = gradient_descent.GradientDescentOptimizer(0.1) sharded_optimizer_obj = sharded_optimizer.ShardedOptimizer( optimizer) cross_replica_optimizer_obj = \ cross_replica_optimizer.CrossReplicaOptimizer( sharded_optimizer_obj) train_op = cross_replica_optimizer_obj.minimize(loss) return model_fn_lib.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)