Exemplo n.º 1
0
  def testAccumulatorMultipleAccumulators(self):
    with self.cached_session() as sess:
      q_f32_0 = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
      q_f32_1 = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
      q_f16_0 = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float16, name="Q", shape=tensor_shape.TensorShape([2, 2]))
      q_f16_1 = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float16, name="Q", shape=tensor_shape.TensorShape([2, 2]))

      accums = [q_f16_0, q_f16_1, q_f32_0, q_f32_1]

      elems = [[[1, 0], [0, 0]], [[0, 1], [0, 0]], [[0, 0], [1, 0]], [[0, 0],
                                                                      [0, 1]]]

      expected_tensors = []

      for i in range(len(accums)):
        tensor_to_add = np.array(elems[i]).astype(accums[i]
                                                  .dtype.as_numpy_dtype)
        expected_tensor = _indexedslice(tensor_to_add)
        expected_tensors.append(expected_tensor)
        st = _indexedslice(tensor_to_add)
        accums[i].apply_indexed_slices_grad(st).run()

      for i in range(len(accums)):
        result = sess.run(accums[i].take_indexed_slices_grad(1))
        self._assertEqual_indexedslices(expected_tensors[i], result)
Exemplo n.º 2
0
    def testReturnShape(self):
        with self.cached_session() as sess:
            q = data_flow_ops.SparseConditionalAccumulator(dtypes_lib.float32,
                                                           name="Q",
                                                           shape=[2, None])

            q.apply_grad(grad_indices=[0],
                         grad_values=np.array([[[[1, 2], [3, 4]],
                                                [[5, 6], [7, 8]]]
                                               ]).astype(np.float32)).run()

            val = self.evaluate(q.take_indexed_slices_grad(1))
            self.assertAllEqual(val.dense_shape, [2, 2, 2, 2])

            q = data_flow_ops.SparseConditionalAccumulator(dtypes_lib.float32,
                                                           name="Q",
                                                           shape=[None, 2])

            q.apply_grad(grad_indices=[0],
                         grad_values=np.array([[[[1, 2, 3], [4, 5, 6]],
                                                [[7, 8, 9], [10, 11, 12]]]
                                               ]).astype(np.float32)).run()

            val = self.evaluate(q.take_indexed_slices_grad(1))
            self.assertAllEqual(val.dense_shape, [-1, 2, 2, 3])
Exemplo n.º 3
0
    def testEmptyShapeApply(self):
        with self.cached_session():
            q = data_flow_ops.SparseConditionalAccumulator(
                dtypes_lib.float32,
                name="Q",
                shape=tensor_shape.TensorShape([]))

            with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                         "Input indices should be vector"):
                q.apply_grad(grad_indices=0, grad_values=[1.0],
                             grad_shape=[]).run()

            with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                         "Input indices should be vector"):
                q.apply_grad(grad_indices=0, grad_values=[1.0]).run()

            with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                         "Values cannot be 0-dimensional."):
                q.apply_grad(grad_indices=[0], grad_values=1.0,
                             grad_shape=[]).run()

            with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                         "Values cannot be 0-dimensional."):
                q.apply_grad(grad_indices=[0], grad_values=1.0).run()

            # The right way to apply a scalar
            q.apply_grad(grad_indices=[0], grad_values=[1.0],
                         grad_shape=[]).run()
            q.apply_grad(grad_indices=[0], grad_values=[1.0]).run()
Exemplo n.º 4
0
  def testAccumulatorApplyAndBlockingTake(self):
    # We need each thread to keep its own device stack or the device scopes
    # won't be properly nested.
    ops.get_default_graph().switch_to_thread_local()
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))

      elems = [10.0, 20.0, 30.0]
      elems_ave = sum(elems) / len(elems)
      accum_ops = []
      for x in elems:
        x = _indexedslice(np.array([[0, x], [0, 0]]).astype(np.float32))
        accum_ops.append(q.apply_indexed_slices_grad(x, local_step=0))
      takeg_t = q.take_indexed_slices_grad(3)

      results = []

      def apply_indexed_slices_grad():
        for accum_op in accum_ops:
          self.evaluate(accum_op)

      def take_grad():
        results.append(self.evaluate(takeg_t))

      accum_thread = self.checkedThread(target=apply_indexed_slices_grad)
      takeg_thread = self.checkedThread(target=take_grad)
      accum_thread.start()
      takeg_thread.start()
      accum_thread.join()
      takeg_thread.join()

      self._assertEqual_nparray([[0, elems_ave], [0, 0]], results[0], sess)
Exemplo n.º 5
0
 def testAccumulatorTakeGradInvalidReductionType(self):
     with self.assertRaises(ValueError):
         data_flow_ops.SparseConditionalAccumulator(
             dtypes_lib.float32,
             name="Q",
             shape=(),
             reduction_type="Invalid")
  def testAccumulatorApplyAndBlockingTake(self):
    with self.test_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))

      elems = [10.0, 20.0, 30.0]
      elems_ave = sum(elems) / len(elems)
      accum_ops = []
      for x in elems:
        x = _indexedslice(np.array([[0, x], [0, 0]]).astype(np.float32))
        accum_ops.append(q.apply_indexed_slices_grad(x, local_step=0))
      takeg_t = q.take_indexed_slices_grad(3)

      results = []

      def apply_indexed_slices_grad():
        for accum_op in accum_ops:
          sess.run(accum_op)

      def take_grad():
        results.append(sess.run(takeg_t))

      accum_thread = self.checkedThread(target=apply_indexed_slices_grad)
      takeg_thread = self.checkedThread(target=take_grad)
      accum_thread.start()
      takeg_thread.start()
      accum_thread.join()
      takeg_thread.join()

      self._assertEqual_nparray([[0, elems_ave], [0, 0]], results[0], sess)
  def testParallelApplyGradSum(self):
    with self.test_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32,
          name="Q",
          shape=tensor_shape.TensorShape([2, 2]),
          reduction_type="SUM")
      elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
      accum_ops = []
      for x in elems:
        x = _indexedslice(np.array([[x, 0], [0, x]]).astype(np.float32))
        accum_ops.append(q.apply_indexed_slices_grad(x, local_step=0))
      takeg_t = q.take_indexed_slices_grad(1)

      def apply_indexed_slices_grad(accum_op):
        sess.run(accum_op)

      threads = [
          self.checkedThread(target=apply_indexed_slices_grad, args=(o,))
          for o in accum_ops
      ]

      for thread in threads:
        thread.start()
      for thread in threads:
        thread.join()

      val = sess.run(takeg_t)

      expected_val = 550.0
      self._assertEqual_nparray(
          np.array([[expected_val, 0], [0, expected_val]]).astype(np.float32),
          val, sess)
Exemplo n.º 8
0
  def testApplyGradtInt32IndicesAndShape(self):
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
      accum_op = q.apply_grad(
          grad_indices=constant_op.constant(
              [0, 2], dtype=dtypes_lib.int32),
          grad_values=constant_op.constant(
              [[0, 0, 1], [3, 0, 4]], dtype=dtypes_lib.float32),
          grad_shape=constant_op.constant(
              [3, 3], dtype=dtypes_lib.int32))
      accum_op.run()
      accum_op = q.apply_indexed_slices_grad(
          indexed_slices.IndexedSlices(
              indices=constant_op.constant(
                  [0, 2], dtype=dtypes_lib.int32),
              values=constant_op.constant(
                  [[0, 0, 1], [3, 0, 4]], dtype=dtypes_lib.float32),
              dense_shape=constant_op.constant(
                  [3, 3], dtype=dtypes_lib.int32)))
      accum_op.run()
      self.assertEqual(q.num_accumulated().eval(), 2)

      val = self.evaluate(q.take_indexed_slices_grad(1))
      self.assertAllEqual(val.indices, [0, 2])
      self.assertAllEqual(val.values, [[0, 0, 1], [3, 0, 4]])
      self.assertAllEqual(val.dense_shape, [3, 3])
Exemplo n.º 9
0
  def testParallelApplyGradMean(self):
    # We need each thread to keep its own device stack or the device scopes
    # won't be properly nested.
    ops.get_default_graph().switch_to_thread_local()
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2]))
      elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
      accum_ops = []
      for x in elems:
        x = _indexedslice(np.array([[x, 0], [0, x]]).astype(np.float32))
        accum_ops.append(q.apply_indexed_slices_grad(x, local_step=0))
      takeg_t = q.take_indexed_slices_grad(1)

      def apply_indexed_slices_grad(accum_op):
        self.evaluate(accum_op)

      threads = [
          self.checkedThread(
              target=apply_indexed_slices_grad, args=(o,)) for o in accum_ops
      ]

      for thread in threads:
        thread.start()
      for thread in threads:
        thread.join()

      val = self.evaluate(takeg_t)

      expected_val = sum(elems) / len(elems)
      self._assertEqual_nparray(
          np.array([[expected_val, 0], [0, expected_val]]).astype(np.float32),
          val, sess)
Exemplo n.º 10
0
 def testAccumulatorSetGlobalStep(self):
     with self.cached_session():
         q = data_flow_ops.SparseConditionalAccumulator(
             dtypes_lib.float32,
             name="Q",
             shape=tensor_shape.TensorShape([1]))
         set_global_step_op = q.set_global_step(1)
         set_global_step_op.run()
Exemplo n.º 11
0
  def testZeroDimensionValues(self):
    with self.cached_session():
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))

      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
                                  "Values cannot be 0-dimensional."):
        q.apply_grad(
            grad_indices=[0], grad_values=np.array(1).astype(np.float32)).run()
Exemplo n.º 12
0
 def testAccumulatorApplyGradFloat32(self):
   with self.cached_session():
     q = data_flow_ops.SparseConditionalAccumulator(
         dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
     accum_op = q.apply_indexed_slices_grad(
         indexed_slices.IndexedSlices(
             indices=[0, 2],
             values=np.array([[0, 0, 1], [3, 0, 4]]).astype(np.float32)))
     accum_op.run()
     self.assertEqual(q.num_accumulated().eval(), 1)
Exemplo n.º 13
0
  def testWrongNonEmptyInputValues(self):
    with self.cached_session():
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))

      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
                                  " non-empty input values, got "):
        q.apply_grad(
            grad_indices=[0, 1],
            grad_values=np.array([[0, 1, 1]]).astype(np.float32)).run()
Exemplo n.º 14
0
  def testNonVectorIndices(self):
    with self.cached_session():
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))

      with self.assertRaisesRegex(
          errors_impl.InvalidArgumentError,
          "Input indices should be vector but received shape:"):
        q.apply_grad(
            grad_indices=[[0, 1], [1, 0]],
            grad_values=np.array([1, 2]).astype(np.float32)).run()
 def testConstructor(self):
     with ops.Graph().as_default():
         q = data_flow_ops.SparseConditionalAccumulator(dtypes_lib.float32,
                                                        name="Q")
     self.assertTrue(isinstance(q.accumulator_ref, ops.Tensor))
     self.assertProtoEquals(
         """
   name:'Q' op:'SparseConditionalAccumulator'
   attr { key: 'dtype' value { type: DT_FLOAT } }
   attr { key: 'shape' value { shape { unknown_rank: true} } }
   attr { key: 'container' value { s: '' } }
   attr { key: 'shared_name' value { s: '' } }
   """, q.accumulator_ref.op.node_def)
Exemplo n.º 16
0
 def _get_accum_apply_and_agg_grad(var_op, grad, indices, dense_shape):
     if indices is None:
         tensor = variable_utils.get_read_var_tensor(var_op)
         grad_accum = data_flow_ops.ConditionalAccumulator(
             grad.dtype,
             shape=tensor.get_shape(),
             shared_name=var_op.name + "/grad_accum")
         # Get a copy of consumers list before creating accum_apply_op
         grad_consumers = list(grad.consumers())
         accum_apply_op = grad_accum.apply_grad(grad,
                                                local_step=MAX_INT64,
                                                name=grad.op.name +
                                                '_accum_apply_grad')
         agg_grad = grad_accum.take_grad(num_accum_required,
                                         name=var_op.name +
                                         '_take_grad')
         update_consumers(grad_consumers, grad, agg_grad)
         update_control_consumers(get_control_consumers(grad.op),
                                  grad.op, agg_grad.op)
     else:
         grad_indexed_slices = ops.IndexedSlices(
             values=grad, indices=indices, dense_shape=dense_shape)
         grad_accum = data_flow_ops.SparseConditionalAccumulator(
             grad.dtype,
             shape=grad.shape,
             shared_name=var_op.name + "/grad_accum")
         # Get a copy of consumers list before creating accum_apply_op
         indices_consumers = list(indices.consumers())
         grad_consumers = list(grad.consumers())
         accum_apply_op = grad_accum.apply_indexed_slices_grad(
             grad_indexed_slices,
             local_step=MAX_INT64,
             name=grad.op.name + '_accum_apply_grad')
         agg_grad = grad_accum.take_indexed_slices_grad(
             num_accum_required, name=var_op.name + '_take_grad')
         agg_indices = agg_grad.indices
         if indices.dtype != agg_grad.indices.dtype:
             agg_indices = math_ops.cast(agg_grad.indices,
                                         indices.dtype)
         agg_grad = ops.IndexedSlices(values=agg_grad.values,
                                      indices=agg_indices,
                                      dense_shape=agg_grad.dense_shape)
         assert isinstance(agg_grad, ops.IndexedSlices)
         update_consumers(indices_consumers, indices, agg_grad.indices)
         update_consumers(grad_consumers, grad, agg_grad.values)
         update_control_consumers(get_control_consumers(indices.op),
                                  indices.op, agg_grad.indices.op)
         update_control_consumers(get_control_consumers(grad.op),
                                  grad.op, agg_grad.values.op)
     return accum_apply_op, agg_grad
Exemplo n.º 17
0
  def testDynamicWrongNonEmptyInputValues(self):
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))

      x_indices = array_ops.placeholder(dtypes_lib.int64)
      x_values = array_ops.placeholder(dtypes_lib.float32)

      accum_op = q.apply_grad(grad_indices=x_indices, grad_values=x_values)

      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
                                  " non-empty input values, got "):
        sess.run(accum_op,
                 feed_dict={
                     x_indices: [0, 1],
                     x_values: np.array([[0, 1, 1]]).astype(np.float32)
                 })
Exemplo n.º 18
0
    def testParallelTakeGrad(self):
        # We need each thread to keep its own device stack or the device scopes
        # won't be properly nested.
        ops.get_default_graph().switch_to_thread_local()
        with self.cached_session() as sess:
            q = data_flow_ops.SparseConditionalAccumulator(
                dtypes_lib.float32,
                name="Q",
                shape=tensor_shape.TensorShape([2, 2]))
            elems = [e + 1 for e in range(10)]
            accum_ops = []
            for e in elems:
                v = _indexedslice(
                    np.array([[0, 0], [e, 0]]).astype(np.float32))
                accum_ops.append(
                    q.apply_indexed_slices_grad(v, local_step=e - 1))
            takeg_t = q.take_indexed_slices_grad(1)

            results = []

            def apply_indexed_slices_grad():
                for accum_op in accum_ops:
                    time.sleep(1.0)
                    sess.run(accum_op)

            apply_indexed_slices_grad_thread = self.checkedThread(
                target=apply_indexed_slices_grad)

            def take_grad():
                t = sess.run(takeg_t)
                results.append(t)

            threads = [self.checkedThread(target=take_grad) for _ in range(10)]

            for thread in threads:
                thread.start()
            apply_indexed_slices_grad_thread.start()

            for thread in threads:
                thread.join()
            apply_indexed_slices_grad_thread.join()

            for i in range(len(accum_ops)):
                self._assertEqual_nparray(np.array([[0, 0], [elems[i], 0]]),
                                          results[i], sess)
Exemplo n.º 19
0
  def testDynamicNonVectorIndices(self):
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))

      x_indices = array_ops.placeholder(dtypes_lib.int64)
      x_values = array_ops.placeholder(dtypes_lib.float32)

      accum_op = q.apply_grad(grad_indices=x_indices, grad_values=x_values)

      with self.assertRaisesRegex(
          errors_impl.InvalidArgumentError,
          "Input indices should be vector but received shape:"):
        sess.run(accum_op,
                 feed_dict={
                     x_indices: [[0, 1], [1, 0]],
                     x_values: np.array([1, 2]).astype(np.float32)
                 })
Exemplo n.º 20
0
    def testAccumulatorCancel(self):
        with self.cached_session() as sess:
            q = data_flow_ops.SparseConditionalAccumulator(
                dtypes_lib.float32,
                name="Q",
                shape=tensor_shape.TensorShape([1, 2, 3]))
            takeg_t = q.take_indexed_slices_grad(1)

            takeg_thread = self.checkedThread(self._blocking_takeg,
                                              args=(sess, takeg_t))

            takeg_thread.start()

            time.sleep(1.0)

            sess.close()  # Will cancel blocked operation

            takeg_thread.join()
Exemplo n.º 21
0
    def _aggregate_sparse_gradients(self, var_op, reduce_to_device,
                                    indexed_slices_grads, values_op_name):
        with ops.device(reduce_to_device):
            grad_accum_op_name = ops.prepend_name_scope(
                values_op_name, u"%sAccum" % AUTODIST_PREFIX)
            grad_accum = data_flow_ops.SparseConditionalAccumulator(
                dtype=indexed_slices_grads[0].values.dtype,
                shape=var_op.outputs[0].shape,
                shared_name=grad_accum_op_name,
                name=grad_accum_op_name)
            accum_apply_ops = [
                grad_accum.apply_indexed_slices_grad(
                    indexed_slices_grads[i],
                    MAX_INT64,
                    name=ops.prepend_name_scope(
                        values_op_name, u"%s-Accum-Apply" % replica_prefix(i)))
                for i in range(self.num_replicas)
            ]
            take_grad_op_name = ops.prepend_name_scope(
                values_op_name, u"%sTake-Grad" % AUTODIST_PREFIX)
            with ops.control_dependencies(accum_apply_ops):
                take_grad = grad_accum.take_indexed_slices_grad(
                    self.num_replicas, name=take_grad_op_name)

            new_indices = take_grad.indices
            new_values = take_grad.values
            new_dense_shape = take_grad.dense_shape
            if indexed_slices_grads[0].indices.dtype != new_indices.dtype:
                new_indices = math_ops.cast(
                    new_indices,
                    indexed_slices_grads[0].indices.dtype,
                    name=ops.prepend_name_scope(
                        values_op_name,
                        u"%sTake-Grad-Cast-Indices" % AUTODIST_PREFIX))
            if indexed_slices_grads[
                    0].dense_shape.dtype != new_dense_shape.dtype:
                new_dense_shape = math_ops.cast(
                    new_dense_shape,
                    indexed_slices_grads[0].dense_shape.dtype,
                    name=ops.prepend_name_scope(
                        values_op_name,
                        u"%sTake-Grad-Cast-Shape" % AUTODIST_PREFIX))
        return ops.IndexedSlices(new_values, new_indices, new_dense_shape)
Exemplo n.º 22
0
    def testAccumulatorRepeatedTakeGrad(self):
        with self.cached_session() as sess:
            q = data_flow_ops.SparseConditionalAccumulator(dtypes_lib.float32,
                                                           name="Q",
                                                           shape=())

            grad_indexed_slices = ops.IndexedSlices(
                indices=[0, 1],
                values=np.array([[1, 0], [0, 2]]).astype(np.float32))
            accum_op = q.apply_indexed_slices_grad(grad_indexed_slices,
                                                   local_step=0)
            accum_op.run()
            accum_op = q.apply_grad([0, 2],
                                    np.array([[0, 1], [3,
                                                       0]]).astype(np.float32),
                                    [3, 2],
                                    local_step=0)
            accum_op.run()

            takeg_t = q.take_indexed_slices_grad(1)
            val = self.evaluate(takeg_t)
            self.assertAllEqual(val.indices, [0, 1, 2])
            self.assertAllEqual(val.values, [[0.5, 0.5], [0, 2], [3, 0]])
            self.assertAllEqual(val.dense_shape, [-1, 2])

            grad_indexed_slices = ops.IndexedSlices(
                indices=[0, 1],
                values=np.array([[10, 0], [0, 20]]).astype(np.float32))
            accum_op = q.apply_indexed_slices_grad(grad_indexed_slices,
                                                   local_step=1)
            accum_op.run()
            accum_op = q.apply_grad([0, 2],
                                    np.array([[0, 10],
                                              [30, 0]]).astype(np.float32),
                                    [3, 2],
                                    local_step=1)
            accum_op.run()

            takeg_t = q.take_indexed_slices_grad(1)
            val = self.evaluate(takeg_t)
            self.assertAllEqual(val.indices, [0, 1, 2])
            self.assertAllEqual(val.values, [[5, 5], [0, 20], [30, 0]])
            self.assertAllEqual(val.dense_shape, [-1, 2])
Exemplo n.º 23
0
  def testAccumulatorTakeGradSum(self):
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=(), reduction_type="SUM")

      grad_indexed_slices = indexed_slices.IndexedSlices(
          indices=[0, 1], values=np.array([[1, 0], [0, 2]]).astype(np.float32))
      accum_op = q.apply_indexed_slices_grad(grad_indexed_slices)
      accum_op.run()
      accum_op = q.apply_grad([0, 2],
                              np.array([[0, 1], [3, 0]]).astype(np.float32),
                              [3, 2])
      accum_op.run()

      takeg_t = q.take_indexed_slices_grad(1)
      val = self.evaluate(takeg_t)
      self.assertAllEqual([0, 1, 2], val.indices)
      self.assertAllEqual([[1, 1], [0, 2], [3, 0]], val.values)
      self.assertAllEqual([-1, 2], val.dense_shape)
 def testConstructorWithShape(self):
     with ops.Graph().as_default():
         q = data_flow_ops.SparseConditionalAccumulator(
             dtypes_lib.float32,
             name="Q",
             shape=tensor_shape.TensorShape([1, 5, 2, 8]))
     self.assertTrue(isinstance(q.accumulator_ref, ops.Tensor))
     self.assertProtoEquals(
         """
   name:'Q' op:'SparseConditionalAccumulator'
   attr { key: 'dtype' value { type: DT_FLOAT } }
   attr { key: 'shape' value { shape { dim {size: 1 }
                                       dim {size: 5 }
                                       dim {size: 2 }
                                       dim {size: 8 }
   } } }
   attr { key: 'container' value { s: '' } }
   attr { key: 'shared_name' value { s: '' } }
   """, q.accumulator_ref.op.node_def)
Exemplo n.º 25
0
  def testAccumulatorCancel(self):
    # We need each thread to keep its own device stack or the device scopes
    # won't be properly nested.
    ops.get_default_graph().switch_to_thread_local()
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32,
          name="Q",
          shape=tensor_shape.TensorShape([1, 2, 3]))
      takeg_t = q.take_indexed_slices_grad(1)

      takeg_thread = self.checkedThread(
          self._blocking_takeg, args=(sess, takeg_t))

      takeg_thread.start()

      time.sleep(1.0)

      sess.close()  # Will cancel blocked operation

      takeg_thread.join()
Exemplo n.º 26
0
  def testDtypes(self):
    with self.cached_session() as sess:
      dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64]

      for i in range(len(dtypes)):
        dtype = dtypes[i]
        q = data_flow_ops.SparseConditionalAccumulator(
            dtype, shape=tensor_shape.TensorShape([3, 3, 3]))

        elems = np.arange(2)
        sum_elems = np.zeros([3, 3, 3]).astype(dtype.as_numpy_dtype)
        for e in elems:
          mat_to_add = np.zeros([3, 3, 3]).astype(dtype.as_numpy_dtype)
          mat_to_add[i, i, i] = e + 1
          sum_elems += mat_to_add
          t = _indexedslice(mat_to_add)
          q.apply_indexed_slices_grad(t).run()

        result = self.evaluate(q.take_indexed_slices_grad(1))

        self._assertEqual_nparray(sum_elems / len(elems), result, sess)
Exemplo n.º 27
0
    def _apply_model_average(self, lvars_and_gvars, name=None):
        """Apply local weights to global variables.

    This contains most of the synchronization implementation.

    Args:
      lvars_and_gvars: List of (local_vars, global_vars) pairs.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the lvars_and_gvars is empty.
    """
        if not lvars_and_gvars:
            raise ValueError("Must supply at least one variable")

        train_ops = []
        aggregated_lvars = []

        model_reassign_ops = []

        global_vars = [g for v, g in lvars_and_gvars if v is not None]

        # local_anchor op will be placed on this worker task by default.
        local_anchor = control_flow_ops.no_op()
        # Colocating local_step variable prevents it being placed on the PS.
        with ops.colocate_with(local_anchor):
            self._local_step = variables.Variable(
                initial_value=0,
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES],
                dtype=self._global_step.dtype.base_dtype,
                name="%s_local_step" % self._name)

        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   self._global_step)
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.global_variables())

        with ops.name_scope(None, self._name):
            for lvar, gvar in lvars_and_gvars:
                lvar = ops.convert_to_tensor(lvar)
                with ops.device(gvar.device):
                    # Dense variables.
                    if lvar is None:
                        aggregated_lvars.append(None)  # pass-through.
                        continue
                    elif isinstance(lvar, ops.Tensor):
                        lvar_accum = data_flow_ops.ConditionalAccumulator(
                            lvar.dtype,
                            shape=gvar.get_shape(),
                            shared_name=gvar.name + "/lvar_accum")
                        train_ops.append(
                            lvar_accum.apply_grad(lvar,
                                                  local_step=self._local_step))
                        aggregated_lvars.append(
                            lvar_accum.take_grad(self._replicas_to_aggregate))
                    else:
                        if not isinstance(lvar, ops.IndexedSlices):
                            raise ValueError("Unknown model variable type!")
                        lvar_accum = data_flow_ops.SparseConditionalAccumulator(
                            lvar.dtype,
                            shape=(),
                            shared_name=gvar.name + "/model_variable_accum")
                        train_ops.append(
                            lvar_accum.apply_indexed_slices_grad(
                                lvar, local_step=self._local_step))
                        aggregated_lvars.append(
                            lvar_accum.take_indexed_slices_grad(
                                self._replicas_to_aggregate))

                    self._accumulator_list.append((lvar_accum, gvar.device))

            # sync_op will be assigned to the same device as the global step.
            with ops.device(self._global_step.device), ops.name_scope(""):
                for avg_var, gvar in zip(aggregated_lvars, global_vars):
                    model_reassign_ops.append(state_ops.assign(gvar, avg_var))
                model_reassign_ops.append(
                    state_ops.assign_add(self._global_step, 1))
                update_op = control_flow_ops.group(*(model_reassign_ops))

            # Create token queue.
            with ops.device(self._global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    self._global_step.dtype.base_dtype,
                    shapes=(),
                    name="sync_token_q",
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

                # dummy_queue is passed to the queue runner. Don't use the real queues
                # because the queue runner doesn't automatically reopen it once it
                # closed queues in PS devices.
                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    name="dummy_queue",
                    shared_name="dummy_queue"))

            with ops.device(self._global_step.device), ops.name_scope(""):
                # Replicas have to wait until they can get a token from the token queue.
                with ops.control_dependencies(train_ops):
                    token = sync_token_queue.dequeue()
                train_op = state_ops.assign(self._local_step, token)

                with ops.control_dependencies([update_op]):
                    # Sync_op needs to insert tokens to the token queue at the end of the
                    # step so the replicas can fetch them to start the next step.
                    tokens = array_ops.fill([self._tokens_per_step],
                                            self._global_step)
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])
            for accum, dev in self._accumulator_list:
                with ops.device(dev):
                    chief_init_ops.append(
                        accum.set_global_step(self._global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._average_applied = True
            return train_op
Exemplo n.º 28
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []
        var_list = []

        # local_anchor op will be placed on this worker task by default.
        local_anchor = control_flow_ops.no_op()
        # Colocating local_step variable prevents it being placed on the PS.
        distribution_strategy = (
            distribution_strategy_context.get_distribution_strategy())
        with distribution_strategy.colocate_vars_with(local_anchor):
            self._local_step = variable_scope.variable(
                initial_value=0,
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES],
                dtype=global_step.dtype.base_dtype,
                name="sync_rep_local_step")

        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step)
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.global_variables())

        with ops.name_scope(None, self._name):
            for grad, var in grads_and_vars:
                var_list.append(var)
                with ops.device(var.device):
                    # Dense gradients.
                    if grad is None:
                        aggregated_grad.append(None)  # pass-through.
                        continue
                    elif isinstance(grad, ops.Tensor):
                        grad_accum = data_flow_ops.ConditionalAccumulator(
                            grad.dtype,
                            shape=var.get_shape(),
                            shared_name=var.name + "/grad_accum")
                        train_ops.append(
                            grad_accum.apply_grad(grad,
                                                  local_step=self._local_step))
                        aggregated_grad.append(
                            grad_accum.take_grad(self._replicas_to_aggregate))
                    else:
                        if not isinstance(grad, ops.IndexedSlices):
                            raise ValueError("Unknown grad type!")
                        grad_accum = data_flow_ops.SparseConditionalAccumulator(
                            grad.dtype,
                            shape=(),
                            shared_name=var.name + "/grad_accum")
                        train_ops.append(
                            grad_accum.apply_indexed_slices_grad(
                                grad, local_step=self._local_step))
                        aggregated_grad.append(
                            grad_accum.take_indexed_slices_grad(
                                self._replicas_to_aggregate))

                    self._accumulator_list.append((grad_accum, var.device))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            # sync_op will be assigned to the same device as the global step.
            with ops.device(global_step.device), ops.name_scope(""):
                update_op = self._opt.apply_gradients(
                    aggregated_grads_and_vars, global_step)

            # Create token queue.
            with ops.device(global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    global_step.dtype.base_dtype,
                    shapes=(),
                    name="sync_token_q",
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

                # dummy_queue is passed to the queue runner. Don't use the real queues
                # because the queue runner doesn't automatically reopen it once it
                # closed queues in PS devices.
                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    name="dummy_queue",
                    shared_name="dummy_queue"))

            with ops.device(global_step.device), ops.name_scope(""):
                # Replicas have to wait until they can get a token from the token queue.
                with ops.control_dependencies(train_ops):
                    token = sync_token_queue.dequeue()
                train_op = state_ops.assign(self._local_step, token)

                with ops.control_dependencies([update_op]):
                    # Sync_op needs to insert tokens to the token queue at the end of the
                    # step so the replicas can fetch them to start the next step.
                    tokens = array_ops.fill([self._tokens_per_step],
                                            global_step)
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                if self._variable_averages is not None:
                    with ops.control_dependencies([sync_op
                                                   ]), ops.name_scope(""):
                        sync_op = self._variable_averages.apply(
                            self._variables_to_average)

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])
            for accum, dev in self._accumulator_list:
                with ops.device(dev):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True
            return train_op
Exemplo n.º 29
0
 def testAccumulatorSizeEmpty(self):
   with self.cached_session():
     q = data_flow_ops.SparseConditionalAccumulator(
         dtypes_lib.float32, name="Q")
     self.assertEqual(q.num_accumulated().eval(), 0)
Exemplo n.º 30
0
 def testConstructorWithInvalidArg(self):
   with ops.Graph().as_default():
     with self.assertRaises(ValueError):
       data_flow_ops.SparseConditionalAccumulator(
           dtypes_lib.float32, name="Q", reduction_type="Invalid")