def test_exact_posterior_recovery_no_transition_noise(self):
        with self.test_session() as session:
            stub_model, data, true_params = self._get_single_model()
            input_fn = input_pipeline.WholeDatasetInputFn(
                input_pipeline.NumpyReader(data))
            features, _ = input_fn()
            model_outputs = stub_model.get_batch_loss(
                features=features,
                mode=None,
                state=math_utils.replicate_state(
                    start_state=stub_model.get_start_state(),
                    batch_size=array_ops.shape(
                        features[feature_keys.TrainEvalFeatures.TIMES])[0]))
            variables.global_variables_initializer().run()
            coordinator = coordinator_lib.Coordinator()
            queue_runner_impl.start_queue_runners(session, coord=coordinator)
            posterior_mean, posterior_var, posterior_times = session.run(
                # Feed the true model parameters so that this test doesn't depend on
                # the generated parameters being close to the variable initializations
                # (an alternative would be training steps to fit the noise values,
                # which would be slow).
                model_outputs.end_state,
                feed_dict=true_params)
            coordinator.request_stop()
            coordinator.join()

            self.assertAllClose(numpy.zeros([1, 4, 4]),
                                posterior_var,
                                atol=1e-2)
            self.assertAllClose(numpy.dot(
                numpy.linalg.matrix_power(
                    stub_model.transition,
                    data[feature_keys.TrainEvalFeatures.TIMES].shape[1]),
                true_params[stub_model.prior_state_mean]),
                                posterior_mean[0],
                                rtol=1e-1)
            self.assertAllClose(
                math_utils.batch_end_time(
                    features[feature_keys.TrainEvalFeatures.TIMES]).eval(),
                posterior_times)
Exemplo n.º 2
0
  def testDistributedFunctionPendingNodesServerReplaced(self):
    with ops.device(self.device_local):
      x1 = array_ops.ones([2, 2])

    @def_function.function
    def worker_fn(i):
      with ops.device(self.device_t1):
        mul = math_ops.matmul(i, i)
      with ops.device(self.device_t2):
        add = mul + i
      return add - i
    worker_fn.get_concrete_function(x1)

    num_calls = 10
    self._coord = coordinator.Coordinator()

    def thread_fn(device, results):
      with self._coord.stop_on_exception():
        for i in range(num_calls):
          with ops.device(device):
            y = worker_fn(x1)
          results[i] = y.numpy()

    def update_server_def_fn():
      with self._coord.stop_on_exception():
        for i in range(num_calls):
          context.update_server_def(
              server_def=(self.server_def_s1_s2_s3
                          if i % 2 == 0 else self.server_def_s1_s2))

    results = [None] * num_calls
    threads = []
    threads.append(threading.Thread(target=thread_fn,
                                    args=(self.device_t1, results)))
    threads.append(threading.Thread(target=update_server_def_fn))
    for t in threads:
      t.start()
    self._coord.join(threads)
    for result in results:
      np.testing.assert_array_equal([[2, 2], [2, 2]], result)
Exemplo n.º 3
0
  def testMultiThreadPendingNodesLockFree(self):
    """Update cluster when other remote function calls are being launched."""

    with ops.device(self.device_t1):
      x1 = array_ops.ones([2, 2])

    num_calls = 10
    self._coord = coordinator.Coordinator()

    @def_function.function
    def worker_fn(i):
      return math_ops.matmul(i, i)

    # Forces function tracing and registration
    worker_fn.get_concrete_function(x1)

    def thread_fn(device, results):
      for i in range(num_calls):
        with self._coord.stop_on_exception():
          with ops.device(device):
            results[i] = worker_fn(x1).numpy()

    def update_server_def_fn():
      for _ in range(30):
        with self._coord.stop_on_exception():
          context.update_server_def(self.server_def_s1_s2)

    t1_results = [None] * num_calls
    t2_results = [None] * num_calls
    threads = []
    threads.append(
        threading.Thread(target=thread_fn, args=(self.device_t1, t1_results)))
    threads.append(
        threading.Thread(target=thread_fn, args=(self.device_t2, t2_results)))
    threads.append(threading.Thread(target=update_server_def_fn))
    for t in threads:
      t.start()
    self._coord.join(threads)
    for result in t1_results + t2_results:
      np.testing.assert_array_equal([[2, 2], [2, 2]], result)
Exemplo n.º 4
0
def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                              cluster_spec, session_config, rpc_layer):
    """Runs a standalone client for between-graph replication."""
    coord = coordinator.Coordinator()
    eval_thread = None
    if _TaskType.EVALUATOR in cluster_spec.jobs:
        eval_thread = threading.Thread(target=_run_single_worker,
                                       args=(eval_fn, eval_strategy,
                                             cluster_spec, _TaskType.EVALUATOR,
                                             0, session_config),
                                       kwargs={
                                           "rpc_layer": rpc_layer,
                                           "coord": coord,
                                       })
        eval_thread.start()

    threads = []
    worker_barrier = _Barrier(_get_num_workers(cluster_spec))
    for task_type in [_TaskType.CHIEF, _TaskType.WORKER]:
        for task_id in range(len(cluster_spec.as_dict().get(task_type, []))):
            t = threading.Thread(target=_run_single_worker,
                                 args=(worker_fn, strategy, cluster_spec,
                                       task_type, task_id, session_config),
                                 kwargs={
                                     "rpc_layer": rpc_layer,
                                     "worker_barrier": worker_barrier,
                                     "coord": coord,
                                 })
            t.start()
            threads.append(t)

    if eval_thread:
        # TODO(yuefengz): is it necessary to join eval thread?
        threads_to_join = threads + [eval_thread]
    else:
        threads_to_join = threads
    coord.join(threads_to_join)

    # TODO(yuefengz): we probably want to return results from all workers?
    return None
 def testPandasFeedingMultiThread(self):
   if not HAS_PANDAS:
     return
   with ops.Graph().as_default():
     array1 = np.arange(128, 256)
     array2 = 2 * array1
     df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
     q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
     batch_size = 5
     dq_op = q.dequeue_many(batch_size)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for _ in range(100):
         dq = sess.run(dq_op)
         indices = dq[0]
         expected_rows = df.iloc[indices]
         for col_num, col in enumerate(df.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         dq[col_num + 1])
       coord.request_stop()
       coord.join(threads)
Exemplo n.º 6
0
 def testNoShuffle(self):
     id_source = rs.ReaderSource(reader_cls=io_ops.IdentityReader,
                                 work_units=self.work_units,
                                 batch_size=1,
                                 shuffle=False,
                                 num_threads=1)
     index_column, value_column = id_source()
     index_tensor = index_column.build()
     value_tensor = value_column.build()
     self.assertEqual([1], index_tensor.get_shape().as_list())
     self.assertEqual([1], value_tensor.get_shape().as_list())
     with self.test_session() as sess:
         variables.global_variables_initializer().run()
         coord = coordinator.Coordinator()
         threads = queue_runner_impl.start_queue_runners(sess=sess,
                                                         coord=coord)
         for i in range(50):
             index, value = sess.run([index_tensor, value_tensor])
             self.assertEqual(i, int(index[0]))
             self.assertEqual(i, int(value[0]))
         coord.request_stop()
         coord.join(threads)
  def testNotifyBeforeWait(self):
    closure_queue = coordinator_lib._CoordinatedClosureQueue()

    def func():
      logging.info('func running')

    coord = coordinator.Coordinator(clean_stop_exception_types=[])

    def process_queue():
      with coord.stop_on_exception():
        closure_queue.get()
        closure_queue.mark_finished()

    closure_queue.put(
        coordinator_lib.Closure(func, closure_queue._cancellation_mgr))
    t = threading.Thread(target=process_queue)
    t.start()
    coord.join([t])

    # This test asserts that waiting at the time the function has been processed
    # doesn't time out.
    closure_queue.wait()
Exemplo n.º 8
0
    def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self):
        batch_size = 2
        a = np.arange(5) * 1.0
        b = np.arange(32, 37)
        x = {'a': a, 'b': b}
        y = np.arange(-32, -27)

        with self.test_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_epochs=1)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [0, 1])
            self.assertAllEqual(res[0]['b'], [32, 33])
            self.assertAllEqual(res[1], [-32, -31])

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [2, 3])
            self.assertAllEqual(res[0]['b'], [34, 35])
            self.assertAllEqual(res[1], [-30, -29])

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [4])
            self.assertAllEqual(res[0]['b'], [36])
            self.assertAllEqual(res[1], [-28])

            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
Exemplo n.º 9
0
    def testMultiThreadedEstimateDataDistribution(self):
        num_classes = 10

        # Set up graph.
        random_seed.set_random_seed(1234)
        label = math_ops.cast(
            math_ops.round(random_ops.random_uniform([1]) * num_classes),
            dtypes_lib.int32)

        prob_estimate = sampling_ops._estimate_data_distribution(  # pylint: disable=protected-access
            label, num_classes)
        # Check that prob_estimate is well-behaved in a multithreaded context.
        _, _, [prob_estimate] = sampling_ops._verify_input(  # pylint: disable=protected-access
            [], label, [prob_estimate])

        # Use queues to run multiple threads over the graph, each of which
        # fetches `prob_estimate`.
        queue = data_flow_ops.FIFOQueue(capacity=25,
                                        dtypes=[prob_estimate.dtype],
                                        shapes=[prob_estimate.get_shape()])
        enqueue_op = queue.enqueue([prob_estimate])
        queue_runner_impl.add_queue_runner(
            queue_runner_impl.QueueRunner(queue, [enqueue_op] * 25))
        out_tensor = queue.dequeue()

        # Run the multi-threaded session.
        with self.cached_session() as sess:
            # Need to initialize variables that keep running total of classes seen.
            variables.global_variables_initializer().run()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(coord=coord)

            for _ in range(25):
                sess.run([out_tensor])

            coord.request_stop()
            coord.join(threads)
  def test_keyed_read_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("ABC\nDEF\nGHK\n")

    batch_size = 1
    queue_capacity = 5
    name = "my_batch"

    with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
      keys, inputs = graph_io.read_keyed_batch_examples(
          filename,
          batch_size,
          reader=io_ops.TextLineReader,
          randomize_input=False,
          num_epochs=1,
          queue_capacity=queue_capacity,
          name=name)
      self.assertAllEqual((None,), keys.get_shape().as_list())
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(variables.local_variables_initializer())

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      self.assertAllEqual(
          session.run([keys, inputs]),
          [[filename.encode("utf-8") + b":1"], [b"ABC"]])
      self.assertAllEqual(
          session.run([keys, inputs]),
          [[filename.encode("utf-8") + b":2"], [b"DEF"]])
      self.assertAllEqual(
          session.run([keys, inputs]),
          [[filename.encode("utf-8") + b":3"], [b"GHK"]])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)
Exemplo n.º 11
0
  def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(self):
    if not HAS_PANDAS:
      return
    with self.cached_session() as session:
      index = np.arange(100, 105)
      a = np.arange(5)
      b = np.arange(32, 37)
      x = pd.DataFrame({'a': a, 'b': b}, index=index)
      y = pd.Series(np.arange(-32, -27), index=index)

      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)

      results = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [0, 1])
      self.assertAllEqual(features['b'], [32, 33])
      self.assertAllEqual(target, [-32, -31])

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [2, 3])
      self.assertAllEqual(features['b'], [34, 35])
      self.assertAllEqual(target, [-30, -29])

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [4])
      self.assertAllEqual(features['b'], [36])
      self.assertAllEqual(target, [-28])

      with self.assertRaises(errors.OutOfRangeError):
        session.run(results)

      coord.request_stop()
      coord.join(threads)
Exemplo n.º 12
0
 def test_chained_exact_posterior_recovery_no_transition_noise(self):
     with self.test_session() as session:
         stub_model, data, true_params = self._get_single_model()
         chunk_size = 10
         input_fn = test_utils.AllWindowInputFn(
             input_pipeline.NumpyReader(data), window_size=chunk_size)
         features, _ = input_fn()
         state_manager = state_management.ChainingStateManager(
             state_saving_interval=1)
         state_manager.initialize_graph(stub_model)
         model_outputs = state_manager.define_loss(
             model=stub_model,
             features=features,
             mode=estimator_lib.ModeKeys.TRAIN)
         variables.global_variables_initializer().run()
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         for _ in range(
                 data[feature_keys.TrainEvalFeatures.TIMES].shape[1] //
                 chunk_size):
             model_outputs.loss.eval()
         posterior_mean, posterior_var, posterior_times = session.run(
             model_outputs.end_state, feed_dict=true_params)
         coordinator.request_stop()
         coordinator.join()
         self.assertAllClose(numpy.zeros([1, 4, 4]),
                             posterior_var,
                             atol=1e-2)
         self.assertAllClose(numpy.dot(
             numpy.linalg.matrix_power(
                 stub_model.transition,
                 data[feature_keys.TrainEvalFeatures.TIMES].shape[1]),
             true_params[stub_model.prior_state_mean]),
                             posterior_mean[0],
                             rtol=1e-1)
         self.assertAllClose(
             data[feature_keys.TrainEvalFeatures.TIMES][:, -1],
             posterior_times)
Exemplo n.º 13
0
    def testGeneratorInputFnLabelDict(self):
        def generator():
            for index in range(2):
                yield {
                    'a': np.ones(1) * index,
                    'b': np.ones(1) * index + 32,
                    'label': np.ones(1) * index - 32,
                    'label2': np.ones(1) * index - 64,
                }

        with self.test_session() as session:
            input_fn = generator_io.generator_input_fn(
                generator,
                target_key=['label', 'label2'],
                batch_size=2,
                shuffle=False,
                num_epochs=1)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1))
            self.assertAllEqual(res[0]['b'],
                                np.asarray([32, 33]).reshape(-1, 1))
            self.assertAllEqual(res[1]['label'],
                                np.asarray([-32, -31]).reshape(-1, 1))
            self.assertAllEqual(res[1]['label2'],
                                np.asarray([-64, -63]).reshape(-1, 1))

            session.run([features])
            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
        def _test(self):
            if np_dtype == bool:
                arr = np.array([True] * int(NUMPY_ARRAY_SIZE / 2) +
                               [False] * int(NUMPY_ARRAY_SIZE / 2))
                np.random.shuffle(arr)
            else:
                arr = np.arange(NUMPY_ARRAY_SIZE, dtype=np_dtype)
            frame = df.TensorFlowDataFrame.from_numpy(
                arr, batch_size=NUMPY_ARRAY_SIZE, shuffle=False)
            self.assertTrue(hasattr(frame["value"], name))
            frame["actual"] = getattr(frame["value"], name)()
            frame_built = frame.build()
            expected_tensor = op(frame_built["value"])
            actual_tensor = frame_built["actual"]

            session = session_lib.Session()
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess=session,
                                                            coord=coord)
            actual, expected = session.run([actual_tensor, expected_tensor])
            coord.request_stop()
            coord.join(threads)
            np.testing.assert_almost_equal(expected, actual)
 def _test_missing_values(self, cut_start, cut_end, offset):
   stub_model = StubTimeSeriesModel()
   data = self._make_test_data(
       length=100, cut_start=cut_start, cut_end=cut_end, offset=offset)
   input_fn = test_utils.AllWindowInputFn(
       input_pipeline.NumpyReader(data), window_size=10)
   chainer = state_management.ChainingStateManager(
       state_saving_interval=1)
   features, _ = input_fn()
   stub_model.initialize_graph()
   chainer.initialize_graph(model=stub_model)
   model_outputs = chainer.define_loss(
       model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN)
   with self.test_session() as session:
     variables.global_variables_initializer().run()
     coordinator = coordinator_lib.Coordinator()
     queue_runner_impl.start_queue_runners(session, coord=coordinator)
     for _ in range(10):
       model_outputs.loss.eval()
     returned_loss = model_outputs.loss.eval()
     coordinator.request_stop()
     coordinator.join()
     return returned_loss
Exemplo n.º 16
0
  def testGeneratorSingleInputFn(self):

    def generator():
      for index in range(2):
        yield {'a': np.ones(1) * index}

    with self.test_session() as session:
      input_fn = generator_io.generator_input_fn(
        generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1)
      features = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features])
      self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1))

      session.run([features])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features])

      coord.request_stop()
      coord.join(threads)
Exemplo n.º 17
0
  def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self):

    def generator():
      for index in range(100):
        yield {'a': np.ones((10, 10)) * index,
               'b': np.ones((5, 5)) * index + 32,
               'label': np.ones((3, 3)) * index - 32}

    with self.test_session() as session:
      input_fn = generator_io.generator_input_fn(
        generator, target_key="label", batch_size=2, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], np.vstack((np.zeros((10, 10)), np.ones((10, 10)))).reshape(2, 10, 10))
      self.assertAllEqual(res[0]['b'], np.vstack((np.zeros((5, 5)), np.ones((5, 5)))).reshape(2, 5, 5) + 32)
      self.assertAllEqual(res[1], np.vstack((np.zeros((3, 3)), np.ones((3, 3)))).reshape(2, 3, 3) - 32)

      coord.request_stop()
      coord.join(threads)
Exemplo n.º 18
0
    def testNumpyInputFnWithZeroEpochs(self):
        a = np.arange(4) * 1.0
        b = np.arange(32, 36)
        x = {'a': a, 'b': b}
        y = np.arange(-32, -28)

        with self.test_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=2,
                                               shuffle=False,
                                               num_epochs=0)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
Exemplo n.º 19
0
 def testRespectCoordShouldStop(self):
   with self.cached_session() as sess:
     # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
     var = variables.VariableV1(zero64)
     count_up_to = var.count_up_to(3)
     queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
     variables.global_variables_initializer().run()
     qr = queue_runner_impl.QueueRunner(queue, [count_up_to])
     # As the coordinator to stop.  The queue runner should
     # finish immediately.
     coord = coordinator.Coordinator()
     coord.request_stop()
     threads = qr.create_threads(sess, coord)
     self.assertEqual(sorted(t.name for t in threads),
                      ["QueueRunnerThread-fifo_queue-CountUpTo:0",
                       "QueueRunnerThread-fifo_queue-close_on_stop"])
     for t in threads:
       t.start()
     coord.join()
     self.assertEqual(0, len(qr.exceptions_raised))
     # The variable should be 0.
     self.assertEqual(0, var.eval())
    def _test_pass_to_next(self, read_offset, step, correct_offset):
        stub_model = StubTimeSeriesModel(correct_offset=correct_offset)
        data = self._make_test_data(length=100 + read_offset,
                                    cut_start=None,
                                    cut_end=None,
                                    offset=100.,
                                    step=step)
        init_input_fn = input_pipeline.WholeDatasetInputFn(
            input_pipeline.NumpyReader(
                {k: v[:-read_offset]
                 for k, v in data.items()}))
        result_input_fn = input_pipeline.WholeDatasetInputFn(
            input_pipeline.NumpyReader(
                {k: v[read_offset:]
                 for k, v in data.items()}))

        chainer = state_management.ChainingStateManager(
            state_saving_interval=1)
        stub_model.initialize_graph()
        chainer.initialize_graph(model=stub_model)
        init_model_outputs = chainer.define_loss(
            model=stub_model,
            features=init_input_fn()[0],
            mode=estimator_lib.ModeKeys.TRAIN)
        result_model_outputs = chainer.define_loss(
            model=stub_model,
            features=result_input_fn()[0],
            mode=estimator_lib.ModeKeys.TRAIN)
        with self.cached_session() as session:
            variables.global_variables_initializer().run()
            coordinator = coordinator_lib.Coordinator()
            queue_runner_impl.start_queue_runners(session, coord=coordinator)
            init_model_outputs.loss.eval()
            returned_loss = result_model_outputs.loss.eval()
            coordinator.request_stop()
            coordinator.join()
            return returned_loss
Exemplo n.º 21
0
 def _random_window_input_fn_test_template(self,
                                           time_series_reader,
                                           window_size,
                                           batch_size,
                                           num_features,
                                           discard_out_of_order=False):
     input_fn = input_pipeline.RandomWindowInputFn(
         time_series_reader=time_series_reader,
         window_size=window_size,
         batch_size=batch_size)
     result, _ = input_fn()
     init_op = variables.local_variables_initializer()
     with self.cached_session() as session:
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         session.run(init_op)
         features = session.run(result)
         coordinator.request_stop()
         coordinator.join()
     self.assertAllEqual([batch_size, window_size],
                         features[TrainEvalFeatures.TIMES].shape)
     for window_position in range(window_size - 1):
         for batch_position in range(batch_size):
             # Checks that all times are contiguous
             self.assertEqual(
                 features[TrainEvalFeatures.TIMES][batch_position,
                                                   window_position + 1],
                 features[TrainEvalFeatures.TIMES][batch_position,
                                                   window_position] + 1)
     self.assertAllEqual([batch_size, window_size, num_features],
                         features[TrainEvalFeatures.VALUES].shape)
     self.assertEqual("int64", features[TrainEvalFeatures.TIMES].dtype)
     for feature_number in range(num_features):
         self.assertAllEqual(
             features[TrainEvalFeatures.TIMES] * 2. + feature_number,
             features[TrainEvalFeatures.VALUES][:, :, feature_number])
     return features
Exemplo n.º 22
0
    def setUp(self, num_workers, num_ps):
        super(BaseFaultToleranceTest, self).setUp()

        self._cluster = multi_worker_test_base.create_multi_process_cluster(
            num_workers=num_workers, num_ps=num_ps, rpc_layer="grpc")
        self._cluster_def = self._cluster.cluster_resolver.cluster_spec(
        ).as_dict()
        self._cluster_def["chief"] = [
            "localhost:%d" % multi_worker_test_base.pick_unused_port()
        ]
        cluster_resolver = SimpleClusterResolver(server_lib.ClusterSpec(
            self._cluster_def),
                                                 rpc_layer="grpc")

        # The strategy's constructor would connect to the cluster.
        self.strategy = parameter_server_strategy_v2.ParameterServerStrategyV2(
            cluster_resolver)
        self.cluster_coord = cluster_coordinator.ClusterCoordinator(
            self.strategy)

        self.thread_coord = thread_coordinator.Coordinator(
            clean_stop_exception_types=[])
        self.num_workers = num_workers
        self.num_ps = num_ps
Exemplo n.º 23
0
  def testConditionallyEnqueueAndBatch(self):
    random_seed.set_random_seed(1234)
    tensor = control_flow_ops.cond(
        math_ops.greater(.5, random_ops.random_uniform([])),
        lambda: constant_op.constant(1.0), lambda: constant_op.constant(2.0))
    keep_input = math_ops.equal(tensor, 2.0)
    batch_size = 4

    # Set up the test graph.
    [batch] = sampling_ops._conditional_batch([tensor], keep_input, batch_size)  # pylint: disable=protected-access

    # Check conditional operation.
    with self.test_session():
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(coord=coord)

      batch_np = batch.eval()

      coord.request_stop()
      coord.join(threads)

    # Check that all elements in batch come from tensors with acceptance prob
    # 1, so that none come from acceptance prob 0.
    self.assertListEqual(list(batch_np), [2.0] * batch_size)
Exemplo n.º 24
0
    def setUp(self):
        ops.reset_default_graph()

        self.scalar_int_feed = array_ops.placeholder(dtypes_lib.int32, ())
        self.unk_int64_feed = array_ops.placeholder(dtypes_lib.int64, (None, ))
        self.vec3_str_feed = array_ops.placeholder(dtypes_lib.string, (3, ))
        self.sparse_c = sparse_tensor.SparseTensor(indices=[[0]],
                                                   values=[1.0],
                                                   dense_shape=[1])

        self._coord = coordinator.Coordinator()
        # Make capacity very large so we can feed all the inputs in the
        # main thread without blocking
        input_queue = data_flow_ops.PaddingFIFOQueue(
            5000,
            dtypes=[dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.string],
            shapes=[(), (None, ), (3, )])

        self._input_enqueue_op = input_queue.enqueue(
            (self.scalar_int_feed, self.unk_int64_feed, self.vec3_str_feed))
        self.scalar_int, self.unk_int64, self.vec3_str = input_queue.dequeue()
        self._threads = None
        self._close_op = input_queue.close()
        self._sess = None
Exemplo n.º 25
0
  def testNumpyInputFnWithVeryLargeBatchSizeAndMultipleEpochs(self):
    a = np.arange(2) * 1.0
    b = np.arange(32, 34)
    x = {'a': a, 'b': b}
    y = np.arange(-32, -30)

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=128, shuffle=False, num_epochs=2)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [0, 1, 0, 1])
      self.assertAllEqual(res[0]['b'], [32, 33, 32, 33])
      self.assertAllEqual(res[1], [-32, -31, -32, -31])

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Exemplo n.º 26
0
    def testRejectionBatchingBehavior(self):
        batch_size = 20
        input_batch_size = 11
        val_input_batch = [array_ops.zeros([input_batch_size, 2, 3, 4])]
        lbl_input_batch = control_flow_ops.cond(
            math_ops.greater(.5, random_ops.random_uniform([])),
            lambda: array_ops.ones([input_batch_size], dtype=dtypes.int32) * 1,
            lambda: array_ops.ones([input_batch_size], dtype=dtypes.int32) * 3)
        probs = np.array([0, .2, 0, .8, 0])
        data_batch, labels = sampling_ops.stratified_sample(
            val_input_batch,
            lbl_input_batch,
            probs,
            batch_size,
            init_probs=[0, .3, 0, .7, 0],
            enqueue_many=True)
        with self.cached_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(coord=coord)

            sess.run([data_batch, labels])

            coord.request_stop()
            coord.join(threads)
Exemplo n.º 27
0
    def testNormalBehavior(self):
        tensor_list = [
            control_flow_ops.cond(
                math_ops.greater(.5, random_ops.random_uniform([])),
                lambda: constant_op.constant(1.0),
                lambda: constant_op.constant(2.0))
        ]
        accept_prob_fn = lambda x: x[0] - 1.0
        batch_size = 10

        # Set up graph.
        sample = sampling_ops.rejection_sample(tensor_list, accept_prob_fn,
                                               batch_size)

        with self.cached_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(coord=coord)

            for _ in range(5):
                sample_np = sess.run(sample)[0]
                self.assertListEqual([2.0] * batch_size, list(sample_np))

            coord.request_stop()
            coord.join(threads)
Exemplo n.º 28
0
    def __init__(self,
                 graph=None,
                 ready_op=USE_DEFAULT,
                 is_chief=True,
                 init_op=USE_DEFAULT,
                 init_feed_dict=None,
                 local_init_op=USE_DEFAULT,
                 logdir=None,
                 summary_op=USE_DEFAULT,
                 saver=USE_DEFAULT,
                 global_step=USE_DEFAULT,
                 save_summaries_secs=120,
                 save_model_secs=600,
                 recovery_wait_secs=30,
                 stop_grace_secs=120,
                 checkpoint_basename="model.ckpt",
                 session_manager=None,
                 summary_writer=USE_DEFAULT,
                 init_fn=None):
        """Create a `Supervisor`.

    Args:
      graph: A `Graph`.  The graph that the model will use.  Defaults to the
        default `Graph`.  The supervisor may add operations to the graph before
        creating a session, but the graph should not be modified by the caller
        after passing it to the supervisor.
      ready_op: `Operation` to check if the model is initialized.  This
        operation is run by supervisors in `prepare_or_wait_for_session()` to
        check if the model is ready to use. The model is considered ready if
        that operation succeeds.  Defaults to the operation returned from
        `tf.assert_variables_initialized()`  If `None`, the model is not checked
        for readiness.
      is_chief: If True, create a chief supervisor in charge of initializing
        and restoring the model.  If False, create a supervisor that relies
        on a chief supervisor for inits and restore.
      init_op: `Operation`.  Used by chief supervisors to initialize the model
        when it can not be recovered.  Defaults to an `Operation` that
        initializes all variables.  If `None`, no initialization is done
        automatically unless you pass a value for `init_fn`, see below.
      init_feed_dict: A dictionary that maps `Tensor` objects to feed values.
        This feed dictionary will be used when `init_op` is evaluated.
      local_init_op: `Operation`. Used by all supervisors to run initializations
        that should run for every new supervisor instance. By default these
        are table initializers and initializers for local variables.
        If `None`, no further per supervisor-instance initialization is
        done automatically.
      logdir: A string.  Optional path to a directory where to checkpoint the
        model and log events for the visualizer.  Used by chief supervisors.
        The directory will be created if it does not exist.
      summary_op: An `Operation` that returns a Summary for the event logs.
        Used by chief supervisors if a `logdir` was specified.  Defaults to the
        operation returned from merge_all_summaries().  If `None`, summaries are
        not computed automatically.
      saver: A Saver object.  Used by chief supervisors if a `logdir` was
        specified.  Defaults to the saved returned by Saver().
        If `None`, the model is not saved automatically.
      global_step: An integer Tensor of size 1 that counts steps.  The value
        from 'global_step' is used in summaries and checkpoint filenames.
        Default to the op named 'global_step' in the graph if it exists, is of
        rank 1, size 1, and of type tf.int32 ot tf.int64.  If `None` the global
        step is not recorded in summaries and checkpoint files.  Used by chief
        supervisors if a `logdir` was specified.
      save_summaries_secs: Number of seconds between the computation of
        summaries for the event log.  Defaults to 120 seconds.  Pass 0 to
        disable summaries.
      save_model_secs: Number of seconds between the creation of model
        checkpoints.  Defaults to 600 seconds.  Pass 0 to disable checkpoints.
      recovery_wait_secs: Number of seconds between checks that the model
        is ready.  Used by supervisors when waiting for a chief supervisor
        to initialize or restore the model.  Defaults to 30 seconds.
      stop_grace_secs: Grace period, in seconds, given to running threads to
        stop when `stop()` is called.  Defaults to 120 seconds.
      checkpoint_basename: The basename for checkpoint saving.
      session_manager: `SessionManager`, which manages Session creation and
        recovery. If it is `None`, a default `SessionManager` will be created
        with the set of arguments passed in for backwards compatibility.
      summary_writer: `SummaryWriter` to use or `USE_DEFAULT`.  Can be `None`
        to indicate that no summaries should be written.
      init_fn: Optional callable used to initialize the model. Called
        after the optional `init_op` is called.  The callable must accept one
        argument, the session being initialized.

    Returns:
      A `Supervisor`.
    """
        # Set default values of arguments.
        if graph is None:
            graph = ops.get_default_graph()
        with graph.as_default():
            self._init_ready_op(ready_op=ready_op)
            self._init_init_op(init_op=init_op, init_feed_dict=init_feed_dict)
            self._init_local_init_op(local_init_op=local_init_op)
            self._init_saver(saver=saver)
            self._init_summary_op(summary_op=summary_op)
            self._init_global_step(global_step=global_step)
        self._graph = graph
        self._is_chief = is_chief
        self._coord = coordinator.Coordinator()
        self._started_threads = []
        self._recovery_wait_secs = recovery_wait_secs
        self._stop_grace_secs = stop_grace_secs
        self._init_fn = init_fn

        # Set all attributes related to checkpointing and writing events to None.
        # Afterwards, set them appropriately for chief supervisors, as these are
        # the only supervisors that can write checkpoints and events.
        self._logdir = None
        self._save_summaries_secs = None
        self._save_model_secs = None
        self._save_path = None
        self._summary_writer = None

        if self._is_chief:
            self._logdir = logdir
            self._save_summaries_secs = save_summaries_secs
            self._save_model_secs = save_model_secs
            if self._logdir:
                self._save_path = os.path.join(self._logdir,
                                               checkpoint_basename)
            if summary_writer is Supervisor.USE_DEFAULT:
                if self._logdir:
                    self._summary_writer = summary_io.SummaryWriter(
                        self._logdir)
            else:
                self._summary_writer = summary_writer

        self._init_session_manager(session_manager=session_manager)
        self._verify_setup()
        # The graph is not allowed to change anymore.
        graph.finalize()
Exemplo n.º 29
0
 def _input_statistics_test_template(self,
                                     stat_object,
                                     num_features,
                                     dtype,
                                     give_full_data,
                                     warmup_iterations=0,
                                     rtol=1e-6,
                                     data_length=500,
                                     chunk_size=4):
     graph = ops.Graph()
     with graph.as_default():
         numpy_dtype = dtype.as_numpy_dtype
         values = (
             (numpy.arange(data_length, dtype=numpy_dtype)[..., None] +
              numpy.arange(num_features, dtype=numpy_dtype)[None,
                                                            ...])[None])
         times = 2 * (numpy.arange(data_length)[None]) - 3
         if give_full_data:
             stat_object.set_data((times, values))
         features = {
             TrainEvalFeatures.TIMES: times,
             TrainEvalFeatures.VALUES: values
         }
         input_fn = input_pipeline.RandomWindowInputFn(
             batch_size=16,
             window_size=chunk_size,
             time_series_reader=input_pipeline.NumpyReader(features))
         statistics = stat_object.initialize_graph(features=input_fn()[0])
         with self.session(graph=graph) as session:
             variables.global_variables_initializer().run()
             coordinator = coordinator_lib.Coordinator()
             queue_runner_impl.start_queue_runners(session,
                                                   coord=coordinator)
             for _ in range(warmup_iterations):
                 # A control dependency should ensure that, for queue-based statistics,
                 # a use of any statistic is preceded by an update of all adaptive
                 # statistics.
                 statistics.total_observation_count.eval()
             self.assertAllClose(
                 range(num_features) +
                 numpy.mean(numpy.arange(chunk_size))[None],
                 statistics.series_start_moments.mean.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.tile(
                     numpy.var(numpy.arange(chunk_size))[None],
                     [num_features]),
                 statistics.series_start_moments.variance.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.mean(values[0], axis=0),
                 statistics.overall_feature_moments.mean.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.var(values[0], axis=0),
                 statistics.overall_feature_moments.variance.eval(),
                 rtol=rtol)
             self.assertAllClose(-3,
                                 statistics.start_time.eval(),
                                 rtol=rtol)
             self.assertAllClose(data_length,
                                 statistics.total_observation_count.eval(),
                                 rtol=rtol)
             coordinator.request_stop()
             coordinator.join()
Exemplo n.º 30
0
def _call_for_each_replica(distribution, fn, args, kwargs):
    """Run `fn` in separate threads, once per replica/worker device.

  Args:
    distribution: the DistributionStrategy object.
    fn: function to run (will be run once per device, each in its own thread).
    args: positional arguments for `fn`
    kwargs: keyword arguments for `fn`.

  Returns:
    Merged return value of `fn` across all replicas.

  Raises:
    RuntimeError: If fn() calls get_replica_context().merge_call() a different
        number of times from the available devices.
  """
    # TODO(josh11b): Add this option once we add synchronization to variable
    # creation. Until then, this is pretty unsafe to use.
    run_concurrently = False
    if not context.executing_eagerly():
        # Needed for per-thread device, etc. contexts in graph mode.
        ops.get_default_graph().switch_to_thread_local()

    coord = coordinator.Coordinator(
        clean_stop_exception_types=(_RequestedStop, ))

    shared_variable_store = {}

    # TODO(isaprykin): Create these threads once instead of during every run()
    # call.
    threads = []
    for index, d in enumerate(distribution.worker_devices):
        variable_creator_fn = shared_variable_creator.make_fn(
            shared_variable_store, index)
        t = MirroredStrategy._MirroredReplicaThread(  # pylint: disable=protected-access
            distribution, coord, d, variable_creator_fn, fn,
            *values.select_device(d, args), **values.select_device(d, kwargs))
        threads.append(t)

    for t in threads:
        t.start()

    # When `fn` starts `should_run` event is set on _MirroredReplicaThread
    # (`MRT`) threads. The execution waits until
    # `MRT.has_paused` is set, which indicates that either `fn` is
    # complete or a `get_replica_context().merge_call()` is called.  If `fn` is
    # complete, then `MRT.done` is set to True.  Otherwise, arguments
    # of `get_replica_context().merge_call` from all paused threads are grouped
    # and the `merge_fn` is performed.  Results of the
    # `get_replica_context().merge_call` are then set to `MRT.merge_result`.
    # Each such `get_replica_context().merge_call` call returns the
    # `MRT.merge_result` for that thread when `MRT.should_run` event
    # is reset again. Execution of `fn` resumes.

    try:
        with coord.stop_on_exception():
            all_done = False
            while not all_done and not coord.should_stop():
                done = []
                if run_concurrently:
                    for t in threads:
                        t.should_run.set()
                    for t in threads:
                        t.has_paused.wait()
                        t.has_paused.clear()
                        if coord.should_stop():
                            return None
                        done.append(t.done)
                else:
                    for t in threads:
                        t.should_run.set()
                        t.has_paused.wait()
                        t.has_paused.clear()
                        if coord.should_stop():
                            return None
                        done.append(t.done)
                if coord.should_stop():
                    return None
                all_done = all(done)
                if not all_done:
                    if any(done):
                        raise RuntimeError(
                            "Some replicas made a different number of "
                            "replica_context().merge_call() calls.")
                    # get_replica_context().merge_call() case
                    merge_args = values.regroup(
                        {t.device: t.merge_args
                         for t in threads})
                    merge_kwargs = values.regroup(
                        {t.device: t.merge_kwargs
                         for t in threads})
                    # We capture the name_scope of the MRT when we call merge_fn
                    # to ensure that if we have opened a name scope in the MRT,
                    # it will be respected when executing the merge function. We only
                    # capture the name_scope from the first MRT and assume it is
                    # the same for all other MRTs.
                    mtt_captured_name_scope = threads[0].captured_name_scope
                    with ops.name_scope(mtt_captured_name_scope):
                        merge_result = threads[0].merge_fn(
                            distribution, *merge_args, **merge_kwargs)
                    for t in threads:
                        t.merge_result = values.select_device(
                            t.device, merge_result)
    finally:
        for t in threads:
            t.should_run.set()
        coord.join(threads)

    return values.regroup({t.device: t.main_result for t in threads})