def testScan_MultiOutputMismatchedInitializer(self):
   elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
   initializer = np.array(1.0)
   # Multiply a * 1 each time
   with self.assertRaisesRegexp(
       ValueError, "two structures don't have the same nested structure"):
     functional_ops.scan(lambda a, x: (a, -a), elems, initializer)
 def testScan_MultiOutputMismatchedInitializer(self):
   elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
   initializer = np.array(1.0)
   # Multiply a * 1 each time
   with self.assertRaisesRegexp(
       ValueError, "two structures don't have the same nested structure"):
     functional_ops.scan(lambda a, x: (a, -a), elems, initializer)
  def testScan_Simple(self):
    with self.test_session():
      elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
      v = constant_op.constant(2.0, name="v")

      r = functional_ops.scan(lambda a, x: math_ops.mul(a, x), elems)
      self.assertAllEqual([1., 2., 6., 24., 120., 720.], r.eval())

      r = functional_ops.scan(
          lambda a, x: math_ops.mul(a, x), elems, initializer=v)
      self.assertAllEqual([2., 4., 12., 48., 240., 1440.], r.eval())
  def testScan_Simple(self):
    elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
    v = constant_op.constant(2.0, name="v")

    # pylint: disable=unnecessary-lambda
    r = functional_ops.scan(lambda a, x: math_ops.multiply(a, x), elems)
    self.assertAllEqual([1., 2., 6., 24., 120., 720.], self.evaluate(r))

    r = functional_ops.scan(
        lambda a, x: math_ops.multiply(a, x), elems, initializer=v)
    self.assertAllEqual([2., 4., 12., 48., 240., 1440.], self.evaluate(r))
  def testScan_Simple(self):
    elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
    v = constant_op.constant(2.0, name="v")

    # pylint: disable=unnecessary-lambda
    r = functional_ops.scan(lambda a, x: math_ops.multiply(a, x), elems)
    self.assertAllEqual([1., 2., 6., 24., 120., 720.], self.evaluate(r))

    r = functional_ops.scan(
        lambda a, x: math_ops.multiply(a, x), elems, initializer=v)
    self.assertAllEqual([2., 4., 12., 48., 240., 1440.], self.evaluate(r))
示例#6
0
    def testScan_Simple(self):
        with self.test_session():
            elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                                         name="data")
            v = constant_op.constant(2.0, name="v")

            r = functional_ops.scan(lambda a, x: math_ops.mul(a, x), elems)
            self.assertAllEqual([1., 2., 6., 24., 120., 720.], r.eval())

            r = functional_ops.scan(lambda a, x: math_ops.mul(a, x),
                                    elems,
                                    initializer=v)
            self.assertAllEqual([2., 4., 12., 48., 240., 1440.], r.eval())
  def testScan_Reverse(self):
    with self.test_session():
      elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
      v = constant_op.constant(2.0, name="v")

      # pylint: disable=unnecessary-lambda
      r = functional_ops.scan(lambda a, x: math_ops.multiply(a, x), elems,
                              reverse=True)
      self.assertAllEqual([720., 720., 360., 120., 30., 6.], self.evaluate(r))
      r = functional_ops.scan(
          lambda a, x: math_ops.multiply(a, x), elems, initializer=v,
          reverse=True)
      self.assertAllEqual([1440., 1440., 720., 240., 60., 12.],
                          self.evaluate(r))
  def testScanVaryingShape(self):
    with self.cached_session() as sess:
      x = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 2])
      x_t = array_ops.transpose(x)
      # scan over dimension 0 (with shape None)
      result = functional_ops.scan(lambda a, x: a + x, x)
      # scanned over transposed dimension 0 (with shape 2)
      result_t = functional_ops.scan(lambda a, x: a + x, x_t, infer_shape=False)
      # ensure gradients can be calculated
      result_grad = gradients_impl.gradients(result, [x])[0]
      result_t_grad = gradients_impl.gradients(result_t, [x_t])[0]

      # smoke test to ensure they all evaluate
      sess.run([result, result_t, result_grad, result_t_grad],
               feed_dict={x: [[1.0, 2.0]]})
  def testScanVaryingShape(self):
    with self.cached_session() as sess:
      x = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 2])
      x_t = array_ops.transpose(x)
      # scan over dimension 0 (with shape None)
      result = functional_ops.scan(lambda a, x: a + x, x)
      # scanned over transposed dimension 0 (with shape 2)
      result_t = functional_ops.scan(lambda a, x: a + x, x_t, infer_shape=False)
      # ensure gradients can be calculated
      result_grad = gradients_impl.gradients(result, [x])[0]
      result_t_grad = gradients_impl.gradients(result_t, [x_t])[0]

      # smoke test to ensure they all evaluate
      sess.run([result, result_t, result_grad, result_t_grad],
               feed_dict={x: [[1.0, 2.0]]})
示例#10
0
 def hiddens(self, input_idxes):
     "Expects input_idxes to be input_idxes of size TIMESTEPS * BATCH_SIZE"
     # embed input encoded sentences
     embedded_timesteps = self.embedding(input_idxes)
     batch_size = tf.shape(input_idxes)[1]
     initial_state = self.rnn_cell.zero_state(batch_size)
     return  functional_ops.scan(self.step_fun, embedded_timesteps, initializer=initial_state)
示例#11
0
def power_sums_tensor(array_size, power_matrix, multiplier):
  r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1).

  Args:
    array_size: The number of non-trivial sums to pre-compute.
    power_matrix: The "A" matrix above.
    multiplier: The "B" matrix above
  Returns:
    A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
      S[0] is the zero matrix
      S[1] is B
      S[2] is A B A^T + B
      ...and so on
  """
  array_size = math_ops.cast(array_size, dtypes.int32)
  power_matrix = ops.convert_to_tensor(power_matrix)
  identity_like_power_matrix = linalg_ops.eye(
      array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype)
  identity_like_power_matrix.set_shape(
      ops.convert_to_tensor(power_matrix).get_shape())
  transition_powers = functional_ops.scan(
      lambda previous_power, _: math_ops.matmul(previous_power, power_matrix),
      math_ops.range(array_size - 1),
      initializer=identity_like_power_matrix)
  summed = math_ops.cumsum(
      array_ops.concat([
          array_ops.expand_dims(multiplier, 0), math_ops.matmul(
              batch_times_matrix(transition_powers, multiplier),
              transition_powers,
              adjoint_b=True)
      ], 0))
  return array_ops.concat(
      [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
示例#12
0
def ctc_label_dense_to_sparse(labels, label_lengths):
    label_shape = tf.shape(labels)
    num_batches_tns = tf.pack([label_shape[0]])
    max_num_labels_tns = tf.pack([label_shape[1]])

    def range_less_than(_, current_input):
        return tf.range(label_shape[1]) < current_input

    init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
    dense_mask = functional_ops.scan(range_less_than,
                                     label_lengths,
                                     initializer=init,
                                     parallel_iterations=1)
    # dense_mask = dense_mask[:, 0, :]

    label_array = tf.reshape(
        tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape)
    label_ind = tf.boolean_mask(label_array, dense_mask)

    batch_array = tf.transpose(
        tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
                   tf.reverse(label_shape, [True])))
    batch_ind = tf.boolean_mask(batch_array, dense_mask)

    indices = tf.transpose(
        tf.reshape(tf.concat(0, [batch_ind, label_ind]), [2, -1]))
    vals_sparse = tf.gather_nd(labels, indices)
    return tf.SparseTensor(tf.to_int64(indices), vals_sparse,
                           tf.to_int64(label_shape))
 def testScan_MultiInputSingleOutput(self):
   elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
   initializer = np.array(1.0)
   # Multiply a * 1 each time
   r = functional_ops.scan(lambda a, x: a * (x[0] + x[1]),
                           (elems + 1, -elems), initializer)
   self.assertAllEqual([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], self.evaluate(r))
示例#14
0
 def testScan_MultiInputSameTypeOutput(self):
   elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
   r = functional_ops.scan(lambda a, x: (a[0] + x[0], a[1] + x[1]),
                           (elems, -elems))
   r_value = self.evaluate(r)
   self.assertAllEqual(np.cumsum(elems), r_value[0])
   self.assertAllEqual(np.cumsum(-elems), r_value[1])
示例#15
0
 def testScan_MultiInputSingleOutput(self):
   elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
   initializer = np.array(1.0)
   # Multiply a * 1 each time
   r = functional_ops.scan(lambda a, x: a * (x[0] + x[1]),
                           (elems + 1, -elems), initializer)
   self.assertAllEqual([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], self.evaluate(r))
示例#16
0
def power_sums_tensor(array_size, power_matrix, multiplier):
    r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1).

  Args:
    array_size: The number of non-trivial sums to pre-compute.
    power_matrix: The "A" matrix above.
    multiplier: The "B" matrix above
  Returns:
    A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
      S[0] is the zero matrix
      S[1] is B
      S[2] is A B A^T + B
      ...and so on
  """
    array_size = math_ops.cast(array_size, dtypes.int32)
    power_matrix = ops.convert_to_tensor(power_matrix)
    identity_like_power_matrix = linalg_ops.eye(
        array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype)
    identity_like_power_matrix.set_shape(
        ops.convert_to_tensor(power_matrix).get_shape())
    transition_powers = functional_ops.scan(
        lambda previous_power, _: math_ops.matmul(previous_power, power_matrix
                                                  ),
        math_ops.range(array_size - 1),
        initializer=identity_like_power_matrix)
    summed = math_ops.cumsum(
        array_ops.concat([
            array_ops.expand_dims(multiplier, 0),
            math_ops.matmul(batch_times_matrix(transition_powers, multiplier),
                            transition_powers,
                            adjoint_b=True)
        ], 0))
    return array_ops.concat(
        [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed],
        0)
示例#17
0
 def testScanEmptyTensor(self):
     with self.test_session():
         x = functional_ops.scan(lambda x, _: x,
                                 math_ops.range(0),
                                 initializer=array_ops.ones([2, 4]))
         self.assertAllEqual([0, 2, 4], x.get_shape())
         self.assertAllEqual(x.get_shape(), x.eval().shape)
 def testScan_MultiInputSameTypeOutput(self):
   elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
   r = functional_ops.scan(lambda a, x: (a[0] + x[0], a[1] + x[1]),
                           (elems, -elems))
   r_value = self.evaluate(r)
   self.assertAllEqual(np.cumsum(elems), r_value[0])
   self.assertAllEqual(np.cumsum(-elems), r_value[1])
    def ctc_label_dense_to_sparse(self,labels, label_lengths):
        """Converts CTC labels from dense to sparse.

        # Arguments
            labels: dense CTC labels.
            label_lengths: length of the labels.

        # Returns
            A sparse tensor representation of the labels.
        """
        label_shape = tf.shape(labels)
        num_batches_tns = tf.stack([label_shape[0]])
        max_num_labels_tns = tf.stack([label_shape[1]])

        def range_less_than(_, current_input):
            return tf.expand_dims(tf.range(label_shape[1]), 0) < tf.fill(
                max_num_labels_tns, current_input)

        init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
        dense_mask = functional_ops.scan(range_less_than, label_lengths,
                                         initializer=init, parallel_iterations=1)
        dense_mask = dense_mask[:, 0, :]

        label_array = tf.reshape(tf.tile(tf.range(label_shape[1]), num_batches_tns),
                                 label_shape)
        label_ind = tf.boolean_mask(label_array, dense_mask)

        batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(label_shape[0]),
                                                      max_num_labels_tns), self.reverse(label_shape, 0)))
        batch_ind = tf.boolean_mask(batch_array, dense_mask)
        indices = tf.transpose(tf.reshape(self.concatenate([batch_ind, label_ind], axis=0), [2, -1]))

        vals_sparse = tf.gather_nd(labels, indices)

        return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
示例#20
0
文件: odes.py 项目: imdone/tensorflow
  def integrate(self, evol_func, y0, time_grid):
    time_delta_grid = time_grid[1:] - time_grid[:-1]

    scan_func = self._make_scan_func(evol_func)

    y_grid = functional_ops.scan(scan_func, (time_grid[:-1], time_delta_grid),
                                 y0)
    return array_ops.concat([[y0], y_grid], axis=0)
示例#21
0
  def integrate(self, evol_func, y0, time_grid):
    time_delta_grid = time_grid[1:] - time_grid[:-1]

    scan_func = self._make_scan_func(evol_func)

    y_grid = functional_ops.scan(scan_func, (time_grid[:-1], time_delta_grid),
                                 y0)
    return array_ops.concat([[y0], y_grid], axis=0)
示例#22
0
  def testScan_SingleInputMultiOutput(self):
    elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
    initializer = (np.array(1.0), np.array(-1.0))
    r = functional_ops.scan(lambda a, x: (a[0] * x, -a[1] * x), elems,
                            initializer)
    r_value = self.evaluate(r)

    self.assertAllEqual([1.0, 2.0, 6.0, 24.0, 120.0, 720.0], r_value[0])
    self.assertAllEqual([1.0, -2.0, 6.0, -24.0, 120.0, -720.0], r_value[1])
示例#23
0
  def testScanUnknownShape(self):
    x = array_ops.placeholder(dtypes.float32)
    initializer = array_ops.placeholder(dtypes.float32)

    def fn(_, current_input):
      return current_input

    y = functional_ops.scan(fn, x, initializer=initializer)
    self.assertIs(None, y.get_shape().dims)
示例#24
0
    def _compute_predictions(self, init = None):
        """ Compute vanilla-RNN states and predictions. """

        with tf.variable_scope('states'):
            with tf.variable_scope("HMM"):
                with tf.variable_scope("transition"):
                    skip_prob = tf.get_variable("skip", shape=[1], initializer=tf.constant_initializer(1e-1))
                    #skip_prob = tf.Variable( np.array(1e-1, dtype=np.float32), name="skip") # .astype(np.float32)
                    self.W_trans = (1-skip_prob) * get_transition_matrix().astype(np.float32)  + skip_prob* np.eye(self.hidden_layer_size).astype(np.float32)
                    #self.W_trans = tf.Variable( transition_with_skips,
                    #                       name='W_trans', trainable=True)
                    print("W_trans", self.W_trans.get_shape())

                with tf.variable_scope("emission"):
                    "W_emit: [self.input_size, self.hidden_layer_size]"
                    if self.emission_init is None:
                        self.W_emit = tf.get_variable("W_emit", shape = [self.hidden_layer_size, self.input_size],
                                                  initializer = tf.random_normal_initializer(0.0, 1e-6))
                    else:
                        if not (self.emission_init.shape == (self.hidden_layer_size, self.input_size)):
                            print("self.emission_init.shape", self.emission_init.shape)
                            print("(self.hidden_layer_size, self.input_size)", (self.hidden_layer_size, self.input_size))
                            raise ValueError("wrong dimensions of  `self.emission_init`")
                        self.W_emit = tf.Variable(self.emission_init.astype(np.float32), name = "W_emit", trainable = False)
                    self.W_emit_summary = tf.image_summary("W_emit", tf.reshape(self.W_emit, [1,self.hidden_layer_size, self.input_size,1]))
                    "idea: impose kernel similarity:  maximize(W K W)"
                    "[ self.hidden_layer_size, self.nt_in_pore ]"

                    emission_in_pore_space = tf.matmul( self.map_hex_to_pore, self.W_emit)
                    self.emission_similarity = tf.reduce_sum( tf.diag_part( tf.matmul( tf.transpose(emission_in_pore_space),(emission_in_pore_space)) ),
                            name="emission_w_similarity")
            if init is None:
                initial_state = tf.ones([self.hidden_layer_size],
                                     name='initial_state')
                initial_state = initial_state/ self.hidden_layer_size
            else:
                initial_state = init
            #states = self._rnn_step_fw(initial_state[:,0], self.inputs[0,:])
            states = functional_ops.scan(self._rnn_step_fw, tf.identity(self.inputs),
                                         initializer=initial_state, name='states')

            states_fw_summary = tf.histogram_summary("states_fw", states)
            #states = states_fw
            #print("states:", states.get_shape())

        with tf.variable_scope('predictions'):
            # set some explicit initializer, orthogonal inialization
            "for now, keep identity mapping from hidden states to labels"
            "assume probability interpretation of values: should sum to one"
            W_pred = tf.Variable(np.eye(self.target_size, dtype = np.float32), name="W_pred", trainable=False)
            predictions = tf.matmul(states, W_pred, name='predictions')
            #predictions = states
            predictions_summary = tf.histogram_summary("predictions", predictions)
            #predictions = tf.nn.softmax(tf.matmul(states, W_pred), name='predictions'))
            # do predictions sum to one?

        return states, predictions
  def testScan_SingleInputMultiOutput(self):
    elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
    initializer = (np.array(1.0), np.array(-1.0))
    r = functional_ops.scan(lambda a, x: (a[0] * x, -a[1] * x), elems,
                            initializer)
    r_value = self.evaluate(r)

    self.assertAllEqual([1.0, 2.0, 6.0, 24.0, 120.0, 720.0], r_value[0])
    self.assertAllEqual([1.0, -2.0, 6.0, -24.0, 120.0, -720.0], r_value[1])
  def testScanShape(self):
    x = constant_op.constant([[1, 2, 3], [4, 5, 6]])

    def fn(_, current_input):
      return current_input

    initializer = constant_op.constant([0, 0, 0])
    y = functional_ops.scan(fn, x, initializer=initializer)
    self.assertAllEqual(y.get_shape(), self.evaluate(y).shape)
  def testScanUnknownShape(self):
    x = array_ops.placeholder(dtypes.float32)
    initializer = array_ops.placeholder(dtypes.float32)

    def fn(_, current_input):
      return current_input

    y = functional_ops.scan(fn, x, initializer=initializer)
    self.assertIs(None, y.get_shape().dims)
示例#28
0
    def testScan_Reverse(self):
        with self.test_session():
            elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                                         name="data")
            v = constant_op.constant(2.0, name="v")

            # pylint: disable=unnecessary-lambda
            r = functional_ops.scan(lambda a, x: math_ops.multiply(a, x),
                                    elems,
                                    reverse=True)
            self.assertAllEqual([720., 720., 360., 120., 30., 6.],
                                self.evaluate(r))
            r = functional_ops.scan(lambda a, x: math_ops.multiply(a, x),
                                    elems,
                                    initializer=v,
                                    reverse=True)
            self.assertAllEqual([1440., 1440., 720., 240., 60., 12.],
                                self.evaluate(r))
示例#29
0
  def testScanShape(self):
    x = constant_op.constant([[1, 2, 3], [4, 5, 6]])

    def fn(_, current_input):
      return current_input

    initializer = constant_op.constant([0, 0, 0])
    y = functional_ops.scan(fn, x, initializer=initializer)
    self.assertAllEqual(y.get_shape(), self.evaluate(y).shape)
  def testScan_Grad(self):
    with self.test_session():
      elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
      v = constant_op.constant(2.0, name="v")

      r = functional_ops.scan(
          lambda a, x: math_ops.mul(a, x), elems, initializer=v)
      r = gradients_impl.gradients(r, v)[0]
      self.assertAllEqual(873.0, r.eval())
示例#31
0
    def test_jacobian_scan_shape(self):
        # Shape x: [3, 4]
        x = random_ops.random_uniform([3, 4])
        elems = random_ops.random_uniform([6])
        # Shape y: [6, 3, 4]
        y = functional_ops.scan(lambda a, e: a + e, elems, initializer=x)
        jacobian = gradients.jacobian(y, x)

        expected_shape = [6, 3, 4, 3, 4]
        self.assertAllEqual(expected_shape, jacobian.shape.as_list())
  def testScan_Control(self):
    with self.cached_session() as sess:
      s = array_ops.placeholder(dtypes.float32, shape=[None])
      b = array_ops.placeholder(dtypes.bool)

      with ops.control_dependencies([b]):
        c = functional_ops.scan(lambda a, x: x * a, s)
      self.assertAllClose(
          np.array([1.0, 3.0, 9.0]), sess.run(c, {s: [1, 3, 3],
                                                  b: True}))
示例#33
0
  def testScan_Control(self):
    with self.cached_session() as sess:
      s = array_ops.placeholder(dtypes.float32, shape=[None])
      b = array_ops.placeholder(dtypes.bool)

      with ops.control_dependencies([b]):
        c = functional_ops.scan(lambda a, x: x * a, s)
      self.assertAllClose(
          np.array([1.0, 3.0, 9.0]), sess.run(c, {s: [1, 3, 3],
                                                  b: True}))
示例#34
0
  def test_jacobian_scan_shape(self):
    # Shape x: [3, 4]
    x = random_ops.random_uniform([3, 4])
    elems = random_ops.random_uniform([6])
    # Shape y: [6, 3, 4]
    y = functional_ops.scan(lambda a, e: a + e, elems, initializer=x)
    jacobian = gradients.jacobian(y, x)

    expected_shape = [6, 3, 4, 3, 4]
    self.assertAllEqual(expected_shape, jacobian.shape.as_list())
示例#35
0
  def testScan_Grad(self):
    with self.cached_session():
      elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
      v = constant_op.constant(2.0, name="v")

      # pylint: disable=unnecessary-lambda
      r = functional_ops.scan(
          lambda a, x: math_ops.multiply(a, x), elems, initializer=v)
      # pylint: enable=unnecessary-lambda
      r = gradients_impl.gradients(r, v)[0]
      self.assertAllEqual(873.0, self.evaluate(r))
示例#36
0
  def testScan_Scoped(self):
    with self.cached_session() as sess:
      with variable_scope.variable_scope("root") as varscope:
        elems = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")

        r = functional_ops.scan(simple_scoped_fn, elems)
        # Check that we have the one variable we asked for here.
        self.assertEqual(len(variables.trainable_variables()), 1)
        self.assertEqual(variables.trainable_variables()[0].name,
                         "root/body/two:0")
        sess.run([variables.global_variables_initializer()])
        results = np.array([1, 6, 18, 44, 98, 208])
        self.assertAllEqual(results, self.evaluate(r))

        # Now let's reuse our single variable.
        varscope.reuse_variables()
        r = functional_ops.scan(simple_scoped_fn, elems, initializer=2)
        self.assertEqual(len(variables.trainable_variables()), 1)
        results = np.array([6, 16, 38, 84, 178, 368])
        self.assertAllEqual(results, self.evaluate(r))
示例#37
0
    def testScan_Grad(self):
        with self.test_session():
            elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                                         name="data")
            v = constant_op.constant(2.0, name="v")

            r = functional_ops.scan(lambda a, x: math_ops.mul(a, x),
                                    elems,
                                    initializer=v)
            r = gradients_impl.gradients(r, v)[0]
            self.assertAllEqual(873.0, r.eval())
示例#38
0
 def testScanGradientWithPartStopGradient(self):
   a = variables.Variable(0.0, name="a")
   b = variables.Variable(0.0, name="b")
   elems = array_ops.zeros(5)
   l0, l1 = functional_ops.scan(
       lambda elem_, input_: (a, b), elems, initializer=(0., 0.))
   loss = l0 + array_ops.stop_gradient(l1)
   grad = gradients_impl.gradients(ys=[loss], xs=[a, b])
   with self.test_session(use_gpu=True) as sess:
     variables.global_variables_initializer().run()
     sess.run(grad)
  def testScan_Scoped(self):
    with self.cached_session() as sess:
      with variable_scope.variable_scope("root") as varscope:
        elems = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")

        r = functional_ops.scan(simple_scoped_fn, elems)
        # Check that we have the one variable we asked for here.
        self.assertEqual(len(variables.trainable_variables()), 1)
        self.assertEqual(variables.trainable_variables()[0].name,
                         "root/body/two:0")
        sess.run([variables.global_variables_initializer()])
        results = np.array([1, 6, 18, 44, 98, 208])
        self.assertAllEqual(results, self.evaluate(r))

        # Now let's reuse our single variable.
        varscope.reuse_variables()
        r = functional_ops.scan(simple_scoped_fn, elems, initializer=2)
        self.assertEqual(len(variables.trainable_variables()), 1)
        results = np.array([6, 16, 38, 84, 178, 368])
        self.assertAllEqual(results, self.evaluate(r))
 def testScanGradientWithPartStopGradient(self):
   a = variables.Variable(0.0, name="a")
   b = variables.Variable(0.0, name="b")
   elems = array_ops.zeros(5)
   l0, l1 = functional_ops.scan(
       lambda elem_, input_: (a, b), elems, initializer=(0., 0.))
   loss = l0 + array_ops.stop_gradient(l1)
   grad = gradients_impl.gradients(ys=[loss], xs=[a, b])
   with self.test_session(use_gpu=True) as sess:
     self.evaluate(variables.global_variables_initializer())
     self.evaluate(grad)
  def testScan_Grad(self):
    with self.cached_session():
      elems = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
      v = constant_op.constant(2.0, name="v")

      # pylint: disable=unnecessary-lambda
      r = functional_ops.scan(
          lambda a, x: math_ops.multiply(a, x), elems, initializer=v)
      # pylint: enable=unnecessary-lambda
      r = gradients_impl.gradients(r, v)[0]
      self.assertAllEqual(873.0, self.evaluate(r))
def monotonic_attention(p_choose_i, previous_attention, mode):

    # Force things to be tensors
    p_choose_i = ops.convert_to_tensor(p_choose_i, name="p_choose_i")
    previous_attention = ops.convert_to_tensor(previous_attention,
                                               name="previous_attention")
    if mode == "recursive":
        # Use .shape[0] when it's not None, or fall back on symbolic shape
        batch_size = tensor_shape.dimension_value(
            p_choose_i.shape[0]) or array_ops.shape(p_choose_i)[0]
        # Compute [1, 1 - p_choose_i[0], 1 - p_choose_i[1], ..., 1 - p_choose_i[-2]]
        shifted_1mp_choose_i = array_ops.concat(
            [array_ops.ones((batch_size, 1)), 1 - p_choose_i[:, :-1]], 1)
        # Compute attention distribution recursively as
        # q[i] = (1 - p_choose_i[i - 1])*q[i - 1] + previous_attention[i]
        # attention[i] = p_choose_i[i]*q[i]
        attention = p_choose_i * array_ops.transpose(
            functional_ops.scan(
                # Need to use reshape to remind TF of the shape between loop iterations
                lambda x, yz: array_ops.reshape(yz[0] * x + yz[1],
                                                (batch_size, )),
                # Loop variables yz[0] and yz[1]
                [
                    array_ops.transpose(shifted_1mp_choose_i),
                    array_ops.transpose(previous_attention)
                ],
                # Initial value of x is just zeros
                array_ops.zeros((batch_size, ))))
    elif mode == "parallel":
        # safe_cumprod computes cumprod in logspace with numeric checks
        cumprod_1mp_choose_i = safe_cumprod(1 - p_choose_i,
                                            axis=1,
                                            exclusive=True)
        # Compute recurrence relation solution
        attention = p_choose_i * cumprod_1mp_choose_i * math_ops.cumsum(
            previous_attention /
            # Clip cumprod_1mp to avoid divide-by-zero
            clip_ops.clip_by_value(cumprod_1mp_choose_i, 1e-10, 1.),
            axis=1)
    elif mode == "hard":
        # Remove any probabilities before the index chosen last time step
        p_choose_i *= math_ops.cumsum(previous_attention, axis=1)
        # Now, use exclusive cumprod to remove probabilities after the first
        # chosen index, like so:
        # p_choose_i = [0, 0, 0, 1, 1, 0, 1, 1]
        # cumprod(1 - p_choose_i, exclusive=True) = [1, 1, 1, 1, 0, 0, 0, 0]
        # Product of above: [0, 0, 0, 1, 0, 0, 0, 0]
        attention = p_choose_i * math_ops.cumprod(
            1 - p_choose_i, axis=1, exclusive=True)
    else:
        raise ValueError("mode must be 'recursive', 'parallel', or 'hard'.")
    return attention
    def loop_body(idx_step, y):
      x = array_ops.zeros([10, 20, 30], dtype=dtypes.float32)
      x = functional_ops.scan(
          math_ops.add,
          x,
          initializer=array_ops.zeros([20, 30], dtype=dtypes.float32),
          back_prop=False,
          parallel_iterations=1)

      with ops.device('/cpu:0'):
        y = array_ops.identity(x)

        return idx_step + 1, y
示例#44
0
        def loop_body(idx_step, y):
            x = array_ops.zeros([10, 20, 30], dtype=dtypes.float32)
            x = functional_ops.scan(math_ops.add,
                                    x,
                                    initializer=array_ops.zeros(
                                        [20, 30], dtype=dtypes.float32),
                                    back_prop=False,
                                    parallel_iterations=1)

            with ops.device('/cpu:0'):
                y = array_ops.identity(x)

                return idx_step + 1, y
示例#45
0
 def forward(self, x0, ts):
     Nt = x0.shape[0]
     Xs = np.zeros(Nt, dtype=np.object)
     for i in range(Nt):
         time_grid = ops.convert_to_tensor(ts[i],
                                           preferred_dtype=float_type,
                                           name='t')
         y0 = ops.convert_to_tensor(x0[i, :].reshape((1, -1)), name='y0')
         time_delta_grid = time_grid[1:] - time_grid[:-1]
         scan_func = self._make_scan_func(self.model.f)
         y_grid = functional_ops.scan(scan_func,
                                      (time_grid[:-1], time_delta_grid), y0)
         y_s = array_ops.concat([[y0], y_grid], axis=0)
         Xs[i] = tf.reshape(tf.squeeze(y_s), [len(ts[i]), self.model.D])
     return Xs
示例#46
0
    def _compute_predictions(self):
        """ Compute vanilla-RNN states and predictions. """

        with tf.variable_scope('states'):
            initial_state = tf.zeros([self.hidden_layer_size],
                                     name='initial_state')
            states = functional_ops.scan(self._rnn_step, self.inputs,
                                         initializer=initial_state, name='states')

        with tf.variable_scope('predictions'):
            W_pred = tf.get_variable(
                'W_pred', shape=[self.hidden_layer_size, self.target_size])
            b_pred = tf.get_variable('b_pred', shape=[self.target_size],
                                     initializer=tf.constant_initializer(0.0))
            predictions = tf.add(tf.matmul(states, W_pred), b_pred, name='predictions')
            
        return states, predictions
示例#47
0
 def forward(self, x0, ts, Nw=1):
     Xs = np.zeros(len(ts), dtype=np.object)
     for i in range(len(ts)):
         t = np.linspace(0, np.max(ts[i]), (len(ts[i]) - 1) * self.s + 1)
         t = np.unique(np.sort(np.hstack((t, ts[i]))))
         idx = np.where(np.isin(t, ts[i]))[0]
         t = np.reshape(t, [-1, 1])
         time_grid = ops.convert_to_tensor(t,
                                           preferred_dtype=float_type,
                                           name='t')
         time_delta_grid = time_grid[1:] - time_grid[:-1]
         y0 = np.repeat(x0[i, :].reshape((1, -1)), Nw, axis=0)
         y0 = ops.convert_to_tensor(y0, name='y0')
         scan_func = self._make_scan_func(self.model.f, self.model.diffus.g)
         y_grid = functional_ops.scan(scan_func,
                                      (time_grid[:-1], time_delta_grid), y0)
         ys = array_ops.concat([[y0], y_grid], axis=0)
         Xs[i] = tf.transpose(tf.gather(ys, idx, axis=0), [1, 0, 2])
     return Xs
示例#48
0
    def forward(self, y0, save_intermediate=False):
        time_grid = ops.convert_to_tensor(self.ts,
                                          preferred_dtype=float_type,
                                          name='t')
        y0 = ops.convert_to_tensor(y0, name='y0')
        time_delta_grid = time_grid[1:] - time_grid[:-1]
        time_grid = time_grid[1:]
        time_combined = tf.concat(
            [time_grid[:, None], time_delta_grid[:, None]], axis=1)
        scan_func = self._make_scan_func(self.f)

        if save_intermediate:
            y_grid = functional_ops.scan(scan_func, time_combined, y0)
            y_s = array_ops.concat([[y0], y_grid], axis=0)
            y_t = y_s[-1, :, :, :]
            return y_t, y_s
        else:
            y_t = functional_ops.foldl(scan_func, time_combined, y0)
            return y_t, None
示例#49
0
def my_ctc_label_dense_to_sparse(labels, label_lengths):
    """Converts CTC labels from dense to sparse.
  Arguments:
      labels: dense CTC labels.
      label_lengths: length of the labels.
  Returns:
      A sparse tensor representation of the labels.
  """
    label_shape = array_ops.shape(labels)
    num_batches_tns = array_ops.stack([label_shape[0]])
    max_num_labels_tns = array_ops.stack([label_shape[1]])

    def range_less_than(_, current_input):
        return array_ops.expand_dims(math_ops.range(label_shape[1]),
                                     0) < array_ops.fill(
                                         max_num_labels_tns, current_input)

    init = math_ops.cast(array_ops.fill([1, label_shape[1]], 0),
                         dtypes_module.bool)
    dense_mask = functional_ops.scan(range_less_than,
                                     label_lengths,
                                     initializer=init,
                                     parallel_iterations=1)
    dense_mask = dense_mask[:, 0, :]

    label_array = array_ops.reshape(
        array_ops.tile(math_ops.range(0, label_shape[1]), num_batches_tns),
        label_shape)
    label_ind = array_ops.boolean_mask(label_array, dense_mask)

    batch_array = array_ops.transpose(
        array_ops.reshape(
            array_ops.tile(math_ops.range(0, label_shape[0]),
                           max_num_labels_tns), reverse(label_shape, 0)))
    batch_ind = array_ops.boolean_mask(batch_array, dense_mask)
    indices = array_ops.transpose(
        array_ops.reshape(concatenate([batch_ind, label_ind], axis=0),
                          [2, -1]))

    vals_sparse = array_ops.gather_nd(labels, indices)

    return sparse_tensor.SparseTensor(math_ops.to_int64(indices), vals_sparse,
                                      math_ops.to_int64(label_shape))
  def testScanFoldl_Nested(self):
    elems = constant_op.constant([1.0, 2.0, 3.0, 4.0], name="data")
    inner_elems = constant_op.constant([0.5, 0.5], name="data")

    def r_inner(a, x):
      return functional_ops.foldl(
          lambda b, y: b * y * x, inner_elems, initializer=a)

    r = functional_ops.scan(r_inner, elems)

    # t == 0 (returns 1)
    # t == 1, a == 1, x == 2 (returns 1)
    #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1
    #   t_1 == 1, b == 1,      y == 0.5, returns b * y * x = 1
    # t == 2, a == 1, x == 3 (returns 1.5*1.5 == 2.25)
    #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1.5
    #   t_1 == 1, b == 1.5,    y == 0.5, returns b * y * x = 1.5*1.5
    # t == 3, a == 2.25, x == 4 (returns 9)
    #   t_0 == 0, b == a == 2.25, y == 0.5, returns b * y * x = 4.5
    #   t_1 == 1, b == 4.5,       y == 0.5, returns b * y * x = 9
    self.assertAllClose([1., 1., 2.25, 9.], self.evaluate(r))
示例#51
0
  def testScanFoldl_Nested(self):
    elems = constant_op.constant([1.0, 2.0, 3.0, 4.0], name="data")
    inner_elems = constant_op.constant([0.5, 0.5], name="data")

    def r_inner(a, x):
      return functional_ops.foldl(
          lambda b, y: b * y * x, inner_elems, initializer=a)

    r = functional_ops.scan(r_inner, elems)

    # t == 0 (returns 1)
    # t == 1, a == 1, x == 2 (returns 1)
    #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1
    #   t_1 == 1, b == 1,      y == 0.5, returns b * y * x = 1
    # t == 2, a == 1, x == 3 (returns 1.5*1.5 == 2.25)
    #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1.5
    #   t_1 == 1, b == 1.5,    y == 0.5, returns b * y * x = 1.5*1.5
    # t == 3, a == 2.25, x == 4 (returns 9)
    #   t_0 == 0, b == a == 2.25, y == 0.5, returns b * y * x = 4.5
    #   t_1 == 1, b == 4.5,       y == 0.5, returns b * y * x = 9
    self.assertAllClose([1., 1., 2.25, 9.], self.evaluate(r))
示例#52
0
  def ctc_label_dense_to_sparse( self, labels, label_lengths ):
    """Mike Henry's implementation, with some minor modifications."""
    with self.G.as_default():
      label_shape = tf.shape( labels )
      num_batches_tns = tf.pack( [label_shape[0]] )
      max_num_labels_tns = tf.pack( [label_shape[1]] )

      def range_less_than(previous_state, current_input):
        return tf.expand_dims( tf.range( label_shape[1] ), 0 ) < current_input

      init = tf.cast( tf.fill( max_num_labels_tns, 0 ), tf.bool )
      dense_mask = functional_ops.scan(range_less_than, label_lengths , initializer=init, parallel_iterations=1)
      dense_mask = dense_mask[ :, 0, : ]

      label_array = tf.reshape( tf.tile( tf.range( 0, label_shape[1] ), num_batches_tns ), label_shape )
      label_ind = tf.boolean_mask( label_array, dense_mask )

      batch_array = tf.transpose( tf.reshape( tf.tile( tf.range( 0,  label_shape[0] ), max_num_labels_tns ), tf.reverse( label_shape,[True]) ) )
      batch_ind = tf.boolean_mask( batch_array, dense_mask )

      indices = tf.transpose( tf.reshape( tf.concat( 0, [batch_ind, label_ind] ), [2,-1] ) )
      vals_sparse = tf.gather_nd( labels, indices )
      return tf.SparseTensor( tf.to_int64(indices), vals_sparse, tf.to_int64( label_shape ) )
示例#53
0
 def f(y):
   # pylint: disable=unnecessary-lambda
   return functional_ops.scan(
       lambda a, x: math_ops.multiply(a, x), y, initializer=v)
示例#54
0
def ais_chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
              target_log_prob_fn, proposal_log_prob_fn, event_dims=(),
              name=None):
  """Runs annealed importance sampling (AIS) to estimate normalizing constants.

  This routine uses Hamiltonian Monte Carlo to sample from a series of
  distributions that slowly interpolates between an initial "proposal"
  distribution

  `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)`

  and the target distribution

  `exp(target_log_prob_fn(x) - target_log_normalizer)`,

  accumulating importance weights along the way. The product of these
  importance weights gives an unbiased estimate of the ratio of the
  normalizing constants of the initial distribution and the target
  distribution:

  E[exp(w)] = exp(target_log_normalizer - proposal_log_normalizer).

  Args:
    n_iterations: Integer number of Markov chain updates to run. More
      iterations means more expense, but smoother annealing between q
      and p, which in turn means exponentially lower variance for the
      normalizing constant estimator.
    step_size: Scalar step size or array of step sizes for the
      leapfrog integrator. Broadcasts to the shape of
      `initial_x`. Larger step sizes lead to faster progress, but
      too-large step sizes make rejection exponentially more likely.
      When possible, it's often helpful to match per-variable step
      sizes to the standard deviations of the target distribution in
      each variable.
    n_leapfrog_steps: Integer number of steps to run the leapfrog
      integrator for. Total progress per HMC step is roughly
      proportional to step_size * n_leapfrog_steps.
    initial_x: Tensor of initial state(s) of the Markov chain(s). Must
      be a sample from q, or results will be incorrect.
    target_log_prob_fn: Python callable which takes an argument like `initial_x`
      and returns its (possibly unnormalized) log-density under the target
      distribution.
    proposal_log_prob_fn: Python callable that returns the log density of the
      initial distribution.
    event_dims: List of dimensions that should not be treated as
      independent. This allows for multiple chains to be run independently
      in parallel. Default is (), i.e., all dimensions are independent.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    ais_weights: Tensor with the estimated weight(s). Has shape matching
      `target_log_prob_fn(initial_x)`.
    chain_states: Tensor with the state(s) of the Markov chain(s) the final
      iteration. Has shape matching `initial_x`.
    acceptance_probs: Tensor with the acceptance probabilities for the final
      iteration. Has shape matching `target_log_prob_fn(initial_x)`.

  #### Examples:

  ```python
  # Estimating the normalizing constant of a log-gamma distribution:
  def proposal_log_prob(x):
    # Standard normal log-probability. This is properly normalized.
    return tf.reduce_sum(-0.5 * tf.square(x) - 0.5 * np.log(2 * np.pi), 1)
  def target_log_prob(x):
    # Unnormalized log-gamma(2, 3) distribution.
    # True normalizer is (lgamma(2) - 2 * log(3)) * x.shape[1]
    return tf.reduce_sum(2. * x - 3. * tf.exp(x), 1)
  # Run 100 AIS chains in parallel
  initial_x = tf.random_normal([100, 20])
  w, _, _ = hmc.ais_chain(1000, 0.2, 2, initial_x, target_log_prob,
                          proposal_log_prob, event_dims=[1])
  log_normalizer_estimate = tf.reduce_logsumexp(w) - np.log(100)
  ```

  ```python
  # Estimating the marginal likelihood of a Bayesian regression model:
  base_measure = -0.5 * np.log(2 * np.pi)
  def proposal_log_prob(x):
    # Standard normal log-probability. This is properly normalized.
    return tf.reduce_sum(-0.5 * tf.square(x) + base_measure, 1)
  def regression_log_joint(beta, x, y):
    # This function returns a vector whose ith element is log p(beta[i], y | x).
    # Each row of beta corresponds to the state of an independent Markov chain.
    log_prior = tf.reduce_sum(-0.5 * tf.square(beta) + base_measure, 1)
    means = tf.matmul(beta, x, transpose_b=True)
    log_likelihood = tf.reduce_sum(-0.5 * tf.square(y - means) +
                                   base_measure, 1)
    return log_prior + log_likelihood
  def log_joint_partial(beta):
    return regression_log_joint(beta, x, y)
  # Run 100 AIS chains in parallel
  initial_beta = tf.random_normal([100, x.shape[1]])
  w, beta_samples, _ = hmc.ais_chain(1000, 0.1, 2, initial_beta,
                                     log_joint_partial, proposal_log_prob,
                                     event_dims=[1])
  log_normalizer_estimate = tf.reduce_logsumexp(w) - np.log(100)
  ```
  """
  with ops.name_scope(name, 'hmc_ais_chain',
                      [n_iterations, step_size, n_leapfrog_steps, initial_x]):
    non_event_shape = array_ops.shape(target_log_prob_fn(initial_x))

    beta_series = math_ops.linspace(0., 1., n_iterations+1)[1:]
    def _body(a, beta):  # pylint: disable=missing-docstring
      def log_prob_beta(x):
        return ((1 - beta) * proposal_log_prob_fn(x) +
                beta * target_log_prob_fn(x))
      last_x = a[0]
      w = a[2]
      w += (1. / n_iterations) * (target_log_prob_fn(last_x) -
                                  proposal_log_prob_fn(last_x))
      # TODO(b/66917083): There's an opportunity for gradient reuse here.
      updated_x, acceptance_probs, _, _ = kernel(step_size, n_leapfrog_steps,
                                                 last_x, log_prob_beta,
                                                 event_dims)
      return updated_x, acceptance_probs, w

    x, acceptance_probs, w = functional_ops.scan(
        _body, beta_series, (initial_x, array_ops.zeros(non_event_shape),
                             array_ops.zeros(non_event_shape)))
  return w[-1], x[-1], acceptance_probs[-1]
示例#55
0
def sample_chain(
    num_results,
    target_log_prob_fn,
    current_state,
    step_size,
    num_leapfrog_steps,
    num_burnin_steps=0,
    num_steps_between_results=0,
    seed=None,
    current_target_log_prob=None,
    current_grads_target_log_prob=None,
    name=None):
  """Runs multiple iterations of one or more Hamiltonian Monte Carlo chains.

  Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) algorithm
  that takes a series of gradient-informed steps to produce a Metropolis
  proposal. This function samples from an HMC Markov chain at `current_state`
  and whose stationary distribution has log-unnormalized-density
  `target_log_prob_fn()`.

  This function samples from multiple chains in parallel. It assumes that the
  the leftmost dimensions of (each) `current_state` (part) index an independent
  chain.  The function `target_log_prob_fn()` sums log-probabilities across
  event dimensions (i.e., current state (part) rightmost dimensions). Each
  element of the output of `target_log_prob_fn()` represents the (possibly
  unnormalized) log-probability of the joint distribution over (all) the current
  state (parts).

  The `current_state` can be represented as a single `Tensor` or a `list` of
  `Tensors` which collectively represent the current state. When specifying a
  `list`, one must also specify a list of `step_size`s.

  Note: `target_log_prob_fn` is called exactly twice.

  Since HMC states are correlated, it is sometimes desirable to produce
  additional intermediate states, and then discard them, ending up with a set of
  states with decreased autocorrelation.  See [1].  Such "thinning" is made
  possible by setting `num_steps_between_results > 0`.  The chain then takes
  `num_steps_between_results` extra steps between the steps that make it into
  the results.  The extra steps are never materialized (in calls to `sess.run`),
  and thus do not increase memory requirements.

  [1]: "Statistically efficient thinning of a Markov chain sampler."
       Art B. Owen. April 2017.
       http://statweb.stanford.edu/~owen/reports/bestthinning.pdf

  #### Examples:

  ##### Sample from a diagonal-variance Gaussian.

  ```python
  tfd = tf.contrib.distributions

  def make_likelihood(true_variances):
    return tfd.MultivariateNormalDiag(
        scale_diag=tf.sqrt(true_variances))

  dims = 10
  dtype = np.float32
  true_variances = tf.linspace(dtype(1), dtype(3), dims)
  likelihood = make_likelihood(true_variances)

  states, kernel_results = hmc.sample_chain(
      num_results=1000,
      target_log_prob_fn=likelihood.log_prob,
      current_state=tf.zeros(dims),
      step_size=0.5,
      num_leapfrog_steps=2,
      num_burnin_steps=500)

  # Compute sample stats.
  sample_mean = tf.reduce_mean(states, axis=0)
  sample_var = tf.reduce_mean(
      tf.squared_difference(states, sample_mean),
      axis=0)
  ```

  ##### Sampling from factor-analysis posteriors with known factors.

  I.e.,

  ```none
  for i=1..n:
    w[i] ~ Normal(0, eye(d))            # prior
    x[i] ~ Normal(loc=matmul(w[i], F))  # likelihood
  ```

  where `F` denotes factors.

  ```python
  tfd = tf.contrib.distributions

  def make_prior(dims, dtype):
    return tfd.MultivariateNormalDiag(
        loc=tf.zeros(dims, dtype))

  def make_likelihood(weights, factors):
    return tfd.MultivariateNormalDiag(
        loc=tf.tensordot(weights, factors, axes=[[0], [-1]]))

  # Setup data.
  num_weights = 10
  num_factors = 4
  num_chains = 100
  dtype = np.float32

  prior = make_prior(num_weights, dtype)
  weights = prior.sample(num_chains)
  factors = np.random.randn(num_factors, num_weights).astype(dtype)
  x = make_likelihood(weights, factors).sample(num_chains)

  def target_log_prob(w):
    # Target joint is: `f(w) = p(w, x | factors)`.
    return prior.log_prob(w) + make_likelihood(w, factors).log_prob(x)

  # Get `num_results` samples from `num_chains` independent chains.
  chains_states, kernels_results = hmc.sample_chain(
      num_results=1000,
      target_log_prob_fn=target_log_prob,
      current_state=tf.zeros([num_chains, dims], dtype),
      step_size=0.1,
      num_leapfrog_steps=2,
      num_burnin_steps=500)

  # Compute sample stats.
  sample_mean = tf.reduce_mean(chains_states, axis=[0, 1])
  sample_var = tf.reduce_mean(
      tf.squared_difference(chains_states, sample_mean),
      axis=[0, 1])
  ```

  Args:
    num_results: Integer number of Markov chain draws.
    target_log_prob_fn: Python callable which takes an argument like
      `current_state` (or `*current_state` if it's a list) and returns its
      (possibly unnormalized) log-density under the target distribution.
    current_state: `Tensor` or Python `list` of `Tensor`s representing the
      current state(s) of the Markov chain(s). The first `r` dimensions index
      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
    step_size: `Tensor` or Python `list` of `Tensor`s representing the step size
      for the leapfrog integrator. Must broadcast with the shape of
      `current_state`. Larger step sizes lead to faster progress, but too-large
      step sizes make rejection exponentially more likely. When possible, it's
      often helpful to match per-variable step sizes to the standard deviations
      of the target distribution in each variable.
    num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
      for. Total progress per HMC step is roughly proportional to `step_size *
      num_leapfrog_steps`.
    num_burnin_steps: Integer number of chain steps to take before starting to
      collect results.
      Default value: 0 (i.e., no burn-in).
    num_steps_between_results: Integer number of chain steps between collecting
      a result. Only one out of every `num_steps_between_samples + 1` steps is
      included in the returned results.  The number of returned chain states is
      still equal to `num_results`.  Default value: 0 (i.e., no thinning).
    seed: Python integer to seed the random number generator.
    current_target_log_prob: (Optional) `Tensor` representing the value of
      `target_log_prob_fn` at the `current_state`. The only reason to specify
      this argument is to reduce TF graph size.
      Default value: `None` (i.e., compute as needed).
    current_grads_target_log_prob: (Optional) Python list of `Tensor`s
      representing gradient of `target_log_prob` at the `current_state` and wrt
      the `current_state`. Must have same shape as `current_state`. The only
      reason to specify this argument is to reduce TF graph size.
      Default value: `None` (i.e., compute as needed).
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., "hmc_sample_chain").

  Returns:
    accepted_states: Tensor or Python list of `Tensor`s representing the
      state(s) of the Markov chain(s) at each result step. Has same shape as
      input `current_state` but with a prepended `num_results`-size dimension.
    kernel_results: `collections.namedtuple` of internal calculations used to
      advance the chain.
  """
  with ops.name_scope(
      name, "hmc_sample_chain",
      [num_results, current_state, step_size, num_leapfrog_steps,
       num_burnin_steps, num_steps_between_results, seed,
       current_target_log_prob, current_grads_target_log_prob]):
    with ops.name_scope("initialize"):
      [
          current_state,
          step_size,
          current_target_log_prob,
          current_grads_target_log_prob,
      ] = _prepare_args(
          target_log_prob_fn,
          current_state,
          step_size,
          current_target_log_prob,
          current_grads_target_log_prob)
      num_results = ops.convert_to_tensor(
          num_results,
          dtype=dtypes.int32,
          name="num_results")
      num_leapfrog_steps = ops.convert_to_tensor(
          num_leapfrog_steps,
          dtype=dtypes.int32,
          name="num_leapfrog_steps")
      num_burnin_steps = ops.convert_to_tensor(
          num_burnin_steps,
          dtype=dtypes.int32,
          name="num_burnin_steps")
      num_steps_between_results = ops.convert_to_tensor(
          num_steps_between_results,
          dtype=dtypes.int32,
          name="num_steps_between_results")

    def _run_chain(num_steps, current_state, kernel_results):
      """Runs the chain(s) for `num_steps`."""
      def _loop_body(iter_, current_state, kernel_results):
        return [iter_ + 1] + list(kernel(
            target_log_prob_fn,
            current_state,
            step_size,
            num_leapfrog_steps,
            seed,
            kernel_results.current_target_log_prob,
            kernel_results.current_grads_target_log_prob))
      while_loop_kwargs = dict(
          cond=lambda iter_, *args: iter_ < num_steps,
          body=_loop_body,
          loop_vars=[
              np.int32(0),
              current_state,
              kernel_results,
          ],
      )
      if seed is not None:
        while_loop_kwargs["parallel_iterations"] = 1
      return control_flow_ops.while_loop(
          **while_loop_kwargs)[1:]  # Lop-off "iter_".

    def _scan_body(args_list, iter_):
      """Closure which implements `tf.scan` body."""
      current_state, kernel_results = args_list
      return _run_chain(
          1 + array_ops.where(math_ops.equal(iter_, 0),
                              num_burnin_steps,
                              num_steps_between_results),
          current_state,
          kernel_results)

    scan_kwargs = dict(
        fn=_scan_body,
        elems=math_ops.range(num_results),  # iter_: used to choose burnin.
        initializer=[
            current_state,
            _make_dummy_kernel_results(
                current_state,
                current_target_log_prob,
                current_grads_target_log_prob),
        ])
    if seed is not None:
      scan_kwargs["parallel_iterations"] = 1
    return functional_ops.scan(**scan_kwargs)
 def testScanEmptyTensor(self):
   with self.cached_session():
     x = functional_ops.scan(
         lambda x, _: x, math_ops.range(0), initializer=array_ops.ones([2, 4]))
     self.assertAllEqual([0, 2, 4], x.get_shape())
     self.assertAllEqual(x.get_shape(), self.evaluate(x).shape)
示例#57
0
def chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
          target_log_prob_fn, event_dims=(), name=None):
  """Runs multiple iterations of one or more Hamiltonian Monte Carlo chains.

  Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC)
  algorithm that takes a series of gradient-informed steps to produce
  a Metropolis proposal. This function samples from an HMC Markov
  chain whose initial state is `initial_x` and whose stationary
  distribution has log-density `target_log_prob_fn()`.

  This function can update multiple chains in parallel. It assumes
  that all dimensions of `initial_x` not specified in `event_dims` are
  independent, and should therefore be updated independently. The
  output of `target_log_prob_fn()` should sum log-probabilities across
  all event dimensions. Slices along dimensions not in `event_dims`
  may have different target distributions; this is up to
  `target_log_prob_fn()`.

  This function basically just wraps `hmc.kernel()` in a tf.scan() loop.

  Args:
    n_iterations: Integer number of Markov chain updates to run.
    step_size: Scalar step size or array of step sizes for the
      leapfrog integrator. Broadcasts to the shape of
      `initial_x`. Larger step sizes lead to faster progress, but
      too-large step sizes make rejection exponentially more likely.
      When possible, it's often helpful to match per-variable step
      sizes to the standard deviations of the target distribution in
      each variable.
    n_leapfrog_steps: Integer number of steps to run the leapfrog
      integrator for. Total progress per HMC step is roughly
      proportional to step_size * n_leapfrog_steps.
    initial_x: Tensor of initial state(s) of the Markov chain(s).
    target_log_prob_fn: Python callable which takes an argument like `initial_x`
      and returns its (possibly unnormalized) log-density under the target
      distribution.
    event_dims: List of dimensions that should not be treated as
      independent. This allows for multiple chains to be run independently
      in parallel. Default is (), i.e., all dimensions are independent.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    acceptance_probs: Tensor with the acceptance probabilities for each
      iteration. Has shape matching `target_log_prob_fn(initial_x)`.
    chain_states: Tensor with the state of the Markov chain at each iteration.
      Has shape `[n_iterations, initial_x.shape[0],...,initial_x.shape[-1]`.

  #### Examples:

  ```python
  # Sampling from a standard normal (note `log_joint()` is unnormalized):
  def log_joint(x):
    return tf.reduce_sum(-0.5 * tf.square(x))
  chain, acceptance_probs = hmc.chain(1000, 0.5, 2, tf.zeros(10), log_joint,
                                      event_dims=[0])
  # Discard first half of chain as warmup/burn-in
  warmed_up = chain[500:]
  mean_est = tf.reduce_mean(warmed_up, 0)
  var_est = tf.reduce_mean(tf.square(warmed_up), 0) - tf.square(mean_est)
  ```

  ```python
  # Sampling from a diagonal-variance Gaussian:
  variances = tf.linspace(1., 3., 10)
  def log_joint(x):
    return tf.reduce_sum(-0.5 / variances * tf.square(x))
  chain, acceptance_probs = hmc.chain(1000, 0.5, 2, tf.zeros(10), log_joint,
                                      event_dims=[0])
  # Discard first half of chain as warmup/burn-in
  warmed_up = chain[500:]
  mean_est = tf.reduce_mean(warmed_up, 0)
  var_est = tf.reduce_mean(tf.square(warmed_up), 0) - tf.square(mean_est)
  ```

  ```python
  # Sampling from factor-analysis posteriors with known factors W:
  # mu[i, j] ~ Normal(0, 1)
  # x[i] ~ Normal(matmul(mu[i], W), I)
  def log_joint(mu, x, W):
    prior = -0.5 * tf.reduce_sum(tf.square(mu), 1)
    x_mean = tf.matmul(mu, W)
    likelihood = -0.5 * tf.reduce_sum(tf.square(x - x_mean), 1)
    return prior + likelihood
  chain, acceptance_probs = hmc.chain(1000, 0.1, 2,
                                      tf.zeros([x.shape[0], W.shape[0]]),
                                      lambda mu: log_joint(mu, x, W),
                                      event_dims=[1])
  # Discard first half of chain as warmup/burn-in
  warmed_up = chain[500:]
  mean_est = tf.reduce_mean(warmed_up, 0)
  var_est = tf.reduce_mean(tf.square(warmed_up), 0) - tf.square(mean_est)
  ```

  ```python
  # Sampling from the posterior of a Bayesian regression model.:

  # Run 100 chains in parallel, each with a different initialization.
  initial_beta = tf.random_normal([100, x.shape[1]])
  chain, acceptance_probs = hmc.chain(1000, 0.1, 10, initial_beta,
                                      log_joint_partial, event_dims=[1])
  # Discard first halves of chains as warmup/burn-in
  warmed_up = chain[500:]
  # Averaging across samples within a chain and across chains
  mean_est = tf.reduce_mean(warmed_up, [0, 1])
  var_est = tf.reduce_mean(tf.square(warmed_up), [0, 1]) - tf.square(mean_est)
  ```
  """
  with ops.name_scope(name, 'hmc_chain', [n_iterations, step_size,
                                          n_leapfrog_steps, initial_x]):
    initial_x = ops.convert_to_tensor(initial_x, name='initial_x')
    non_event_shape = array_ops.shape(target_log_prob_fn(initial_x))

    def body(a, _):
      updated_x, acceptance_probs, log_prob, grad = kernel(
          step_size, n_leapfrog_steps, a[0], target_log_prob_fn, event_dims,
          a[2], a[3])
      return updated_x, acceptance_probs, log_prob, grad

    potential_and_grad = _make_potential_and_grad(target_log_prob_fn)
    potential, grad = potential_and_grad(initial_x)
    return functional_ops.scan(body, array_ops.zeros(n_iterations),
                               (initial_x, array_ops.zeros(non_event_shape),
                                -potential, -grad))[:2]
示例#58
0
def monotonic_attention(p_choose_i, previous_attention, mode):
  """Compute monotonic attention distribution from choosing probabilities.

  Monotonic attention implies that the input sequence is processed in an
  explicitly left-to-right manner when generating the output sequence.  In
  addition, once an input sequence element is attended to at a given output
  timestep, elements occurring before it cannot be attended to at subsequent
  output timesteps.  This function generates attention distributions according
  to these assumptions.  For more information, see ``Online and Linear-Time
  Attention by Enforcing Monotonic Alignments''.

  Args:
    p_choose_i: Probability of choosing input sequence/memory element i.  Should
      be of shape (batch_size, input_sequence_length), and should all be in the
      range [0, 1].
    previous_attention: The attention distribution from the previous output
      timestep.  Should be of shape (batch_size, input_sequence_length).  For
      the first output timestep, preevious_attention[n] should be [1, 0, 0, ...,
      0] for all n in [0, ... batch_size - 1].
    mode: How to compute the attention distribution.  Must be one of
      'recursive', 'parallel', or 'hard'.
        * 'recursive' uses tf.scan to recursively compute the distribution.
          This is slowest but is exact, general, and does not suffer from
          numerical instabilities.
        * 'parallel' uses parallelized cumulative-sum and cumulative-product
          operations to compute a closed-form solution to the recurrence
          relation defining the attention distribution.  This makes it more
          efficient than 'recursive', but it requires numerical checks which
          make the distribution non-exact.  This can be a problem in particular
          when input_sequence_length is long and/or p_choose_i has entries very
          close to 0 or 1.
        * 'hard' requires that the probabilities in p_choose_i are all either 0
          or 1, and subsequently uses a more efficient and exact solution.

  Returns:
    A tensor of shape (batch_size, input_sequence_length) representing the
    attention distributions for each sequence in the batch.

  Raises:
    ValueError: mode is not one of 'recursive', 'parallel', 'hard'.
  """
  # Force things to be tensors
  p_choose_i = ops.convert_to_tensor(p_choose_i, name="p_choose_i")
  previous_attention = ops.convert_to_tensor(
      previous_attention, name="previous_attention")
  if mode == "recursive":
    # Use .shape[0].value when it's not None, or fall back on symbolic shape
    batch_size = p_choose_i.shape[0].value or array_ops.shape(p_choose_i)[0]
    # Compute [1, 1 - p_choose_i[0], 1 - p_choose_i[1], ..., 1 - p_choose_i[-2]]
    shifted_1mp_choose_i = array_ops.concat(
        [array_ops.ones((batch_size, 1)), 1 - p_choose_i[:, :-1]], 1)
    # Compute attention distribution recursively as
    # q[i] = (1 - p_choose_i[i])*q[i - 1] + previous_attention[i]
    # attention[i] = p_choose_i[i]*q[i]
    attention = p_choose_i*array_ops.transpose(functional_ops.scan(
        # Need to use reshape to remind TF of the shape between loop iterations
        lambda x, yz: array_ops.reshape(yz[0]*x + yz[1], (batch_size,)),
        # Loop variables yz[0] and yz[1]
        [array_ops.transpose(shifted_1mp_choose_i),
         array_ops.transpose(previous_attention)],
        # Initial value of x is just zeros
        array_ops.zeros((batch_size,))))
  elif mode == "parallel":
    # safe_cumprod computes cumprod in logspace with numeric checks
    cumprod_1mp_choose_i = safe_cumprod(1 - p_choose_i, axis=1, exclusive=True)
    # Compute recurrence relation solution
    attention = p_choose_i*cumprod_1mp_choose_i*math_ops.cumsum(
        previous_attention /
        # Clip cumprod_1mp to avoid divide-by-zero
        clip_ops.clip_by_value(cumprod_1mp_choose_i, 1e-10, 1.), axis=1)
  elif mode == "hard":
    # Remove any probabilities before the index chosen last time step
    p_choose_i *= math_ops.cumsum(previous_attention, axis=1)
    # Now, use exclusive cumprod to remove probabilities after the first
    # chosen index, like so:
    # p_choose_i = [0, 0, 0, 1, 1, 0, 1, 1]
    # cumprod(1 - p_choose_i, exclusive=True) = [1, 1, 1, 1, 0, 0, 0, 0]
    # Product of above: [0, 0, 0, 1, 0, 0, 0, 0]
    attention = p_choose_i*math_ops.cumprod(
        1 - p_choose_i, axis=1, exclusive=True)
  else:
    raise ValueError("mode must be 'recursive', 'parallel', or 'hard'.")
  return attention
示例#59
0
 def scan():
   return functional_ops.scan(
       lambda a, x: a + x, elems, parallel_iterations=1)