def testFoldr_Simple(self): with self.test_session(): elems = tf.constant([1, 2, 3, 4, 5, 6], name="data") r = tf.foldr(lambda a, x: tf.mul(tf.add(a, x), 2), elems) self.assertAllEqual(450, r.eval()) r = tf.foldr(lambda a, x: tf.mul(tf.add(a, x), 2), elems, initializer=10) self.assertAllEqual(1282, r.eval())
def testFoldr_Simple(self): with self.test_session(): elems = tf.constant([1, 2, 3, 4, 5, 6], name="data") r = tf.foldr(lambda a, x: tf.mul(tf.add(a, x), 2), elems) self.assertAllEqual(450, r.eval()) r = tf.foldr( lambda a, x: tf.mul(tf.add(a, x), 2), elems, initializer=10) self.assertAllEqual(1282, r.eval())
def testFoldr_Scoped(self): with self.test_session() as sess: with tf.variable_scope("root") as varscope: elems = tf.constant([1, 2, 3, 4, 5, 6], name="data") r = tf.foldr(simple_scoped_fn, elems) # Check that we have the one variable we asked for here. self.assertEqual(len(tf.trainable_variables()), 1) self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0") sess.run([tf.initialize_all_variables()]) self.assertAllEqual(450, r.eval()) # Now let's reuse our single variable. varscope.reuse_variables() r = tf.foldr(simple_scoped_fn, elems, initializer=10) self.assertEqual(len(tf.trainable_variables()), 1) self.assertAllEqual(1282, r.eval())
def tf_matmul_left(dUs: tf.Tensor): """ Parameters: dUs: tf.Tensor Tensorlist of shape (N, n,m) with number N matrices of size nxm Multiplies a list of matrices from the left. """ return tf.foldr(lambda a, x: tf.matmul(a, x), dUs)
def testFold_Grad(self): with self.test_session(): elems = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data") v = tf.constant(2.0, name="v") r = tf.foldl(lambda a, x: tf.mul(a, x), elems, initializer=v) r = tf.gradients(r, v)[0] self.assertAllEqual(720.0, r.eval()) r = tf.foldr(lambda a, x: tf.mul(a, x), elems, initializer=v) r = tf.gradients(r, v)[0] self.assertAllEqual(720.0, r.eval())
def __init__(self, phase, visualize, output_dir, batch_size, initial_learning_rate, steps_per_checkpoint, model_dir, target_embedding_size, attn_num_hidden, attn_num_layers, clip_gradients, max_gradient_norm, session, load_model, gpu_id, use_gru, use_distance=True, max_image_width=160, max_image_height=60, max_prediction_length=8, channels=1, reg_val=0): self.use_distance = use_distance # We need resized width, not the actual width max_resized_width = 1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT self.max_original_width = max_image_width self.max_width = int(math.ceil(max_resized_width)) self.encoder_size = int(math.ceil(1. * self.max_width / 4)) self.decoder_size = max_prediction_length + 2 self.buckets = [(self.encoder_size, self.decoder_size)] if gpu_id >= 0: device_id = '/gpu:' + str(gpu_id) else: device_id = '/cpu:0' self.device_id = device_id if not os.path.exists(model_dir): os.makedirs(model_dir) if phase == 'test': batch_size = 1 logging.info('phase: %s', phase) logging.info('model_dir: %s', model_dir) logging.info('load_model: %s', load_model) logging.info('output_dir: %s', output_dir) logging.info('steps_per_checkpoint: %d', steps_per_checkpoint) logging.info('batch_size: %d', batch_size) logging.info('learning_rate: %f', initial_learning_rate) logging.info('reg_val: %d', reg_val) logging.info('max_gradient_norm: %f', max_gradient_norm) logging.info('clip_gradients: %s', clip_gradients) logging.info('max_image_width %f', max_image_width) logging.info('max_prediction_length %f', max_prediction_length) logging.info('channels: %d', channels) logging.info('target_embedding_size: %f', target_embedding_size) logging.info('attn_num_hidden: %d', attn_num_hidden) logging.info('attn_num_layers: %d', attn_num_layers) logging.info('visualize: %s', visualize) if use_gru: logging.info('using GRU in the decoder.') self.reg_val = reg_val self.sess = session self.steps_per_checkpoint = steps_per_checkpoint self.model_dir = model_dir self.output_dir = output_dir self.batch_size = batch_size self.global_step = tf.Variable(0, trainable=False) self.phase = phase self.visualize = visualize self.learning_rate = initial_learning_rate self.clip_gradients = clip_gradients self.channels = channels if phase == 'train': self.forward_only = False else: self.forward_only = True with tf.device(device_id): self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32) self.height_float = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.float32) self.img_pl = tf.placeholder(tf.string, name='input_image_as_bytes') self.img_data = tf.cond(tf.less(tf.rank(self.img_pl), 1), lambda: tf.expand_dims(self.img_pl, 0), lambda: self.img_pl) self.img_data = tf.map_fn(self._prepare_image, self.img_data, dtype=tf.float32) num_images = tf.shape(self.img_data)[0] # TODO: create a mask depending on the image/batch size self.encoder_masks = [] for i in xrange(self.encoder_size + 1): self.encoder_masks.append(tf.tile([[1.]], [num_images, 1])) self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.decoder_size + 1): self.decoder_inputs.append(tf.tile([1], [num_images])) if i < self.decoder_size: self.target_weights.append(tf.tile([1.], [num_images])) else: self.target_weights.append(tf.tile([0.], [num_images])) cnn_model = CNN(self.img_data, not self.forward_only) self.conv_output = cnn_model.tf_output() self.perm_conv_output = tf.transpose(self.conv_output, perm=[1, 0, 2]) self.attention_decoder_model = Seq2SeqModel( encoder_masks=self.encoder_masks, encoder_inputs_tensor=self.perm_conv_output, decoder_inputs=self.decoder_inputs, target_weights=self.target_weights, target_vocab_size=len(DataGen.CHARMAP), buckets=self.buckets, target_embedding_size=target_embedding_size, attn_num_layers=attn_num_layers, attn_num_hidden=attn_num_hidden, forward_only=self.forward_only, use_gru=use_gru) table = tf.contrib.lookup.MutableHashTable( key_dtype=tf.int64, value_dtype=tf.string, default_value="", checkpoint=True, ) insert = table.insert( tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64), tf.constant(DataGen.CHARMAP), ) with tf.control_dependencies([insert]): num_feed = [] prb_feed = [] for line in xrange(len(self.attention_decoder_model.output)): guess = tf.argmax( self.attention_decoder_model.output[line], axis=1) proba = tf.reduce_max(tf.nn.softmax( self.attention_decoder_model.output[line]), axis=1) num_feed.append(guess) prb_feed.append(proba) # Join the predictions into a single output string. trans_output = tf.transpose(num_feed) trans_output = tf.map_fn( lambda m: tf.foldr( lambda a, x: tf.cond( tf.equal(x, DataGen.EOS_ID), lambda: '', lambda: table.lookup(x) + a # pylint: disable=undefined-variable ), m, initializer=''), trans_output, dtype=tf.string) # Calculate the total probability of the output string. trans_outprb = tf.transpose(prb_feed) trans_outprb = tf.gather(trans_outprb, tf.range(tf.size(trans_output))) trans_outprb = tf.map_fn(lambda m: tf.foldr( lambda a, x: tf.multiply(tf.cast(x, tf.float32), a), m, initializer=tf.cast(1, tf.float32)), trans_outprb, dtype=tf.float32) self.prediction = tf.cond( tf.equal(tf.shape(trans_output)[0], 1), lambda: trans_output[0], lambda: trans_output, ) self.probability = tf.cond( tf.equal(tf.shape(trans_outprb)[0], 1), lambda: trans_outprb[0], lambda: trans_outprb, ) self.prediction = tf.identity(self.prediction, name='prediction') self.probability = tf.identity(self.probability, name='probability') if not self.forward_only: # train self.updates = [] self.summaries_by_bucket = [] params = tf.trainable_variables() opt = tf.train.AdadeltaOptimizer( learning_rate=initial_learning_rate) loss_op = self.attention_decoder_model.loss if self.reg_val > 0: reg_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) logging.info('Adding %s regularization losses', len(reg_losses)) logging.debug('REGULARIZATION_LOSSES: %s', reg_losses) loss_op = self.reg_val * tf.reduce_sum( reg_losses) + loss_op gradients, params = list( zip(*opt.compute_gradients(loss_op, params))) if self.clip_gradients: gradients, _ = tf.clip_by_global_norm( gradients, max_gradient_norm) # Summaries for loss, variables, gradients, gradient norms and total gradient norm. summaries = [ tf.summary.scalar("loss", loss_op), tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients)) ] all_summaries = tf.summary.merge(summaries) self.summaries_by_bucket.append(all_summaries) # update op - apply gradients update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.updates.append( opt.apply_gradients(list(zip(gradients, params)), global_step=self.global_step)) self.saver_all = tf.train.Saver(tf.all_variables()) self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt") ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and load_model: # pylint: disable=no-member logging.info("Reading model parameters from %s", ckpt.model_checkpoint_path) self.saver_all.restore(self.sess, ckpt.model_checkpoint_path) else: logging.info("Created model with fresh parameters.") self.sess.run(tf.initialize_all_variables())
def _aocr_model_fn(features, labels, mode, params=None, config=None): forward_only = (mode != tf.estimator.ModeKeys.TRAIN) global_step = tf.train.get_or_create_global_step() max_resized_width = 1. * params['max_image_width'] / params[ 'max_image_height'] * config['height'] max_original_width = params['max_image_width'] max_width = int(math.ceil(max_resized_width)) encoder_size = int(math.ceil(1. * max_width / 4)) decoder_size = params['max_prediction_length'] + 2 buckets = [(encoder_size, decoder_size)] cnn_model = CNN(features, not forward_only) conv_output = cnn_model.tf_output() perm_conv_output = tf.transpose(conv_output, perm=[1, 0, 2]) encoder_masks = [] for i in xrange(params['encoder_size'] + 1): encoder_masks.append(tf.tile([[1.]], [params['batch_size'], 1])) decoder_inputs = [] target_weights = [] for i in xrange(decoder_size + 1): decoder_inputs.append(tf.tile([0], [params['batch_size']])) if i < decoder_size: target_weights.append(tf.tile([1.], [params['batch_size']])) else: target_weights.append(tf.tile([0.], [params['batch_size']])) attention_decoder_model = Seq2SeqModel( encoder_masks=encoder_masks, encoder_inputs_tensor=perm_conv_output, decoder_inputs=decoder_inputs, target_weights=target_weights, target_vocab_size=len(DataGen.CHARMAP), buckets=buckets, target_embedding_size=params['target_embedding_size'], attn_num_layers=params['attn_num_layers'], attn_num_hidden=params['attn_num_hidden'], forward_only=forward_only, use_gru=params['use_gru']) table = tf.contrib.lookup.MutableHashTable( key_dtype=tf.int64, value_dtype=tf.string, default_value="", checkpoint=True, ) insert = table.insert( tf.constant(list(range(len(params['char_map']))), dtype=tf.int64), tf.constant(params['char_map']), ) with tf.control_dependencies([insert]): num_feed = [] prb_feed = [] for line in xrange(len(attention_decoder_model.output)): guess = tf.argmax(attention_decoder_model.output[line], axis=1) proba = tf.reduce_max(tf.nn.softmax( attention_decoder_model.output[line]), axis=1) num_feed.append(guess) prb_feed.append(proba) # Join the predictions into a single output string. trans_output = tf.transpose(num_feed) trans_output = tf.map_fn( lambda m: tf.foldr( lambda a, x: tf.cond( tf.equal(x, params['eos_id']), lambda: '', lambda: table.lookup(x) + a # pylint: disable=undefined-variable ), m, initializer=''), trans_output, dtype=tf.string) # Calculate the total probability of the output string. trans_outprb = tf.transpose(prb_feed) trans_outprb = tf.gather(trans_outprb, tf.range(tf.size(trans_output))) trans_outprb = tf.map_fn(lambda m: tf.foldr( lambda a, x: tf.multiply(tf.cast(x, tf.float64), a), m, initializer=tf.cast(1, tf.float64)), trans_outprb, dtype=tf.float64) prediction = tf.cond( tf.equal(tf.shape(trans_output)[0], 1), lambda: trans_output[0], lambda: trans_output, ) probability = tf.cond( tf.equal(tf.shape(trans_outprb)[0], 1), lambda: trans_outprb[0], lambda: trans_outprb, ) prediction = tf.identity(prediction, name='prediction') probability = tf.identity(probability, name='probability') if forward_only: # train updates = [] summaries_by_bucket = [] params = tf.trainable_variables() opt = tf.train.AdadeltaOptimizer( learning_rate=params['initial_learning_rate']) loss_op = attention_decoder_model.loss if params['reg_val'] > 0: reg_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) logging.info('Adding %s regularization losses', len(reg_losses)) logging.debug('REGULARIZATION_LOSSES: %s', reg_losses) loss_op = params['reg_val'] * tf.reduce_sum( reg_losses) + loss_op gradients, params = list( zip(*opt.compute_gradients(loss_op, params))) if params['max_gradient_norm'] is not None: gradients, _ = tf.clip_by_global_norm( gradients, params['max_gradient_norm']) # Summaries for loss, variables, gradients, gradient norms and total gradient norm. summaries = [ tf.summary.scalar("loss", loss_op), tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients)) ] all_summaries = tf.summary.merge(summaries) summaries_by_bucket.append(all_summaries) # update op - apply gradients update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): updates.append( opt.apply_gradients(list(zip(gradients, params)), global_step=global_step))
def discounted_return(rewards, discounts, final_value=None, time_major=True, provide_all_returns=True): """Computes discounted return. ``` Q_t = sum_{t'=t}^T gamma^(t'-t) * r_{t'} + gamma^(T-t+1)*final_value. ``` For details, see "Reinforcement Learning: An Introduction" Second Edition by Richard S. Sutton and Andrew G. Barto Define abbreviations: (B) batch size representing number of trajectories (T) number of steps per trajectory Args: rewards: Tensor with shape [T, B] (or [T]) representing rewards. discounts: Tensor with shape [T, B] (or [T]) representing discounts. final_value: Tensor with shape [B] (or [1]) representing value estimate at t=T. This is optional, when set, it allows final value to bootstrap the reward to go computation. Otherwise it's zero. time_major: A boolean indicating whether input tensors are time major. False means input tensors have shape [B, T]. provide_all_returns: A boolean; if True, this will provide all of the returns by time dimension; if False, this will only give the single complete discounted return. Returns: If provide_all_returns is True: A tensor with shape [T, B] (or [T]) representing the discounted returns. Shape is [B, T] when time_major is false. If provide_all_returns is False: A tensor with shape [B] (or []) representing the discounted returns. """ if not time_major: with tf.name_scope("to_time_major_tensors"): discounts = tf.transpose(discounts) rewards = tf.transpose(rewards) if final_value is None: final_value = tf.zeros_like(rewards[-1]) def discounted_return_fn(accumulated_discounted_reward, reward_discount): reward, discount = reward_discount return accumulated_discounted_reward * discount + reward if provide_all_returns: returns = tf.scan(fn=discounted_return_fn, elems=(rewards, discounts), reverse=True, initializer=final_value, back_prop=False) if not time_major: with tf.name_scope("to_batch_major_tensors"): returns = tf.transpose(returns) else: returns = tf.foldr(fn=discounted_return_fn, elems=(rewards, discounts), initializer=final_value, back_prop=False) return tf.stop_gradient(returns)
def build(self, input_shape): input_shape = tf.TensorShape(input_shape) input_shape.assert_is_compatible_with(self.input_spec.shape) scale_table = tf.constant(self.scale_table, dtype=self.dtype) # Lower bound scales. We need to do this here, and not in __init__, because # the dtype may not yet be known there. if self.scale_bound is None: self._scale = math_ops.lower_bound(self._scale, scale_table[0]) elif self.scale_bound > 0: self._scale = math_ops.lower_bound(self._scale, self.scale_bound) multiplier = -self._standardized_quantile(self.tail_mass / 2) pmf_center = np.ceil(np.array(self.scale_table) * multiplier).astype(int) pmf_length = 2 * pmf_center + 1 max_length = np.max(pmf_length) # This assumes that the standardized cumulative has the property # 1 - c(x) = c(-x), which means we can compute differences equivalently in # the left or right tail of the cumulative. The point is to only compute # differences in the left tail. This increases numerical stability: c(x) is # 1 for large x, 0 for small x. Subtracting two numbers close to 0 can be # done with much higher precision than subtracting two numbers close to 1. samples = abs(np.arange(max_length, dtype=int) - pmf_center[:, None]) samples = tf.constant(samples, dtype=self.dtype) samples_scale = tf.expand_dims(scale_table, 1) upper = self._standardized_cumulative((.5 - samples) / samples_scale) lower = self._standardized_cumulative((-.5 - samples) / samples_scale) pmf = upper - lower # Compute out-of-range (tail) masses. tail_mass = 2 * lower[:, :1] def cdf_initializer(shape, dtype=None, partition_info=None): del partition_info # unused assert tuple(shape) == (len(pmf_length), max_length + 2) assert dtype == tf.int32 return self._pmf_to_cdf(pmf, tail_mass, tf.constant(pmf_length, dtype=tf.int32), max_length) quantized_cdf = self.add_weight("quantized_cdf", shape=(len(pmf_length), max_length + 2), initializer=cdf_initializer, dtype=tf.int32, trainable=False) cdf_length = self.add_weight( "cdf_length", shape=(len(pmf_length), ), initializer=tf.initializers.constant(pmf_length + 2), dtype=tf.int32, trainable=False) # Works around a weird TF issue with reading variables inside a loop. self._quantized_cdf = tf.identity(quantized_cdf) self._cdf_length = tf.identity(cdf_length) # Now, if they haven't been overridden, compute the indexes into the table # for each of the passed-in scales. if not hasattr(self, "_indexes"): # Prevent tensors from bouncing back and forth between host and GPU. with tf.device("/cpu:0"): fill = tf.constant(len(self.scale_table) - 1, dtype=tf.int32) initializer = tf.fill(tf.shape(self.scale), fill) def loop_body(indexes, scale): return indexes - tf.cast(self.scale <= scale, tf.int32) self._indexes = tf.foldr(loop_body, scale_table[:-1], initializer=initializer, back_prop=False, name="compute_indexes") self._offset = tf.constant(-pmf_center, dtype=tf.int32) # tfc.SymmetricConditional.build(self, input_shape) super(tfc.SymmetricConditional, self).build(input_shape) dbg_build = { 'scale_table': scale_table, '_scale': self._scale, '_quantized_cdf': self._quantized_cdf, '_cdf_length': cdf_length, '_offset': self._offset, 'fill': fill, 'initializer': initializer, '_indexes': self._indexes } self.dbg_build = add_prefix_to_dict('build', dbg_build)
total = tf.foldl(lambda a, x: a + x, elems) fn = lambda a, x: a + x total = tf.foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None) print total.eval() # ((((1*2)*3)*4)*5)*6 # ''' # almost same function as tf.foldr elems = [1, 2, 3, 4, 5, 6] total = tf.foldr(lambda a, x: a + x, elems) print total.eval() elems = [1, 2, 3, 4, 5, 6] total = tf.scan(lambda a, x: a + x, elems) print total.eval() [1 + initializer, 1 + initializer + 2, 1 + initializer + 3, ...] fn = lambda a, x: a * x total = tf.scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None)
def build(self, input_shape): """Builds the entropy model. This function precomputes the quantized CDF table based on the scale table. This can be done at graph construction time. Then, it creates the graph for computing the indexes into that table based on the scale tensor, and then uses this index tensor to determine the starting positions of the PMFs for each scale. Arguments: input_shape: Shape of the input tensor. Raises: ValueError: If `input_shape` doesn't specify number of input dimensions. """ input_shape = tf.TensorShape(input_shape) input_shape.assert_is_compatible_with(self.input_spec.shape) scale_table = tf.constant(self.scale_table, dtype=self.dtype) # Lower bound scales. We need to do this here, and not in __init__, because # the dtype may not yet be known there. if self.scale_bound is None: self._scale = math_ops.lower_bound(self._scale, scale_table[0]) elif self.scale_bound > 0: self._scale = math_ops.lower_bound(self._scale, self.scale_bound) multiplier = -self._standardized_quantile(self.tail_mass / 2) pmf_center = np.ceil(np.array(self.scale_table) * multiplier).astype(int) pmf_length = 2 * pmf_center + 1 max_length = np.max(pmf_length) # This assumes that the standardized cumulative has the property # 1 - c(x) = c(-x), which means we can compute differences equivalently in # the left or right tail of the cumulative. The point is to only compute # differences in the left tail. This increases numerical stability: c(x) is # 1 for large x, 0 for small x. Subtracting two numbers close to 0 can be # done with much higher precision than subtracting two numbers close to 1. samples = abs(np.arange(max_length, dtype=int) - pmf_center[:, None]) samples = tf.constant(samples, dtype=self.dtype) samples_scale = tf.expand_dims(scale_table, 1) upper = self._standardized_cumulative((.5 - samples) / samples_scale) lower = self._standardized_cumulative((-.5 - samples) / samples_scale) pmf = upper - lower # Compute out-of-range (tail) masses. tail_mass = 2 * lower[:, :1] def cdf_initializer(shape, dtype=None, partition_info=None): del partition_info # unused assert tuple(shape) == (len(pmf_length), max_length + 2) assert dtype == tf.int32 return self._pmf_to_cdf( pmf, tail_mass, tf.constant(pmf_length, dtype=tf.int32), max_length) quantized_cdf = self.add_variable( "quantized_cdf", shape=(len(pmf_length), max_length + 2), initializer=cdf_initializer, dtype=tf.int32, trainable=False) cdf_length = self.add_variable( "cdf_length", shape=(len(pmf_length),), initializer=tf.initializers.constant(pmf_length + 2), dtype=tf.int32, trainable=False) # Works around a weird TF issue with reading variables inside a loop. self._quantized_cdf = tf.identity(quantized_cdf) self._cdf_length = tf.identity(cdf_length) # Now, if they haven't been overridden, compute the indexes into the table # for each of the passed-in scales. if not hasattr(self, "_indexes"): # Prevent tensors from bouncing back and forth between host and GPU. with tf.device("/cpu:0"): fill = tf.constant( len(self.scale_table) - 1, dtype=tf.int32) initializer = tf.fill(tf.shape(self.scale), fill) def loop_body(indexes, scale): return indexes - tf.cast(self.scale <= scale, tf.int32) self._indexes = tf.foldr( loop_body, scale_table[:-1], initializer=initializer, back_prop=False, name="compute_indexes") self._offset = tf.constant(-pmf_center, dtype=tf.int32) super(SymmetricConditional, self).build(input_shape)
def discounted_return(rewards, discounts, final_value=None, time_major=True, provide_all_returns=True): """Computes discounted return. ``` Q_n = sum_{n'=n}^N gamma^(n'-n) * r_{n'} + gamma^(N-n+1)*final_value. ``` For details, see "Reinforcement Learning: An Introduction" Second Edition by Richard S. Sutton and Andrew G. Barto Define abbreviations: `B`: batch size representing number of trajectories. `T`: number of steps per trajectory. This is equal to `N - n` in the equation above. **Note** To replicate the calculation `Q_n` exactly, use `discounts = gamma * tf.ones_like(rewards)` and `provide_all_returns=False`. Args: rewards: Tensor with shape `[T, B]` (or `[T]`) representing rewards. discounts: Tensor with shape `[T, B]` (or `[T]`) representing discounts. final_value: (Optional.). Default: An all zeros tensor. Tensor with shape `[B]` (or `[1]`) representing value estimate at `T`. This is optional; when set, it allows final value to bootstrap the reward computation. time_major: A boolean indicating whether input tensors are time major. False means input tensors have shape `[B, T]`. provide_all_returns: A boolean; if True, this will provide all of the returns by time dimension; if False, this will only give the single complete discounted return. Returns: If `provide_all_returns`: A tensor with shape `[T, B]` (or `[T]`) representing the discounted returns. The shape is `[B, T]` when `not time_major`. If `not provide_all_returns`: A tensor with shape `[B]` (or []) representing the discounted returns. """ if not time_major: with tf.name_scope("to_time_major_tensors"): discounts = tf.transpose(discounts) rewards = tf.transpose(rewards) if final_value is None: final_value = tf.zeros_like(rewards[-1]) def discounted_return_fn(accumulated_discounted_reward, reward_discount): reward, discount = reward_discount return accumulated_discounted_reward * discount + reward if provide_all_returns: returns = tf.nest.map_structure( tf.stop_gradient, tf.scan( fn=discounted_return_fn, elems=(rewards, discounts), reverse=True, initializer=final_value)) if not time_major: with tf.name_scope("to_batch_major_tensors"): returns = tf.transpose(returns) else: returns = tf.foldr( fn=discounted_return_fn, elems=(rewards, discounts), initializer=final_value, back_prop=False) return tf.stop_gradient(returns)
import tensorflow as tf import sys from tensorflow.python.client import timeline riskfree = 0.02 volatility = 0.30 horner = lambda coeff, x: x * tf.foldr(lambda a, b: a + x*b, coeff) rsqrt2pi = 0.39894228040143267793994605993438 coeff = [0.31938153,-0.356563782,1.781477937,-1.821255978,1.330274429] cnd1 = lambda d: rsqrt2pi * tf.exp (-0.5*d*d) * horner(coeff, 1.0 / (1.0 + 0.2316419 * abs(d))) def cnd(d): return cnd1(d) # c = tf.Variable(cnd1(d),validate_shape=False) # return tf.cond(tf.reshape(d,[]) > 0, lambda: 1.0 - c, lambda: c) def blackscholes((price, strike, years)): r = tf.constant(riskfree) v = tf.constant(volatility) v_sqrtT = (v * tf.sqrt(years)) d1 = (tf.log (price / strike) + (r + 0.5 * v * v) * years) / v_sqrtT d2 = (d1 - v_sqrtT) cndD1 = cnd(d1) cndD2 = cnd(d2) x_expRT = (strike * tf.exp (-r * years)) V_call = price * cndD1 - x_expRT * cndD2 V_put = x_expRT * (1.0 - cndD2) - price * (1.0 - cndD1) return (V_call, V_put)
def __init__(self, phase, visualize, data_path, output_dir, batch_size, initial_learning_rate, num_epoch, steps_per_checkpoint, target_vocab_size, model_dir, target_embedding_size, attn_num_hidden, attn_num_layers, clip_gradients, max_gradient_norm, session, load_model, gpu_id, use_gru, use_distance=True, max_image_width=160, max_image_height=60, max_prediction_length=8, reg_val=0): self.use_distance = use_distance # We need resized width, not the actual width self.max_original_width = max_image_width self.max_width = int(math.ceil(1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT)) self.encoder_size = int(math.ceil(1. * self.max_width / 4)) self.decoder_size = max_prediction_length + 2 self.buckets = [(self.encoder_size, self.decoder_size)] gpu_device_id = '/gpu:' + str(gpu_id) self.gpu_device_id = gpu_device_id if not os.path.exists(model_dir): os.makedirs(model_dir) logging.info('loading data') # load data if phase == 'train': self.s_gen = DataGen(data_path, self.buckets, epochs=num_epoch, max_width=self.max_original_width) else: batch_size = 1 self.s_gen = DataGen(data_path, self.buckets, epochs=1, max_width=self.max_original_width) logging.info('phase: %s' % phase) logging.info('model_dir: %s' % (model_dir)) logging.info('load_model: %s' % (load_model)) logging.info('output_dir: %s' % (output_dir)) logging.info('steps_per_checkpoint: %d' % (steps_per_checkpoint)) logging.info('batch_size: %d' % (batch_size)) logging.info('num_epoch: %d' % num_epoch) logging.info('learning_rate: %d' % initial_learning_rate) logging.info('reg_val: %d' % (reg_val)) logging.info('max_gradient_norm: %f' % max_gradient_norm) logging.info('clip_gradients: %s' % clip_gradients) logging.info('max_image_width %f' % max_image_width) logging.info('max_prediction_length %f' % max_prediction_length) logging.info('target_vocab_size: %d' % target_vocab_size) logging.info('target_embedding_size: %f' % target_embedding_size) logging.info('attn_num_hidden: %d' % attn_num_hidden) logging.info('attn_num_layers: %d' % attn_num_layers) logging.info('visualize: %s' % visualize) if use_gru: logging.info('using GRU in the decoder.') self.reg_val = reg_val self.sess = session self.steps_per_checkpoint = steps_per_checkpoint self.model_dir = model_dir self.output_dir = output_dir self.batch_size = batch_size self.num_epoch = num_epoch self.global_step = tf.Variable(0, trainable=False) self.phase = phase self.visualize = visualize self.learning_rate = initial_learning_rate self.clip_gradients = clip_gradients if phase == 'train': self.forward_only = False elif phase == 'test': self.forward_only = True else: assert False, phase with tf.device(gpu_device_id): self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32) self.height_float = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.float64) self.img_pl = tf.placeholder(tf.string, name='input_image_as_bytes') self.img_data = tf.cond( tf.less(tf.rank(self.img_pl), 1), lambda: tf.expand_dims(self.img_pl, 0), lambda: self.img_pl ) self.img_data = tf.map_fn(self._prepare_image, self.img_data, dtype=tf.float32) num_images = tf.shape(self.img_data)[0] # TODO: create a mask depending on the image/batch size self.encoder_masks = [] for i in xrange(self.encoder_size + 1): self.encoder_masks.append( tf.tile([[1.]], [num_images, 1]) ) self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.decoder_size + 1): self.decoder_inputs.append( tf.tile([0], [num_images]) ) if i < self.decoder_size: self.target_weights.append(tf.tile([1.], [num_images])) else: self.target_weights.append(tf.tile([0.], [num_images])) # TODO: not 2, 2 is static (???) self.zero_paddings = tf.zeros([num_images, 2, 512], dtype=np.float32) cnn_model = CNN(self.img_data, True) self.conv_output = cnn_model.tf_output() self.concat_conv_output = tf.concat(axis=1, values=[self.conv_output, self.zero_paddings]) self.perm_conv_output = tf.transpose(self.concat_conv_output, perm=[1, 0, 2]) self.attention_decoder_model = Seq2SeqModel( encoder_masks=self.encoder_masks, encoder_inputs_tensor=self.perm_conv_output, decoder_inputs=self.decoder_inputs, target_weights=self.target_weights, target_vocab_size=target_vocab_size, buckets=self.buckets, target_embedding_size=target_embedding_size, attn_num_layers=attn_num_layers, attn_num_hidden=attn_num_hidden, forward_only=self.forward_only, use_gru=use_gru) table = tf.contrib.lookup.MutableHashTable( key_dtype=tf.int64, value_dtype=tf.string, default_value="", checkpoint=True, ) insert = table.insert( tf.constant(range(len(DataGen.CHARMAP)), dtype=tf.int64), tf.constant(DataGen.CHARMAP), ) with tf.control_dependencies([insert]): num_feed = [] for l in xrange(len(self.attention_decoder_model.output)): guess = tf.argmax(self.attention_decoder_model.output[l], axis=1) num_feed.append(guess) trans_output = tf.transpose(num_feed) trans_output = tf.map_fn( lambda m: tf.foldr( lambda a, x: tf.cond( tf.equal(x, DataGen.EOS_ID), lambda: '', lambda: table.lookup(x) + a ), m, initializer='' ), trans_output, dtype=tf.string ) self.prediction = tf.cond( tf.equal(tf.shape(trans_output)[0], 1), lambda: trans_output[0], lambda: trans_output ) if not self.forward_only: # train self.updates = [] self.summaries_by_bucket = [] params = tf.trainable_variables() opt = tf.train.AdadeltaOptimizer(learning_rate=initial_learning_rate) if self.reg_val > 0: reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) logging.info('Adding %s regularization losses', len(reg_losses)) logging.debug('REGULARIZATION_LOSSES: %s', reg_losses) loss_op = self.reg_val * tf.reduce_sum(reg_losses) + self.attention_decoder_model.loss else: loss_op = self.attention_decoder_model.loss gradients, params = zip(*opt.compute_gradients(loss_op, params)) if self.clip_gradients: gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) # Add summaries for loss, variables, gradients, gradient norms and total gradient norm. summaries = [] summaries.append(tf.summary.scalar("loss", loss_op)) summaries.append(tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients))) all_summaries = tf.summary.merge(summaries) self.summaries_by_bucket.append(all_summaries) # update op - apply gradients update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.updates.append(opt.apply_gradients(zip(gradients, params), global_step=self.global_step)) self.saver_all = tf.train.Saver(tf.all_variables()) self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt") ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and load_model: logging.info("Reading model parameters from %s" % ckpt.model_checkpoint_path) self.saver_all.restore(self.sess, ckpt.model_checkpoint_path) else: logging.info("Created model with fresh parameters.") self.sess.run(tf.initialize_all_variables())