def normalized_locations_to_indices(offsets, height, width): """Converts normalized locations to indices. Args: offsets: Tensor of size [batch, 2] with normalized (i.e., range -1, 1) x and y locations. height: (Integer) Image height. width: (Integer) Image width. Returns: indices_height: (Integer) Image height index. indices_width: (Integer) Image width index. """ offsets = tf.cast(offsets, dtype=tf.float32) # Compute the coordinates of the top left of each glimpse. indices_height = tf.cast( tf.round((height - 1.) * (offsets[:, 0] + 1.) / 2.), tf.int32) indices_width = tf.cast(tf.round((width - 1.) * (offsets[:, 1] + 1.) / 2.), tf.int32) # Clip to the correct size. indices_height = tf.clip_by_value(indices_height, 0, height - 1) indices_width = tf.clip_by_value(indices_width, 0, width - 1) return indices_height, indices_width
def nearest_sampler(img, x, y): """ Input ----- - img: batch of images in (B, H, W, C) layout. - grid: x, y which is the output of affine_grid_generator. Returns ------- - out: interpolated images according to grids. Same size as grid. """ H = tf.shape(img)[1] W = tf.shape(img)[2] max_y = tf.cast(H - 1, 'int32') max_x = tf.cast(W - 1, 'int32') zero = tf.zeros([], dtype='int32') # rescale x and y to [0, W-1/H-1] x = tf.cast(x, 'float32') y = tf.cast(y, 'float32') # x = 0.5 * ((x + 1.0) * tf.cast(max_x-1, 'float32')) # y = 0.5 * ((y + 1.0) * tf.cast(max_y-1, 'float32')) # grab 4 nearest corner points for each (x_i, y_i) x0 = tf.cast(tf.round(x), 'int32') y0 = tf.cast(tf.round(y), 'int32') # clip to range [0, H-1/W-1] to not violate img boundaries x0 = tf.clip_by_value(x0, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) # get pixel value at corner coords Ia = get_pixel_value(img, x0, y0) return Ia
def resample_voxels(v, xs, ys, zs, method="trilinear"): if method == "trilinear": floor_xs = tf.floor(tf.clip_by_value(xs, 0, 64)) floor_ys = tf.floor(tf.clip_by_value(ys, 0, 64)) floor_zs = tf.floor(tf.clip_by_value(zs, 0, 64)) ceil_xs = tf.ceil(tf.clip_by_value(xs, 0, 64)) ceil_ys = tf.ceil(tf.clip_by_value(ys, 0, 64)) ceil_zs = tf.ceil(tf.clip_by_value(zs, 0, 64)) final_value =( tf.abs((xs-floor_xs)*(ys-floor_ys)*(zs-floor_zs))*get_voxel_values(v, ceil_xs, ceil_ys, ceil_zs) + tf.abs((xs-floor_xs)*(ys-floor_ys)*(zs-ceil_zs))*get_voxel_values(v, ceil_xs, ceil_ys, floor_zs) + tf.abs((xs-floor_xs)*(ys-ceil_ys)*(zs-floor_zs))*get_voxel_values(v, ceil_xs, floor_ys, ceil_zs) + tf.abs((xs-floor_xs)*(ys-ceil_ys)*(zs-ceil_zs))*get_voxel_values(v, ceil_xs, floor_ys, floor_zs) + tf.abs((xs-ceil_xs)*(ys-floor_ys)*(zs-floor_zs))*get_voxel_values(v, floor_xs, ceil_ys, ceil_zs) + tf.abs((xs-ceil_xs)*(ys-floor_ys)*(zs-ceil_zs))*get_voxel_values(v, floor_xs, ceil_ys, floor_zs) + tf.abs((xs-ceil_xs)*(ys-ceil_ys)*(zs-floor_zs))*get_voxel_values(v, floor_xs, floor_ys, ceil_zs) + tf.abs((xs-ceil_xs)*(ys-ceil_ys)*(zs-ceil_zs))*get_voxel_values(v, floor_xs, floor_ys, floor_zs) ) return final_value elif method == "nearest": r_xs = tf.round(xs) r_ys = tf.round(ys) r_zs = tf.round(zs) return get_voxel_values(v, r_xs, r_ys, r_zs) else: raise NameError(method)
def _resize_small(data): image = data["image"] h, w = tf.shape(image)[0], tf.shape(image)[1] ratio = (tf.cast(smaller_size, tf.float32) / tf.cast(tf.minimum(h, w), tf.float32)) h = tf.cast(tf.round(tf.cast(h, tf.float32) * ratio), tf.int32) w = tf.cast(tf.round(tf.cast(w, tf.float32) * ratio), tf.int32) data["image"] = tf.image.resize_area(image[None], [h, w])[0] return data
def glimpseSensor(self, img, normLoc): loc_x = tf.round(((normLoc[:, 0] + 1) / 2.0) * self.img_W) loc_y = tf.round(((normLoc[:, 1] + 1) / 2.0) * self.img_H) loc = tf.stack([loc_x, loc_y], axis=1) #loc = tf.round(((normLoc + 1) / 2.0) * self.img_W) # normLoc coordinates are between -1 and 1 loc = tf.cast(loc, tf.int32) img = tf.reshape( img, (self.batch_size, self.img_W, self.img_H, self.channels)) # process each image individually zooms = [] for k in range(self.batch_size): imgZooms = [] one_img = img[k, :, :, :] max_radius = self.minRadius * (2**(self.depth - 1)) offset = 2 * max_radius # pad image with zeros one_img = tf.image.pad_to_bounding_box(one_img, offset, offset, max_radius * 4 + self.img_W, max_radius * 4 + self.img_H) for i in range(self.depth): r = int(self.minRadius * (2**(i))) d_raw = 2 * r d = tf.constant(d_raw, shape=[1]) d = tf.tile(d, [2]) loc_k = loc[k, :] adjusted_loc = offset + loc_k - r one_img2 = tf.reshape(one_img, (one_img.get_shape()[0].value, one_img.get_shape()[1].value)) # crop image to (d x d) zoom = tf.slice(one_img2, adjusted_loc, d) # resize cropped image to (sensorBandwidth x sensorBandwidth) zoom = tf.image.resize_bilinear( tf.reshape(zoom, (1, d_raw, d_raw, 1)), (self.sensorBandwidth, self.sensorBandwidth)) zoom = tf.reshape(zoom, (self.sensorBandwidth, self.sensorBandwidth)) imgZooms.append(zoom) zooms.append(tf.stack(imgZooms)) zooms = tf.stack(zooms) self.glimpse_images.append(zooms) return zooms
def model_fn(features, labels, mode, params,): model = Sequential() model.add(Dense(5)) model.add(Activation("relu")) model.add(Dense(1)) logits = model(features, training = (mode == tf.estimator.ModeKeys.TRAIN)) loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) if mode == tf.estimator.ModeKeys.EVAL: predictions = tf.round(logits) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions), } return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.GradientDescentOptimizer(params['lr']) if NUM_REPLICAS > 1: optimizer = ipu.cross_replica_optimizer.CrossReplicaOptimizer(optimizer) train_op = optimizer.minimize(loss=loss) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def build_model(num_features): #Number of classes. In a binary classification #there's only one class. K = 1 # Feature matrix. mxn dimension. X = tf.placeholder(tf.float32, [None, num_features]) # Since this is a binary classification problem, # Y will be mx1 dimension Y = tf.placeholder(tf.float32, [None, K]) # Trainable Variable Weights. nx1 dimension W = tf.Variable(tf.zeros([num_features, K])) # Trainable Variable Bias b = tf.Variable(0.0) # Hypothesis (prediction). mx1 dimension. logits = tf.matmul(X, W) + b Y_hat = tf.nn.sigmoid(logits) # Sigmoid Cross Entropy Cost Function cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( labels=Y, logits=logits)) # Gradient Descent Optimizer optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(cost) #Round a prediction over 0.5 to 1 and less to 0. #Then compare with actual outcome. correct_prediction = tf.equal(tf.round(Y_hat), Y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return optimizer, accuracy, X, Y, Y_hat
def quantize_image(image): """ Taken from Balle's implementation """ image = tf.round(image * 255) image = tf.saturate_cast(image, tf.uint8) return image
def quantize_image(image): """Changes the range of pixel values to [0, 255] and cast it into uint8""" image = np.reshape(image, (image.shape[1], image.shape[2], 3)) image = tf.convert_to_tensor(image) image = tf.round(image * 255) image = tf.saturate_cast(image, tf.uint8) return image
def img2mpi(self, img, depth, planedepths): """Compute ground truth MPI of visible content using depth map.""" height = tf.shape(img)[1] width = tf.shape(img)[2] num_depths = planedepths.shape[0] depth_inds = (tf.to_float(num_depths) - 1) * ( (1.0 / depth) - (1.0 / planedepths[0])) / ((1.0 / planedepths[-1]) - (1.0 / planedepths[0])) depth_inds = tf.round(depth_inds) depth_inds_tile = tf.to_int32( tf.tile(depth_inds[:, :, :, tf.newaxis], [1, 1, 1, num_depths])) _, _, d = tf.meshgrid(tf.range(height), tf.range(width), tf.range(num_depths), indexing='ij') mpi_colors = tf.to_float( tf.tile(img[:, :, :, tf.newaxis, :], [1, 1, 1, num_depths, 1])) mpi_alphas = tf.to_float( tf.where(tf.equal(depth_inds_tile, d), tf.ones_like(depth_inds_tile), tf.zeros_like(depth_inds_tile))) mpi = tf.concat([mpi_colors, mpi_alphas[Ellipsis, tf.newaxis]], axis=4) return mpi
def infer(self, features, *args, **kwargs): """Produce predictions from the model by running it.""" del args, kwargs if "targets" not in features: if "infer_targets" in features: targets_shape = common_layers.shape_list(features["infer_targets"]) elif "inputs" in features: targets_shape = common_layers.shape_list(features["inputs"]) targets_shape[1] = self.hparams.video_num_target_frames else: raise ValueError("no inputs are given.") features["targets"] = tf.zeros(targets_shape, dtype=tf.float32) output, _ = self(features) # pylint: disable=not-callable if not isinstance(output, dict): output = {"targets": output} x = output["targets"] if self.is_per_pixel_softmax: x_shape = common_layers.shape_list(x) x = tf.reshape(x, [-1, x_shape[-1]]) x = tf.argmax(x, axis=-1) x = tf.reshape(x, x_shape[:-1]) else: x = tf.squeeze(x, axis=-1) x = tf.to_int64(tf.round(x)) output["targets"] = x if self.hparams.reward_prediction: output["target_reward"] = tf.argmax(output["target_reward"], axis=-1) # only required for decoding. output["outputs"] = output["targets"] output["scores"] = output["targets"] return output
def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True): """Helper to create an initialized Variable with weight decay. Note that the Variable is initialized with a truncated normal distribution. A weight decay is added only if one is specified. Args: name: name of the variable shape: list of ints stddev: standard deviation of a truncated Gaussian wd: add L2Loss weight decay multiplied by this float. If None, weight decay is not added for this Variable. use_xavier: bool, whether to use xavier initializer Returns: Variable Tensor """ if use_xavier: initializer = tf.contrib.layers.xavier_initializer() var = _variable_on_cpu(name, shape, initializer) else: # initializer = tf.truncated_normal_initializer(stddev=stddev) with tf.device('/cpu:0'): var = tf.truncated_normal(shape, stddev=np.sqrt(2 / shape[-1])) var = tf.round(var * tf.constant( 1000, dtype=tf.float32)) / tf.constant(1000, dtype=tf.float32) var = tf.Variable(var, name='weights') if wd is not None: weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') tf.add_to_collection('losses', weight_decay) return var
def _get_discriminator_output(self, inputs, discriminator, labels): """Discriminator binary classifier.""" with tf.variable_scope("discriminator_predictions"): hidden = tf.layers.dense( discriminator.get_sequence_output(), units=self._bert_config.hidden_size, activation=modeling.get_activation( self._bert_config.hidden_act), kernel_initializer=modeling.create_initializer( self._bert_config.initializer_range)) logits = tf.squeeze(tf.layers.dense(hidden, units=1), -1) weights = tf.cast(inputs.input_mask, tf.float32) labelsf = tf.cast(labels, tf.float32) losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=labelsf) * weights per_example_loss = (tf.reduce_sum(losses, axis=-1) / (1e-6 + tf.reduce_sum(weights, axis=-1))) loss = tf.reduce_sum(losses) / (1e-6 + tf.reduce_sum(weights)) probs = tf.nn.sigmoid(logits) preds = tf.cast(tf.round((tf.sign(logits) + 1) / 2), tf.int32) DiscOutput = collections.namedtuple( "DiscOutput", ["loss", "per_example_loss", "probs", "preds", "labels"]) return DiscOutput( loss=loss, per_example_loss=per_example_loss, probs=probs, preds=preds, labels=labels, )
def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): if out_mul != 1.0: expr = [x * out_mul for x in expr] if out_add != 0.0: expr = [x + out_add for x in expr] if out_shrink > 1: ksize = [1, 1, out_shrink, out_shrink] expr = [ tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr ] if out_dtype is not None: if tf.as_dtype(out_dtype).is_integer: expr = [tf.round(x) for x in expr] expr = [tf.saturate_cast(x, out_dtype) for x in expr] return expr
def build_graph(parameters): """Build the round op testing graph.""" input_value = tf.compat.v1.placeholder(dtype=parameters["input_dtype"], name="input1", shape=parameters["input_shape"]) out = tf.round(input_value) return [input_value], [out]
def rescale(boxes, keypoints, old_shape, new_shape): """ Arguments: boxes: a float tensor with shape [num_persons, 4]. keypoints: an int tensor with shape [num_persons, 17, 3]. old_shape, new_shape: int tensors with shape [3]. Returns: a float tensor with shape [num_persons, 4]. an int tensor with shape [num_persons, 17, 3]. """ points, v = tf.split(keypoints, [2, 1], axis=2) points = tf.to_float(points) old_shape = tf.to_float(old_shape) new_shape = tf.to_float(new_shape) old_height, old_width = old_shape[0], old_shape[1] new_height, new_width = new_shape[0], new_shape[1] scaler = tf.stack([new_height / old_height, new_width / old_width]) points *= scaler scaler = tf.stack([new_height, new_width]) scaler = tf.concat(2 * [scaler], axis=0) boxes *= scaler new_height = tf.to_int32(new_height) new_width = tf.to_int32(new_width) points = tf.to_int32(tf.round(points)) y, x = tf.split(points, 2, axis=2) y = tf.clip_by_value(y, 0, new_height - 1) x = tf.clip_by_value(x, 0, new_width - 1) keypoints = tf.concat([y, x, v], axis=2) return boxes, keypoints
def blueRatioHistogram(img): # t1 = time() #img = cv.imread('A00_01.jpg') red = img[:, :, 2] blue = img[:, :, 0] green = img[:, :, 1] red = tf.convert_to_tensor(red) green = tf.convert_to_tensor(green) blue = tf.convert_to_tensor(blue) blue = tf.to_float(blue) red = tf.to_float(red) green = tf.to_float(green) #100 * b b100 = tf.multiply(blue, 100.) #r+g r_g = tf.add(red, green) #r+g+b r_g_b = tf.add(r_g, blue) one = tf.constant([[1.]]) #r+g+b+1 r_g_b_1 = tf.add(r_g_b, one) #r+g+1 r_g_1 = tf.add(r_g, one) #factor1 = (100*b)/(r+g+1) factor1 = tf.div(b100, r_g_1) #256 t56 = tf.multiply(one, 255.) #factor2 = 256/(r+g+b+1) factor2 = tf.div(t56, r_g_b_1) #brh = factor1*factor2 brh = tf.multiply(factor1, factor2) #normalising brh and scaling to 256 maxb = brh a = tf.reduce_max(maxb, [0, 1]) brh = tf.div(brh, a) brh = tf.multiply(brh, 255.) brh = tf.round(brh) brh = tf.cast(brh, tf.uint8) with tf.Session() as sess: brh = sess.run(brh) #equal = sess.run(equal) #maxa = sess.run(a) #t2 = time() # t = (t2 - t1) #print("Time taken is "+str(t)+"us") return brh
def create_model(self, mask_l, mask_r, kine_l, kine_r, is_training): self.mask = tf.round(tf.clip_by_value(mask_l, 0., 1.)) with tf.variable_scope("robot_l"): self.proj_logits_l = create_robot_new(kine_l, is_training, "Left") self.proj_sigm_l = tf.nn.sigmoid(self.proj_logits_l) self.pred_mask = tf.round( tf.clip_by_value(tf.round(self.proj_sigm_l), 0., 1.)) with tf.name_scope("cost"): self.cost_l = self._get_dice_coef_loss(self.proj_sigm_l, mask_l) with tf.name_scope("accuracy"): self.f1 = self._get_f1_accuracy(self.pred_mask, self.mask)
def my_round(x): # The custom gradient def grad(dy): return 1 * dy # Return the result AND the gradient return tf.round(x), grad
def round_grad(x): idx = tf.cast(tf.round(x), tf.int32) def grad(dy): """ Let gradients pass through. """ return dy return idx, grad
def iqst(x, n): """Integer quantization with straight-through estimator.""" eps = 1e-7 s = float(n - 1) xp = tf.clip_by_value((x + 1) / 2.0, -eps, 1 + eps) xpp = tf.round(s * xp) xppp = 2 * (xpp / s) - 1 return xpp, x + tf.stop_gradient(xppp - x)
def build(self): # プレースホルダを定義 tf_x = tf.placeholder(tf.int32, shape=(self.batch_size, self.seq_len), name='tf_x') tf_y = tf.placeholder(tf.float32, shape=(self.batch_size), name='tf_y') tf_keepprob = tf.placeholder(tf.float32, name='tf_keepprob') #埋め込み層を作成 embedding = tf.Variable(tf.random_uniform( (self.n_words, self.embed_size), minval=-1, maxval=1), name='embedding') embed_x = tf.nn.embedding_lookup(embedding, tf_x, name='embeded_x') #LSTMセルを定義し、積み上げる cells = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.lstm_size), output_keep_prob=tf_keepprob) for i in range(self.num_layers) ]) # 初期状態を定義 self.initial_state = cells.zero_state(self.batch_size, tf.float32) print(' <<initial state >> ', self.initial_state) lstm_outputs, self.final_state = tf.nn.dynamic_rnn( cells, embed_x, initial_state=self.initial_state) # 注意:lstm_outputsの形状 [batch_size, max_time, cells.output_size] print('\n <<lstm_output >>', lstm_outputs) print('\n <<final state >>', self.final_state) # RNNの出力の後に全結合層を適用 logits = tf.layers.dense(inputs=lstm_outputs[:, -1], units=1, activation=None, name='logits') logits = tf.squeeze(logits, name='logits_squeezed') print('\n << logits >>', logits) y_proba = tf.nn.sigmoid(logits, name='probabilities') predictions = { 'probabilities': y_proba, 'labels': tf.cast(tf.round(y_proba), tf.int32, name='labels') } print('\n << predictions >>', predictions) # コスト関数を定義 cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( labels=tf_y, logits=logits), name='cost') # オプティマイザを定義 optimizer = tf.train.AdamOptimizer(self.learning_rate) train_op = optimizer.minimize(cost, name='train_op')
def single_obj_scoremap(scoremap): """ Applies my algorithm to figure out the most likely object from a given segmentation scoremap. """ with tf.variable_scope('single_obj_scoremap'): filter_size = 21 s = scoremap.get_shape().as_list() assert len(s) == 4, "Scoremap must be 4D." scoremap_softmax = tf.nn.softmax( scoremap) #B, H, W, C --> normalizes across last dimension scoremap_fg = tf.reduce_max(scoremap_softmax[:, :, :, 1:], 3) # B, H, W detmap_fg = tf.round(scoremap_fg) # B, H, W # find maximum in the fg scoremap max_loc = find_max_location(scoremap_fg) # use maximum to start "growing" our objectmap objectmap_list = list() kernel_dil = tf.ones( (filter_size, filter_size, 1)) / float(filter_size * filter_size) for i in range(s[0]): # create initial objectmap (put a one at the maximum) sparse_ind = tf.reshape( max_loc[i, :], [1, 2]) # reshape that its one point with 2dim) objectmap = tf.sparse_to_dense(sparse_ind, [s[1], s[2]], 1.0) # grow the map by dilation and pixelwise and num_passes = max(s[1], s[2]) // ( filter_size // 2 ) # number of passes needes to make sure the map can spread over the whole image for j in range(num_passes): objectmap = tf.reshape(objectmap, [1, s[1], s[2], 1]) objectmap_dil = tf.nn.dilation2d(objectmap, kernel_dil, [1, 1, 1, 1], [1, 1, 1, 1], 'SAME') objectmap_dil = tf.reshape(objectmap_dil, [s[1], s[2]]) objectmap = tf.round( tf.multiply(detmap_fg[i, :, :], objectmap_dil)) objectmap = tf.reshape(objectmap, [s[1], s[2], 1]) objectmap_list.append(objectmap) objectmap = tf.stack(objectmap_list) return objectmap
def write_png(filename, image): """Creates graph to write a PNG image file.""" image = tf.squeeze(image, 0) if image.dtype.is_floating: image = tf.round(image) if image.dtype != tf.uint8: image = tf.saturate_cast(image, tf.uint8) string = tf.image.encode_png(image) return tf.io.write_file(filename, string)
def _resize_small_pp(data): image = data["image"] # A single image: HWC # A batch of images: BHWC h, w = tf.shape(image)[-3], tf.shape(image)[-2] # Figure out the necessary h/w. ratio = tf.to_float(smaller_size) / tf.to_float(tf.minimum(h, w)) h = tf.to_int32(tf.round(tf.to_float(h) * ratio)) w = tf.to_int32(tf.round(tf.to_float(w) * ratio)) # NOTE: use align_corners=False for AREA resize, but True for Bilinear. # See also https://github.com/tensorflow/tensorflow/issues/6720 static_rank = len(image.get_shape().as_list()) if static_rank == 3: # A single image: HWC data["image"] = tf.image.resize_area(image[None], [h, w])[0] elif static_rank == 4: # A batch of images: BHWC data["image"] = tf.image.resize_area(image, [h, w]) return data
def load_test_model_graph(checkpoint_dir): ''' model used in test mode. (entropy_bootleneck(training=False) ''' # inputs x = tf.placeholder(tf.float32, [1, None, None, 3]) orig_x = tf.placeholder(tf.float32, [1, None, None, 3]) # Instantiate model. analysis_transform = AnalysisTransform(192) synthesis_transform = SynthesisTransform(192) hyper_analysis_transform = HyperAnalysisTransform(192) hyper_synthesis_transform = HyperSynthesisTransform(192) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) # eval bpp num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # reconstruction metric # Bring both images back to 0..255 range. orig_x_255 = orig_x * 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(orig_x_255, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, orig_x_255, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, orig_x_255, 255)) # session sess = tf.Session() # load graph latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) return sess, x, orig_x, [ string, side_string ], eval_bpp, x_hat, mse, psnr, msssim, num_pixels, y, z
def build(self): ## Define the placeholders tf_x = tf.placeholder(tf.int32, shape=(self.batch_size, self.seq_len), name='tf_x') tf_y = tf.placeholder(tf.float32, shape=(self.batch_size), name='tf_y') tf_keepprob = tf.placeholder(tf.float32, name='tf_keepprob') ## Create the embedding layer embedding = tf.Variable(tf.random_uniform( (self.n_words, self.embed_size), minval=-1, maxval=1), name='embedding') embed_x = tf.nn.embedding_lookup(embedding, tf_x, name='embeded_x') ## Define LSTM cell and stack them together cells = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.lstm_size), output_keep_prob=tf_keepprob) for i in range(self.num_layers) ]) ## Define the initial state: self.initial_state = cells.zero_state(self.batch_size, tf.float32) print(' << initial state >> ', self.initial_state) lstm_outputs, self.final_state = tf.nn.dynamic_rnn( cells, embed_x, initial_state=self.initial_state) ## Note: lstm_outputs shape: ## [batch_size, max_time, cells.output_size] print('\n << lstm_output >> ', lstm_outputs) print('\n << final state >> ', self.final_state) ## Apply a FC layer after on top of RNN output: logits = tf.layers.dense(inputs=lstm_outputs[:, -1], units=1, activation=None, name='logits') logits = tf.squeeze(logits, name='logits_squeezed') print('\n << logits >> ', logits) y_proba = tf.nn.sigmoid(logits, name='probabilities') predictions = { 'probabilities': y_proba, 'labels': tf.cast(tf.round(y_proba), tf.int32, name='labels') } print('\n << predictions >> ', predictions) ## Define the cost function cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( labels=tf_y, logits=logits), name='cost') ## Define the optimizer optimizer = tf.train.AdamOptimizer(self.learning_rate) train_op = optimizer.minimize(cost, name='train_op')
def __call__(self, img): scale = 1.0 + tf.random_uniform([], -self.max_stretch, self.max_stretch) img_shape = tf.shape(img) ts = tf.to_int32(tf.round(tf.to_float(img_shape[:2]) * scale)) resize_method_map = { 'bilinear': tf.image.ResizeMethod.BILINEAR, 'bicubic': tf.image.ResizeMethod.BICUBIC } return tf.image.resize_images( img, ts, method=resize_method_map[self.interpolation])
def add_evaluation_step(result_tensor, ground_truth_tensor): with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.round(result_tensor), ground_truth_tensor) # correct_prediction = tf.equal(tf.reduce_sum(tf.bitwise.bitwise_xor(tf.cast(tf.round(result_tensor),tf.int8), tf.cast(ground_truth_tensor,tf.int8)), axis=1), 0) with tf.name_scope('accuracy'): evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) return evaluation_step
def _get_infer_maximum_iterations(self, hparams, source_sequence_length): """Maximum decoding steps at inference time.""" if hparams.tgt_max_len_infer: maximum_iterations = hparams.tgt_max_len_infer utils.print_out(" decoding maximum_iterations %d" % maximum_iterations) else: decoding_length_factor = 2.0 max_encoder_length = tf.reduce_max(source_sequence_length) maximum_iterations = tf.to_int32( tf.round(tf.to_float(max_encoder_length) * decoding_length_factor)) return maximum_iterations