def testVonMisesSampleMoments(self): locs_v = np.array([-2., -1., 0.3, 2.3]) concentrations_v = np.array([0.1, 1.0, 2.0, 10.0]) von_mises = tfd.VonMises( self.make_tensor(locs_v), self.make_tensor(concentrations_v)) n = 10000 samples = von_mises.sample(n, seed=12345) expected_mean = von_mises.mean() actual_mean = tf.atan2( tf.reduce_mean(tf.sin(samples), 0), tf.reduce_mean(tf.cos(samples), 0)) expected_variance = von_mises.variance() standardized_samples = samples - tf.expand_dims(von_mises.mean(), 0) actual_variance = 1. - tf.reduce_mean(tf.cos(standardized_samples), axis=0) [ expected_mean_val, expected_variance_val, actual_mean_val, actual_variance_val ] = self.evaluate( [expected_mean, expected_variance, actual_mean, actual_variance]) self.assertAllClose(expected_mean_val, actual_mean_val, rtol=0.1) self.assertAllClose(expected_variance_val, actual_variance_val, rtol=0.1)
np.log(200.0) + np.zeros(nl), np.log(150.0), np.log(300.0), dtype=T) log_prior += log_jac # Frequency spacing log_dnu_param, log_dnu, log_jac, log_dnu_range = get_bounded_variable( "log_dnu", np.log(17.0), np.log(15.0), np.log(30.0), dtype=T) log_prior += log_jac # The phase for each mode cp, sp = np.random.randn(2, nmodes) phi_x, phi_y, cosphi, sinphi, log_jac = get_unit_vector("phi", cp, sp, dtype=T) log_prior += log_jac phi = tf.atan2(sinphi, cosphi) # The parameters of the envelope log_amp_param, log_amp, log_jac, log_amp_range = get_bounded_variable( "log_amp", np.log(np.random.uniform(0.015, 0.02, nl)), np.log(0.01), np.log(0.03), dtype=T) log_prior += log_jac log_width = tf.Variable(np.log(25.0), dtype=T, name="log_width") curve = tf.Variable(0.001, dtype=T, name="curve") # Initialize session.run(tf.global_variables_initializer())
cc.ops.reduceIndex( pred_angle, tf.expand_dims(tf.expand_dims(nla[i], axis=-1), axis=-1))) # with tf.device('/cpu:0'): # softmax_linear = cc.layers.monitorGrads(softmax_linear) # pred_angle = cc.layers.monitorGrads(pred_angle) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=softmax_linear, labels=nla[i], name='cross_entropy_per_example_' + str(i)) # nan2 = nan[i] - numpy.pi angleDif = nan[i] - pred_angle # angleDif2 = nan2 - pred_angle normAngleDif = tf.abs( tf.atan2(tf.sin(angleDif), tf.cos(angleDif))) # normAngleDif2 = tf.abs(tf.atan2(tf.sin(angleDif2), tf.cos(angleDif2))) # normAngleDif = tf.reduce_min(tf.stack([normAngleDif, normAngleDif2], axis=0), axis=0) meanAnDif = tf.reduce_mean(normAngleDif, name="angle_loss_" + str(i)) cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_' + str(i)) tf.add_to_collection('x_entropies', cross_entropy_mean) tf.add_to_collection('angle_losses', meanAnDif) # softmax_linear, prob, weightDecayFactor = inference(next_example, batch_size, "train", first=True, resuse_batch_norm=False) # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=softmax_linear, labels=next_label, name='cross_entropy_per_example') # cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') # tf.add_to_collection('losses', cross_entropy_mean)
def box_8c_to_box_3d(box_8c): """Computes the 3D bounding box corner positions from 8 corners. To go back from 8-corner representation to box3D, we need to reverse the transformation done in 'box_3d_to_box_8c'. The first thing we need is orientation, this is estimated by calculating the midpoints of P1 -> P2 and P3 -> P4. Connecting these midpoints, results to a vector which gives us the direction of the corners. However note that y-axis is facing downwards and hence we negate this orientation. Next we calculate the centroids by taking the average of four corners for x and z axes. We then translate the centroids back to the origin and then multiply by the rotation matrix, however now we are rotating the opposite direction, so the angle signs are reversed. After rotation we can translate the corners back however, there is one additional step before translation. Since we plan to regress corners, it is expected for the corners to be skewed, i.e. resulting to non-rectangular shapes. Hence we attempt to align the corners (by min/maxing the corners and aligning them by the min and max values for each corner. After this step we can translate back, and calculate length, width and height. Args: box_8c: An ndarray or a tensor of shape (N x 3 x 8) representing the box corners. Returns: corners_3d: An ndarray or a tensor of shape (3 x 8) representing the box as corners in this format -> [[x1,...,x8],[y1...,y8], [z1,...,z8]]. """ format_checker.check_box_8c_format(box_8c) ####################### # calculate orientation ####################### x_corners = box_8c[:, 0] y_corners = box_8c[:, 1] z_corners = box_8c[:, 2] x12_midpoint = (x_corners[:, 0] + x_corners[:, 1]) / 2 z12_midpoint = (z_corners[:, 0] + z_corners[:, 1]) / 2 x34_midpoint = (x_corners[:, 2] + x_corners[:, 3]) / 2 z34_midpoint = (z_corners[:, 2] + z_corners[:, 3]) / 2 # We use the midpoints to get a vector to figure out # the orientation delta_x = x12_midpoint - x34_midpoint delta_z = z12_midpoint - z34_midpoint # negate the orientation since y is downwards rys = -tf.atan2(delta_z, delta_x) # Calcuate the centroid by averaging four corners center_x = tf.reduce_mean(x_corners[:, 0:4], axis=1) center_z = tf.reduce_mean(z_corners[:, 0:4], axis=1) # Translate the centroid to the origin before rotation translated_x = box_8c[:, 0] - tf.reshape(center_x, (-1, 1)) translated_z = box_8c[:, 2] - tf.reshape(center_z, (-1, 1)) # The sign for the angle needs to be flipped because we # want to rotate back i.e. reverse rotation op we did during # transforming box_3d -> box_8c ry_sin = tf.sin(-rys) ry_cos = tf.cos(-rys) zeros = tf.zeros_like(rys, dtype=tf.float32) ones = tf.ones_like(rys, dtype=tf.float32) rotation_mats = tf.stack([ tf.stack([ry_cos, zeros, ry_sin], axis=1), tf.stack([zeros, ones, zeros], axis=1), tf.stack([-ry_sin, zeros, ry_cos], axis=1) ], axis=2) corners = tf.stack([translated_x, y_corners, translated_z], axis=1) # Rotate the corners corners_3d = tf.matmul(rotation_mats, corners, transpose_a=True, transpose_b=False) # Align the corners in case they are skewed aligned_corners = align_boxes_8c(corners_3d) # Translate the corners back aligned_corners_x = aligned_corners[:, 0] + tf.reshape(center_x, (-1, 1)) aligned_corners_z = aligned_corners[:, 2] + tf.reshape(center_z, (-1, 1)) new_x_corners = aligned_corners_x new_y_corners = aligned_corners[:, 1] new_z_corners = aligned_corners_z x_b_right = new_x_corners[:, 1] x_b_left = new_x_corners[:, 2] z_b_left = new_z_corners[:, 2] z_t_left = new_z_corners[:, 3] corner_y1 = new_y_corners[:, 0] corner_y5 = new_y_corners[:, 4] length = x_b_right - x_b_left width = z_t_left - z_b_left height = corner_y1 - corner_y5 # Re-calculate the centroid center_x = tf.reduce_mean(new_x_corners[:, 0:4], axis=1) center_z = tf.reduce_mean(new_z_corners[:, 0:4], axis=1) center_y = corner_y1 box_3d = tf.stack( [center_x, center_y, center_z, length, width, height, rys], axis=1) return box_3d
# gradStrength = tf.sqrt(tpGrad[0]**2 + tpGrad[1]**2) # pooledGrads, indexes = tf.nn.max_pool_with_argmax(gradStrength, [1,3,3,1], [1,1,1,1], padding='SAME') # intIndeces = tf.cast(indexes, tf.int32) # pooledTpGrads = [cc.ops.poolByIndex(tpGrad[0], intIndeces), cc.ops.poolByIndex(tpGrad[1], intIndeces)] # avgrads = tf.concat(pooledTpGrads, axis=-1) avgrads = tf.concat(tpGrad, axis=-1) avgrads = tf.layers.average_pooling2d(avgrads*9, [5,5], [1,1], padding='SAME') # grads = tf.concat(grad, axis=-1) gradsRot = cc.transformations.rotateVectorField(avgrads, angle, irelevantAxisFirst=True) splitGradsRot = tf.split(gradsRot, 2, -1) gradStrength = tf.sqrt(splitGradsRot[0]**2 + splitGradsRot[1]**2) noSmall = [tf.where(tf.abs(gradStrength)<10e-2, tf.zeros_like(splitGradsRot[0]), splitGradsRot[0]), tf.where(tf.abs(gradStrength)<10e-2, tf.zeros_like(splitGradsRot[1]), splitGradsRot[1])] gamiesai = tf.concat(noSmall, axis=-1) # gradsRot = tf.contrib.image.rotate(grads, angle, interpolation="BILINEAR") ang = tf.atan2(noSmall[1],noSmall[0]) reMapped = tf.where(ang<-numpy.pi, ang+2*numpy.pi, ang) reMapped = tf.where(reMapped>=numpy.pi, reMapped-2*numpy.pi, reMapped) quantized = tf.round(reMapped/(numpy.pi/16)) rotInpt = tf.contrib.image.rotate(inpt, angle, interpolation="BILINEAR") # avRotInpt = tf.layers.average_pooling2d(rotInpt, [5,5], [1,1], padding='SAME') avRotInpt = rotInpt rotGrad = tf.image.image_gradients(avRotInpt) rottpGrad = [] rottpGrad.append(tf.nn.conv2d(rotGrad[0],wx,[1,1,1,1],"SAME")/2) rottpGrad.append(tf.nn.conv2d(rotGrad[1],wy,[1,1,1,1],"SAME")/2) # rotgradStrength = tf.sqrt(rottpGrad[0]**2 + rottpGrad[1]**2) # pooledRotGrads, rIndexes = tf.nn.max_pool_with_argmax(rotgradStrength, [1,3,3,1], [1,1,1,1], padding='SAME') # intRIndeces = tf.cast(rIndexes, tf.int32) # pooledRotTpGrads = [cc.ops.poolByIndex(rottpGrad[0], intRIndeces), cc.ops.poolByIndex(rottpGrad[1], intRIndeces)]
def __init__(self, import_dir=None, nDim_high=None, nDim_low=None, layers=[], activations="tanh", encoder_activations=None, decoder_activations=None, last_layer="linear", norm="None", uniform_bias=False, uniform_weights=False, factor_bias=0.0, factor_weights=1.0, mode_bias='FAN_AVG', mode_weights='FAN_AVG', optimizer="Adam", learning_rate=.001, momentum=.1, objective="L2", load=False, number=None, dtype="float", tensorboard=True): """ Args: import_dir: string, path to directory which contains the saved model, all other arguments will be ignored except number and tensorboard nDim_high: number of input dimension of the data nDim_low: desired number of intermediate lowest dimensional representation layers: list of int. Size of input, all hidden and encoding layer. Specify this or nDim_high/nDim_low, not both. activations: to be used: relu, sigmoid, tanh, softsign, elu, selu, softmax, swish encoder_activation: activation function in the encoding layer, see activations last_layer: activation function for the last layer if given. The last layer has same size as output and performs one last transformation. Put None if you do not wish to have a such last layer norm: additional normalization; "None", "Layer", weight/bias initialization parameter, see tf.contrib.layers.variance_scaling_initializer uniform, factor, mode (default is xavier initialization, set to 1, FAN_IN to self normalizing initialization optimizer: GradientDescent, Adam, Adagrad, Adadelta, Momentum learning_rate: the learning rate objective: the objective which is to be minimized, specify L2: quadratic distance between in/- and output CrossEntropy load: whether to load this model or create new one (path is standardized) number: which number to give the net, will give new number if None dtype: data type to be used in tensorflow, "float" or "double" tensorboard: whether to use the tensorboard """ if import_dir is not None: self.sess = tf.Session() self.sess.as_default() tf.saved_model.loader.load(self.sess, ["main"], import_dir) self.nLayer = self.sess.run("nLayer:0") self.layers = self.sess.run("layers:0") self.init = tf.global_variables_initializer() else: if dtype=="float": dtype = tf.float32 else: dtype = tf.float64 if not os.path.isdir("out"): os.makedirs("out") if not os.path.isdir("out/vid"): os.makedirs("out/vid") if tensorboard and not os.path.isdir("tmp"): os.makedirs("tmp") # self saves: layer, number, optimizer, ...? self.tensorboard = tensorboard # layer layers = [nDim_high] + list(layers) self.nLayer = len(layers) - 1 self.layers = layers put_last_layer = last_layer is not None if not encoder_activations is None and len(encoder_activations) != self.nLayer: raise ValueError("Given length of encoder activations is invalid: {actlength} for size of {size}".format(actlength=len(encoder_activations), size=self.nLayer)) if not decoder_activations is None and len(decoder_activations) != self.nLayer: raise ValueError("Given length of decoder activations is invalid: {actlength} for size of {size}".format(actlength=len(decoder_activations), size=self.nLayer)) tf.constant(put_last_layer, name="put_last_layer") if not put_last_layer: last_layer = "None" tf.constant(last_layer, name="last_layer") tf.constant(self.nLayer, name="nLayer") # Use this one to def nLayer when loading a tensorflow model: self.nLayer = sess.run("nLayer:0") tf.constant(self.layers, name="layers") # Use this one to def layers when loading a tensorflow model: self.layers = sess.run("layers:0") tf.constant(activations, name="activations") tf.constant(norm, name="norm") self.flag_name = ("cAE" + "_%d"*len(layers)) % tuple(layers) self.save_path = "out/"+self.flag_name+".ckpt" print("Encoder network layers (inclusive in/- and output): ", end="") print(layers + [1]) # neural net: input x = tf.placeholder(dtype, shape=[None,nDim_high], name="x") y = tf.placeholder(dtype, shape=[None,nDim_low], name="y") # this is an "encoding" custom input node freq = tf.constant([[1,1]], name='freq', dtype=dtype) phase = tf.get_variable('trainable/phase', initializer=[np.pi/2,0], trainable=False) if nDim_low == 1: pass elif nDim_low == 2: capR = tf.get_variable('trainable/capR', dtype=tf.float32, initializer=[1.,1.], trainable=True) else: raise ValueError("Currently only nDim_low = 1 allowed!") initializer_bias = lambda: tf.contrib.layers.variance_scaling_initializer(factor=factor_bias, mode=mode_bias, uniform=uniform_bias) initializer_weights = lambda: tf.contrib.layers.variance_scaling_initializer(factor=factor_weights, mode=mode_weights, uniform=uniform_weights) # encoder: intermediate_layers = layers[1:] weightNames = ["encoder_weights_d%d"%d for d in range(self.nLayer)] biasNames = ["encoder_bias_d%d"%d for d in range(self.nLayer)] activation_fn = [_get_activation(activations) for _ in range(self.nLayer)] if encoder_activations is None else [_get_activation(encoder_activations[d]) for d in range(self.nLayer)] aname = ["encoder_act_d%d"%(d) for d in range(self.nLayer)] layer_norm_name = ["encoder_layer_norm_d%d"%d for d in range(self.nLayer)] enc = self._fully_connected_stack(x, weightNames, biasNames, intermediate_layers, activation_fn, initializer_bias(), initializer_weights(), aname, norm=norm, layer_norm_name=layer_norm_name, reuse=False, transpose=False) # tanh transformations wx = tf.get_variable('trainable/encoder/wx', shape=[layers[-1],nDim_low + 1], initializer=initializer_weights()) bx = tf.get_variable('trainable/encoder/bx', shape=[nDim_low + 1], initializer=initializer_bias()) xxx = tf.add(tf.matmul(enc, wx), bx, name="xxx") if nDim_low == 1: phi = tf.reshape(tf.atan2(xxx[:,1], xxx[:,0]), [-1, 1], name='phi') if nDim_low == 2: # theta = tf.asin(tf.nn.sigmoid(xxx[:,2])) theta = xxx[:,2] cosTheta = tf.cos(theta) sinTheta = tf.sin(theta) phi1 = tf.reshape(tf.atan2(xxx[:,1]/(1 + cosTheta), xxx[:,0]/(1 + cosTheta)), [-1,1]) phi = tf.concat([tf.reshape(theta, [-1,1]), phi1], axis=1, name='phi') # decoder: # circularity if nDim_low == 1: circ = tf.sin(tf.matmul(phi, freq) + phase, name="circ") circ_custom = tf.sin(tf.matmul(y, freq) + phase, name="circ_custom") if nDim_low == 2: xback_yback = (capR + tf.reshape(tf.tile(cosTheta, [2]), [-1,2]))*tf.sin(tf.matmul(phi1, freq) + phase) circ = tf.transpose(tf.stack([xback_yback[:,0], xback_yback[:,1], sinTheta], name="circ")) # custom sinTheta_custom = tf.sin(y[:,1]) cosTheta_custom = tf.cos(y[:,1]) xback_yback_custom = (capR + tf.reshape(tf.tile(cosTheta_custom, [2]), [-1,2]))*tf.sin(tf.matmul(tf.reshape(y[:,0], [-1,1]), freq) + phase) circ_custom = tf.transpose(tf.stack([xback_yback_custom[:,0], xback_yback_custom[:,1], sinTheta_custom], name="circ_custom")) # rest intermediate_layers = list(layers[self.nLayer - 1::-1]) wName = "decoder" weightNames = ["%s_weights_d%d"%(wName,d) for d in range(self.nLayer - 1 + put_last_layer, -1, -1)] biasNames = ["decoder_bias_d%d"%d for d in range(self.nLayer - 1 + put_last_layer, -1, -1)] activation_fn = [_get_activation(activations) for d in range(self.nLayer - 1, -1, -1)] if decoder_activations is None else [_get_activation(decoder_activations[d]) for d in range(self.nLayer)] aname = ["decoder_act_d%d"%(d) for d in range(self.nLayer - 1 + put_last_layer, -1, -1)] layer_norm_name = ["decoder_layer_norm_d%d"%d for d in range(self.nLayer - 1 + put_last_layer, -1, -1)] scope_name="trainable/%s"%wName if put_last_layer: intermediate_layers.append(layers[0]) activation_fn.append(_get_activation(last_layer)) dec = self._fully_connected_stack(circ, weightNames, biasNames, intermediate_layers, activation_fn, initializer_bias(), initializer_weights(), aname, norm=norm, layer_norm_name=layer_norm_name, reuse=False, transpose=False, scope_name=scope_name) # Now build one additional FULL path in the case where the user wants the decoding of his own latent space points aname = ["decoder_act_custom_d%d"%(d) for d in range(self.nLayer - 1 + put_last_layer, -1, -1)] decoder_custom = self._fully_connected_stack(circ_custom, weightNames, biasNames, intermediate_layers, activation_fn, None, None, aname, norm=norm, layer_norm_name=layer_norm_name, reuse=True, transpose=False, scope_name=scope_name) # Quadratic loss, start learning from p=0 l2_loss = tf.reduce_mean(tf.square(x - dec), name="l2_loss") # Reconstruction error l2_loss_single = tf.reduce_mean(tf.square(x - dec), axis=1, name="l2_loss_single") l2_loss_single_feature = tf.square(x - dec, name="l2_loss_single_feature") # the optimizer, one for normal and one for the encoder optimizerOp = _get_optimizer(optimizer, learning_rate, momentum) optimizerEnc = _get_optimizer(optimizer, name=optimizer + "_encoder") # the train step, one for each optimizer train = optimizerOp.minimize(l2_loss, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES), name="train") train_enc = optimizerEnc.minimize(l2_loss, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="trainable/encoder"), name="train_encoder") # Session and initialization self.sess = tf.Session() self.sess.as_default() self.init = tf.global_variables_initializer() # Tensorboard if tensorboard: # merged_p = [] tf.summary.scalar("l2_loss", l2_loss) # Variable summaries # tf.summary.histogram("w_encoder_%d" % p, self.variables[2*p,0]) # tf.summary.histogram("b_encoder_%d" % p, self.variables[2*p,1]) # tf.summary.histogram("w_decoder_%d" % p, self.variables[2*p+1,0]) # tf.summary.histogram("b_decoder_%d" % p, self.variables[2*p+1,1]) # Activation summaries # Encoder if nDim_low == 1: tf.summary.histogram("a_encoder", phi) # Decoder # tf.summary.scalar("cross_entropy_%d" % p, self.cross_entropy[p]) #self.merged_p.append(tf.summary.merge(["l2_loss_%d" % p])) pass merged = tf.summary.merge_all() if number is None and load: self.number = self._get_number(last=False) elif number is None: self.number = self._get_number() twriter_path = os.path.join(os.getcwd(), "tmp", "summary_" + self.flag_name + ("_%d" % self.number)) self.test_writer = tf.summary.FileWriter(twriter_path, self.sess.graph) print("To use tensorboard, type:") print("tensorboard --logdir='" + twriter_path + "'") # Save and Load self.saver = tf.train.Saver() if load: self.saver.restore(self.sess, self.save_path) else: # Initialize all variables self.sess.run(self.init)
def phi(self): """ Azimuth. """ return tf.atan2(tf_non_zero(self.py(), self.epsilon), self.px())
def extract_descriptor(self, image_shape): """ Main function of this class, which extracts the descriptors from a batch of images. Args: image_shape : Tuple(int, int). A tuple (height, width) indicating the size of the input images. Returns: (This explanation below is modified from scikit-image). descs : 4D array of floats Grid of DAISY descriptors for the given image as an array dimensionality (N, P, Q, R) where ``N = len(images)`` ``P = ceil((M - radius*2) / step)`` ``Q = ceil((N - radius*2) / step)`` ``R = (rings * histograms + 1) * orientations`` """ images = tf.placeholder( tf.float32, shape=[None, 1, image_shape[0], image_shape[1]]) images /= 255.0 dx = images[:, :, :, 1:] - images[:, :, :, :-1] dx = tf.pad( dx, tf.constant([[0, 0], [0, 0], [0, 0], [0, 1]])) dy = images[:, :, 1:, :] - images[:, :, :-1, :] dy = tf.pad( dy, tf.constant([[0, 0], [0, 0], [0, 1], [0, 0]])) # Compute gradient orientation and magnitude and their contribution # to the histograms. grad_mag = tf.sqrt(dx ** 2 + dy ** 2) grad_ori = tf.atan2(dy, dx) hist = tf.exp(self.orientation_kappa * tf.cos( grad_ori - self.orientation_angles)) hist *= grad_mag # Smooth orientation histograms for the center and all rings. hist_smooth = self._compute_ring_histograms(hist) # Assemble descriptor grid. theta = np.array([2 * np.pi * j / self.histograms for j in range(self.histograms)]) desc_dims = (self.rings * self.histograms + 1) * self.orientations desc_shape = (images.shape[2] - 2 * self.radius, images.shape[3] - 2 * self.radius) idx = self.orientations cos_theta = tf.cos(theta) sin_theta = tf.sin(theta) descs = [hist_smooth[ :, 0, :, self.radius:-self.radius, self.radius:-self.radius]] for i in range(self.rings): for j in range(self.histograms): y_min = self.radius + tf.cast(tf.round( self.ring_radii[i] * sin_theta[j]), tf.int32) y_max = desc_shape[0] + y_min x_min = self.radius + tf.cast(tf.round( self.ring_radii[i] * cos_theta[j]), tf.int32) x_max = desc_shape[1] + x_min descs.append(hist_smooth[ :, i + 1, :, y_min:y_max, x_min:x_max]) idx += self.orientations descs = tf.concat(descs, axis=1) descs = descs[:, :, ::self.step, ::self.step] descs = tf.transpose(descs, [0, 2, 3, 1]) # Normalize descriptors. if self.normalization != 'off': descs += 1e-10 if self.normalization == 'l1': descs /= tf.reduce_sum(descs, axis=3, keepdims=True) elif self.normalization == 'l2': descs /= tf.sqrt(tf.reduce_sum( tf.pow(descs, 2), axis=3, keepdims=True)) elif self.normalization == 'daisy': for i in range(0, desc_dims, self.orientations): norms = tf.sqrt(tf.reduce_sum( tf.pow(descs[:, :, :, i:i + self.orientations], 2), axis=3, keepdims=True)) descs[:, :, :, i:i + self.orientations] /= norms return images, descs
from dnnlib import tflib from training import dataset tflib.init_tf() minibatch_size = 1 training_set = dataset.TFRecordDataset('../datasets/car_labels_v7_oversample_filter') interpolation_prob = 0.2 rotation_offset = 108 labels_original = training_set.get_random_labels_tf(minibatch_size) # Mirror some labels to balance the rotatinos random_vector = tf.random_uniform([minibatch_size]) < 0.5 rotation_cos = tf.expand_dims(labels_original[:, rotation_offset], axis=-1) rotation_sin = tf.expand_dims(labels_original[:, rotation_offset + 1], axis=-1) angle = tf.atan2(rotation_sin, rotation_cos) new_rotation_cos = tf.cos(angle) new_rotation_sin = tf.sin(angle) * -1 mirrored_labels = tf.concat([ labels_original[:, :rotation_offset], new_rotation_cos, new_rotation_sin, labels_original[:, rotation_offset + 2:] ], axis=1) labels = tf.where(random_vector, labels_original, mirrored_labels) # Remove half of front left and front right to balance the rotation label zero_rotation = tf.expand_dims(tf.zeros([minibatch_size]), axis=-1) removed_labels = tf.concat([ labels[:, :rotation_offset], zero_rotation,
def rayTraceSinglePass( rays, boundarySegments, boundaryArcs, targetSegments, targetArcs, materials, epsilion=1e-6, ): if boundarySegments is not None: b_usingSegments = True else: b_usingSegments = False if boundaryArcs is not None: b_usingArcs = True else: b_usingArcs = False with tf.name_scope("rayTraceSingle") as scope: # rayRange is just a list of all ray indexes, useful for constructing index tensors to be used # with gather with tf.name_scope("rayRange") as scope: rayRange = tf.range( tf.shape(rays, out_type=tf.int64)[0], dtype=tf.int64, name="rayRange" ) # join boundaries and targets, for the purposes of finding the closest intersection with tf.name_scope("segmentTargetJoining") as scope: if b_usingSegments: opticalSegmentCount = tf.cast( tf.shape(boundarySegments)[0], dtype=tf.int64 ) else: opticalSegmentCount = 0 if targetSegments is not None: targetSegments = tf.pad(targetSegments, [[0, 0], [0, 2]]) if b_usingSegments: boundarySegments = tf.concat( (boundarySegments, targetSegments), 0, name="joinedBoundarySegments", ) elif targetSegments.shape[0] != 0: boundarySegments = targetSegments b_usingSegments = True with tf.name_scope("arcTargetJoining") as scope: if b_usingArcs: opticalArcCount = tf.cast(tf.shape(boundaryArcs)[0], dtype=tf.int64) else: opticalArcCount = 0 if targetArcs is not None: targetArcs = tf.pad(targetArcs, [[0, 0], [0, 2]]) if b_usingArcs: boundaryArcs = tf.concat( (boundaryArcs, targetArcs), 0, name="joinedBoundaryArcs" ) elif targetArcs.shape[0] != 0: boundaryArcs = targetArcs b_usingArcs = True # slice the input rays into sections with tf.name_scope("inputRaySlicing") as scope: xstart = rays[:, 0] ystart = rays[:, 1] xend = rays[:, 2] yend = rays[:, 3] # intersect rays and boundary segments if b_usingSegments: with tf.name_scope("ray-SegmentIntersection") as scope: with tf.name_scope("variableMeshing") as scope: xa1, xb1 = tf.meshgrid(xstart, boundarySegments[:, 0]) ya1, yb1 = tf.meshgrid(ystart, boundarySegments[:, 1]) xa2, xb2 = tf.meshgrid(xend, boundarySegments[:, 2]) ya2, yb2 = tf.meshgrid(yend, boundarySegments[:, 3]) xa = xa2 - xa1 ya = ya2 - ya1 xb = xb2 - xb1 yb = yb2 - yb1 # v is the parameter of the intersection for B (bounds), and u is for A (rays). inf values signify # that this pair of lines is parallel with tf.name_scope("raw_v_parameter") as scope: denominator = xa * yb - ya * xb validSegmentIntersection = tf.greater_equal( tf.abs(denominator), epsilion ) safe_value = tf.ones_like(denominator) safe_denominator = tf.where( validSegmentIntersection, denominator, safe_value ) segmentV = tf.where( validSegmentIntersection, (ya * (xb1 - xa1) - xa * (yb1 - ya1)) / safe_denominator, safe_value, ) with tf.name_scope("raw_u_parameter") as scope: segmentU = tf.where( validSegmentIntersection, (xb * (ya1 - yb1) - yb * (xa1 - xb1)) / safe_denominator, safe_value, ) # Since B encodes line segments, not infinite lines, purge all occurances in v which are <=0 or >=1 # since these imply rays that did not actually strike the segment, only intersected with its # infinite continuation. # And since A encodes semi-infinite rays, purge all occurances in u which are <epsilion, since # these are intersections that occur before the ray source. We need to compare to epsilion to take # account of rays that are starting on a boundary with tf.name_scope("selectClosestValidIntersection") as scope: validSegmentIntersection = tf.logical_and( validSegmentIntersection, tf.greater_equal(segmentV, -epsilion) ) validSegmentIntersection = tf.logical_and( validSegmentIntersection, tf.less_equal(segmentV, 1.0 + epsilion), ) validSegmentIntersection = tf.logical_and( validSegmentIntersection, tf.greater_equal(segmentU, epsilion) ) # true where a ray intersection was actually found (since raySegmentIndices = 0 if the ray # intersects with boundary 0, or if there was no intersection with tf.name_scope("raySegmentMask") as scope: raySegmentMask = tf.reduce_any(validSegmentIntersection, axis=0) # match segmentU to each ray with tf.name_scope("segmentU") as scope: # raySegmentIndices tells us which ray intersects with which boundary. # raySegmentIndices[n]=m => ray n intersects boundary segment m inf = 2 * tf.reduce_max(segmentU) * safe_value segmentU = tf.where(validSegmentIntersection, segmentU, inf) raySegmentIndices = tf.argmin( segmentU, axis=0, name="raySegmentIndices" ) # intersectIndicesSquare is a set of indices that can be used with gather_nd to select # positions out of the grid tensors intersectIndicesSquare = tf.transpose( tf.stack([raySegmentIndices, rayRange]) ) # the u parameter for ray intersections, after filtering and processing segmentU = tf.gather_nd( segmentU, intersectIndicesSquare, name="segmentU" ) # package and pair the boundary segments with the rays that intersect with them boundarySegments = tf.gather( boundarySegments, raySegmentIndices, name="boundarySegments" ) # intersect rays and boundary arcs if b_usingArcs: with tf.name_scope("ray-ArcIntersection") as scope: with tf.name_scope("inputMeshgrids") as scope: x1, xc = tf.meshgrid(xstart, boundaryArcs[:, 0]) y1, yc = tf.meshgrid(ystart, boundaryArcs[:, 1]) x2, thetaStart = tf.meshgrid(xend, boundaryArcs[:, 2]) y2, thetaEnd = tf.meshgrid(yend, boundaryArcs[:, 3]) y2, r = tf.meshgrid(tf.reshape(yend, [-1]), boundaryArcs[:, 4]) # the reshape in the above line shouldn't be necessary, but I was getting some really wierd # bugs that went away whenever I tried to read the damn tensor, and this fixes it for some # reason. # a, b, c here are parameters to a quadratic equation for u, so we have some special cases to deal # with # a = 0 => ray of length zero. This should never happen, but if it does, should invalidate # the intersections # rad < 0 => ray does not intersect circle # ????? # c = 0 => ray starts on circle => u = 0, -b/c # c = 0 => ray ends on circle??? My mind has changed on this with tf.name_scope("coordinateAdjusting") as scope: xr = (x1 - xc) / r yr = (y1 - yc) / r xd = (x2 - x1) / r yd = (y2 - y1) / r with tf.name_scope("quadraticEquationParts") as scope: with tf.name_scope("a") as scope: a = xd * xd + yd * yd with tf.name_scope("b") as scope: b = 2.0 * xr * xd + 2.0 * yr * yd with tf.name_scope("c") as scope: c = xr * xr + yr * yr - 1.0 with tf.name_scope("rad") as scope: rad = b * b - 4.0 * a * c safe_value = tf.ones_like(a, name="safe_value") with tf.name_scope("raw_u_parameter") as scope: # u will be the parameter of the intersections along the ray # rad < 0 special case with tf.name_scope("specialCase_complex") as scope: radLess = tf.less(rad, 0) uminus_valid = uplus_valid = tf.logical_not(radLess) safe_rad = tf.where(radLess, safe_value, rad) uminus = tf.where(radLess, safe_value, (-b - tf.sqrt(safe_rad))) uplus = tf.where(radLess, safe_value, (-b + tf.sqrt(safe_rad))) # a = 0 special case with tf.name_scope("specialCase_azero") as scope: azero = tf.less(tf.abs(a), epsilion) safe_a = tf.where(azero, safe_value, 2 * a) uminus_valid = tf.logical_and( uminus_valid, tf.logical_not(azero) ) uminus = tf.where(azero, safe_value, uminus / safe_a) uplus_valid = tf.logical_and(uplus_valid, tf.logical_not(azero)) uplus = tf.where(azero, safe_value, uplus / safe_a) """ czero = tf.less(tf.abs(c), epsilion) safe_c = tf.where(czero, safe_value, c) uplus_valid = tf.logical_and(uplus_valid, tf.logical_not(czero)) b_over_c = tf.where(czero, safe_value, b/safe_c) uplus = tf.where(azero, -b_over_c, uplus/safe_a) #uplus = tf.where(azero, -b/c, uplus/safe_a)""" # cut out all of the rays that have a u < epsilion parameter, since we only want reactions # ahead of the ray with tf.name_scope("cullNegativeU") as scope: uminus_valid = tf.logical_and( uminus_valid, tf.greater_equal(uminus, epsilion) ) uplus_valid = tf.logical_and( uplus_valid, tf.greater_equal(uplus, epsilion) ) with tf.name_scope("raw_v_parameter") as scope: # determine the x,y coordinate of the intersections with tf.name_scope("xminus") as scope: xminus = x1 + (x2 - x1) * uminus with tf.name_scope("xplus") as scope: xplus = x1 + (x2 - x1) * uplus with tf.name_scope("yminus") as scope: yminus = y1 + (y2 - y1) * uminus with tf.name_scope("yplus") as scope: yplus = y1 + (y2 - y1) * uplus # determine the angle along the arc (arc's parameter) where the intersection occurs """ these atan2 calls seem to be f*****g up the gradient. So I have to do something convoluted.""" """ finiteUMinus = tf.debugging.is_finite(uminus) finiteUPlus = tf.debugging.is_finite(uplus) def safe_atan2(y, x, safe_mask): with tf.name_scope("safe_atan") as scope: safe_x = tf.where(safe_mask, x, tf.ones_like(x)) safe_y = tf.where(safe_mask, y, tf.ones_like(y)) return tf.where(safe_mask, tf.atan2(safe_y, safe_x), tf.zeros_like(safe_x))""" vminus = tf.atan2(yminus - yc, xminus - xc) # vminus = safe_atan2(yminus-yc, xminus-xc, finiteUMinus) vminus = tf.floormod(vminus, 2 * PI) vplus = tf.atan2(yplus - yc, xplus - xc) # vplus = safe_atan2(yplus-yc, xplus-xc, finiteUPlus) vplus = tf.floormod(vplus, 2 * PI) # Cut out all cases where v does not fall within the angular extent of the arc with tf.name_scope("selectValid_v") as scope: # my angle in interval algorithm fails when the interval is full (0->2PI). So making the # following adjustment to thetaStart thetaStart = thetaStart + epsilion vminus_valid = tf.less_equal( tf.floormod(vminus - thetaStart, 2 * PI), tf.floormod(thetaEnd - thetaStart, 2 * PI), ) uminus_valid = tf.logical_and(vminus_valid, uminus_valid) vplus_valid = tf.less_equal( tf.floormod(vplus - thetaStart, 2 * PI), tf.floormod(thetaEnd - thetaStart, 2 * PI), ) uplus_valid = tf.logical_and(vplus_valid, uplus_valid) # now we can finally select between the plus and minus cases # arcU = tf.where(tf.less(uminus, uplus), uminus, uplus, name="arcU") # arcV = tf.where(tf.less(uminus, uplus), vminus, vplus, name="arcV") with tf.name_scope("choosePlusOrMinus") as scope: # We have been keeping track of valid and invalid intersections in the u+/-_valid tensors. But # now we need to compare the values in the u+/- tensors and prepare for the argmin call that # finds only the closest intersections. To do this we now need to fill the invalid values in # each tensor with some value that is larger than any valid value. Unfortunately we cannot # use np.inf because that seems to mess with the gradient calculator. inf = ( 2 * safe_value * tf.reduce_max([tf.reduce_max(uminus), tf.reduce_max(uplus)]) ) uminus = tf.where(uminus_valid, uminus, inf) uplus = tf.where(uplus_valid, uplus, inf) choose_uminus = tf.less(uminus, uplus) uminus_valid = tf.logical_and(uminus_valid, choose_uminus) uplus_valid = tf.logical_and( uplus_valid, tf.logical_not(choose_uminus) ) # rayArcMask will tell us which rays have found at least one valid arc intersection rayArcMask = tf.logical_or(uminus_valid, uplus_valid) rayArcMask = tf.reduce_any(rayArcMask, axis=0) arcU = tf.where(choose_uminus, uminus, uplus) arcV = tf.where(choose_uminus, vminus, vplus) """ # true where a ray intersection was actually found with tf.name_scope("rayArcMask") as scope: rayArcMask = tf.is_finite(arcU) rayArcMask = tf.reduce_any(rayArcMask, axis=0)""" # match arcU to each ray with tf.name_scope("arcU_and_arcV") as scope: # rayArcIndices tells us which ray intersects with which boundary. # rayArcIndices[n]=m => ray n intersects boundary segment m rayArcIndices = tf.argmin(arcU, axis=0, name="rayArcIndices") # intersectIndicesSquare is a set of indices that can be used with gather_nd to select # positions out of the grid tensors intersectIndicesSquare = tf.transpose( tf.stack([rayArcIndices, rayRange]) ) # the u parameter for ray intersections, after filtering and processing arcU = tf.gather_nd(arcU, intersectIndicesSquare, name="arcU") arcV = tf.gather_nd(arcV, intersectIndicesSquare, name="arcV") # package and pair the boundary arcs with the rays that intersect with them boundaryArcs = tf.gather( boundaryArcs, rayArcIndices, name="boundaryArcs" ) # determine which rays are dead with tf.name_scope("deadRays") as scope: if b_usingSegments and b_usingArcs: deadRays = tf.boolean_mask( rays, tf.logical_not(tf.logical_or(rayArcMask, raySegmentMask)), name="deadRays", ) else: if b_usingSegments: deadRays = tf.boolean_mask( rays, tf.logical_not(raySegmentMask), name="deadRays" ) elif b_usingArcs: deadRays = tf.boolean_mask( rays, tf.logical_not(rayArcMask), name="deadRays" ) else: raise RuntimeError( "rayTraceSinglePass: no boundaries provided for raytracing" ) # select between segment and arc intersections with tf.name_scope("arc_segment_selection") as scope: if b_usingSegments and b_usingArcs: chooseSegment = tf.logical_and( tf.less(segmentU, arcU), raySegmentMask, name="chooseSegment" ) chooseSegment = tf.logical_or( chooseSegment, tf.logical_and(raySegmentMask, tf.logical_not(rayArcMask)), ) chooseArc = tf.logical_and( tf.logical_not(chooseSegment), rayArcMask, name="chooseArc" ) chooseArc = tf.logical_or( chooseArc, tf.logical_and(rayArcMask, tf.logical_not(raySegmentMask)), ) else: if b_usingSegments: chooseSegment = raySegmentMask if b_usingArcs: chooseArc = rayArcMask # project ALL rays into the boundaries. Rays that do not intersect with any boundaries will also be # projected to zero length, but these will be filtered off later with tf.name_scope("rayProjection") as scope: if b_usingSegments: with tf.name_scope("segments") as scope: xstart = rays[:, 0] ystart = rays[:, 1] xend = rays[:, 2] yend = rays[:, 3] xend = xstart + (xend - xstart) * segmentU yend = ystart + (yend - ystart) * segmentU reactedRays_Segment = tf.stack( [xstart, ystart, xend, yend, rays[:, 4], rays[:, 5]], axis=1 ) if b_usingArcs: with tf.name_scope("arcs") as scope: xstart = rays[:, 0] ystart = rays[:, 1] xend = rays[:, 2] yend = rays[:, 3] xend = xstart + (xend - xstart) * arcU yend = ystart + (yend - ystart) * arcU reactedRays_Arc = tf.stack( [xstart, ystart, xend, yend, rays[:, 4], rays[:, 5]], axis=1 ) # determine which rays are finished with tf.name_scope("finishedRays") as scope: finishedRays = tf.zeros([0, 6], dtype=tf.float64) if b_usingSegments: finishedSegmentMask = tf.greater_equal( raySegmentIndices, opticalSegmentCount, name="finishedSegmentMask" ) fsMask = tf.logical_and(finishedSegmentMask, chooseSegment) finishedRays_Segment = tf.boolean_mask(reactedRays_Segment, fsMask) finishedRays = tf.cond( tf.reduce_any(fsMask), lambda: tf.concat([finishedRays, finishedRays_Segment], axis=0), lambda: finishedRays, ) if b_usingArcs: finishedArcMask = tf.greater_equal( rayArcIndices, opticalArcCount, name="finishedArcMask" ) faMask = tf.logical_and(finishedArcMask, chooseArc) finishedRays_Arc = tf.boolean_mask(reactedRays_Arc, faMask) finishedRays = tf.cond( tf.reduce_any(faMask), lambda: tf.concat([finishedRays, finishedRays_Arc], axis=0), lambda: finishedRays, ) # conjugate to finished rays with tf.name_scope("reactedRays") as scope: reactedRays = tf.zeros([0, 6], dtype=tf.float64) if b_usingSegments: chooseSegment = tf.logical_and( tf.logical_not(finishedSegmentMask), chooseSegment ) reactedRays_Segment = tf.boolean_mask( reactedRays_Segment, chooseSegment, name="reactedRays_Segment" ) boundarySegments = tf.boolean_mask( boundarySegments, chooseSegment, name="boundarySegments" ) reactedRays = tf.cond( tf.reduce_any(chooseSegment), lambda: tf.concat([reactedRays, reactedRays_Segment], axis=0), lambda: reactedRays, ) if b_usingArcs: chooseArc = tf.logical_and(tf.logical_not(finishedArcMask), chooseArc) reactedRays_Arc = tf.boolean_mask( reactedRays_Arc, chooseArc, name="reactedRays_Arc" ) arcV = tf.boolean_mask(arcV, chooseArc, name="arcV") boundaryArcs = tf.boolean_mask( boundaryArcs, chooseArc, name="boundaryArcs" ) reactedRays = tf.cond( tf.reduce_any(chooseArc), lambda: tf.concat([reactedRays, reactedRays_Arc], axis=0), lambda: reactedRays, ) # calculate the norm of the surface with tf.name_scope("norm") as scope: norm = tf.zeros([0], dtype=tf.float64) if b_usingSegments: normSegment = ( tf.atan2( boundarySegments[:, 3] - boundarySegments[:, 1], boundarySegments[:, 2] - boundarySegments[:, 0], name="normSegment", ) + PI / 2 ) norm = tf.cond( tf.reduce_any(chooseSegment), lambda: tf.concat([norm, normSegment], axis=0), lambda: norm, ) if b_usingArcs: normArc = tf.where( tf.less(boundaryArcs[:, 4], 0), arcV + PI, arcV, name="normArc" ) normArc = tf.floormod(normArc, 2 * PI) norm = tf.cond( tf.reduce_any(chooseArc), lambda: tf.concat([norm, normArc], axis=0), lambda: norm, ) with tf.name_scope("refractiveIndex") as scope: # calculate the refractive index for every material and ray wavelengths = reactedRays[:, 4] nstack = tf.stack( [each(wavelengths) for each in materials], axis=1, name="nstack" ) rayRange = tf.range( tf.shape(reactedRays)[0], dtype=tf.int32, name="rayRange" ) # select just the correct entry for n_in and n_out if b_usingSegments and b_usingArcs: n_in_indices = tf.concat( [boundarySegments[:, 4], boundaryArcs[:, 5]], axis=0, name="n_in_indices", ) else: if b_usingSegments: n_in_indices = boundarySegments[:, 4] if b_usingArcs: n_in_indices = boundaryArcs[:, 5] n_in_indices = tf.cast(n_in_indices, tf.int32) n_in_indices = tf.transpose(tf.stack([rayRange, n_in_indices])) n_in = tf.gather_nd(nstack, n_in_indices, name="n_in") if b_usingSegments and b_usingArcs: n_out_indices = tf.concat( [boundarySegments[:, 5], boundaryArcs[:, 6]], axis=0, name="n_out_indices", ) else: if b_usingSegments: n_out_indices = boundarySegments[:, 5] if b_usingArcs: n_out_indices = boundaryArcs[:, 6] n_out_indices = tf.cast(n_out_indices, tf.int32) n_out_indices = tf.transpose(tf.stack([rayRange, n_out_indices])) n_out = tf.gather_nd(nstack, n_out_indices, name="n_out") activeRays = react(reactedRays, norm, n_in, n_out) return reactedRays, activeRays, finishedRays, deadRays
def phi(self, **opts): """ Azimuth. """ return tf.atan2(tf_non_zero(self.py(**opts), self.epsilon), self.px(**opts))
def model(self, features: Dict[str, tf.Tensor], labels: Dict[str, tf.Tensor], mode: str) -> tf.Tensor: """ Define your model metrics and architecture, the logic is dependent on the mode. :param features: A dictionary of potential inputs for your model :param labels: Input label set :param mode: Current training mode (train, test, predict) :return: An estimator spec used by the higher level API """ # set flag if the model is currently training is_training = mode == tf.estimator.ModeKeys.TRAIN # initialise model architecture seg_output, pos_output, cos_output, sin_output, width_output = self._create_model( features['input'], is_training) segmentation_classes = tf.argmax(input=seg_output, axis=3, output_type=tf.int32) # TODO: update model predictions predictions = { 'segmentation': tf.expand_dims(segmentation_classes, -1), 'segmentation_probabilities': tf.nn.softmax(seg_output), } # if mode == tf.estimator.ModeKeys.PREDICT: # TODO: update output during serving export_outputs = { 'segmentation': tf.estimator.export.ClassificationOutput( scores=predictions['segmentation_probabilities'], classes=tf.cast(predictions['segmentation'], tf.string)) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) # calculate loss # specify some class weightings segmentation_class_weights = tf.constant([1, 5, 1, 5]) # specify the weights for each sample in the batch (without having to compute the onehot label matrix) segmentation_weights = tf.gather(segmentation_class_weights, labels['seg']) seg_loss = tf.losses.sparse_softmax_cross_entropy( labels=labels['seg'], logits=seg_output, weights=segmentation_weights) # seg_loss = tf.losses.sparse_softmax_cross_entropy(labels=labels['seg'], logits=seg_output) # tf.print(labels['grasps'], output_stream=sys.stdout) # print(labels['grasps']) # grasp_loss_mask = labels['quality'] grasp_loss_mask = tf.to_float( tf.greater(labels['quality'], tf.zeros_like(labels['quality']))) # quality_loss = tf.losses.mean_squared_error(labels=labels['quality'], predictions=pos_output) quality_loss = tf.losses.sigmoid_cross_entropy(labels['quality'], logits=pos_output) sin_loss = tf.losses.mean_squared_error(labels=labels['angle_sin'], predictions=sin_output, weights=grasp_loss_mask) cos_loss = tf.losses.mean_squared_error(labels=labels['angle_cos'], predictions=cos_output, weights=grasp_loss_mask) width_loss = tf.losses.mean_squared_error( labels=(labels['gripper_width'] / 150.0), predictions=width_output, weights=grasp_loss_mask) angle = 0.5 * tf.atan2(sin_output, cos_output) loss = seg_loss + quality_loss + sin_loss + cos_loss + width_loss # TODO: update summaries for tensorboard segmentations_class_colors = tf.convert_to_tensor( [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255]], dtype=tf.uint8) segmentation_image = tf.gather(segmentations_class_colors, segmentation_classes) tf.summary.scalar('seg_loss', seg_loss) tf.summary.scalar('quality_loss', quality_loss) tf.summary.scalar('angle_sin_loss', sin_loss) tf.summary.scalar('angle_cos_loss', cos_loss) tf.summary.scalar('width_loss', width_loss) tf.summary.scalar('loss', loss) gaussian_blur_kernel = gaussian_kernel(5, 0.0, 1.0) gaussian_blur_kernel = gaussian_blur_kernel[:, :, tf.newaxis, tf.newaxis] quality = tf.sigmoid(pos_output) quality = tf.nn.conv2d(quality, gaussian_blur_kernel, [1, 1, 1, 1], 'SAME') images = { 'input': tf.summary.image('input', features['input']), 'segmentation': tf.summary.image('segmentation', segmentation_image), 'quality': tf.summary.image('quality', quality), 'angle_sin': tf.summary.image('angle_sin', sin_output), 'angle_cos': tf.summary.image('angle_cos', cos_output), 'width': tf.summary.image('width', width_output), 'angle': tf.summary.image('angle', angle) } # tf.summary.image('segmentation', tf.image.hsv_to_rgb(predictions['segmentation'] / 4.0)) # tf.summary.image('segmentation', tf.cast(predictions['segmentation'], tf.float32)) if mode == tf.estimator.ModeKeys.EVAL: # TODO: update evaluation metrics # output eval images # eval_summary_hook = tf.train.SummarySaverHook(summary_op=images, save_secs=120) summaries_dict = { 'val_mean_iou': tf.metrics.mean_iou(labels['seg'], predictions=predictions['segmentation'], num_classes=4) } # summaries_dict.update(images) # summaries_dict.update(get_estimator_eval_metric_ops) b = tf.shape(quality)[0] detection_grasps = self._create_detection_head( quality, angle, width_output) detection_evaluator = OAHODetectionEvaluator() detection_visualizer = OAHODetectionVisualizer() groundtruth_grasps = tf.reshape( tf.sparse_tensor_to_dense(labels['grasps'], -1), (b, -1, 4)) groundtruth_segmentation_image = tf.gather( segmentations_class_colors, tf.squeeze(labels['seg'], -1)) summaries_dict.update( detection_evaluator.get_estimator_eval_metric_ops({ 'image_id': labels['id'], 'groundtruth_grasps': groundtruth_grasps, 'detection_grasps': detection_grasps })) normalized_depth = tf.image.convert_image_dtype( features['input'] / tf.reduce_max(features['input'], axis=[1, 2], keepdims=True), dtype=tf.uint8) summaries_dict.update( detection_visualizer.get_estimator_eval_metric_ops({ 'image_id': labels['id'], 'depth': normalized_depth, 'groundtruth_grasps': groundtruth_grasps, 'detection_grasps': detection_grasps, 'groundtruth_segmentation': groundtruth_segmentation_image, 'detection_segmentation': segmentation_image })) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops= summaries_dict #, evaluation_hooks=[eval_summary_hook] ) # assert only reach this point during training assert mode == tf.estimator.ModeKeys.TRAIN # create learning rate variable for hyper param tuning lr = tf.Variable(initial_value=self.config['learning_rate'], name='learning-rate') # TODO: update optimiser optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.minimize( loss, global_step=tf.train.get_global_step(), colocate_gradients_with_ops=True, ) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
rotatedWeightsL = [] for angle in angles: rotatedWeightsL.append(cc.transformations.rotateVectorField(flatWeights, angle)) rotatedWeights = tf.stack(rotatedWeightsL, axis=0) thetas = [] with tf.device('/gpu:0'): weightShape = rotatedWeights.get_shape() for angle in range(0, num_bins, num_bins//num_angles): weightSet = tf.gather(rotatedWeights, angle + offset) conv2 = cc.layers.conv_2tf(inpt, weightSet, c_i, c_o, 1, 1, "SAME") conv0 = tf.nn.conv2d(inpt, weightSet, [1,1,1,1], "SAME") angleMask = tf.logical_and(tf.abs(conv0)<1e-2, tf.abs(conv2)<1e-2) conv0 = tf.where(angleMask, tf.zeros_like(conv0), conv0) conv2 = tf.where(angleMask, tf.ones_like(conv2), conv2) thetas.append(-tf.atan2(conv2, conv0)) angles2 = tf.concat(angles, axis=0) thetas = tf.stack(thetas, axis=-1) winner = tf.argmin(tf.abs(thetas), axis=-1, output_type=tf.int32) thetas2 = cc.ops.reduceIndex(thetas, winner) thetas2, convMask = cc.ops.offsetCorrect(thetas2, [numpy.pi/num_angles]) # myErrorGPU = tf.reduce_sum(tf.pow(groundTruth - convByIndexCudaRes,1)) # tfErrorGPU = tf.reduce_sum(tf.pow(groundTruth - flatConvCudaRes,1)) quantized = tf.cast(tf.round(thetas2*num_bins/(2*numpy.pi)), tf.int32) + tf.cast(winner * (num_bins//num_angles), tf.int32) + offset convByIndexCudaRes = cc.ops.convByIndex(inpt, rotatedWeights, quantized, convMask, thetas2, [1,1,1,1], "SAME") flatConvCudaResL = [] for rotation in range(num_bins+1): rMask = tf.cast(tf.equal(quantized, rotation), tf.float32) flatConvCudaResL.append(tf.nn.conv2d(inpt, rotatedWeightsL[rotation], [1,1,1,1], "SAME")*rMask) flatConvCudaRes = tf.reduce_sum(tf.stack(flatConvCudaResL, axis=0), axis=0) * tf.cast(convMask, tf.float32) myErrorGPU = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=tf.reduce_sum(convByIndexCudaRes, axis=[0,1,2]), labels=lab)