def _build_functions(self, data): self.handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle(self.handle, data.data.output_types, data.data.output_shapes) iterat = data.data.make_initializable_iterator() next_batch = iterator.get_next() # give directly batch tensor depending on the network reshape self.acoustic, self.mfcc, self.video, self.labels = self._retrieve_batch( next_batch) # self.mfcc = self.mfcc - tf.reduce_min(self.mfcc, axis=[1], keep_dims=True) # self.mfcc = self.mfcc/tf.reduce_max(self.mfcc, axis=[1], keep_dims=True) mfccmap = tf.reshape(self.mfcc, (-1, 12, 1, 12)) mfccmap = tf.tile(mfccmap, (1, 1, 36 * 48, 1)) mfccmap = tf.reshape(mfccmap, (-1, 36, 48, 12)) self.model_encoder_images._build_model(self.video) self.model_encoder_acoustic._build_model( mfccmap, self.model_encoder_images.output) output = tf.reshape(self.model_encoder_acoustic.output, shape=[-1, 12, 36, 48, 12]) self.model._build_model(output) expanded_shape = [-1, 12, self.num_classes] self.logits = tf.reduce_mean(tf.reshape(self.model.output, shape=expanded_shape), axis=1) self.cross_loss = tf.losses.softmax_cross_entropy( onehot_labels=self.labels, logits=self.logits, scope='cross_loss') self.loss = tf.losses.get_total_loss() # Define accuracy self.accuracy = buildAccuracy(self.logits, self.labels) self.global_step = tf.train.create_global_step() var_list = slim.get_variables(self.model.scope + '/') self.optimizer2 = tf.train.AdamOptimizer( learning_rate=self.learning_rate) # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # with tf.control_dependencies(update_ops): with tf.device('/gpu:0'): # Compute the gradients for acoustic variables. # self.train_op_0 = self.optimizer2.minimize(loss=self.loss, # var_list=var_list, # global_step=self.global_step) # Compute the gradients for acoustic variables. grads_and_vars = self.optimizer2.compute_gradients( self.loss, var_list) # Ask the optimizer to apply the gradients. self.train_op_0 = self.optimizer2.apply_gradients( grads_and_vars, global_step=self.global_step) # Initialize model saver self.saver = tf.train.Saver(max_to_keep=5) return iterat
def _build_functions(self, data): self.handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle(self.handle, data.data.output_types, data.data.output_shapes) iterat = data.data.make_initializable_iterator() next_batch = iterator.get_next() # give directly batch tensor depending on the network reshape in_data, self.labels = self._retrieve_batch(next_batch) self.model._build_model(in_data) if FLAGS.model == 'ResNet18_v1' and self.temporal_pooling: # temporal pooling gives one predition for nr_frames, if it is not we have one predicition for frame expanded_shape = [-1, self.nr_frames, self.num_classes] self.logits = tf.reduce_mean(tf.reshape(self.model.output, shape=expanded_shape), axis=1) elif FLAGS.model == 'DualCamHybridNet' and self.temporal_pooling: expanded_shape = [ -1, FLAGS.sample_length * _FRAMES_PER_SECOND, self.num_classes ] self.logits = tf.reduce_mean(tf.reshape(self.model.output, shape=expanded_shape), axis=1) else: self.logits = self.model.output # Define loss self.cross_loss = tf.losses.softmax_cross_entropy( onehot_labels=self.labels, logits=self.logits, scope='cross_loss') self.loss = tf.losses.get_total_loss() # Define accuracy self.accuracy = buildAccuracy(self.logits, self.labels) # Initialize counters and stats self.global_step = tf.train.create_global_step() # Define optimizer #before different self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step = self.optimizer.minimize( loss=self.loss, global_step=self.global_step, var_list=self.model.train_vars) # Initialize model saver self.saver = tf.train.Saver(max_to_keep=None) return iterat
def _build_functions(self, data): self.handle = tf.placeholder(tf.string, shape=()) self.epoch = tf.placeholder(tf.int32, shape=()) iterator = tf.data.Iterator.from_string_handle(self.handle, data.data.output_types, data.data.output_shapes) iterat = data.data.make_initializable_iterator() next_batch = iterator.get_next() # give directly batch tensor depending on the network reshape audio_data, acoustic_data, labels, scenario = self._retrieve_batch( next_batch) self.labels = labels # build model with tensor data next batch with tf.device('/gpu:0'): self.model_2._build_model(audio_data) self.model_transfer._build_model( acoustic_data) # positive_outputANDnegative_output # find logits after defining next batch and iterator # temporal pooling gives one predition for nr_frames, if it is not we have one predicition for frame # normalize vector of audio with positive and then negative logits_2_reshape = self.model_2.output temperature_value = 1 logits_transfer = self.model_transfer.output # network[7] logits_transfer = tf.nn.softmax(logits_transfer / temperature_value) expanded_shape = [ -1, FLAGS.sample_length * _FRAMES_PER_SECOND, self.num_classes ] transferweighted = tf.reduce_mean(tf.reshape(logits_transfer, shape=expanded_shape), axis=1) self.cross_loss = tf.losses.softmax_cross_entropy( onehot_labels=self.labels, logits=logits_2_reshape, weights=1.0 - self.alpha, scope='cross_loss') self.dist_loss = tf.losses.softmax_cross_entropy( onehot_labels=transferweighted, logits=logits_2_reshape, weights=self.alpha, scope='dist_loss') self.loss = tf.losses.get_total_loss() # Define accuracy self.accuracy = buildAccuracy(logits_2_reshape, self.labels) # Initialize counters and stats self.global_step = tf.train.create_global_step() # Define optimizer # before different self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.train_vars = self.model_2.train_vars with tf.control_dependencies(update_ops): with tf.device('/gpu:0'): # Compute the gradients for acoustic variables. # Compute the gradients for visual variables. grads_and_vars = self.optimizer.compute_gradients( self.loss, self.model_2.train_vars) # Ask the optimizer to apply the gradients. self.train_op_1 = self.optimizer.apply_gradients( grads_and_vars, global_step=self.global_step) # Initialize model saver self.saver = tf.train.Saver(max_to_keep=None) return iterat
def plotdecodeimages(): s = FLAGS.init_checkpoint.split('/')[-1] name = (s.split('_')[1]).split('.ckpt')[0] s = FLAGS.ac_checkpoint.split('/')[-1] nameac = (s.split('_')[1]).split('.ckpt')[0] random_pick = False build_spectrogram = True normalize = False # Create data loaders according to the received program arguments print('{} - Creating data loaders'.format(datetime.now())) modalities = [] modalities.append(0) modalities.append(1) modalities.append(2) with tf.device('/cpu:0'): if FLAGS.datatype == 'old': num_classes = 14 train_data = ActionsDataLoader(FLAGS.train_file, 'testing', batch_size=FLAGS.batch_size, num_epochs=1, sample_length=1, datakind=FLAGS.datatype, buffer_size=10, shuffle=False, embedding=0, normalize=normalize, build_spectrogram=build_spectrogram, correspondence=0, random_pick=random_pick, modalities=modalities, nr_frames=12) elif FLAGS.datatype == 'outdoor': num_classes = 10 train_data = SoundDataLoader(FLAGS.train_file, 'testing', batch_size=FLAGS.batch_size, num_epochs=1, sample_length=1, datakind=FLAGS.datatype, buffer_size=10, shuffle=False, embedding=0, normalize=normalize, build_spectrogram=build_spectrogram, correspondence=0, random_pick=random_pick, modalities=modalities, nr_frames=12) modelacustic = DualCamHybridModel(input_shape=[36, 48, 12], num_classes=num_classes, embedding=0) modelnegative = DualCamHybridModel(input_shape=[36, 48, 12], num_classes=num_classes, embedding=0) data_size = train_data.num_samples # Build model print('{} - Building model'.format(datetime.now())) print(data_size) with tf.device('/gpu:0'): model_video = ResNet50Model(input_shape=[224, 298, 3], num_classes=None) if FLAGS.num_skip_conn == 2: model = UNetAcResNet50_2skips(input_shape=[36, 48, 12], embedding=FLAGS.ae) elif FLAGS.num_skip_conn == 1: model = UNetAcResNet50(input_shape=[36, 48, 12], embedding=FLAGS.ae) elif FLAGS.num_skip_conn == 0: model = UNetAcResNet50_0skips(input_shape=[36, 48, 12], embedding=FLAGS.ae) handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle( handle, train_data.data.output_types, train_data.data.output_shapes) train_iterat = train_data.data.make_initializable_iterator() next_batch = iterator.get_next() mfcc = tf.reshape(next_batch[1], shape=[-1, 12]) images = tf.reshape(next_batch[2], shape=[-1, 224, 298, 3]) acoustic = tf.reshape(next_batch[0], shape=[-1, 12, 36, 48, 12]) # mfcc = mfcc - tf.reduce_min(mfcc, axis=[1], keep_dims=True) # mfcc = mfcc / tf.reduce_max(mfcc, axis=[1], keep_dims=True) mfccmap = tf.reshape(mfcc, (-1, 12, 1, 12)) mfccmap = tf.tile(mfccmap, (1, 1, 36 * 48, 1)) mfccmap = tf.reshape(mfccmap, (-1, 36, 48, 12)) model_video._build_model(images) model._build_model(mfccmap, model_video.output) modelacustic._build_model(acoustic) labels = tf.reshape(next_batch[3], shape=[-1, num_classes]) if FLAGS.mfccmap == 0: output = tf.reshape(model.output, shape=[-1, 12, 36, 48, 12]) else: output = tf.reshape(mfccmap, shape=[-1, 12, 36, 48, 12]) # if os.path.exists(data_dir): # print("Features already computed!") # else: # os.makedirs(data_dir) modelnegative._build_model(output) expanded_shape = [-1, 12, num_classes] logitsacoustic = tf.reduce_mean(tf.reshape(modelacustic.output, shape=expanded_shape), axis=1) logistnegative = tf.reduce_mean(tf.reshape(modelnegative.output, shape=expanded_shape), axis=1) accuracyacoustic = buildAccuracy(logitsacoustic, labels) accuracynegative = buildAccuracy(logistnegative, labels) total_size = 0 batch_count = 0 num = 0 accuracyac = 0 accuracyfalse = 0 # dataset_list_images = np.zeros([data_size, 36, 48, 12], dtype=float) # dataset_list_acoustic = np.zeros([data_size, 36, 48, 12], dtype=float) print('{} - Starting'.format(datetime.now())) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) as session: train_handle = session.run(train_iterat.string_handle()) # Initialize student model # from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file # latest_ckp = FLAGS.init_checkpoint # print_tensors_in_checkpoint_file(latest_ckp, all_tensors=False, tensor_name='resnet_v1_50/conv_map/BatchNorm/gamma') # print_tensors_in_checkpoint_file(latest_ckp, all_tensors=False, tensor_name='resnet_v1_50/conv_map/BatchNorm/moving_variance') if FLAGS.init_checkpoint is None: print('{} - Initializing student model'.format(datetime.now())) model.init_model(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) else: print('{} - Restoring student model'.format(datetime.now())) var_list1 = slim.get_variables(model_video.scope + '/') var_list2 = slim.get_variables(model.scope + '/') var_list = var_list2 + var_list1 # to_exclude = [i.name for i in tf.global_variables() # if modelacustic.scope in i.name or 'moving_mean' in i.name or 'moving_variance' in i.name or # '/Adam' in i.name or 'power' in i.name or 'step' in i.name] # # or 'vgg_vox' in i.name # var_list = slim.get_variables_to_restore(exclude=to_exclude) # Attempting # to # use # uninitialized # value # resnet_v1_50 / block3 / unit_6 / bottleneck_v1 / conv2 / BatchNorm / moving_mean saver = tf.train.Saver(var_list=var_list) saver.restore(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) #variables_in_checkpoint = tf.train.list_variables('path.ckpt') var_list = slim.get_variables(modelacustic.scope + '/') saver = tf.train.Saver(var_list=var_list) saver.restore(session, FLAGS.ac_checkpoint) var_list = slim.get_variables(modelnegative.scope + '/') saver = tf.train.Saver(var_list=var_list) saver.restore(session, FLAGS.ac_checkpoint) session.run(train_iterat.initializer) while True: try: # reconstructed, ac = session.run([output, acoustic], # feed_dict={handle: train_handle, # model.network['keep_prob']: 1.0, # model.network['is_training']: 0, # model_video.network['keep_prob']: 1.0, # model_video.network['is_training']: 0}) # batchnum = reconstructed.shape[0] # dataset_list_images[total_size:total_size + batchnum, :] = reconstructed # dataset_list_acoustic[total_size:total_size + batchnum, :] = ac # ac = np.expand_dims(ac, axis=1) # ac = np.tile(ac, (1, 12, 1, 1, 1)) # reconstructed = np.expand_dims(reconstructed, axis=1) # reconstructed = np.tile(reconstructed, (1, 12, 1, 1, 1)) acc, accrec, labelsvalue = session.run( [accuracyacoustic, accuracynegative, labels], feed_dict={ handle: train_handle, model.network['keep_prob']: 1.0, model.network['is_training']: 0, model_video.network['keep_prob']: 1.0, model_video.network['is_training']: 0, modelnegative.network['keep_prob']: 1, modelnegative.network['is_training']: 0, modelacustic.network['keep_prob']: 1, modelacustic.network['is_training']: 0 }) total_size += labelsvalue.shape[0] accuracyac += acc * labelsvalue.shape[0] accuracyfalse += accrec * labelsvalue.shape[0] print(total_size) except tf.errors.OutOfRangeError: break batch_count += 1 # np.save('{}/ac.npy'.format(data_dir), dataset_list_acoustic) # np.save('{}/acreconstructed.npy'.format(data_dir), dataset_list_images) print('{} - Completed, got {} samples'.format(datetime.now(), total_size)) acctot = accuracyac / total_size accrectot = accuracyfalse / total_size print('acc rec {} acc ac {}'.format(accrectot, acctot)) if FLAGS.mfccmap == 0: with open( '{}'.format( str.join('/', FLAGS.init_checkpoint.split('/')[:-1])) + "/test_unet{}_dualcamnet{}.txt".format(name, nameac), "w") as outfile: outfile.write('acc rec {} acc ac {}'.format(accrectot, acctot)) else: with open( '{}'.format( str.join('/', FLAGS.ac_checkpoint.split('/')[:-1])) + "/test_map_dualcamnet{}.txt".format(nameac), "w") as outfile: outfile.write('acc rec {} acc ac {}'.format(accrectot, acctot))