def __init__(self, feature_type: FeatureType): if not (FeatureType.MFCC == feature_type): raise Exception("Only supports {}".format(FeatureType.MFCC.name)) self.data_pkl = get_dataset("savee_sr_22k_2sec_4-classes.pkl") rl_data, pre_train_data = randomize_split(self.data_pkl, split_ratio=0.7) self.data = rl_data self.pre_train_data = pre_train_data
def __init__(self, feature_type: FeatureType) -> None: self.data_pkl = get_dataset( "signal-no-silent-4-class-dataset-2sec_sr_22k.pkl") for d in self.data_pkl: single_file = {} feature = librosa.feature.mfcc(d['x'], sr=SR, n_mfcc=NUM_MFCC) single_file[feature_type.name] = feature single_file['y_emo'] = d['emo'] single_file['y_gen'] = d['gen'] self.data.append(single_file) rl_data, pre_train_data = randomize_split(self.data, split_ratio=0.7) self.data = rl_data self.pre_train_data = pre_train_data
def __init__(self, sr: int = 44) -> None: self.data_pkl = get_dataset('improv-4_Class-sr_' + str(sr) + 'k_2sec.pkl') for d in self.data_pkl: self.data.append({ FeatureType.MFCC.name: d['x'], 'y_emo': d['emo'], 'y_gen': d['gen'], 'path': d['path'] }) rl_data, pre_train_data = randomize_split(self.data, split_ratio=0.7) self.data = rl_data self.pre_train_data = pre_train_data
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = framework.get_dataset(args.data_dir) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = framework.get_image_paths_and_labels( train_set) assert len(image_list) > 0, 'The dataset should not be empty' # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_rotate: image = tf.py_func(framework.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.reshape(image_batch, [args.batch_size, 100, 100, 3]) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') prelogits = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay, batch_size=args.batch_size) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) print(logits.name) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor > 0.0: prelogits_center_loss, _ = framework.center_loss( prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection( tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op, redun = framework.trainspd(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=config) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, redun, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) sess.close() return model_dir
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=args.seed) if args.use_split_dataset: dataset_tmp = framework.get_dataset(args.data_dir) train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) if (args.mode=='TRAIN'): dataset = train_set elif (args.mode=='CLASSIFY'): dataset = test_set else: dataset = framework.get_dataset(args.data_dir) # Check that there are at least one training image per class for cls in dataset: assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset') paths, labels = framework.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) # Load the model print('Loading feature extraction model') framework.load_model(args.model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") embeddings = tf.nn.l2_normalize(embeddings,1) embedding_size = embeddings.get_shape()[1] phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int(math.ceil(1.0*nrof_images / args.batch_size)) emb_array = np.zeros((nrof_images , embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i*args.batch_size end_index = min((i+1)*args.batch_size, nrof_images) paths_batch = paths[start_index:end_index] if (end_index-start_index)!=args.batch_size: paths_batch.extend(paths[0:(args.batch_size-(end_index-start_index))]) #print(paths_batch) images = framework.load_data(paths_batch, False, False, args.image_size) feed_dict = { images_placeholder:images, phase_train_placeholder:False } arr = sess.run(embeddings, feed_dict=feed_dict) if (end_index-start_index)!=args.batch_size: arr=arr[0:end_index-start_index] emb_array[start_index:end_index,:] = arr[0:(end_index - start_index)] classifier_filename_exp = os.path.expanduser(args.classifier_filename) if (args.mode=='TRAIN'): # Train classifier print('Training classifier') model = SVC(kernel='linear', probability=True) model.fit(emb_array, labels) # Create a list of class names class_names = [ cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(classifier_filename_exp, 'wb') as outfile: pickle.dump((model, class_names), outfile) print('Saved classifier model to file "%s"' % classifier_filename_exp) elif (args.mode=='CLASSIFY'): # Classify images print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] arr=[] for i in range(len(best_class_indices)): arr.append((paths[i], predictions[i],best_class_indices[i],labels[i])) pickle.dump(arr, open('classification_data.p','w')) accuracy = 100*np.mean(np.equal(best_class_indices, labels)) print('Accuracy: %.3f' % accuracy) # normalized accuracy (on face detection by MTCNN) # 436 is total images in sfew validation set, 412 is faces that were detected by sfew norm_acc = ((accuracy/100.*float(nrof_images))/436.)*100.+(1.0/7.0)*(436.-float(nrof_images))/436.0*100 print(nrof_images) print('Total Accuracy accounting for face detection failure: %.3f%%' % (norm_acc))