Пример #1
0
    def __init__(self, feature_type: FeatureType):
        if not (FeatureType.MFCC == feature_type):
            raise Exception("Only supports {}".format(FeatureType.MFCC.name))

        self.data_pkl = get_dataset("savee_sr_22k_2sec_4-classes.pkl")

        rl_data, pre_train_data = randomize_split(self.data_pkl, split_ratio=0.7)

        self.data = rl_data
        self.pre_train_data = pre_train_data
Пример #2
0
    def __init__(self, feature_type: FeatureType) -> None:
        self.data_pkl = get_dataset(
            "signal-no-silent-4-class-dataset-2sec_sr_22k.pkl")
        for d in self.data_pkl:
            single_file = {}
            feature = librosa.feature.mfcc(d['x'], sr=SR, n_mfcc=NUM_MFCC)
            single_file[feature_type.name] = feature
            single_file['y_emo'] = d['emo']
            single_file['y_gen'] = d['gen']
            self.data.append(single_file)

        rl_data, pre_train_data = randomize_split(self.data, split_ratio=0.7)

        self.data = rl_data
        self.pre_train_data = pre_train_data
Пример #3
0
    def __init__(self, sr: int = 44) -> None:
        self.data_pkl = get_dataset('improv-4_Class-sr_' + str(sr) +
                                    'k_2sec.pkl')

        for d in self.data_pkl:
            self.data.append({
                FeatureType.MFCC.name: d['x'],
                'y_emo': d['emo'],
                'y_gen': d['gen'],
                'path': d['path']
            })

        rl_data, pre_train_data = randomize_split(self.data, split_ratio=0.7)

        self.data = rl_data
        self.pre_train_data = pre_train_data
Пример #4
0
def main(args):
    network = importlib.import_module(args.model_def)
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = framework.get_dataset(args.data_dir)
    nrof_classes = len(train_set)
    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)

    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = framework.get_image_paths_and_labels(
            train_set)
        assert len(image_list) > 0, 'The dataset should not be empty'

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')

        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 1),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1, ), (1, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder], name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)
                if args.random_rotate:
                    image = tf.py_func(framework.random_rotate_image, [image],
                                       tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

#pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])

        image_batch, label_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.reshape(image_batch, [args.batch_size, 100, 100, 3])
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')
        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))
        print('Building training graph')
        prelogits = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay,
            batch_size=args.batch_size)
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope='Logits',
            reuse=False)
        print(logits.name)
        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Add center loss
        if args.center_loss_factor > 0.0:
            prelogits_center_loss, _ = framework.center_loss(
                prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES,
                prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op, redun = framework.trainspd(total_loss, global_step,
                                             args.optimizer, learning_rate,
                                             args.moving_average_decay,
                                             tf.global_variables(),
                                             args.log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=config)
        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

# Training and validation loop
            print('Running training')
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, image_list, label_list,
                      index_dequeue_op, enqueue_op, image_paths_placeholder,
                      labels_placeholder, learning_rate_placeholder,
                      phase_train_placeholder, batch_size_placeholder,
                      global_step, total_loss, train_op, redun, summary_op,
                      summary_writer, regularization_losses,
                      args.learning_rate_schedule_file)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

    sess.close()
    return model_dir
Пример #5
0
def main(args):
  
    with tf.Graph().as_default():
      
        with tf.Session() as sess:
            
            np.random.seed(seed=args.seed)
            
            if args.use_split_dataset:
                dataset_tmp = framework.get_dataset(args.data_dir)
                train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class)
                if (args.mode=='TRAIN'):
                    dataset = train_set
                elif (args.mode=='CLASSIFY'):
                    dataset = test_set
            else:
                dataset = framework.get_dataset(args.data_dir)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset')            

                 
            paths, labels = framework.get_image_paths_and_labels(dataset)
            
            print('Number of classes: %d' % len(dataset))
            print('Number of images: %d' % len(paths))
            
            # Load the model
            print('Loading feature extraction model')
            framework.load_model(args.model)
            
            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 	    embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 
            embeddings = tf.nn.l2_normalize(embeddings,1)
	    embedding_size = embeddings.get_shape()[1]
	    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]
            
            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(math.ceil(1.0*nrof_images / args.batch_size))
            emb_array = np.zeros((nrof_images , embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i*args.batch_size
                end_index = min((i+1)*args.batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                if (end_index-start_index)!=args.batch_size:
                    paths_batch.extend(paths[0:(args.batch_size-(end_index-start_index))])
                    #print(paths_batch)
                images = framework.load_data(paths_batch, False, False, args.image_size)
                feed_dict = { images_placeholder:images, phase_train_placeholder:False }
                arr = sess.run(embeddings, feed_dict=feed_dict)
		if (end_index-start_index)!=args.batch_size:
		    arr=arr[0:end_index-start_index]
                emb_array[start_index:end_index,:] = arr[0:(end_index - start_index)]            
            classifier_filename_exp = os.path.expanduser(args.classifier_filename)
            if (args.mode=='TRAIN'):
                # Train classifier
                print('Training classifier')
                model = SVC(kernel='linear', probability=True)
                model.fit(emb_array, labels)
            
                # Create a list of class names
                class_names = [ cls.name.replace('_', ' ') for cls in dataset]

                # Saving classifier model
                with open(classifier_filename_exp, 'wb') as outfile:
                    pickle.dump((model, class_names), outfile)
                print('Saved classifier model to file "%s"' % classifier_filename_exp)                
            elif (args.mode=='CLASSIFY'):
                # Classify images
                print('Testing classifier')
                with open(classifier_filename_exp, 'rb') as infile:
                    (model, class_names) = pickle.load(infile)
                print('Loaded classifier model from file "%s"' % classifier_filename_exp)
                predictions = model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                arr=[]
                for i in range(len(best_class_indices)):
		    arr.append((paths[i], predictions[i],best_class_indices[i],labels[i]))
                pickle.dump(arr, open('classification_data.p','w'))
                accuracy = 100*np.mean(np.equal(best_class_indices, labels))
                print('Accuracy: %.3f' % accuracy)
                # normalized accuracy (on face detection by MTCNN)
                # 436 is total images in sfew validation set, 412 is faces that were detected by sfew
                norm_acc = ((accuracy/100.*float(nrof_images))/436.)*100.+(1.0/7.0)*(436.-float(nrof_images))/436.0*100
                print(nrof_images)
                print('Total Accuracy accounting for face detection failure: %.3f%%' % (norm_acc))