示例#1
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100):

    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    if load_to_memory:
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#2
0
def evaluate_on_validation(arousal_model_path,
                           valence_model_path,
                           output_file,
                           istrain=True):
    arousal_model = load_custom_model(arousal_model_path)
    valence_model = load_custom_model(valence_model_path)
    model = 'trimodal_model'
    dataset = DataSet(
        istrain=istrain,
        model=model,
    )
    x_valid, y_valid, valid_name_list = dataset.get_all_sequences_in_memory(
        'Validation')

    arousal_pred = arousal_model.predict(x_valid)
    arousal_pred = np.squeeze(arousal_pred)
    valence_pred = valence_model.predict(x_valid)
    valence_pred = np.squeeze(valence_pred)

    print_out_csv(arousal_pred, valence_pred, valid_name_list,
                  '../omg_ValidationVideos.csv', output_file)

    cmd = 'python ../calculateEvaluationCCC.py ../omg_ValidationVideos_pred.csv ../new_omg_ValidationVideos.csv'
    process = subprocess.Popen(cmd.split(),
                               stderr=subprocess.STDOUT,
                               universal_newlines=True)
    process.communicate()
示例#3
0
def main():
    seq_length = 20
    class_limit = None
    image_shape = (80, 80, 3)
    data = DataSet(seq_length=seq_length,
                   class_limit=class_limit,
                   image_shape=image_shape)
    batch_size = 20
    concat = False

    data_type = 'images'
    #data_type = 'features'

    X_test, y_test = data.get_all_sequences_in_memory('val', data_type)
    y_test1 = np.argmax(y_test, axis=1)

    md = load_model('./data/checkpoints/weights.hdf5')

    optimizer = Adam(lr=1e-6)  # aggressively small learning rate
    crits = ['accuracy']
    md.compile(loss='categorical_crossentropy',
               optimizer=optimizer,
               metrics=crits)
    score = md.predict(X_test, batch_size=batch_size)

    y_pred = np.argmax(score, axis=1)

    # confusion matrix
    #a = confusion_matrix(y_test1,y_pred)
    #b = a/a.sum(axis = 1, keepdims=True)

    acc = md.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
    print("acc:", acc)

    class_names = np.array(
        ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'])

    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    plot_confusion_matrix(y_test1,
                          y_pred,
                          classes=class_names,
                          title='Confusion matrix, without normalization')

    # Plot normalized confusion matrix
    plot_confusion_matrix(y_test1,
                          y_pred,
                          classes=class_names,
                          normalize=True,
                          title='Normalized confusion matrix')

    plt.show()
示例#4
0
def get_data(data_type):
    cf = get_config()
    seq_length = cf.getint('sequence', 'seq_length')
    data = DataSet(seq_length=seq_length, class_limit=None)
    X_train, y_train = data.get_all_sequences_in_memory(data_type,
                                                        data_type='features')
    # Flatten feature vectors
    X_train = X_train.reshape(X_train.shape[0], -1)

    y_train = np.array([np.argmax(y_train[i]) for i, _ in enumerate(y_train)])
    n_samples, n_features = X_train.shape
    return X_train, y_train, n_samples, n_features
示例#5
0
def evaluate_on_test(arousal_model_path, valence_model_path, output_file, istrain=False, model='quadmodal_1_model'):
    arousal_model = load_custom_model(arousal_model_path)
    valence_model = load_custom_model(valence_model_path)
    dataset = DataSet(
        istrain = istrain,
        model = model
        )
    
    #load test data
    x_test, name_list = dataset.get_all_sequences_in_memory('Test')

    arousal_pred = arousal_model.predict(x_test)
    arousal_pred = np.squeeze(arousal_pred)
    valence_pred = valence_model.predict(x_test)
    valence_pred = np.squeeze(valence_pred)
    
    print_out_csv(arousal_pred, valence_pred, name_list, '../omg_TestVideos.csv', output_file)
示例#6
0
def train(istrain=True,
          model='visual_model',
          saved_model_path=None,
          task='arousal',
          batch_size=2,
          nb_epoch=200,
          learning_r=1e-3):
    """
    train the model
    :param model: 'visual_model','audio_model','word_model','trimodal_model'
    :param saved_model_path: saved_model path
    :param task: 'aoursal','valence','emotion'
    :param batch_size: 2
    :param nb_epoch:2100
    :return:s
    """
    timestamp = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(time.time()))
    # Helper: Save the model.
    if not os.path.exists(os.path.join('checkpoints', model)):
        os.makedirs(os.path.join('checkpoints', model))
    checkpointer = ModelCheckpoint(
        #filepath = os.path.join('checkpoints', model, task+'-'+ str(timestamp)+'-'+'best.hdf5' ),
        filepath=os.path.join('checkpoints', model,
                              task + '-' + str(timestamp) + '-' + 'best.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=20)

    # Helper: Save results.

    csv_logger = CSVLogger(os.path.join('logs', model , task +'-'+ \
        str(timestamp) + '.log'))

    # Get the data and process it.
    # seq_length for the sentence
    seq_length = 20
    dataset = DataSet(istrain=istrain,
                      model=model,
                      task=task,
                      seq_length=seq_length)

    # Get the model.
    rm = ResearchModels(istrain=istrain,
                        model=model,
                        seq_length=seq_length,
                        saved_model_path=saved_model_path,
                        task_type=task,
                        saved_audio_model=None,
                        saved_visual_model=None,
                        saved_word_model=None,
                        learning_r=learning_r)
    # Get training and validation data.
    x_train, y_train, train_name_list = dataset.get_all_sequences_in_memory(
        'Train')
    x_valid, y_valid, valid_name_list = dataset.get_all_sequences_in_memory(
        'Validation')

    # Fit!
    # Use standard fit.
    rm.model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        validation_data=(x_valid, y_valid),
        verbose=1,
        callbacks=[tb, early_stopper, csv_logger, checkpointer],
        #callbacks=[tb, lrate, csv_logger,  checkpointer],
        epochs=nb_epoch)

    # find the current best model and get its prediction on validation set
    model_weights_path = os.path.join(
        'checkpoints', model, task + '-' + str(timestamp) + '-' + 'best.hdf5')

    best_model = load_custom_model(model_weights_path)

    y_valid_pred = best_model.predict(x_valid)
    y_valid_pred = np.squeeze(y_valid_pred)

    y_train_pred = best_model.predict(x_train)
    y_train_pred = np.squeeze(y_train_pred)

    #calculate the ccc and mse
    if task in ['arousal', 'valence']:
        print("The CCC in validation set is {}".format(
            ccc(y_valid, y_valid_pred)[0]))
        print("The mse in validation set is {}".format(
            mse(y_valid, y_valid_pred)))

        print("The CCC in train set is {}".format(
            ccc(y_train, y_train_pred)[0]))
        print("The mse in train set is {}".format(mse(y_train, y_train_pred)))
    elif task == "emotion":
        print("F1 score in validation set is {}".format(
            f1(y_valid, y_valid_pred)))
    # display the prediction and true label
    log_path = os.path.join('logs', model , task +'-'+ \
        str(timestamp) + '.log')

    display_true_vs_pred([y_valid, y_train], [y_valid_pred, y_train_pred],
                         log_path, task, model)
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          config=None):

    if config is not None:
        load_to_memory = config.videoLoadToMemory
        batch_size = config.videoBatchSize
        nb_epoch = config.videoEpochs
        repo_dir = config.repoDir
        feature_file_path = config.featureFileName
        work_dir = config.workDir
        lr = config.videoLearningRate
        decay = config.videoDecay
        classlist = config.classes
    else:
        load_to_memory = False
        batch_size = 32
        nb_epoch = 100
        repo_dir = ''
        feature_file_path = 'data/data_file.csv'
        work_dir = 'data'
        lr = 1e-5
        decay = 1e-6
        classlist = []

    # Helper: Save the model.
    checkpointpath = os.path.join(work_dir, 'checkpoints')
    if not os.path.exists(checkpointpath):
        print("Creating checkpoint folder [%s]", checkpointpath)
        os.makedirs(checkpointpath)
    checkpointer = ModelCheckpoint(
        filepath=os.path.join(work_dir, 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    logpath = os.path.join(work_dir, 'logs')
    if not os.path.exists(logpath):
        print("Creating log folder [%s]", logpath)
        os.makedirs(logpath)
    tb = TensorBoard(log_dir=os.path.join(work_dir, 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join(logpath, model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       repo_dir=repo_dir,
                       feature_file_path=feature_file_path,
                       work_dir=work_dir,
                       classlist=classlist)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape,
                       repo_dir=repo_dir,
                       feature_file_path=feature_file_path,
                       work_dir=work_dir,
                       classlist=classlist)
    # Check if data is sufficient
    if False == data.check_data(batch_size):
        print("Insufficient data")
        sys.exit(0)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model, lr,
                        decay)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#8
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100):
    #Save the Model
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'savedmodels', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    #TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    #Stop after 5 epochs when there is no progress in Learning
    early_stopper = EarlyStopping(patience=5)

    #Save Results in csv format
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    #Process the Data
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    #Get Steps per epoch
    #Guess how much of data.data is Train data by multiplying with 0.7
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        #Get Data
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        #Get Generators
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    #Get Model
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    #Fit by using Standard fit
    if load_to_memory:
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#9
0
def train(data_type, seq_length, model, learning_rate,learning_decay,saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    print('trainig_num is ', training_num)
    if model == 'lstm_regression':
        regression = 1
        sequence_len = 20
        monitor_par = 'val_loss'
    else:
        regression = 0
        sequence_len = seq_length
        monitor_par = 'val_acc'

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join(main_folder, 'checkpoints',model+'2', model + '-{epoch:03d}.hdf5'),
        #filepath=os.path.join(main_folder, 'checkpoints',model, model + '-' + data_type + \
            #'.{epoch:03d}-{val_loss:.3f}.hdf5'),
        monitor=monitor_par,
        verbose=1,
        save_best_only=True)

    # # Helper: TensorBoard
    # tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # # Helper: Stop when we stop learning.
    # early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    #timestamp = time.time()
    csv_logger = CSVLogger(os.path.join(main_folder, 'logs', model +'2'+'-' + 'training-log' + '.csv'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    #steps_per_epoch = (len(data.data) * 0.7) // batch_size
    steps_per_epoch = training_num // batch_size
    print('step is: %d'%steps_per_epoch)

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
       
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,regression)
        val_generator = data.frame_generator(batch_size, 'test', data_type,regression)

    # Get the model.
    
    rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,learning_decay,saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        hist = rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        
        hist = rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch, # in each epoch all the training data are evaluated
            epochs=nb_epoch,
            verbose=1,
            callbacks=[csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4) # if you see that GPU is idling and waiting for batches, try to increase the amout of workers
    return hist
示例#10
0
def train(data_type, seq_length, model, saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.best2.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Balance the class weights!
    print("setting weights!:")
    flashing = 0
    not_flashing = 0
    unknown = 0
    for label in y:
        if label[0]:
            flashing = flashing + 1
        elif label[1]:
            not_flashing = not_flashing + 1
        else:
            unknown = unknown + 1
    raw = [flashing,not_flashing,unknown]
    dist = [sum(raw)/float(i) for i in raw]
    class_weights = {1:dist[0], 2:dist[1], 3:dist[2]}
    print(class_weights)

    # Use custom metrics because acc is garbage
    print("setting metrics!")
    metrics = Metrics()

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[tb,metrics],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#11
0
def train(data_type, seq_length, model, saved_model_extractnet=None,saved_model_lstm=None,
          concat=False, class_limit=None, image_shape=None,
          load_to_memory=False):
    # Set variables.
    nb_epoch = 1000
    batch_size = 32

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath='./data/checkpoints/' + model + '16-40-conv-lstm-mixed-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir='./data/logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=10)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \
        str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type)
        print X.shape
        X_test, y_test = data.get_all_sequences_in_memory(batch_size, 'test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type, concat)
        val_generator = data.test_frame(batch_size, 'test', data_type, concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model_extractnet=saved_model_extractnet,
                         saved_model_lstm=saved_model_lstm)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X, y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            shuffle=False,
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            validation_data=val_generator,
            validation_steps=20)
示例#12
0
hidden_size = 50
use_dropout = True
model = Sequential()
vocabulary = 6
data_type = 'features'
seq_length = 60
class_limit = 6
image_shape = None
data = DataSet(seq_length=seq_length,
               class_limit=class_limit,
               image_shape=image_shape)
# generator = data.frame_generator(batch_size, 'train', data_type)
# # for f in generator:
# #     print(f)
# val_generator = data.frame_generator(batch_size, 'test', data_type)
X, y = data.get_all_sequences_in_memory('train', data_type)
print(X.shape)
print(y.shape)
X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
print(X_test.shape)
print(y_test.shape)
model.add(LSTM(hidden_size, input_shape=(60, 18), return_sequences=True))
model.add(LSTM(hidden_size, return_sequences=True))
#model.add(LSTM(hidden_size, return_sequences=True))
if use_dropout:
    model.add(Dropout(0.5))
if use_dropout:
    model.add(Dropout(0.5))

#model.add(Dense(4))
#model.add(TimeDistributed(Dense(vocabulary)))
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100,
          dens_kernel_reg_l1=0.1,
          dens_kernel_reg_l2=0.1,
          dens_activity_reg_l1=0.1,
          dens_activity_reg_l2=0.1,
          conv3d_w_reg_l1=0.1,
          conv3d_w_reg_l2=0.1,
          conv3d_b_reg_l1=0.1,
          conv3d_b_reg_l2=0.1,
          conv3d_activity_reg_l1=0.1,
          conv3d_activity_reg_l2=0.1):
    # str of time
    current_datetime = datetime.datetime.now()
    str_datetime = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")

    # Helper: Save the model.
    checkpoint_path = os.path.join('data', 'checkpoints',
                                   EXP_NAME + '-' + model + '-' + data_type + \
                                   '.{epoch:03d}-{val_loss:.3f}' + '-' + str_datetime + '.hdf5')
    checkpointer = ModelCheckpoint(filepath=checkpoint_path,
                                   verbose=1,
                                   save_best_only=True)

    # Helper: Schedule learning rate decay
    def step_decay(epoch):
        initial_lr = INIT_LEARNING_RATE
        lr_drop_ratio = LR_DROP_RATIO
        epochs_drop = EPOCHS_DROP
        lr = initial_lr * math.pow(lr_drop_ratio,
                                   math.floor((1 + epoch) / epochs_drop))
        print(lr)
        return lr

    learning_rate = LearningRateScheduler(step_decay)

    # Helper: TensorBoard
    # tb = TensorBoard(log_dir=os.path.join('data', 'logs', EXP_NAME + str_datetime))
    tb = LRTensorBoard(log_dir=os.path.join('data', 'logs', EXP_NAME +
                                            str_datetime))

    # Helper: Save results.
    log_path = os.path.join(
        'data', 'logs', EXP_NAME + '-' + 'training-' + str_datetime + '.log')
    csv_logger = CSVLogger(log_path)

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=PATIENTS)

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes),
                        model,
                        seq_length,
                        saved_model,
                        dens_kernel_reg_l1=dens_kernel_reg_l1,
                        dens_kernel_reg_l2=dens_kernel_reg_l2,
                        dens_activity_reg_l1=dens_activity_reg_l1,
                        dens_activity_reg_l2=dens_activity_reg_l2,
                        conv3d_w_reg_l1=conv3d_w_reg_l1,
                        conv3d_w_reg_l2=conv3d_w_reg_l2,
                        conv3d_b_reg_l1=conv3d_b_reg_l1,
                        conv3d_b_reg_l2=conv3d_b_reg_l2,
                        conv3d_activity_reg_l1=conv3d_activity_reg_l1,
                        conv3d_activity_reg_l2=conv3d_activity_reg_l2)

    # Get the optimizer:
    if OPTIMIZER == 'SGD':
        optimizer = keras.optimizers.SGD(lr=INIT_LEARNING_RATE,
                                         momentum=MOMENTUM,
                                         nesterov=False)
    elif OPTIMIZER == 'RMSProp':
        optimizer = keras.optimizers.RMSprop(lr=INIT_LEARNING_RATE,
                                             epsilon=None)
    elif OPTIMIZER == 'Adam':
        optimizer = keras.optimizers.Adam(lr=INIT_LEARNING_RATE,
                                          beta_1=0.9,
                                          beta_2=0.999,
                                          epsilon=None,
                                          amsgrad=False)

    rm.model.compile(loss=LOSS_FUNCTION,
                     optimizer=optimizer,
                     metrics=['accuracy'])

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger, learning_rate],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(generator=generator,
                               steps_per_epoch=steps_per_epoch,
                               epochs=nb_epoch,
                               verbose=1,
                               callbacks=[
                                   tb, early_stopper, csv_logger, checkpointer,
                                   learning_rate
                               ],
                               validation_data=val_generator,
                               validation_steps=40,
                               workers=4)
示例#14
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          concat=False,
          class_limit=None,
          image_shape=None,
          load_to_memory=False):
    # Set variables.
    nb_epoch = 1000
    # 32 for LSTM; 8 for CRNN; 5 for CNN-3d
    if model == 'conv_3d' or model == 'c3d':
        batch_size = 5
    elif model == 'crnn' or model == 'lrcn':
        batch_size = 8
    else:
        batch_size = 32

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath='/hdd/hpc/Projects/Weather/121201_Vi/data/checkpoints/' + model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir='/hdd/hpc/Projects/Weather/121201_Vi/data/logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=10)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger('/hdd/hpc/Projects/Weather/121201_Vi/data/logs/' + model + '-' + 'training-' + \
        str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    #steps_per_epoch = (len(data.data) * 0.7) // batch_size
    train, test, validation = data.split_train_test()
    steps_per_epoch = len(train) // batch_size

    if load_to_memory:
        # Get data.
        # X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type, concat)
        # X_val, y_val = data.get_all_sequences_in_memory(batch_size, 'validation', data_type, concat)
        # X_test, y_test = data.get_all_sequences_in_memory(batch_size, 'test', data_type, concat)
        X, y = data.get_all_sequences_in_memory('train', data_type, concat)
        X_val, y_val = data.get_all_sequences_in_memory(
            'validation', data_type, concat)
        X_test, y_test = data.get_all_sequences_in_memory(
            'test', data_type, concat)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,
                                         concat)
        val_generator = data.frame_generator(batch_size, 'validation',
                                             data_type, concat)
        test_generator = data.frame_generator(batch_size, 'test', data_type,
                                              concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_val, y_val),
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            # callbacks=[checkpointer, tb, csv_logger], #early_stopper, csv_logger],
            epochs=nb_epoch)

        # evaluate model
        results = rm.model.evaluate(X_test,
                                    y_test,
                                    batch_size=batch_size,
                                    verbose=1)
        print()
        print('Evaluation results on test data is:')
        print(results)
        print(rm.model.metrics_names)

    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            # callbacks=[checkpointer, tb, csv_logger], #early_stopper, csv_logger],
            validation_data=val_generator,
            validation_steps=10)

        # Evaluate!
        results = rm.model.evaluate_generator(generator=test_generator,
                                              steps=len(test) // batch_size)
        print()
        print('Evaluation results on test data is:')
        print(results)
        print(rm.model.metrics_names)
示例#15
0
def train(seq_length,
          feature_length,
          class_limit=None,
          batch_size=32,
          nb_epoch=100):

    # 获取数据
    data = DataSet(seq_length=seq_length, class_limit=class_limit)

    # 获取训练数据(每个视频提取到的40个2048维的特征)
    # x_train为特征数组(数组个数为seq_length),y_train为类别的One-Hot编码数组(下同)
    x_train, y_train = data.get_all_sequences_in_memory('train')
    x_test, y_test = data.get_all_sequences_in_memory('test')

    # 随机打乱样本的顺序
    x_train, y_train = shuffle(x_train, y_train)
    x_test, y_test = shuffle(x_test, y_test)

    # 确定训练的类数量
    if class_limit is None:
        class_num = 101
    else:
        class_num = class_limit

    # 设置模型相关参数
    learning_rate_base = 0.0001
    learning_rate_decay = 0.9
    step_per_epoch = math.ceil(len(x_train) / batch_size)
    regularizer_rate = 0.2

    with tf.name_scope("input"):
        # 设置输入
        input_x = tf.placeholder(tf.float32,
                                 [None, seq_length, feature_length],
                                 name="input-x")
        input_y = tf.placeholder(tf.float32, [None, class_num], name="input-y")

    # 设置正则化项
    regularizer = tf.contrib.layers.l2_regularizer(regularizer_rate)

    # 训练LSTM模型前向传播的计算
    logits, inference_y = lstm_inference.inference(input_x, class_num,
                                                   regularizer)

    with tf.name_scope("loss"):
        # 设置平均交叉熵为误差
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                    labels=input_y))
        tf.summary.scalar("cross_entropy", cross_entropy)
        # 计算总loss损失
        loss = cross_entropy + tf.add_n(tf.get_collection("losses"))

    # 初始化全局迭代次数
    global_step = tf.Variable(0, trainable=False)

    with tf.name_scope("train"):
        # 设置退化学习率(所有训练样本训练结束一次时更新学习率)
        learning_rate = tf.train.exponential_decay(learning_rate_base,
                                                   global_step, step_per_epoch,
                                                   learning_rate_decay)
        tf.summary.scalar("learning-rate", learning_rate)

        # 定义优化器
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(
            loss, global_step=global_step)

    # 测试时LSTM的前向传播计算
    _, output_y = lstm_inference.inference(input_x,
                                           class_num,
                                           None,
                                           train=False,
                                           reuse=True)

    with tf.name_scope("accuracy"):
        # 训练时的准确率
        correct_prediction_train = tf.equal(tf.argmax(inference_y, 1),
                                            tf.argmax(input_y, 1))
        acc_train = tf.reduce_mean(
            tf.cast(correct_prediction_train, tf.float32))
        tf.summary.scalar("acc_train", acc_train)
        # 测试时准确率
        correct_prediction = tf.equal(tf.argmax(output_y, 1),
                                      tf.argmax(input_y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 保存变量
    saver = tf.train.Saver(max_to_keep=1)

    # 整理所有日志生成
    merged = tf.summary.merge_all()

    # 启动Session训练
    with tf.Session() as sess:
        # 初始化写日志的writer,并将当前Tensorflow计算图写入日志
        summary_writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)

        # 初始化所有变量
        tf.global_variables_initializer().run()

        # 当前最高准确率,用于确定准确率是否提高
        last_test_acc = 0.0

        # 设置准确率未提高次数的计数器
        acc_not_improved = 0

        # 准确率未提高的最大次数,用于结束训练
        max_acc_not_improved = 30

        test_acc_list = []

        for i in range(nb_epoch):
            print("epoch %d:" % (i + 1))
            for j in range(step_per_epoch):
                start = j * batch_size
                end = min(start + batch_size, len(x_train))
                # 网络优化过程
                _, loss_value, step, summary = sess.run(
                    [train_step, cross_entropy, global_step, merged],
                    feed_dict={
                        input_x: x_train[start:end],
                        input_y: y_train[start:end]
                    })
                # 将所有日志写入文件,供给TensorBoard使用
                summary_writer.add_summary(summary, step)
                # 打印信息
                print(
                    "\tAfter %d training step(s), loss on training batch is %g"
                    % (step, loss_value))

            train_acc = sess.run(accuracy,
                                 feed_dict={
                                     input_x: x_train,
                                     input_y: y_train
                                 })

            # 测试集的准确率
            test_acc = sess.run(accuracy,
                                feed_dict={
                                    input_x: x_test,
                                    input_y: y_test
                                })
            test_acc_list.append(test_acc)
            print(
                "After %d epoch(es), accuracy on train is %g, accuracy on test is %g"
                % ((i + 1), train_acc, test_acc))

            # 是否保存现在的模型
            if test_acc > last_test_acc:
                last_test_acc = test_acc
                print("accuracy improved, saved model")
                saver.save(sess,
                           os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                           global_step=i)
                acc_not_improved = 0
            else:
                acc_not_improved += 1
                print("accuracy not improved")

            # 若达到训练结束条件,则停止训练
            if acc_not_improved >= max_acc_not_improved:
                break
        # 作图
        temp = 0
        x = []
        for i in range(len(test_acc_list)):
            temp += 30
            x.append(temp)
        plt.plot(x,
                 test_acc_list,
                 linewidth=3,
                 color='black',
                 marker='o',
                 markerfacecolor='white',
                 markersize=3)
        plt.xlabel('train_step')
        plt.ylabel('test_accuracy')
        plt.legend()
        plt.show()
        print("\n\nEnd of training")
示例#16
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          concat=False,
          class_limit=None,
          image_shape=None,
          load_to_memory=False):
    # Set variables.
    nb_epoch = 1000
    batch_size = 16

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath='./data/checkpoints/' + model + '-' + data_type + \
                 '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True)

    incepcheck = ModelCheckpoint(
        filepath='./data/checkpoints/' + model + '-' + data_type + \
                 '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True,
        save_weights_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir='./data/logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=10)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \
                           str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type,
                                                concat)
        X_test, y_test = data.get_all_sequences_in_memory(
            batch_size, 'test', data_type, concat)

    elif model == 'div_crnn':
        generator = data.frame_generator2(batch_size, 'train', data_type,
                                          concat)
        val_generator = data.frame_generator2(batch_size, 'test', data_type,
                                              concat)

    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,
                                         concat)
        val_generator = data.frame_generator(batch_size, 'test', data_type,
                                             concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # model_json_str = rm.model.to_json()
    # open('/home/takubuntu/PycharmProjects/DL/Wake_detect/IR_classification/data/checkpoints/json_model.json','w').write(model_json_str)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[checkpointer, tb, csv_logger],
                     epochs=nb_epoch)
    # elif model == 'inception*':
    #     rm.model.fit_generator(
    #         generator=generator,
    #         steps_per_epoch=steps_per_epoch,
    #         epochs=nb_epoch,
    #         verbose=1,
    #         callbacks=[incepcheck, tb, csv_logger],
    #         validation_data=val_generator,
    #         validation_steps=10)
    else:
        # Use fit generator.
        rm.model.fit_generator(generator=generator,
                               steps_per_epoch=steps_per_epoch,
                               epochs=nb_epoch,
                               verbose=1,
                               callbacks=[checkpointer, tb, csv_logger],
                               validation_data=val_generator,
                               validation_steps=10)
示例#17
0
use_dropout=True
vocabulary = 6
data_type = 'features'
seq_length = 60
class_limit =  6
image_shape = None
data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )
# generator = data.frame_generator(batch_size, 'train', data_type)
# # for f in generator:
# #     print(f)
# val_generator = data.frame_generator(batch_size, 'test', data_type)
X_tr, y_tr = data.get_all_sequences_in_memory('train', data_type)
X_train= X_tr.reshape(780,1080)
y_train = np.zeros(780)
j = 0
for i in y_tr:
    #print(np.argmax(i))
    y_train[j] = np.argmax(i)
    j +=1
#print(X_train.shape)
#print(y_train.shape)
X_te, y_te = data.get_all_sequences_in_memory('test', data_type)
X_test = X_te.reshape(192,1080)
y_test = np.zeros(192)
j = 0
for i in y_te:
    #print(np.argmax(i))
def train(data_type, seq_length, model, saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#19
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          concat=False,
          class_limit=None,
          image_shape=None,
          load_to_memory=False):
    # Set variables.
    nb_epoch = 1000
    batch_size = 8
    seq_length = 125

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.getcwd()+'\\data\\checkpoints\\' + model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=2,
        save_best_only=True)
    lrScheduler = ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.5,
                                    patience=1,
                                    cooldown=1,
                                    verbose=2)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.getcwd() + '\\data\\logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.getcwd()+'\\data\\logs\\' + model + '-' + 'training-' + \
        str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data))

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type,
                                                concat)
        X_test, y_test = data.get_all_sequences_in_memory(
            batch_size, 'test', data_type, concat)


##        pathy = os.getcwd()+'/y.npy'
##        numpy.save(pathy,y)
##        pathyt = os.getcwd()+'/y_test.npy'
##        numpy.save(pathyt,y_test)

    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,
                                         concat)
        val_generator = data.frame_generator(batch_size, 'test', data_type,
                                             concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)
    print("research model")
    print(rm.model.summary())
    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     shuffle=True,
                     verbose=2,
                     callbacks=[
                         checkpointer, tb, early_stopper, csv_logger,
                         lrScheduler
                     ],
                     epochs=nb_epoch)
        print("from load to memory")
    else:
        # Use fit generator.
        rm.model.fit_generator(generator=generator,
                               steps_per_epoch=steps_per_epoch,
                               epochs=nb_epoch,
                               verbose=2,
                               shuffle=True,
                               callbacks=[
                                   checkpointer, tb, early_stopper, csv_logger,
                                   lrScheduler
                               ],
                               validation_data=val_generator,
                               validation_steps=10)
        print("from generator")
示例#20
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          concat=False,
          class_limit=None,
          image_shape=None,
          load_to_memory=False):

    #Video-Classification-CNN-and-LSTM--master
    #callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0),
    #ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ]
    # nb_epoch = 500

    # Set variables.
    nb_epoch = 1000
    batch_size = 32

    checkpointer = ModelCheckpoint(
        filepath='./data/checkpoints/' + model + '-' + data_type +  \
            '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True)

    # filepath='./data/checkpoints/try.hdf5',

    # Helper: TensorBoard
    tb = TensorBoard(log_dir='./data/logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=200)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \
        str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type,
                                                concat)
        X_test, y_test = data.get_all_sequences_in_memory(
            batch_size, 'test', data_type, concat)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,
                                         concat)
        val_generator = data.frame_generator(batch_size, 'test', data_type,
                                             concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    #model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),
    #batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[checkpointer, tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            validation_data=val_generator,
            validation_steps=10)
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          concat=False,
          class_limit=None,
          image_shape=None,
          load_to_memory=True):
    # Set variables.
    nb_epoch = 1000000
    batch_size = 32

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(                                             #This is for writing out the logs 
        filepath='./data/checkpoints/' + model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir='./data/logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(
        patience=100000)  #this number of epoches with no impovement

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \
        str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.

    steps_per_epoch = (len(data.data) * 0.7) // batch_size
    print("Iterations per epoach", steps_per_epoch)

    if load_to_memory:
        # Get data.

        X, y = data.get_all_sequences_in_memory('train', data_type, concat)
        X_test, y_test = data.get_all_sequences_in_memory(
            'test', data_type, concat)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,
                                         concat)
        val_generator = data.frame_generator(batch_size, 'test', data_type,
                                             concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length,
                        saved_model)  #object for the architecture we need
    print(rm)
    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
示例#22
0
def train(istrain=True, model_type='quadmodal_1', saved_model_path=None, task='emotion',
         batch_size=2, nb_epoch=200, learning_r=1e-3, show_plots=True, is_fusion=False,
         fusion_type=None, pretrained=False):
    """
    train the model
    :param model: 'visual_model','audio_model','word_model','trimodal_model','quadmodal_X_model'
    :param saved_model_path: saved_model path
    :param task: 'aoursal','valence','emotion'
    :param batch_size: 2
    :param nb_epoch:2100
    :return:s
    """
    timestamp =  time.strftime('%Y-%m-%d-%H:%M:%S',time.localtime(time.time()))
    # Helper: Save the model.
    model_name = model_type
    model_name = model_name.replace(':','-')
    model_name = model_name.replace('[','')
    model_name = model_name.replace(']','')
    if ',' in model_name:
        model_name = model_name.replace(',','__')
        max_len = 200
        if len(model_name) >= max_len:
            model_name = model_name[:max_len]
        model_name = 'fusion_' + fusion_type + '__' + model_name
    if not os.path.exists(os.path.join('checkpoints', model_name)):
        os.makedirs(os.path.join('checkpoints', model_name))
    checkpointer = ModelCheckpoint(
        monitor='val_acc',
        #filepath = os.path.join('checkpoints', model, task+'-'+ str(timestamp)+'-'+'best.hdf5' ),
        filepath = os.path.join('checkpoints', model_name, task + '-{val_acc:.3f}-{acc:.3f}.hdf5' ),
        verbose=1,
        save_best_only=True)
    checkpointer_acc = ModelCheckpoint(
        monitor='acc',
        #filepath = os.path.join('checkpoints', model, task+'-'+ str(timestamp)+'-'+'best.hdf5' ),
        filepath = os.path.join('checkpoints', model_name, task + '-{val_acc:.3f}-{acc:.3f}.hdf5' ),
        verbose=1,
        save_best_only=True)
    
    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('logs', model_name))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=1000)
    
    # Helper: Save results.
    
    csv_logger = CSVLogger(os.path.join('logs', model_name , task +'-'+ \
        str(timestamp) + '.log'))

    # Get the data and process it.
    # seq_length for the sentence
    seq_length = 20
    dataset = DataSet(
        istrain = istrain,
        model = model_type,
        task = task,
        seq_length=seq_length,
        model_name=model_name,
        is_fusion=is_fusion
        )

    # Get the model.
    model = None
    if pretrained:
        model_weights_path = get_best_model(model_name)
        if model_weights_path:
            print('USING MODEL', model_weights_path)
            model = load_model(model_weights_path)
        # model_file = os.path.join('models',model_name + '.hdf5')
        # if os.path.exists(model_file):
        #     model = load_model(model_file)
        # else:
        #     print('No trained model found')
    if model is None:
        rm = ResearchModels(
                istrain = istrain,
                model = model_type, 
                seq_length = seq_length, 
                saved_path=saved_model_path, 
                task_type= task,
                learning_r = learning_r,
                model_name=model_name,
                is_fusion=is_fusion,
                fusion_type=fusion_type
                )
        model = rm.model
    # Get training and validation data.
    x_train, y_train, train_name_list = dataset.get_all_sequences_in_memory('Train')
    x_valid, y_valid, valid_name_list= dataset.get_all_sequences_in_memory('Validation')
    x_test, y_test, test_name_list = dataset.get_all_sequences_in_memory('Test')
    if task == 'emotion':
        y_train = to_categorical(y_train)
        y_valid = to_categorical(y_valid)
        y_test = to_categorical(y_test)

    # Fit!
    # Use standard fit
    print('Size', len(x_train), len(y_train), len(x_valid), len(y_valid), len(x_test), len(y_test))
    history = model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        validation_data=(x_valid,y_valid),
        verbose=1,
        callbacks=[tb, csv_logger,  checkpointer, checkpointer_acc],
        #callbacks=[tb, early_stopper, csv_logger,  checkpointer],
        #callbacks=[tb, lrate, csv_logger,  checkpointer],
        epochs=nb_epoch)
    
    # find the current best model and get its prediction on validation set
    model_weights_path = get_best_model(model_name)
    #model_weights_path = os.path.join('checkpoints', model_name, task + '-' + str(nb_epoch) + '-' + str(timestamp) + '-' + 'best.hdf5' )
    print('model_weights_path', model_weights_path)

    if model_weights_path:
        best_model = load_custom_model(model_weights_path)
    else:
        best_model = model
    

    y_valid_pred = best_model.predict(x_valid)
    y_valid_pred = np.squeeze(y_valid_pred)
    
    y_train_pred = best_model.predict(x_train)
    y_train_pred = np.squeeze(y_train_pred)

    y_test_pred = best_model.predict(x_test)
    y_test_pred = np.squeeze(y_test_pred)

    #calculate the ccc and mse

    if not os.path.exists('results'):
        os.mkdir('results')
    filename = os.path.join('results', model_name+'__'+str(nb_epoch)+'_'+task+'.txt')
    f1_score = f1(y_valid, y_valid_pred)
    f1_score_test = f1(y_test, y_test_pred)
    acc_val = model.evaluate(x_valid, y_valid, verbose=1)[1]
    acc_train = model.evaluate(x_train, y_train, verbose=1)[1]
    acc_test = model.evaluate(x_test, y_test, verbose=1)[1]
    print("F1 score in validation set is {}".format(f1_score))
    print("F1 score in test set is {}".format(f1_score_test))
    print("Val acc is {}".format(acc_val))
    print("Train acc is {}".format(acc_train))
    print("Test acc is {}".format(acc_test))
    plot_acc(history, model_name, timestamp, show_plots, nb_epoch)
    with open(filename, 'w') as f:
        f.write(str([acc_val, acc_train, acc_test, f1_score, f1_score_test]))
    # display the prediction and true label
    log_path = os.path.join('logs', model_name , task +'-'+ \
        str(timestamp) + '.log')
    
    display_true_vs_pred([y_valid, y_train, y_test], [y_valid_pred, y_train_pred, y_test_pred],log_path, task, model_name, [acc_val, acc_train, acc_test], show_plots, timestamp, nb_epoch)
IMGWIDTH = 224
sequences_dir = os.path.join('data', 'sequences')
if not os.path.exists(sequences_dir):
    os.mkdir(sequences_dir)

extract_features(10, 2, (224,224,3))
data = DataSet(
            seq_length=20,
            class_limit=2,
          image_shape=(IMGWIDTH,IMGWIDTH,3)
        )
data_type = 'features'
# train, test = data.split_train_test()
# print(test)
x, y = data.get_all_sequences_in_memory('test', data_type)

weights_file = 'cnn_lstm_VGGFace.h5'
the_model = ResearchModels(2,'lstm',20,features_length=2622)
the_model.model.load_weights(weights_file)
# results = the_model.model.evaluate(x_test_imgs,y_test, batch_size=32)
# print('test loss: {} \n test acc: {}'.format(results[0],results[1]))

# test_gen = data.frame_generator(32, 'test')
# for _ in range(5):
#     results = the_model.model.evaluate_generator(test_gen, steps=30)
#     print('test loss: {} \t test acc: {}'.format(results[0],results[1])

# print(len(x))
preds = the_model.model.predict(x)
# print(len(preds))
示例#24
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#25
0
# print(y_val.shape)
# df = pd.read_csv("new_data/data_file.csv", header = None)

# test_no = df[df.iloc[:,0] == 'testing'].shape[0]
# train_no = df[df.iloc[:,0] == 'training'].shape[0]

# test_no = df[df.iloc[:,0] == 'testing'].shape[0]

data = DataSet(
    seq_length=seq,
    class_limit=2,
    image_shape=(320, 240, 3),
    #         initial=initial
)

X, y, paths_train = data.get_all_sequences_in_memory('training', frame, seq,
                                                     initial)
print("X.shape", X.shape)
print("y.shape", y.shape)

X_val, y_val, paths_val = data.get_all_sequences_in_memory(
    'testing', test_frame, seq, test_initial)
print("X_val.shape", X_val.shape)
print("Y_val.shape", y_val.shape)

# X_test, y_test,paths_test = data.get_all_sequences_in_memory('validation',test_frame, seq, test_initial)
# print("X_test.shape" ,X_test.shape)
# print("y_test.shape" ,y_test.shape)

print(data.get_classes())

features = 512