Exemplo n.º 1
0
def train(features_filepath, weights_dir, rf_model, start_fold=1, end_fold=1, timestep=5, progress_percent=0.05, iccv_epic=True, features_size=4096, cores=None):
    np.random.seed(42)

    users = IO.load_annotations(ntcir.filepaths)
    sorted_users = ntcir.utils.sort(users)

    num_frames_per_day = 2880
    sequences = ntcir.get_sequences(sorted_users, num_frames_per_day)

    if not start_fold:
        start_fold = current_fold(weights_dir, rf_model.name)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        with open(features_filepath.format(fold), 'r') as f:
            user_features = pickle.load(f)

        if iccv_epic:
            train_split = ntcir.read_split('datasets/ntcir/training_split.txt')
        else:
            train_split = ntcir.get_split_fold(sorted_users, int(fold) - 1)

        training_batches = ntcir.get_training_batches(train_split, sequences, timestep=timestep)

        num_features = timestep*features_size
        num_training_batches = len(training_batches)

        # Extract features of the images
        features = np.zeros((num_training_batches, num_features))
        targets = np.zeros(num_training_batches)

        if progress_percent:
            training_progress_percent = int(num_training_batches * progress_percent)
            print "Creating training matrix for fold {}".format(fold)

        for i, batch in enumerate(training_batches):
            day = user_features[batch.user_id][batch.date]
            for j, ind in enumerate(batch.indices):
                image = day.images[ind]
                start_ind = j * features_size
                end_ind = (j+1) * features_size
                features[i, start_ind:end_ind] = image.features

            last_ind = batch.indices[-1]
            targets[i] = day.images[last_ind].label

            if progress_percent and (i + 1) % training_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_training_batches * 100, i + 1, num_training_batches))
        gc.collect()
        if not cores:
            cores = multiprocessing.cpu_count()
        random_forest = RandomForestClassifier(n_estimators=rf_model.num_estimators, n_jobs=cores)
        random_forest.fit(features, targets)

        weights_filepath = os.path.join(weights_dir, "weights." + rf_model.name + ".fold_" + fold + ".pkl")
        with open(weights_filepath, 'w') as f:
            pickle.dump(random_forest, f, pickle.HIGHEST_PROTOCOL)
    return random_forest
def train(features_filepath, weights_dir, sgd_params, base_model, start_fold=None, end_fold=5, timestep=10,
          batch_size=1, iccv_epic=False):
    np.random.seed(42)

    users = IO.load_annotations(ntcir.filepaths)
    sorted_users = ntcir.utils.sort(users)

    num_frames_per_day = 2880
    sequences = ntcir.get_sequences(sorted_users, num_frames_per_day)

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'

    if not start_fold:
        start_fold = current_fold(weights_dir, base_model.name)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        with open(features_filepath.format(fold), 'r') as f:
            features = pickle.load(f)

        if iccv_epic:
            train_split = ntcir.read_split('datasets/ntcir/training_split.txt')
            test_split = ntcir.read_split('datasets/ntcir/validation_split.txt')
        else:
            train_split = ntcir.get_split_fold(sorted_users, int(fold) - 1)
            test_split = ntcir.get_split_fold(sorted_users, int(fold) - 1, False)

        training_batches = ntcir.get_training_batches(train_split, sequences, timestep=timestep)
        test_batches = ntcir.get_batches(test_split, sequences, timestep=timestep)

        K.set_learning_phase(1)

        model = base_model.load(feature_vector_length=base_model.feature_vector_length, timestep=timestep)
        sgd = SGD(lr=sgd_params.lr, decay=sgd_params.decay, momentum=sgd_params.momentum, nesterov=sgd_params.nesterov)
        model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

        steps_per_epoch = int(len(training_batches) / batch_size)
        train_generator = generate_batch(features, training_batches, base_model.feature_vector_length, batch_size,
                                         timestep, steps_per_epoch)
        validation_steps = int(len(test_batches) / batch_size)
        validation_generator = generate_batch(features, test_batches, base_model.feature_vector_length, batch_size,
                                              timestep, validation_steps)

        # checkpoint
        base_model_weights = "weights." + base_model.name + ".fold_" + fold + ".epoch_{epoch:02d}." + backend + ".hdf5"
        weights_filepath = os.path.join(weights_dir, base_model_weights)
        checkpoint = ModelCheckpoint(weights_filepath, monitor='val_acc', verbose=1, save_best_only=False)
        history = HistoryLog()

        # fine-tune the model
        model.fit_generator(
            train_generator,
            steps_per_epoch=steps_per_epoch,
            epochs=10,
            callbacks=[checkpoint, history],
            validation_data=validation_generator,
            validation_steps=validation_steps)

        ts = time()
        timestamp = datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S')

        loss_filepath = os.path.join(weights_dir,
                                     "{}.fold_{}.loss.{}.log".format(base_model.name, fold, timestamp))
        history.log_training_loss(loss_filepath)

        epoch_filepath = os.path.join(weights_dir,
                                      "{}.fold_{}.epoch.{}.log".format(base_model.name, fold, timestamp))
        history.log_epoch(epoch_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
Exemplo n.º 3
0
def test(features_filepath,
         results_dir,
         rf_model,
         start_fold=1,
         end_fold=1,
         timestep=5,
         progress_percent=0.05,
         iccv_epic=True,
         features_size=4096):
    np.random.seed(42)

    users = IO.load_annotations(ntcir.filepaths)
    sorted_users = ntcir.utils.sort(users)

    num_frames_per_day = 2880
    sequences = ntcir.get_sequences(sorted_users, num_frames_per_day)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        with open(features_filepath.format(fold), 'r') as f:
            user_features = pickle.load(f)

        weights = rf_model.weights.format(fold)
        rf = load_random_forest(weights)

        if iccv_epic:
            test_split = ntcir.read_split('datasets/ntcir/test_split.txt')
        else:
            test_split = ntcir.get_split_fold(sorted_users,
                                              int(fold) - 1, False)
        test_batches = ntcir.get_training_batches(test_split,
                                                  sequences,
                                                  timestep=timestep)

        num_features = timestep * features_size
        num_test_batches = len(test_batches)

        features = np.zeros((num_test_batches, num_features))
        img_paths = list()
        labels = list()
        for i, batch in enumerate(test_batches):
            day = user_features[batch.user_id][batch.date]
            for j, ind in enumerate(batch.indices):
                image = day.images[ind]
                start_ind = j * features_size
                end_ind = (j + 1) * features_size
                features[i, start_ind:end_ind] = image.features

            last_ind = batch.indices[-1]
            img_paths.append(day.images[last_ind].path)
            labels.append(day.images[last_ind].label)

        predictions = rf.predict(features)

        results = list()
        for i in range(num_test_batches):
            results.append((img_paths[i], labels[i], predictions[i]))

        #ORIGINAL
        # num_features = timestep * features_size
        # num_test_batches = len(test_batches)
        #
        # if progress_percent:
        #     test_progress_percent = int(num_test_batches * progress_percent)
        #     print "Testing fold {}".format(fold)
        #
        # results = list()
        # features = np.zeros(num_features)
        # for i, batch in enumerate(test_batches):
        #     day = user_features[batch.user_id][batch.date]
        #     for j, ind in enumerate(batch.indices):
        #         image = day.images[ind]
        #         start_ind = j * features_size
        #         end_ind = (j + 1) * features_size
        #         features[start_ind:end_ind] = image.features
        #
        #     last_ind = batch.indices[-1]
        #     img_path = day.images[last_ind].path
        #     label = day.images[last_ind].label
        #     prediction = rf.predict([features])[0].astype(np.int)
        #
        #     results.append((img_path, label, prediction))
        #     if progress_percent and (i + 1) % test_progress_percent == 0:
        #         print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_test_batches * 100, i + 1, num_test_batches))

        results_fname = "{}.fold_{}.{}.csv".format(rf_model.name, fold,
                                                   backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(results, results_filepath)
def test(features_filepath,
         results_dir,
         base_model,
         start_fold,
         end_fold,
         timestep,
         iccv_epic=False):
    users = IO.load_annotations(ntcir.filepaths)
    sorted_users = ntcir.utils.sort(users)

    num_frames_per_day = 2880
    sequences = ntcir.get_sequences(sorted_users, num_frames_per_day)

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'

    if not start_fold:
        start_fold = current_fold(results_dir, base_model.name + '.fold')

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        with open(features_filepath.format(fold), 'r') as f:
            features = pickle.load(f)

        if iccv_epic:
            test_split = ntcir.read_split('datasets/ntcir/test_split.txt')
        else:
            test_split = ntcir.get_split_fold(sorted_users,
                                              int(fold) - 1, False)
        test_batches = ntcir.get_batches(test_split,
                                         sequences,
                                         timestep=timestep,
                                         include_last=True)

        K.set_learning_phase(False)

        weights = base_model.best_weights.format(fold)
        model = base_model.load(
            feature_vector_length=base_model.feature_vector_length,
            weights=weights,
            timestep=timestep)

        frames = list()
        groundtruth = list()
        predictions = list()
        for i, batch in enumerate(test_batches):
            x, y = load_batch(
                features,
                batch,
                feature_vector_length=base_model.feature_vector_length,
                batch_size=1,
                timestep=timestep)

            prediction = model.predict_on_batch(x)
            prediction = np.argmax(prediction, axis=2).squeeze()[0:batch.size]

            predictions.extend(prediction)
            groundtruth.extend(np.argmax(y, axis=2).squeeze()[0:batch.size])

            for j, ind in enumerate(batch.indices):
                image = features[batch.user_id][batch.date].images[ind]
                frames.append(image.path)

        results_fname = "{}.fold_{}.{}.csv".format(base_model.name, fold,
                                                   backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(frames, groundtruth, predictions, results_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
def test(features_filepath,
         results_dir,
         base_model,
         start_fold,
         end_fold,
         timestep,
         iccv_epic=False,
         progress_percent=0.05):
    users = IO.load_annotations(ntcir.filepaths)
    sorted_users = ntcir.utils.sort(users)

    num_frames_per_day = 2880
    sequences = ntcir.get_sequences(sorted_users, num_frames_per_day)

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'

    if not start_fold:
        start_fold = current_fold(results_dir, base_model.name + '.fold')

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        with open(features_filepath.format(fold), 'r') as f:
            features = pickle.load(f)

        if iccv_epic:
            test_split = ntcir.read_split('datasets/ntcir/test_split.txt')
        else:
            test_split = ntcir.get_split_fold(sorted_users,
                                              int(fold) - 1, False)
        test_batches = ntcir.get_training_batches(test_split,
                                                  sequences,
                                                  timestep=timestep)

        K.set_learning_phase(False)

        weights = base_model.best_weights.format(fold)
        model = base_model.load(
            feature_vector_length=base_model.feature_vector_length,
            weights=weights,
            timestep=timestep)

        num_test_batches = len(test_batches)

        if progress_percent:
            test_progress_percent = int(num_test_batches * progress_percent)
            print "Testing fold {}".format(fold)

        results = list()
        for i, batch in enumerate(test_batches):
            x, y = load_batch(
                features,
                batch,
                feature_vector_length=base_model.feature_vector_length,
                batch_size=1,
                timestep=timestep)

            prediction = model.predict_on_batch(x)
            prediction = np.argmax(prediction, axis=2).squeeze()[-1]

            ind = batch.indices[-1]
            image = features[batch.user_id][batch.date].images[ind]

            results.append((image.path, image.label, prediction))
            if progress_percent and (i + 1) % test_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" % (
                    (i + 1) / num_test_batches * 100, i + 1, num_test_batches))

        results_fname = "{}.many2one.fold_{}.{}.csv".format(
            base_model.name, fold, backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(results, results_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()