def train(features_filepath, weights_dir, rf_model, start_fold=1, end_fold=1, timestep=5, progress_percent=0.05, iccv_epic=True, features_size=4096, cores=None): np.random.seed(42) users = IO.load_annotations(ntcir.filepaths) sorted_users = ntcir.utils.sort(users) num_frames_per_day = 2880 sequences = ntcir.get_sequences(sorted_users, num_frames_per_day) if not start_fold: start_fold = current_fold(weights_dir, rf_model.name) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: with open(features_filepath.format(fold), 'r') as f: user_features = pickle.load(f) if iccv_epic: train_split = ntcir.read_split('datasets/ntcir/training_split.txt') else: train_split = ntcir.get_split_fold(sorted_users, int(fold) - 1) training_batches = ntcir.get_training_batches(train_split, sequences, timestep=timestep) num_features = timestep*features_size num_training_batches = len(training_batches) # Extract features of the images features = np.zeros((num_training_batches, num_features)) targets = np.zeros(num_training_batches) if progress_percent: training_progress_percent = int(num_training_batches * progress_percent) print "Creating training matrix for fold {}".format(fold) for i, batch in enumerate(training_batches): day = user_features[batch.user_id][batch.date] for j, ind in enumerate(batch.indices): image = day.images[ind] start_ind = j * features_size end_ind = (j+1) * features_size features[i, start_ind:end_ind] = image.features last_ind = batch.indices[-1] targets[i] = day.images[last_ind].label if progress_percent and (i + 1) % training_progress_percent == 0: print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_training_batches * 100, i + 1, num_training_batches)) gc.collect() if not cores: cores = multiprocessing.cpu_count() random_forest = RandomForestClassifier(n_estimators=rf_model.num_estimators, n_jobs=cores) random_forest.fit(features, targets) weights_filepath = os.path.join(weights_dir, "weights." + rf_model.name + ".fold_" + fold + ".pkl") with open(weights_filepath, 'w') as f: pickle.dump(random_forest, f, pickle.HIGHEST_PROTOCOL) return random_forest
def train(features_filepath, weights_dir, sgd_params, base_model, start_fold=None, end_fold=5, timestep=10, batch_size=1, iccv_epic=False): np.random.seed(42) users = IO.load_annotations(ntcir.filepaths) sorted_users = ntcir.utils.sort(users) num_frames_per_day = 2880 sequences = ntcir.get_sequences(sorted_users, num_frames_per_day) backend = 'tf' if K.backend() == 'tensorflow' else 'th' if not start_fold: start_fold = current_fold(weights_dir, base_model.name) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: with open(features_filepath.format(fold), 'r') as f: features = pickle.load(f) if iccv_epic: train_split = ntcir.read_split('datasets/ntcir/training_split.txt') test_split = ntcir.read_split('datasets/ntcir/validation_split.txt') else: train_split = ntcir.get_split_fold(sorted_users, int(fold) - 1) test_split = ntcir.get_split_fold(sorted_users, int(fold) - 1, False) training_batches = ntcir.get_training_batches(train_split, sequences, timestep=timestep) test_batches = ntcir.get_batches(test_split, sequences, timestep=timestep) K.set_learning_phase(1) model = base_model.load(feature_vector_length=base_model.feature_vector_length, timestep=timestep) sgd = SGD(lr=sgd_params.lr, decay=sgd_params.decay, momentum=sgd_params.momentum, nesterov=sgd_params.nesterov) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) steps_per_epoch = int(len(training_batches) / batch_size) train_generator = generate_batch(features, training_batches, base_model.feature_vector_length, batch_size, timestep, steps_per_epoch) validation_steps = int(len(test_batches) / batch_size) validation_generator = generate_batch(features, test_batches, base_model.feature_vector_length, batch_size, timestep, validation_steps) # checkpoint base_model_weights = "weights." + base_model.name + ".fold_" + fold + ".epoch_{epoch:02d}." + backend + ".hdf5" weights_filepath = os.path.join(weights_dir, base_model_weights) checkpoint = ModelCheckpoint(weights_filepath, monitor='val_acc', verbose=1, save_best_only=False) history = HistoryLog() # fine-tune the model model.fit_generator( train_generator, steps_per_epoch=steps_per_epoch, epochs=10, callbacks=[checkpoint, history], validation_data=validation_generator, validation_steps=validation_steps) ts = time() timestamp = datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S') loss_filepath = os.path.join(weights_dir, "{}.fold_{}.loss.{}.log".format(base_model.name, fold, timestamp)) history.log_training_loss(loss_filepath) epoch_filepath = os.path.join(weights_dir, "{}.fold_{}.epoch.{}.log".format(base_model.name, fold, timestamp)) history.log_epoch(epoch_filepath) del model if K.backend() == 'tensorflow': K.clear_session()
def test(features_filepath, results_dir, rf_model, start_fold=1, end_fold=1, timestep=5, progress_percent=0.05, iccv_epic=True, features_size=4096): np.random.seed(42) users = IO.load_annotations(ntcir.filepaths) sorted_users = ntcir.utils.sort(users) num_frames_per_day = 2880 sequences = ntcir.get_sequences(sorted_users, num_frames_per_day) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: with open(features_filepath.format(fold), 'r') as f: user_features = pickle.load(f) weights = rf_model.weights.format(fold) rf = load_random_forest(weights) if iccv_epic: test_split = ntcir.read_split('datasets/ntcir/test_split.txt') else: test_split = ntcir.get_split_fold(sorted_users, int(fold) - 1, False) test_batches = ntcir.get_training_batches(test_split, sequences, timestep=timestep) num_features = timestep * features_size num_test_batches = len(test_batches) features = np.zeros((num_test_batches, num_features)) img_paths = list() labels = list() for i, batch in enumerate(test_batches): day = user_features[batch.user_id][batch.date] for j, ind in enumerate(batch.indices): image = day.images[ind] start_ind = j * features_size end_ind = (j + 1) * features_size features[i, start_ind:end_ind] = image.features last_ind = batch.indices[-1] img_paths.append(day.images[last_ind].path) labels.append(day.images[last_ind].label) predictions = rf.predict(features) results = list() for i in range(num_test_batches): results.append((img_paths[i], labels[i], predictions[i])) #ORIGINAL # num_features = timestep * features_size # num_test_batches = len(test_batches) # # if progress_percent: # test_progress_percent = int(num_test_batches * progress_percent) # print "Testing fold {}".format(fold) # # results = list() # features = np.zeros(num_features) # for i, batch in enumerate(test_batches): # day = user_features[batch.user_id][batch.date] # for j, ind in enumerate(batch.indices): # image = day.images[ind] # start_ind = j * features_size # end_ind = (j + 1) * features_size # features[start_ind:end_ind] = image.features # # last_ind = batch.indices[-1] # img_path = day.images[last_ind].path # label = day.images[last_ind].label # prediction = rf.predict([features])[0].astype(np.int) # # results.append((img_path, label, prediction)) # if progress_percent and (i + 1) % test_progress_percent == 0: # print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_test_batches * 100, i + 1, num_test_batches)) results_fname = "{}.fold_{}.{}.csv".format(rf_model.name, fold, backend) results_filepath = os.path.join(results_dir, results_fname) write_results(results, results_filepath)
def test(features_filepath, results_dir, base_model, start_fold, end_fold, timestep, iccv_epic=False): users = IO.load_annotations(ntcir.filepaths) sorted_users = ntcir.utils.sort(users) num_frames_per_day = 2880 sequences = ntcir.get_sequences(sorted_users, num_frames_per_day) backend = 'tf' if K.backend() == 'tensorflow' else 'th' if not start_fold: start_fold = current_fold(results_dir, base_model.name + '.fold') folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: with open(features_filepath.format(fold), 'r') as f: features = pickle.load(f) if iccv_epic: test_split = ntcir.read_split('datasets/ntcir/test_split.txt') else: test_split = ntcir.get_split_fold(sorted_users, int(fold) - 1, False) test_batches = ntcir.get_batches(test_split, sequences, timestep=timestep, include_last=True) K.set_learning_phase(False) weights = base_model.best_weights.format(fold) model = base_model.load( feature_vector_length=base_model.feature_vector_length, weights=weights, timestep=timestep) frames = list() groundtruth = list() predictions = list() for i, batch in enumerate(test_batches): x, y = load_batch( features, batch, feature_vector_length=base_model.feature_vector_length, batch_size=1, timestep=timestep) prediction = model.predict_on_batch(x) prediction = np.argmax(prediction, axis=2).squeeze()[0:batch.size] predictions.extend(prediction) groundtruth.extend(np.argmax(y, axis=2).squeeze()[0:batch.size]) for j, ind in enumerate(batch.indices): image = features[batch.user_id][batch.date].images[ind] frames.append(image.path) results_fname = "{}.fold_{}.{}.csv".format(base_model.name, fold, backend) results_filepath = os.path.join(results_dir, results_fname) write_results(frames, groundtruth, predictions, results_filepath) del model if K.backend() == 'tensorflow': K.clear_session()
def test(features_filepath, results_dir, base_model, start_fold, end_fold, timestep, iccv_epic=False, progress_percent=0.05): users = IO.load_annotations(ntcir.filepaths) sorted_users = ntcir.utils.sort(users) num_frames_per_day = 2880 sequences = ntcir.get_sequences(sorted_users, num_frames_per_day) backend = 'tf' if K.backend() == 'tensorflow' else 'th' if not start_fold: start_fold = current_fold(results_dir, base_model.name + '.fold') folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: with open(features_filepath.format(fold), 'r') as f: features = pickle.load(f) if iccv_epic: test_split = ntcir.read_split('datasets/ntcir/test_split.txt') else: test_split = ntcir.get_split_fold(sorted_users, int(fold) - 1, False) test_batches = ntcir.get_training_batches(test_split, sequences, timestep=timestep) K.set_learning_phase(False) weights = base_model.best_weights.format(fold) model = base_model.load( feature_vector_length=base_model.feature_vector_length, weights=weights, timestep=timestep) num_test_batches = len(test_batches) if progress_percent: test_progress_percent = int(num_test_batches * progress_percent) print "Testing fold {}".format(fold) results = list() for i, batch in enumerate(test_batches): x, y = load_batch( features, batch, feature_vector_length=base_model.feature_vector_length, batch_size=1, timestep=timestep) prediction = model.predict_on_batch(x) prediction = np.argmax(prediction, axis=2).squeeze()[-1] ind = batch.indices[-1] image = features[batch.user_id][batch.date].images[ind] results.append((image.path, image.label, prediction)) if progress_percent and (i + 1) % test_progress_percent == 0: print("Progress %3.2f%% (%d/%d)" % ( (i + 1) / num_test_batches * 100, i + 1, num_test_batches)) results_fname = "{}.many2one.fold_{}.{}.csv".format( base_model.name, fold, backend) results_filepath = os.path.join(results_dir, results_fname) write_results(results, results_filepath) del model if K.backend() == 'tensorflow': K.clear_session()