def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS): print ("Loading data definitions...") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl')) # frames_source = frames_source[:: 2] frames = np.zeros(shape=((len(frames_source),) + IMG_SIZE)) j = 1 for i in range(1, len(frames_source)): filename = "frame_" + str(j) + ".png" im_file = os.path.join(DATA_DIR, filename) try: frame = cv2.imread(im_file, cv2.IMREAD_COLOR) frames[i] = (frame.astype(np.float32) - 127.5) / 127.5 # j = j + 2 j = j + 1 except AttributeError as e: print(im_file) print(e) # Build video progressions videos_list = [] start_frame_index = 0 end_frame_index = VIDEO_LENGTH while (end_frame_index <= len(frames_source)): frame_list = frames_source[start_frame_index:end_frame_index] if (len(set(frame_list)) == 1): videos_list.append((start_frame_index, end_frame_index)) start_frame_index = start_frame_index + 1 end_frame_index = end_frame_index + 1 else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH videos_list = np.asarray(videos_list, dtype=np.int32) n_videos = videos_list.shape[0] if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) if CLASSIFIER: # Load labesl into categorical 1-hot vectors actions = ['moving slow', 'slowing down', 'standing', 'speeding up', 'moving fast', 'fake'] print ("Loading annotations...") action_classes = hkl.load(os.path.join(DATA_DIR, 'annotations_train_128.hkl')) action_nums = [] for i in range(len(action_classes)): action_dict = dict(ele.split(':') for ele in action_classes[i].split(', ')[2:]) action_nums.append(actions.index(str(action_dict['Driver']))) # action_nums = action_nums[::2] action_cats = np.asarray(to_categorical(action_nums, len(actions))) # Setup validation val_frames_source = hkl.load(os.path.join(VAL_DATA_DIR, 'sources_val_128.hkl')) # val_frames_source = val_frames_source[:: 2] val_frames = np.zeros(shape=((len(val_frames_source),) + IMG_SIZE)) j = 1 for i in range(1, len(val_frames_source)): filename = "frame_" + str(j) + ".png" im_file = os.path.join(VAL_DATA_DIR, filename) try: val_frame = cv2.imread(im_file, cv2.IMREAD_COLOR) val_frames[i] = (val_frame.astype(np.float32) - 127.5) / 127.5 # j = j + 2 j = j + 1 except AttributeError as e: print(im_file) print(e) val_videos_list = [] start_frame_index = 0 end_frame_index = VIDEO_LENGTH while (end_frame_index <= len(val_frames_source)): val_frame_list = val_frames_source[start_frame_index:end_frame_index] if (len(set(val_frame_list)) == 1): val_videos_list.append((start_frame_index, end_frame_index)) start_frame_index = start_frame_index + VIDEO_LENGTH end_frame_index = end_frame_index + VIDEO_LENGTH else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH val_videos_list = np.asarray(val_videos_list, dtype=np.int32) n_val_videos = val_videos_list.shape[0] if CLASSIFIER: # Load val labesl into categorical 1-hot vectors val_action_classes = hkl.load(os.path.join(VAL_DATA_DIR, 'annotations_val_128.hkl')) val_action_nums = [] for i in range(len(val_action_classes)): val_action_dict = dict(ele.split(':') for ele in val_action_classes[i].split(', ')[2:]) val_action_nums.append(actions.index(str(val_action_dict['Driver']))) # val_action_nums = val_action_nums[::2] val_action_cats = to_categorical(val_action_nums, len(actions)) # Build the Spatio-temporal Autoencoder print ("Creating models...") encoder = encoder_model() decoder = decoder_model() intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output) mask_gen_1 = Sequential() mask_gen_1.add(encoder) mask_gen_1.add(intermediate_decoder) mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_G) autoencoder = autoencoder_model(encoder, decoder) if CLASSIFIER: # classifier = classifier_model() classifier = conv_classifier_model() action_predictor = action_model(encoder, decoder, classifier) action_predictor.compile(loss=['mse', 'categorical_crossentropy'], loss_weights=LOSS_WEIGHTS, optimizer=OPTIM_G, metrics=['accuracy']) # action_predictor.compile(loss='categorical_crossentropy', # optimizer=OPTIM_G, # metrics=['accuracy']) set_trainability(classifier, True) classifier.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=OPTIM_D) run_utilities(encoder, decoder, autoencoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS) print (action_predictor.summary()) run_utilities(encoder, decoder, autoencoder, "None", ENC_WEIGHTS, DEC_WEIGHTS, "None") autoencoder.compile(loss="mean_squared_error", optimizer=OPTIM_A) NB_ITERATIONS = int(n_videos/BATCH_SIZE) # NB_ITERATIONS = 1 NB_VAL_ITERATIONS = int(n_val_videos/BATCH_SIZE) # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS = lrs_callback.LearningRateScheduler(schedule=schedule) LRS.set_model(autoencoder) print ("Beginning Training...") # Begin Training for epoch in range(NB_EPOCHS_AUTOENCODER): print("\n\nEpoch ", epoch) loss = [] val_loss = [] # Set learning rate every epoch LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print ("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder X, y = load_X_y(videos_list, index, frames, []) X_train = X[:, 0 : 10] y_train = X[:, 10 :] loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS-1) + " " + "loss: " + str(loss[len(loss)-1]) + "\t [" + "{0}>".format("="*(arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) orig_image, truth_image, pred_image = combine_images(X_train, y_train, predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 if epoch == 0 : cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image) cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_image) cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_image) # Run over validation data for index in range(NB_VAL_ITERATIONS): X, y = load_X_y(val_videos_list, index, val_frames, []) X_train = X[:, 0 : 10] y_train = X[:, 10 :] val_loss.append(autoencoder.test_on_batch(X_train, y_train)) arrow = int(index / (NB_VAL_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS-1) + " " + "val_loss: " + str(val_loss[len(val_loss)-1]) + "\t [" + "{0}>".format("="*(arrow))) stdout.flush() # then after each epoch/iteration avg_loss = sum(loss)/len(loss) avg_val_loss = sum(val_loss) / len(val_loss) logs = {'loss': avg_loss, 'val_loss' : avg_val_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"d_loss\":%f};\n" % (epoch, avg_loss)) print("\nAvg loss: " + str(avg_loss) + " Avg val loss: " + str(avg_val_loss)) # Save model weights per epoch to file encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'e`oder_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True) # Save predicted mask per epoch predicted_attn = mask_gen_1.predict(X_train, verbose=0) a_pred = np.reshape(predicted_attn, newshape=(10, 10, 16, 16, 1)) np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred) # predicted_attn = mask_gen_2.predict(X_train, verbose=0) # a_pred = np.reshape(predicted_attn, newshape=(10, 10, 128, 128, 1)) # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen2_' + str(epoch) + '.npy'), a_pred) # Train AAE if CLASSIFIER: # exp_memory = ExperienceMemory(memory_length=100) print ("Training Classifier...") # Setup fake labels y_fake_classes = [] for k in range(BATCH_SIZE): class_nums = (len(actions) - 1) * np.ones(shape=(int(VIDEO_LENGTH / 2), 1), dtype=np.float32) y_fake_classes.append(to_categorical(class_nums, len(actions))) y_fake_classes = np.asarray(y_fake_classes) for epoch in range(NB_EPOCHS_CLASS): print("\n\nEpoch ", epoch) c_loss = [] a_loss = [] val_c_loss = [] val_a_loss = [] # # Set learning rate every epoch # LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print ("Learning rate: " + str(lr)) print ("a_loss_metrics: " + str(action_predictor.metrics_names)) print ("c_loss_metrics: " + str(classifier.metrics_names)) for index in range(NB_ITERATIONS): # Train Autoencoder X, y = load_X_y(videos_list, index, frames, action_cats) X_train = X[:, 0 : int(VIDEO_LENGTH/2)] y_true_imgs = X[:, int(VIDEO_LENGTH/2) :] y_true_classes = y[:, int(VIDEO_LENGTH/2) :] print (y_true_classes.shape) print (y_true_classes) # Train classifier y_fake_imgs = autoencoder.predict(X_train, verbose=0) X = np.concatenate((y_true_imgs, y_fake_imgs), axis=0) y = np.concatenate((y_true_classes, y_fake_classes), axis=0) for j in range(C_TRAIN_RATIO): c_loss.append(classifier.train_on_batch(X, y)) # Train action_predictor set_trainability(classifier, False) for j in range(A_TRAIN_RATIO): a_loss.append(action_predictor.train_on_batch(X_train, [y_true_imgs, y_true_classes])) # a_loss.append(action_predictor.train_on_batch(X_train, y_true_classes)) set_trainability(classifier, True) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS-1) + " " + "a_loss: " + str([ a_loss[len(a_loss) - 1][j] for j in [0, -1]]) + " " + "c_loss: " + str(c_loss[len(c_loss) - 1]) + " " + "\t [" + "{0}>".format("="*(arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file # predicted_images = autoencoder.predict(X_train) predicted_images, predicted_classes = action_predictor.predict(X_train, verbose=0) # predicted_classes = action_predictor.predict(X_train, verbose=0) orig_image, truth_image, pred_image = combine_images(X_train, y_true_imgs, predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX if epoch == 0 : y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH/2)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_futr = np.argmax(y_true_classes[k, j]) cv2.putText(orig_image, actions[class_num_past], (2 + j*(128), 120 + k*128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, actions[class_num_futr], (2 + j*(128), 120 + k*128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_orig.png"), orig_image) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_truth.png"), truth_image) # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): class_num = np.argmax(predicted_classes[k, j]) class_num_futr = np.argmax(y_true_classes[k, j]) cv2.putText(pred_image, actions[class_num], (2 + j * (128), 120 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_image, actions[class_num_futr], (2 + j * (128), 105 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_image) # Run over validation data print ('') for index in range(NB_VAL_ITERATIONS): X, y = load_X_y(val_videos_list, index, val_frames, val_action_cats) X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_classes = y[:, 0: int(VIDEO_LENGTH / 2)] y_imgs = X[:, int(VIDEO_LENGTH / 2):] val_c_loss.append(classifier.test_on_batch(X_train, y_classes)) val_a_loss.append(action_predictor.test_on_batch(X_train, [y_imgs, y_classes])) # val_a_loss.append(action_predictor.test_on_batch(X_train, y_classes)) arrow = int(index / (NB_VAL_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + " " + "val_a_loss: " + str([val_a_loss[len(val_a_loss) - 1][j] for j in [0, -1]]) + " " + "val_c_loss: " + str(val_c_loss[len(val_c_loss) - 1]) ) stdout.flush() predicted_attn = mask_gen_1.predict(X_train, verbose=0) a_pred = np.reshape(predicted_attn, newshape=(10, 10, 16, 16, 1)) np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_cla_gen1_' + str(epoch) + '.npy'), a_pred) # then after each epoch/iteration avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0) avg_val_c_loss = np.mean(np.asarray(val_c_loss, dtype=np.float32), axis=0) avg_a_loss = np.mean(np.asarray(a_loss, dtype=np.float32), axis=0) avg_val_a_loss = np.mean(np.asarray(val_a_loss, dtype=np.float32), axis=0) loss_values = np.asarray(avg_c_loss.tolist() + avg_val_c_loss.tolist() \ + avg_a_loss.tolist() + avg_val_a_loss.tolist(), dtype=np.float32) c_loss_keys = ['c_' + metric for metric in classifier.metrics_names] a_loss_keys = ['a_' + metric for metric in action_predictor.metrics_names] val_c_loss_keys = ['c_val_' + metric for metric in classifier.metrics_names] val_a_loss_keys = ['a_val_' + metric for metric in action_predictor.metrics_names] loss_keys = c_loss_keys + val_c_loss_keys + \ a_loss_keys + val_a_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_cla.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_aae.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs)) print("\nAvg c_loss: " + str(avg_c_loss) + " Avg val_c_loss: " + str(avg_val_c_loss) + "\nAvg a_loss: " + str(avg_a_loss[0]) + " Avg val_a_loss: " + str(avg_val_a_loss[0])) # Save model weights per epoch to file encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_'+str(epoch)+'.h5'), True) decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True) classifier.save_weights(os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True) # End TensorBoard Callback TC.on_train_end('_') TC_cla.on_train_end('_')
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS): print("Loading data definitions.") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl')) videos_list = get_video_lists(frames_source=frames_source, stride=1) # Load actions from annotations action_labels = hkl.load(os.path.join(DATA_DIR, 'annotations_train_128.hkl')) ped_action_classes, ped_class_count = get_action_classes(action_labels=action_labels) print("Training Stats: " + str(ped_class_count)) if RAM_DECIMATE: frames = load_to_RAM(frames_source=frames_source) if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Setup test test_frames_source = hkl.load(os.path.join(TEST_DATA_DIR, 'sources_test_128.hkl')) test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8) # Load test action annotations test_action_labels = hkl.load(os.path.join(TEST_DATA_DIR, 'annotations_test_128.hkl')) test_ped_action_classes, test_ped_class_count = get_action_classes(test_action_labels) print("Test Stats: " + str(test_ped_class_count)) videos_list = subsample_videos(videos_list=videos_list, ped_action_labels=ped_action_classes) # Build the Spatio-temporal Autoencoder print("Creating models.") encoder = encoder_model() decoder = decoder_model() print(encoder.summary()) print(decoder.summary()) # Build attention layer output intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output) mask_gen = Sequential() mask_gen.add(encoder) mask_gen.add(intermediate_decoder) mask_gen.compile(loss='mean_absolute_error', optimizer=OPTIM_A) autoencoder = autoencoder_model(encoder, decoder) autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A) # Build stacked classifier if CLASSIFIER: classifier = pretrained_c3d() classifier.compile(loss="binary_crossentropy", optimizer=OPTIM_C, metrics=['accuracy']) sclassifier = stacked_classifier_model(encoder, decoder, classifier) sclassifier.compile(loss=["mean_absolute_error", "binary_crossentropy"], optimizer=OPTIM_C, loss_weights=LOSS_WEIGHTS, metrics=['accuracy']) print(sclassifier.summary()) if CLASSIFIER: run_utilities(encoder, decoder, autoencoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS) else: run_utilities(encoder, decoder, autoencoder, 'classifier', ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS) n_videos = videos_list.shape[0] n_test_videos = test_videos_list.shape[0] NB_ITERATIONS = int(n_videos / BATCH_SIZE) # NB_ITERATIONS = 1 NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE) # NB_TEST_ITERATIONS = 1 # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS_auto = lrs_callback.LearningRateScheduler(schedule=auto_schedule) LRS_auto.set_model(autoencoder) if CLASSIFIER: LRS_clas = lrs_callback.LearningRateScheduler(schedule=clas_schedule) LRS_clas.set_model(sclassifier) print("Beginning Training.") # Begin Training for epoch in range(NB_EPOCHS_AUTOENCODER): print("\n\nEpoch ", epoch) loss = [] test_loss = [] # Set learning rate every epoch LRS_auto.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: X, y = load_X_y_RAM(videos_list, index, frames, []) else: X, y = load_X_y(videos_list, index, DATA_DIR, []) X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_train = X[:, int(VIDEO_LENGTH / 2):] loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "loss: " + str(loss[len(loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) orig_image = arrange_images(X_train) truth_image = arrange_images(y_train) pred_image = arrange_images(predicted_images) orig_image = orig_image * 127.5 + 127.5 pred_image = pred_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 if epoch == 0: cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image) cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_image) cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_image) # Run over validation data print('') for index in range(NB_TEST_ITERATIONS): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, []) X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_train = X[:, int(VIDEO_LENGTH / 2):] test_loss.append(autoencoder.test_on_batch(X_train, y_train)) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_loss: " + str(test_loss[len(test_loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() # then after each epoch/iteration avg_loss = sum(loss) / len(loss) avg_test_loss = sum(test_loss) / len(test_loss) logs = {'loss': avg_loss, 'test_loss': avg_test_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"loss\":%f, \"test_loss\":%f};\n" % (epoch, avg_loss, avg_test_loss)) print("\nAvg loss: " + str(avg_loss) + " Avg test loss: " + str(avg_test_loss)) # Save model weights per epoch to file encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True) # Save predicted mask per epoch # predicted_attn = mask_gen.predict(X_train, verbose=0) # a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, 16, 14, 14, 1)) # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred) # Train Classifier if CLASSIFIER: print("Training Classifier...") for epoch in range(NB_EPOCHS_CLASS): print("\n\nEpoch ", epoch) c_loss = [] test_c_loss = [] # # Set learning rate every epoch LRS_clas.on_epoch_begin(epoch=epoch) lr = K.get_value(sclassifier.optimizer.lr) print("Learning rate: " + str(lr)) print("c_loss_metrics: " + str(sclassifier.metrics_names)) for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes) else: X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes) X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_true_class = y[:, CLASS_TARGET_INDEX] y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] c_loss.append(sclassifier.train_on_batch(X_train, [y_true_imgs, y_true_class])) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "c_loss: " + str([ c_loss[len(c_loss) - 1][j] for j in [0, 1, 2, 3, 4]]) + " " + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images, ped_pred_class = sclassifier.predict(X_train, verbose=0) pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = pred_seq * 127.5 + 127.5 truth_image = arrange_images(y_true_imgs) truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_futr = np.argmax(y_true_classes[k, j]) class_num_y = np.argmax(ped_pred_class[k]) cv2.putText(pred_seq, simple_ped_set[class_num_past], (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) try: cv2.putText(pred_seq, simple_ped_set[class_num_y], (2 + (j + 16) * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) except IndexError as e: print (class_num_y) print (e) cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_futr], (2 + (j + 16) * (128), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, simple_ped_set[class_num_futr], (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_truth.png"), truth_image) # Run over test data print('') for index in range(NB_TEST_ITERATIONS): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes) X_test = X[:, 0: int(VIDEO_LENGTH / 2)] y_true_class = y[:, CLASS_TARGET_INDEX] y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] test_c_loss.append(sclassifier.test_on_batch(X_test, [y_true_imgs, y_true_class])) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_c_loss: " + str([test_c_loss[len(test_c_loss) - 1][j] for j in [0, 1, 2, 3, 4]])) stdout.flush() # Save generated images to file test_predicted_images, test_ped_pred_class = sclassifier.predict(X_test, verbose=0) pred_seq = arrange_images(np.concatenate((X_test, test_predicted_images), axis=1)) pred_seq = pred_seq * 127.5 + 127.5 truth_image = arrange_images(y_true_imgs) truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_futr = np.argmax(y_true_classes[k, j]) class_num_y = np.argmax(test_ped_pred_class[k]) cv2.putText(pred_seq, simple_ped_set[class_num_past], (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, simple_ped_set[class_num_y], (2 + (j + 16) * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_futr], (2 + (j + 16) * (128), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, simple_ped_set[class_num_futr], (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(index) + "_cla_test_pred.png"), pred_seq) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(index) + "_cla_test_truth.png"), truth_image) # predicted_attn = mask_gen.predict(X_train, verbose=0) # a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, 16, 16, 16, 1)) # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_cla_' + str(epoch) + '.npy'), a_pred) # then after each epoch/iteration avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0) avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0) loss_values = np.asarray(avg_c_loss.tolist() + avg_test_c_loss.tolist(), dtype=np.float32) c_loss_keys = ['c_' + metric for metric in classifier.metrics_names] test_c_loss_keys = ['c_test_' + metric for metric in classifier.metrics_names] loss_keys = c_loss_keys + test_c_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_cla.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs)) print("\nAvg c_loss: " + str(avg_c_loss) + " Avg test_c_loss: " + str(avg_test_c_loss)) # Save model weights per epoch to file encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True) classifier.save_weights(os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True)
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS): print ("Loading data definitions...") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl')) videos_list = get_video_lists(frames_source=frames_source, stride=4) n_videos = videos_list.shape[0] # Setup test test_frames_source = hkl.load(os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl')) test_videos_list = get_video_lists(frames_source=test_frames_source, stride=(int(VIDEO_LENGTH/2))) n_test_videos = test_videos_list.shape[0] if RAM_DECIMATE: frames = load_to_RAM(frames_source=frames_source) if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Build the Spatio-temporal Autoencoder print ("Creating models...") encoder = encoder_model() print (encoder.summary()) decoder = decoder_model() autoencoder = autoencoder_model(encoder, decoder) autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A) # Build attention layer output # intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output) # mask_gen_1 = Sequential() # mask_gen_1.add(encoder) # mask_gen_1.add(intermediate_decoder) # mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_A) run_utilities(encoder, decoder, autoencoder, ENC_WEIGHTS, DEC_WEIGHTS) NB_ITERATIONS = int(n_videos/BATCH_SIZE) # NB_ITERATIONS = 5 NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE) # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS = lrs_callback.LearningRateScheduler(schedule=schedule) LRS.set_model(autoencoder) print ("Beginning Training...") # Begin Training for epoch in range(NB_EPOCHS_AUTOENCODER): print("\n\nEpoch ", epoch) loss = [] test_loss = [] # Set learning rate every epoch LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print ("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: X = load_X_RAM(videos_list, index, frames) else: X = load_X(videos_list, index, DATA_DIR, IMG_SIZE) X_train = X[:, 0 : int(VIDEO_LENGTH/2)] y_train = X[:, int(VIDEO_LENGTH/2) :] loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS-1) + " " + "loss: " + str(loss[len(loss)-1]) + "\t [" + "{0}>".format("="*(arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) voila = np.concatenate((X_train, y_train), axis=1) truth_seq = arrange_images(voila) pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) truth_seq = truth_seq * 127.5 + 127.5 pred_seq = pred_seq * 127.5 + 127.5 cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_seq) cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_seq) # Run over test data print ('') for index in range(NB_TEST_ITERATIONS): X = load_X(test_videos_list, index, TEST_DATA_DIR, IMG_SIZE) X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_train = X[:, int(VIDEO_LENGTH / 2):] test_loss.append(autoencoder.test_on_batch(X_train, y_train)) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_loss: " + str(test_loss[len(test_loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() # then after each epoch/iteration avg_loss = sum(loss)/len(loss) avg_test_loss = sum(test_loss) / len(test_loss) logs = {'loss': avg_loss, 'test_loss': avg_test_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"loss\":%f, \"test_loss\":%f};\n" % (epoch, avg_loss, avg_test_loss)) print("\nAvg loss: " + str(avg_loss) + " Avg test loss: " + str(avg_test_loss)) # Save model weights per epoch to file encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True)
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, DIS_WEIGHTS): print("Loading data definitions...") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl')) # Build video progressions videos_list = [] start_frame_index = 1 end_frame_index = VIDEO_LENGTH + 1 while (end_frame_index <= len(frames_source)): frame_list = frames_source[start_frame_index:end_frame_index] if (len(set(frame_list)) == 1): videos_list.append(range(start_frame_index, end_frame_index)) start_frame_index = start_frame_index + 1 end_frame_index = end_frame_index + 1 else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH videos_list = np.asarray(videos_list, dtype=np.int32) n_videos = videos_list.shape[0] if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Build the Spatio-temporal Autoencoder print("Creating models...") encoder = encoder_model() decoder = decoder_model() intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output) mask_gen = Sequential() mask_gen.add(encoder) mask_gen.add(intermediate_decoder) mask_gen.compile(loss='mean_squared_error', optimizer=OPTIM_G) autoencoder = autoencoder_model(encoder, decoder) if ADVERSARIAL: discriminator = discriminator_model() aae = aae_model(autoencoder, discriminator) aae.compile(loss='binary_crossentropy', optimizer=OPTIM_G) set_trainability(discriminator, True) discriminator.compile(loss='binary_crossentropy', optimizer=OPTIM_D) run_utilities(encoder, decoder, autoencoder, discriminator, ENC_WEIGHTS, DEC_WEIGHTS, DIS_WEIGHTS) else: run_utilities(encoder, decoder, autoencoder, 'None', ENC_WEIGHTS, DEC_WEIGHTS, 'None') autoencoder.compile(loss=mse_kld_loss, optimizer=OPTIM_A) NB_ITERATIONS = int(n_videos / BATCH_SIZE) # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS = lrs_callback.LearningRateScheduler(schedule=schedule) LRS.set_model(autoencoder) print("Beginning Training...") # Begin Training for epoch in range(NB_EPOCHS_AUTOENCODER): print("\n\nEpoch ", epoch) loss = [] # Set learning rate every epoch LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder X = load_X(videos_list, index, DATA_DIR) X_train = X[:, 0:int(VIDEO_LENGTH / 2)] y_train = X[:, int(VIDEO_LENGTH / 2):] loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIteration: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "loss: " + str(loss[len(loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) orig_image, truth_image, pred_image = combine_images( X_train, y_train, predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 if epoch == 0: cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image) cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_image) cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_image) # then after each epoch/iteration avg_loss = sum(loss) / len(loss) logs = {'loss': avg_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"d_loss\":%f};\n" % (epoch, avg_loss)) print("\nAvg loss: " + str(avg_loss)) # Save predicted mask per epoch predicted_attn_1 = mask_gen.predict(X_train, verbose=0) a_pred_1 = np.reshape(predicted_attn_1, newshape=(10, 10, 16, 16, 1)) np.save( os.path.join(TEST_RESULTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred_1) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True) # Train AAE if ADVERSARIAL: exp_memory = ExperienceMemory(memory_length=100) for epoch in range(NB_EPOCHS_AAE): print("\n\nEpoch ", epoch) g_loss = [] d_loss = [] # a_loss = [] # # Set learning rate every epoch # LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder X = load_X(videos_list, index, DATA_DIR) X_train = X[:, 0:int(VIDEO_LENGTH / 2)] y_train = X[:, int(VIDEO_LENGTH / 2):] future_images = autoencoder.predict(X_train, verbose=0) trainable_fakes = exp_memory.get_trainable_fakes( current_gens=future_images, exp_window_size=5) # Train Discriminator on future images (y_train, not X_train) X = np.concatenate((y_train, trainable_fakes)) y = np.concatenate( (np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.int), np.zeros(shape=(BATCH_SIZE, 10, 1), dtype=np.int)), axis=0) d_loss.append(discriminator.train_on_batch(X, y)) # Train AAE set_trainability(discriminator, False) y = np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.int) g_loss.append(aae.train_on_batch(X_train, y)) set_trainability(discriminator, True) # # Train Autoencoder # a_loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIteration: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "g_loss: " + str(g_loss[len(g_loss) - 1]) + " " + "d_loss: " + str(d_loss[len(d_loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) orig_image, truth_image, pred_image = combine_images( X_train, y_train, predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 if epoch == 0: cv2.imwrite( os.path.join( GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_aae_orig.png"), orig_image) cv2.imwrite( os.path.join( GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_aae_truth.png"), truth_image) cv2.imwrite( os.path.join( GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_aae_pred.png"), pred_image) predicted_attn_1 = mask_gen.predict(X_train, verbose=0) a_pred_1 = np.reshape(predicted_attn_1, newshape=(10, 10, 16, 16, 1)) np.save( os.path.join( TEST_RESULTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred_1) # then after each epoch/iteration # avg_a_loss = sum(a_loss) / len(a_loss) avg_g_loss = sum(g_loss) / len(g_loss) avg_d_loss = sum(d_loss) / len(d_loss) logs = {'g_loss': avg_g_loss, 'd_loss': avg_d_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_aae.json'), 'a') as log_file: log_file.write( "{\"epoch\":%d, \"g_loss\":%f, \"d_loss\":%f};\n" % (epoch, avg_g_loss, avg_d_loss)) print("\nAvg g_loss: " + str(avg_g_loss) + " Avg d_loss: " + str(avg_d_loss)) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_aae_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_aae_epoch_' + str(epoch) + '.h5'), True) discriminator.save_weights( os.path.join(CHECKPOINT_DIR, 'discriminator_aae_epoch_' + str(epoch) + '.h5'), True) # End TensorBoard Callback TC.on_train_end('_')
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS): print("Loading data definitions.") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl')) videos_list_1 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=0) videos_list_2 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=1) videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0) # Load actions from annotations action_labels = hkl.load( os.path.join(DATA_DIR, 'annotations_train_208.hkl')) ped_action_classes, ped_class_count = get_action_classes( action_labels=action_labels) print("Training Stats: " + str(ped_class_count)) if RAM_DECIMATE: frames = load_to_RAM(frames_source=frames_source) if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Setup test test_frames_source = hkl.load( os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl')) test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8, frame_skip=0) # Load test action annotations test_action_labels = hkl.load( os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl')) test_ped_action_classes, test_ped_class_count = get_action_classes( test_action_labels) print("Test Stats: " + str(test_ped_class_count)) # Build the Spatio-temporal Autoencoder print("Creating models.") encoder = encoder_model() decoder = decoder_model() # Build stacked classifier if CLASSIFIER: classifier = ensemble_c3d() # classifier.compile(loss="binary_crossentropy", # optimizer=OPTIM_C, # metrics=['accuracy']) run_utilities(encoder, decoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS) sclassifier = stacked_classifier_model(encoder, decoder, classifier) sclassifier.compile(loss="binary_crossentropy", optimizer=OPTIM_C, metrics=['accuracy']) print(sclassifier.summary()) if not CLASSIFIER: autoencoder = autoencoder_model(encoder, decoder) autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A) run_utilities(encoder, decoder, 'classifier', ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS) n_videos = videos_list.shape[0] n_test_videos = test_videos_list.shape[0] NB_ITERATIONS = int(n_videos / BATCH_SIZE) # NB_ITERATIONS = 1 NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE) # NB_TEST_ITERATIONS = 1 # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) if not CLASSIFIER: LRS_auto = lrs_callback.LearningRateScheduler(schedule=auto_schedule) LRS_auto.set_model(autoencoder) LC_auto = coeff_callback.CoeffCallback(schedule=coeff_schedule) if CLASSIFIER: LRS_cla = lrs_callback.LearningRateScheduler(schedule=cla_schedule) LRS_cla.set_model(sclassifier) print("Beginning Training.") # Begin Training for epoch in range(NB_EPOCHS_AUTOENCODER): print("\n\nEpoch ", epoch) loss = [] test_loss = [] # Set learning rate every epoch LRS_auto.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print("Learning rate: " + str(lr)) LC_auto.on_epoch_begin(epoch=epoch) print("Loss Coefficients: " + str(LAMBDA)) for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: X, y = load_X_y_RAM(videos_list, index, frames, []) else: X, y = load_X_y(videos_list, index, DATA_DIR, []) X_train = np.flip(X[:, 0:int(VIDEO_LENGTH / 2)], axis=1) y_train = X[:, int(VIDEO_LENGTH / 2):] loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "loss: " + str(loss[len(loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) voila = np.concatenate((X_train, y_train), axis=1) truth_seq = arrange_images(voila) pred_seq = arrange_images( np.concatenate((X_train, predicted_images), axis=1)) truth_seq = truth_seq * 127.5 + 127.5 pred_seq = pred_seq * 127.5 + 127.5 if epoch == 0: cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_seq) cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_seq) # Run over test data print('') for index in range(NB_TEST_ITERATIONS): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, []) X_test = np.flip(X[:, 0:int(VIDEO_LENGTH / 2)], axis=1) y_test = X[:, int(VIDEO_LENGTH / 2):] test_loss.append(autoencoder.test_on_batch(X_test, y_test)) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_loss: " + str(test_loss[len(test_loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() # then after each epoch/iteration avg_loss = sum(loss) / len(loss) avg_test_loss = sum(test_loss) / len(test_loss) logs = {'loss': avg_loss, 'test_loss': avg_test_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"loss\":%f, \"test_loss\":%f};\n" % (epoch, avg_loss, avg_test_loss)) print("\nAvg loss: " + str(avg_loss) + " Avg test loss: " + str(avg_test_loss)) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True) # Train Classifier if CLASSIFIER: print("Training Classifier...") for epoch in range(NB_EPOCHS_CLASS): print("\n\nEpoch ", epoch) c_loss = [] test_c_loss = [] # # Set learning rate every epoch LRS_cla.on_epoch_begin(epoch=epoch) lr = K.get_value(sclassifier.optimizer.lr) y_train_pred = [] y_train_true = [] print("Learning rate: " + str(lr)) print("c_loss_metrics: " + str(sclassifier.metrics_names)) for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes) else: X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes) X_train = np.flip(X[:, 0:int(VIDEO_LENGTH / 2)], axis=1) # X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_true_class = y[:, CLASS_TARGET_INDEX] y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] c_loss.append(sclassifier.train_on_batch( X_train, y_true_class)) y_train_true.extend(y_true_class) y_train_pred.extend(sclassifier.predict(X_train, verbose=0)) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "c_loss: " + str([c_loss[len(c_loss) - 1][j] for j in [0, 1]]) + " " + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file generator = autoencoder_model(encoder, decoder) predicted_images = generator.predict(X_train) ped_pred_class = sclassifier.predict(X_train, verbose=0) pred_seq = arrange_images( np.concatenate((X_train, predicted_images), axis=1)) pred_seq = pred_seq * 127.5 + 127.5 truth_image = arrange_images(y_true_imgs) truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y[:, 0:int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): # class_num_past = np.argmax(y_orig_classes[k, j]) # class_num_futr = np.argmax(y_true_classes[k, j]) # class_num_y = np.argmax(ped_pred_class[k]) # label_true = simple_ped_set[class_num_futr] # label_orig = simple_ped_set[class_num_past] # label_pred = simple_ped_set[class_num_y] # # label_true = str(y_orig_classes[k, j]) # label_pred = str([round(float(i), 2) for i in ped_pred_class[k]]) if (y_orig_classes[k, j] > 0.5): label_orig = "crossing" else: label_orig = "not crossing" if (y_true_classes[k][0] > 0.5): label_true = "crossing" else: label_true = "not crossing" if (ped_pred_class[k][0] > 0.5): label_pred = "crossing" else: label_pred = "not crossing" cv2.putText(pred_seq, label_orig, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, label_pred, (2 + (j + 16) * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, 'truth: ' + label_true, (2 + (j + 16) * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, label_true, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_truth.png"), truth_image) # Run over test data print('') y_test_pred = [] y_test_true = [] for index in range(NB_TEST_ITERATIONS): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes) X_test = X[:, 0:int(VIDEO_LENGTH / 2)] y_true_class = y[:, CLASS_TARGET_INDEX] y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] test_c_loss.append( sclassifier.test_on_batch(X_test, y_true_class)) y_test_true.extend(y_true_class) y_test_pred.extend(sclassifier.predict(X_test, verbose=0)) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write( "\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_c_loss: " + str([test_c_loss[len(test_c_loss) - 1][j] for j in [0, 1]])) stdout.flush() # Save generated images to file generator = autoencoder_model(encoder, decoder) test_predicted_images = generator.predict(X_test) test_ped_pred_class = sclassifier.predict(X_test, verbose=0) orig_image = arrange_images(X_test) truth_image = arrange_images(y_true_imgs) pred_image = arrange_images(test_predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX if epoch == 0: y_orig_classes = y[:, 0:int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): # class_num_past = np.argmax(y_orig_classes[k, j]) # class_num_futr = np.argmax(y_true_classes[k, j]) if (y_orig_classes[k, j] > 0.5): label_orig = "crossing" else: label_orig = "not crossing" if (y_true_classes[k][0] > 0.5): label_true = "crossing" else: label_true = "not crossing" cv2.putText(orig_image, label_orig, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, label_true, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_test_orig.png"), orig_image) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_test_truth.png"), truth_image) # Add labels as text to the image for k in range(BATCH_SIZE): # class_num_y = np.argmax(test_ped_pred_class[k]) if (test_ped_pred_class[k][0] > 0.5): label_pred = "crossing" else: label_pred = "not crossing" for j in range(int(VIDEO_LENGTH / 2)): cv2.putText(pred_image, label_pred, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_test_pred.png"), pred_image) # then after each epoch/iteration avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0) avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0) # Calculate Precision and Recall scores train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics( np.asarray(y_train_true), np.asarray(y_train_pred), avg='micro') test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics( np.asarray(y_test_true), np.asarray(y_test_pred), avg='micro') loss_values = np.asarray( avg_c_loss.tolist() + [train_prec.tolist()] + [train_rec.tolist()] + avg_test_c_loss.tolist() + [test_prec.tolist()] + [test_rec.tolist()], dtype=np.float32) # loss_values = np.asarray(avg_c_loss.tolist() + train_prec.tolist() + # train_rec.tolist() + # avg_test_c_loss.tolist() + test_prec.tolist() + # test_rec.tolist(), dtype=np.float32) precs = ['prec_' + action for action in simple_ped_set] recs = ['rec_' + action for action in simple_ped_set] c_loss_keys = [ 'c_' + metric for metric in sclassifier.metrics_names + precs + recs ] test_c_loss_keys = [ 'c_test_' + metric for metric in sclassifier.metrics_names + precs + recs ] loss_keys = c_loss_keys + test_c_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_cla.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs)) print("\nAvg c_loss: " + str(avg_c_loss) + " Avg test_c_loss: " + str(avg_test_c_loss)) prec, recall, fbeta, support = get_sklearn_metrics( np.asarray(y_train_true), np.asarray(y_train_pred), avg='weighted') print("Train Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (prec, recall, fbeta)) prec, recall, fbeta, support = get_sklearn_metrics( np.asarray(y_test_true), np.asarray(y_test_pred), avg='weighted') print("Test Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (prec, recall, fbeta)) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True) classifier.save_weights( os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True) print( get_classification_report(np.asarray(y_train_true), np.asarray(y_train_pred))) print( get_classification_report(np.asarray(y_test_true), np.asarray(y_test_pred)))
def train(BATCH_SIZE, ENC_WEIGHTS, TEM_WEIGHTS, DEC_WEIGHTS): print("Loading data...") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl')) # Build video progressions videos_list = [] start_frame_index = 1 end_frame_index = VIDEO_LENGTH + 1 while (end_frame_index <= len(frames_source)): frame_list = frames_source[start_frame_index:end_frame_index] if (len(set(frame_list)) == 1): videos_list.append(range(start_frame_index, end_frame_index)) start_frame_index = start_frame_index + 1 end_frame_index = end_frame_index + 1 else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH videos_list = np.asarray(videos_list, dtype=np.int32) n_videos = videos_list.shape[0] if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Build video progressions videos_list = [] start_frame_index = 1 end_frame_index = VIDEO_LENGTH + 1 while (end_frame_index <= len(frames_source)): frame_list = frames_source[start_frame_index:end_frame_index] if (len(set(frame_list)) == 1): videos_list.append(range(start_frame_index, end_frame_index)) start_frame_index = start_frame_index + 1 end_frame_index = end_frame_index + 1 else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH videos_list = np.asarray(videos_list, dtype=np.int32) n_videos = videos_list.shape[0] # Build the Spatio-temporal Autoencoder print("Creating models...") encoder = encoder_model() temporizer = temporal_model() decoder = decoder_model() # print (encoder.summary()) # print (temporizer.summary()) # print (decoder.summary()) autoencoder = autoencoder_model(encoder, temporizer, decoder) run_utilities(encoder, temporizer, decoder, autoencoder, ENC_WEIGHTS, TEM_WEIGHTS, DEC_WEIGHTS) autoencoder.compile(loss='mean_squared_error', optimizer=OPTIM) NB_ITERATIONS = int(n_videos / BATCH_SIZE) # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS = lrs_callback.LearningRateScheduler(schedule=schedule) LRS.set_model(autoencoder) print("Beginning Training...") # Begin Training for epoch in range(NB_EPOCHS): print("\n\nEpoch ", epoch) loss = [] # Set learning rate every epoch LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder X = load_X(videos_list, index, DATA_DIR) loss.append(autoencoder.train_on_batch(X, X)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIteration: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "loss: " + str(loss[len(loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file generated_images = autoencoder.predict(X, verbose=0) orig_image, image = combine_images(generated_images, X) image = image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 if epoch == 0: cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image) cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + ".png"), image) # then after each epoch/iteration avg_loss = sum(loss) / len(loss) logs = {'loss': avg_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"loss\":%f};\n" % (epoch, avg_loss)) print("\nAvg loss: " + str(avg_loss)) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True) temporizer.save_weights( os.path.join(CHECKPOINT_DIR, 'temporizer_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True) # End TensorBoard Callback TC.on_train_end('_')
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS): print("Loading data definitions.") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl')) # videos_list_1 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=0) videos_list = get_video_lists(frames_source=frames_source, stride=8, frame_skip=0) # videos_list_2 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=1) # videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0) # Load actions from annotations action_labels = hkl.load(os.path.join(DATA_DIR, 'annotations_train_208.hkl')) ped_action_classes, ped_class_count = get_action_classes(action_labels=action_labels) print("Training Stats: " + str(ped_class_count)) if RAM_DECIMATE: frames = load_to_RAM(frames_source=frames_source) if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Setup test val_frames_source = hkl.load(os.path.join(VAL_DATA_DIR, 'sources_val_208.hkl')) val_videos_list = get_video_lists(frames_source=val_frames_source, stride=8, frame_skip=0) # Load test action annotations val_action_labels = hkl.load(os.path.join(VAL_DATA_DIR, 'annotations_val_208.hkl')) val_ped_action_classes, val_ped_class_count = get_action_classes(val_action_labels) print("Val Stats: " + str(val_ped_class_count)) # Build the Spatio-temporal Autoencoder print ("Creating models.") encoder = encoder_model() decoder = decoder_model() # Build stacked classifier classifier = ensemble_c3d() run_utilities(encoder, decoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS) sclassifier = stacked_classifier_model(encoder, decoder, classifier) sclassifier.compile(loss=["binary_crossentropy"], optimizer=OPTIM_C, metrics=['accuracy']) print (sclassifier.summary()) n_videos = videos_list.shape[0] n_val_videos = val_videos_list.shape[0] NB_ITERATIONS = int(n_videos/BATCH_SIZE) # NB_ITERATIONS = 1 NB_VAL_ITERATIONS = int(n_val_videos/BATCH_SIZE) # NB_VAL_ITERATIONS = 1 # Setup TensorBoard Callback TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS_cla = lrs_callback.LearningRateScheduler(schedule=cla_schedule) LRS_cla.set_model(sclassifier) print ("Beginning Training.") # Begin Training # Train Classifier print("Training Classifier...") for epoch in range(1, NB_EPOCHS_CLASS+1): print("\n\nEpoch ", epoch) c_loss = [] val_c_loss = [] # # Set learning rate every epoch LRS_cla.on_epoch_begin(epoch=epoch) lr = K.get_value(sclassifier.optimizer.lr) y_train_pred = [] y_train_true = [] print("Learning rate: " + str(lr)) print("c_loss_metrics: " + str(sclassifier.metrics_names)) for index in range(NB_ITERATIONS): if RAM_DECIMATE: X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes) else: X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes) if REV: X_train = np.flip(X[:, 0: int(VIDEO_LENGTH / 2)], axis=1) else: X_train = X[:, 0: int(VIDEO_LENGTH / 2)] y_true_class = y[:, CLASS_TARGET_INDEX] y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] c_loss.append(sclassifier.train_on_batch(X_train, y_true_class)) y_train_true.extend(y_true_class) y_train_pred.extend(sclassifier.predict(X_train, verbose=0)) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "c_loss: " + str([ c_loss[len(c_loss) - 1][j] for j in [0, 1]]) + " " + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file z, res = encoder.predict(X_train) predicted_images = decoder.predict([z, res]) ped_pred_class = sclassifier.predict(X_train, verbose=0) pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = pred_seq * 127.5 + 127.5 truth_image = arrange_images(y_true_imgs) truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): if y_orig_classes[k, j] > 0.5: label_orig = "crossing" else: label_orig = "not crossing" if y_true_classes[k][j] > 0.5: label_true = "crossing" else: label_true = "not crossing" if ped_pred_class[k][0] > 0.5: label_pred = "crossing" else: label_pred = "not crossing" cv2.putText(pred_seq, label_orig, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, label_pred, (2 + (j + 16) * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, 'truth: ' + label_true, (2 + (j + 16) * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, label_true, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_truth.png"), truth_image) # Run over validation data print('') y_val_pred = [] y_val_true = [] for index in range(NB_VAL_ITERATIONS): X, y = load_X_y(val_videos_list, index, VAL_DATA_DIR, val_ped_action_classes) if REV: X_val = np.flip(X[:, 0: int(VIDEO_LENGTH / 2)], axis=1) else: X_val = X[:, 0: int(VIDEO_LENGTH / 2)] y_true_class = y[:, CLASS_TARGET_INDEX] y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] val_c_loss.append(sclassifier.test_on_batch(X_val, y_true_class)) y_val_true.extend(y_true_class) y_val_pred.extend(sclassifier.predict(X_val, verbose=0)) arrow = int(index / (NB_VAL_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + " " + "val_c_loss: " + str([ val_c_loss[len(val_c_loss) - 1][j] for j in [0, 1]])) stdout.flush() # Save generated images to file z, res = encoder.predict(X_val) val_predicted_images = decoder.predict([z, res]) val_ped_pred_class = sclassifier.predict(X_val, verbose=0) orig_image = arrange_images(X_val) truth_image = arrange_images(y_true_imgs) pred_image = arrange_images(val_predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX if epoch == 0: y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): if (y_orig_classes[k, j] > 0.5): label_orig = "crossing" else: label_orig = "not crossing" if (y_true_classes[k][j] > 0.5): label_true = "crossing" else: label_true = "not crossing" cv2.putText(orig_image, label_orig, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, label_true, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_val_orig.png"), orig_image) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_val_truth.png"), truth_image) # Add labels as text to the image for k in range(BATCH_SIZE): # class_num_y = np.argmax(val_ped_pred_class[k]) if (val_ped_pred_class[k][0] > 0.5): label_pred = "crossing" else: label_pred = "not crossing" for j in range(int(VIDEO_LENGTH / 2)): cv2.putText(pred_image, label_pred, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_val_pred.png"), pred_image) # then after each epoch/iteration avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0) avg_val_c_loss = np.mean(np.asarray(val_c_loss, dtype=np.float32), axis=0) # Calculate Precision and Recall scores train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics(np.asarray(y_train_true), np.asarray(y_train_pred), avg='binary', pos_label=1) val_prec, val_rec, val_fbeta, val_support = get_sklearn_metrics(np.asarray(y_val_true), np.asarray(y_val_pred), avg='binary', pos_label=1) print("\nTrain Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (train_prec, train_rec, train_fbeta)) print("Val Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (val_prec, val_rec, val_fbeta)) loss_values = np.asarray(avg_c_loss.tolist() + [train_prec.tolist()] + [train_rec.tolist()] + avg_val_c_loss.tolist() + [val_prec.tolist()] + [val_rec.tolist()], dtype=np.float32) # loss_values = np.asarray(avg_c_loss.tolist() + train_prec.tolist() + # train_rec.tolist() + # avg_val_c_loss.tolist() + val_prec.tolist() + # val_rec.tolist(), dtype=np.float32) precs = ['prec_' + action for action in simple_ped_set] recs = ['rec_' + action for action in simple_ped_set] c_loss_keys = ['c_' + metric for metric in sclassifier.metrics_names + precs + recs] val_c_loss_keys = ['c_val_' + metric for metric in sclassifier.metrics_names + precs + recs] loss_keys = c_loss_keys + val_c_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_cla.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs)) print("\nAvg c_loss: " + str(avg_c_loss) + " Avg val_c_loss: " + str(avg_val_c_loss)) # Save model weights per epoch to file if FINETUNE_ENCODER: encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True) if FINETUNE_DECODER: decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True) classifier.save_weights(os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True) print(get_classification_report(np.asarray(y_train_true), np.asarray(y_train_pred))) print(get_classification_report(np.asarray(y_val_true), np.asarray(y_val_pred)))
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, GEN_WEIGHTS, DIS_WEIGHTS): print("Loading data definitions...") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl')) # Build video progressions videos_list = [] start_frame_index = 1 end_frame_index = VIDEO_LENGTH + 1 while (end_frame_index <= len(frames_source)): frame_list = frames_source[start_frame_index:end_frame_index] if (len(set(frame_list)) == 1): videos_list.append(range(start_frame_index, end_frame_index)) start_frame_index = start_frame_index + 1 end_frame_index = end_frame_index + 1 else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH videos_list = np.asarray(videos_list, dtype=np.int32) n_videos = videos_list.shape[0] # Setup validation val_frames_source = hkl.load( os.path.join(VAL_DATA_DIR, 'sources_val_128.hkl')) val_videos_list = [] start_frame_index = 1 end_frame_index = VIDEO_LENGTH + 1 while (end_frame_index <= len(val_frames_source)): val_frame_list = val_frames_source[start_frame_index:end_frame_index] if (len(set(val_frame_list)) == 1): val_videos_list.append(range(start_frame_index, end_frame_index)) start_frame_index = start_frame_index + VIDEO_LENGTH end_frame_index = end_frame_index + VIDEO_LENGTH else: start_frame_index = end_frame_index - 1 end_frame_index = start_frame_index + VIDEO_LENGTH val_videos_list = np.asarray(val_videos_list, dtype=np.int32) n_val_videos = val_videos_list.shape[0] if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Build the Spatio-temporal Autoencoder print("Creating models...") encoder = encoder_model() decoder = decoder_model() autoencoder = autoencoder_model(encoder, decoder) autoencoder.compile(loss="mean_squared_error", optimizer=OPTIM_A) intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[1].output) mask_gen_1 = Sequential() mask_gen_1.add(encoder) mask_gen_1.add(intermediate_decoder) mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_G) if ADVERSARIAL: generator = refiner_g_model() discriminator = refiner_d_model() gan = gan_model(autoencoder, generator, discriminator) generator.compile(loss='binary_crossentropy', optimizer='sgd') gan.compile(loss=['mae', 'binary_crossentropy'], loss_weights=LOSS_WEIGHTS, optimizer=OPTIM_G, metrics=['accuracy']) print('GAN') print(gan.summary()) set_trainability(discriminator, True) discriminator.compile(loss='binary_crossentropy', optimizer=OPTIM_D, metrics=['accuracy']) run_utilities(encoder, decoder, autoencoder, generator, discriminator, gan, ENC_WEIGHTS, DEC_WEIGHTS, GEN_WEIGHTS, DIS_WEIGHTS) else: run_utilities(encoder, decoder, autoencoder, 'None', 'None', 'None', ENC_WEIGHTS, DEC_WEIGHTS, 'None', 'None') NB_ITERATIONS = int(n_videos / BATCH_SIZE) # NB_ITERATIONS = 5 NB_VAL_ITERATIONS = int(n_val_videos / BATCH_SIZE) # for i in range(len(decoder.layers)): # print (decoder.layers[i], str(i)) # # exit(0) # Setup TensorBoard Callback TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False) TC_gan = tb_callback.TensorBoard(log_dir=TF_LOG_GAN_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS = lrs_callback.LearningRateScheduler(schedule=schedule) LRS.set_model(autoencoder) print("Beginning Training...") # Begin Training for epoch in range(NB_EPOCHS_AUTOENCODER): print("\n\nEpoch ", epoch) loss = [] val_loss = [] # Set learning rate every epoch LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(autoencoder.optimizer.lr) print("Learning rate: " + str(lr)) for index in range(NB_ITERATIONS): # Train Autoencoder X = load_X(videos_list, index, DATA_DIR, (128, 128, 3)) X_train = X[:, 0:10] y_train = X[:, 10:] loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "loss: " + str(loss[len(loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = autoencoder.predict(X_train, verbose=0) orig_image, truth_image, pred_image = combine_images( X_train, y_train, predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 if epoch == 0: cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image) cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_image) cv2.imwrite( os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_image) predicted_attn = mask_gen_1.predict(X_train, verbose=0) a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, VIDEO_LENGTH - 10, 16, 16, 1)) np.save( os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_cla_gen1_' + str(epoch) + '.npy'), a_pred) # Run over validation data for index in range(NB_VAL_ITERATIONS): X = load_X(val_videos_list, index, VAL_DATA_DIR, (128, 128, 3)) X_train = X[:, 0:10] y_train = X[:, 10:] val_loss.append(autoencoder.test_on_batch(X_train, y_train)) arrow = int(index / (NB_VAL_ITERATIONS / 40)) stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + " " + "val_loss: " + str(val_loss[len(val_loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() # then after each epoch/iteration avg_loss = sum(loss) / len(loss) avg_val_loss = sum(val_loss) / len(val_loss) logs = {'loss': avg_loss, 'val_loss': avg_val_loss} TC.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, \"loss\":%f};\n" % (epoch, avg_loss)) print("\nAvg loss: " + str(avg_loss) + " Avg val loss: " + str(avg_val_loss)) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True) predicted_attn = mask_gen_1.predict(X_train, verbose=0) a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, VIDEO_LENGTH - 10, 16, 16, 1)) np.save( os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_cla_gen1_' + str(epoch) + '.npy'), a_pred) # Train AAE if ADVERSARIAL: print("Training Stage II.") exp_memory = ExperienceMemory(memory_length=100) for epoch in range(NB_EPOCHS_GAN): print("\n\nEpoch ", epoch) g_loss = [] val_g_loss = [] d_loss = [] val_d_loss = [] # a_loss = [] # # Set learning rate every epoch # LRS.on_epoch_begin(epoch=epoch) lr = K.get_value(gan.optimizer.lr) print("GAN learning rate: " + str(lr)) lr = K.get_value(discriminator.optimizer.lr) print("Disc learning rate: " + str(lr)) print("g_loss_metrics: " + str(gan.metrics_names)) print("d_loss_metrics: " + str(discriminator.metrics_names)) for index in range(NB_ITERATIONS): # Train Autoencoder X = load_X(videos_list, index, DATA_DIR, (128, 128, 3)) X_hd = load_X(videos_list, index, HD_DATA_DIR, (256, 256, 3)) X128 = X[:, 0:int(VIDEO_LENGTH / 2)] Y128 = autoencoder.predict(X128, verbose=0) X256_real = X_hd[:, int(VIDEO_LENGTH / 2):] X256_fake = generator.predict(Y128, verbose=0) trainable_fakes = exp_memory.get_trainable_fakes( current_gens=X256_fake, exp_window_size=4) # Train Discriminator on future images (y_train, not X_train) X = np.concatenate((X256_real, trainable_fakes)) y = np.concatenate( (np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32), np.zeros(shape=(BATCH_SIZE, 10, 1), dtype=np.float32)), axis=0) d_loss.append(discriminator.train_on_batch(X, y)) # Train AAE set_trainability(discriminator, False) y = np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32) g_loss.append(gan.train_on_batch(X128, [X256_real, y])) set_trainability(discriminator, True) # # Train Autoencoder # a_loss.append(autoencoder.train_on_batch(X_train, y_train)) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "g_loss: " + str([g_loss[len(g_loss) - 1][j] for j in [0, -1]]) + " " + "d_loss: " + str(d_loss[len(d_loss) - 1]) + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file predicted_images = generator.predict(Y128, verbose=0) orig_image, truth_image, pred_image = combine_images( Y128, X256_real, predicted_images) pred_image = pred_image * 127.5 + 127.5 orig_image = orig_image * 127.5 + 127.5 truth_image = truth_image * 127.5 + 127.5 if epoch == 0: cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_gan_orig.png"), orig_image) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_gan_truth.png"), truth_image) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_gan_pred.png"), pred_image) # Run over validation data print('') for index in range(NB_VAL_ITERATIONS): X = load_X(val_videos_list, index, VAL_DATA_DIR, (128, 128, 3)) X_hd = load_X(val_videos_list, index, VAL_HD_DATA_DIR, (256, 256, 3)) X128_val = X[:, 0:int(VIDEO_LENGTH / 2)] Y128_val = autoencoder.predict(X128, verbose=0) X256_real_val = X_hd[:, int(VIDEO_LENGTH / 2):] X256_fake_val = generator.predict(Y128_val, verbose=0) X = np.concatenate((X256_real_val, X256_fake_val)) y = np.concatenate( (np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32), np.zeros(shape=(BATCH_SIZE, 10, 1), dtype=np.float32)), axis=0) val_d_loss.append(discriminator.test_on_batch(X, y)) y = np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32) val_g_loss.append( gan.test_on_batch(X128_val, [X256_real_val, y])) arrow = int(index / (NB_VAL_ITERATIONS / 40)) stdout.write( "\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + " " + "val_g_loss: " + str([val_g_loss[len(val_g_loss) - 1][j] for j in [0, -1]]) + " " + "val_d_loss: " + str(val_d_loss[len(val_d_loss) - 1])) stdout.flush() # then after each epoch/iteration avg_d_loss = np.mean(np.asarray(d_loss, dtype=np.float32), axis=0) avg_val_d_loss = np.mean(np.asarray(val_d_loss, dtype=np.float32), axis=0) avg_g_loss = np.mean(np.asarray(g_loss, dtype=np.float32), axis=0) avg_val_g_loss = np.mean(np.asarray(val_g_loss, dtype=np.float32), axis=0) loss_values = np.asarray(avg_d_loss.tolist() + avg_val_d_loss.tolist() \ + avg_g_loss.tolist() + avg_val_g_loss.tolist(), dtype=np.float32) d_loss_keys = [ 'd_' + metric for metric in discriminator.metrics_names ] g_loss_keys = ['g_' + metric for metric in gan.metrics_names] val_d_loss_keys = [ 'd_val_' + metric for metric in discriminator.metrics_names ] val_g_loss_keys = [ 'g_val_' + metric for metric in gan.metrics_names ] loss_keys = d_loss_keys + val_d_loss_keys + \ g_loss_keys + val_g_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_gan.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_gan.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs)) print("\nAvg d_loss: " + str(avg_d_loss) + " Avg val_d_loss: " + str(avg_val_d_loss) + "\nAvg g_loss: " + str([avg_g_loss[j] for j in [0, -1]]) + " Avg val_g_loss: " + str([avg_val_g_loss[j] for j in [0, -1]])) # Save model weights per epoch to file encoder.save_weights( os.path.join(CHECKPOINT_DIR, 'encoder_gan_epoch_' + str(epoch) + '.h5'), True) decoder.save_weights( os.path.join(CHECKPOINT_DIR, 'decoder_gan_epoch_' + str(epoch) + '.h5'), True) generator.save_weights( os.path.join(CHECKPOINT_DIR, 'generator_gan_epoch_' + str(epoch) + '.h5'), True) discriminator.save_weights( os.path.join(CHECKPOINT_DIR, 'discriminator_gan_epoch_' + str(epoch) + '.h5'), True) # End TensorBoard Callback TC.on_train_end('_')
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS): print("Loading data definitions.") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl')) videos_list_1 = get_video_lists(frames_source=frames_source, stride=1, frame_skip=0) videos_list_2 = get_video_lists(frames_source=frames_source, stride=1, frame_skip=1) # videos_list_3 = get_video_lists(frames_source=frames_source, stride=1, frame_skip=2) videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0) # Load actions from annotations action_labels = hkl.load( os.path.join(DATA_DIR, 'annotations_train_208.hkl')) ped_action_classes, ped_class_count = get_action_classes( action_labels=action_labels) print("Training Stats: " + str(ped_class_count)) classwise_videos_list, count = get_classwise_data(videos_list, ped_action_classes) videos_list = prob_subsample(classwise_videos_list, count) if RAM_DECIMATE: frames = load_to_RAM(frames_source=frames_source) # if SHUFFLE: # # Shuffle images to aid generalization # videos_list = np.random.permutation(videos_list) # Setup test test_frames_source = hkl.load( os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl')) test_videos_list = get_video_lists(frames_source=test_frames_source, stride=1) # Load test action annotations test_action_labels = hkl.load( os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl')) test_ped_action_classes, test_ped_class_count = get_action_classes( test_action_labels) print("Test Stats: " + str(test_ped_class_count)) # Build the Spatio-temporal Autoencoder print("Creating models.") # Build stacked classifier classifier = pretrained_c3d() classifier.compile( loss="categorical_crossentropy", optimizer=OPTIM_C, # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy']) metrics=['accuracy']) run_utilities(classifier, CLA_WEIGHTS) n_videos = videos_list.shape[0] n_test_videos = test_videos_list.shape[0] NB_ITERATIONS = int(n_videos / BATCH_SIZE) # NB_ITERATIONS = 1 NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE) # NB_TEST_ITERATIONS = 1 # Setup TensorBoard Callback TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule) LRS_clas.set_model(classifier) print("Beginning Training.") # Begin Training # Train Classifier if CLASSIFIER: print("Training Classifier...") for epoch in range(NB_EPOCHS_CLASS): print("\n\nEpoch ", epoch) c_loss = [] test_c_loss = [] # # Set learning rate every epoch LRS_clas.on_epoch_begin(epoch=epoch) lr = K.get_value(classifier.optimizer.lr) print("Learning rate: " + str(lr)) print("c_loss_metrics: " + str(classifier.metrics_names)) y_train_pred = [] y_train_true = [] for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: videos_list = prob_subsample(classwise_videos_list, count) X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes) else: videos_list = prob_subsample(classwise_videos_list, count) X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes) X_train = X y_true_class = y[:, CLASS_TARGET_INDEX] c_loss.append(classifier.train_on_batch(X_train, y_true_class)) y_train_true.extend(y_true_class) y_train_pred.extend(classifier.predict(X_train, verbose=0)) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "c_loss: " + str([c_loss[len(c_loss) - 1][j] for j in [0, 1]]) + " " + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file ped_pred_class = classifier.predict(X_train, verbose=0) # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = arrange_images(X_train) pred_seq = pred_seq * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_y = np.argmax(ped_pred_class[k]) cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_past], (2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, simple_ped_set[class_num_y], (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq) # Run over test data print('') y_test_pred = [] y_test_true = [] for index in range(NB_TEST_ITERATIONS): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes) X_test = X y_true_class = y[:, CLASS_TARGET_INDEX] test_c_loss.append( classifier.test_on_batch(X_test, y_true_class)) y_test_true.extend(y_true_class) y_test_pred.extend(classifier.predict(X_test, verbose=0)) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write( "\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_c_loss: " + str([test_c_loss[len(test_c_loss) - 1][j] for j in [0, 1]])) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file test_ped_pred_class = classifier.predict(X_test, verbose=0) # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = arrange_images(X_test) pred_seq = pred_seq * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_y = np.argmax(test_ped_pred_class[k]) cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_past], (2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, simple_ped_set[class_num_y], (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_test_pred.png"), pred_seq) # then after each epoch/iteration avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0) avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0) train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics( np.asarray(y_train_true), np.asarray(y_train_pred), avg=None) test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics( np.asarray(y_test_true), np.asarray(y_test_pred), avg=None) loss_values = np.asarray(avg_c_loss.tolist() + train_prec.tolist() + train_rec.tolist() + avg_test_c_loss.tolist() + test_prec.tolist() + test_rec.tolist(), dtype=np.float32) precs = ['prec_' + action for action in simple_ped_set] recs = ['rec_' + action for action in simple_ped_set] fbeta = ['fbeta_' + action for action in simple_ped_set] c_loss_keys = [ 'c_' + metric for metric in classifier.metrics_names + precs + recs ] test_c_loss_keys = [ 'c_test_' + metric for metric in classifier.metrics_names + precs + recs ] loss_keys = c_loss_keys + test_c_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_cla.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs)) print("\nAvg c_loss: " + str(avg_c_loss) + " Avg test_c_loss: " + str(avg_test_c_loss)) print("Training Precision per class:" + str(train_prec)) print("Test Precision per class:" + str(test_prec)) print("Training Recall per class:" + str(train_rec)) print("Test Recall per class:" + str(test_rec)) prec, recall, fbeta, support = get_sklearn_metrics( np.asarray(y_train_true), np.asarray(y_train_pred), avg='weighted') print("Train Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (prec, recall, fbeta)) prec, recall, fbeta, support = get_sklearn_metrics( np.asarray(y_test_true), np.asarray(y_test_pred), avg='weighted') print("Test Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (prec, recall, fbeta)) # Save model weights per epoch to file # encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True) # decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True) classifier.save_weights( os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True) # get_confusion_matrix(y_train_true, y_train_pred) # get_confusion_matrix(y_test_true, y_test_pred) print( get_classification_report(np.asarray(y_train_true), np.asarray(y_train_pred))) print( get_classification_report(np.asarray(y_test_true), np.asarray(y_test_pred)))
def test(CLA_WEIGHTS): if not os.path.exists(TEST_RESULTS_DIR + '/pred/'): os.mkdir(TEST_RESULTS_DIR + '/pred/') # Setup test test_frames_source = hkl.load( os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl')) # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8, frame_skip=0) # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=16, frame_skip=0) test_videos_list = get_video_lists(frames_source=test_frames_source, stride=16, frame_skip=2) # Load test action annotations test_action_labels = hkl.load( os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl')) test_ped_action_classes, test_ped_class_count = get_action_classes( test_action_labels, mode='sigmoid') print("Test Stats: " + str(test_ped_class_count)) # Build the Spatio-temporal Autoencoder print("Creating models.") # Build stacked classifier # classifier = pretrained_c3d() classifier = ensemble_c3d() # classifier = c3d_scratch() classifier.compile( loss="binary_crossentropy", optimizer=OPTIM_C, # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy']) metrics=['acc']) # Build attention layer output intermediate_classifier = Model(inputs=classifier.layers[0].input, outputs=classifier.layers[1].output) mask_gen_1 = Sequential() # mask_gen_1.add(encoder) mask_gen_1.add(intermediate_classifier) mask_gen_1.compile(loss='binary_crossentropy', optimizer=OPTIM_C) run_utilities(classifier, CLA_WEIGHTS) n_test_videos = test_videos_list.shape[0] NB_TEST_ITERATIONS = int(n_test_videos / TEST_BATCH_SIZE) # NB_TEST_ITERATIONS = 5 # Setup TensorBoard Callback TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule) LRS_clas.set_model(classifier) if CLASSIFIER: print("Testing Classifier...") # Run over test data print('') y_test_pred = [] y_test_true = [] test_c_loss = [] for index in range(NB_TEST_ITERATIONS): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes, batch_size=TEST_BATCH_SIZE) X_test = X y_true_class = y[:, CLASS_TARGET_INDEX] test_c_loss.append(classifier.test_on_batch(X_test, y_true_class)) y_test_true.extend(y_true_class) y_test_pred.extend(classifier.predict(X_test, verbose=0)) arrow = int(index / (NB_TEST_ITERATIONS / 40)) stdout.write( "\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + " " + "test_c_loss: " + str([test_c_loss[len(test_c_loss) - 1][j] for j in [0, 1]])) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file test_ped_pred_class = classifier.predict(X_test, verbose=0) # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = arrange_images(X_test) pred_seq = pred_seq * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y # Add labels as text to the image for k in range(TEST_BATCH_SIZE): for j in range(int(VIDEO_LENGTH)): if (y_orig_classes[k, j] > 0.5): label_true = "crossing" else: label_true = "not crossing" if (test_ped_pred_class[k] > 0.5): label_pred = "crossing" else: label_pred = "not crossing" cv2.putText(pred_seq, 'truth: ' + label_true, (2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, label_pred, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join(TEST_RESULTS_DIR + '/pred/', str(index) + "_cla_test_pred.png"), pred_seq) # then after each epoch avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0) test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics( np.asarray(y_test_true), np.asarray(y_test_pred), avg='binary', pos_label=1) print("\nAvg test_c_loss: " + str(avg_test_c_loss)) print("Test Prec: %.4f, Recall: %.4f, Fbeta: %.4f" % (test_prec, test_rec, test_fbeta)) print("Classification Report") print( get_classification_report(np.asarray(y_test_true), np.asarray(y_test_pred))) print("Confusion matrix") tn, fp, fn, tp = confusion_matrix(y_test_true, np.round(y_test_pred)).ravel() print("TN: %.2f, FP: %.2f, FN: %.2f, TP: %.2f" % (tn, fp, fn, tp))
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS): print("Loading data definitions.") frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl')) videos_list_1 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=0) videos_list_2 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=1) videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0) # Load actions from annotations action_labels = hkl.load( os.path.join(DATA_DIR, 'annotations_train_208.hkl')) ped_action_classes, ped_class_count = get_action_classes( action_labels=action_labels, mode='sigmoid') print("Training Stats: " + str(ped_class_count)) # videos_list = remove_zero_classes(videos_list, ped_action_classes) # classwise_videos_list, count = get_classwise_data(videos_list, ped_action_classes) # videos_list = prob_subsample(classwise_videos_list, count) if RAM_DECIMATE: frames = load_to_RAM(frames_source=frames_source) if SHUFFLE: # Shuffle images to aid generalization videos_list = np.random.permutation(videos_list) # Setup validation val_frames_source = hkl.load( os.path.join(VAL_DATA_DIR, 'sources_val_208.hkl')) val_videos_list = get_video_lists(frames_source=val_frames_source, stride=8, frame_skip=0) # Load val action annotations val_action_labels = hkl.load( os.path.join(VAL_DATA_DIR, 'annotations_val_208.hkl')) val_ped_action_classes, val_ped_class_count = get_action_classes( val_action_labels, mode='sigmoid') # val_videos_list = remove_zero_classes(val_videos_list, val_ped_action_classes) print("Val Stats: " + str(val_ped_class_count)) # Build the Spatio-temporal Autoencoder print("Creating models.") # Build stacked classifier # classifier = pretrained_c3d() classifier = ensemble_c3d() # classifier = c3d_scratch() classifier.compile( loss="binary_crossentropy", optimizer=OPTIM_C, # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy']) metrics=['acc']) # Build attention layer output intermediate_classifier = Model(inputs=classifier.layers[0].input, outputs=classifier.layers[1].output) mask_gen_1 = Sequential() # mask_gen_1.add(encoder) mask_gen_1.add(intermediate_classifier) mask_gen_1.compile(loss='binary_crossentropy', optimizer=OPTIM_C) run_utilities(classifier, CLA_WEIGHTS) n_videos = videos_list.shape[0] n_val_videos = val_videos_list.shape[0] NB_ITERATIONS = int(n_videos / BATCH_SIZE) # NB_ITERATIONS = 5 NB_VAL_ITERATIONS = int(n_val_videos / BATCH_SIZE) # NB_VAL_ITERATIONS = 5 # Setup TensorBoard Callback TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule) LRS_clas.set_model(classifier) print("Beginning Training.") # Begin Training # Train Classifier if CLASSIFIER: print("Training Classifier...") for epoch in range(1, NB_EPOCHS_CLASS + 1): print("\n\nEpoch ", epoch) c_loss = [] val_c_loss = [] # # Set learning rate every epoch LRS_clas.on_epoch_begin(epoch=epoch) lr = K.get_value(classifier.optimizer.lr) print("Learning rate: " + str(lr)) print("c_loss_metrics: " + str(classifier.metrics_names)) y_train_pred = [] y_train_true = [] for index in range(NB_ITERATIONS): # Train Autoencoder if RAM_DECIMATE: # videos_list = prob_subsample(classwise_videos_list, count) X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes) else: # videos_list = prob_subsample(classwise_videos_list, count) X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes) X_train = X y_true_class = y[:, CLASS_TARGET_INDEX] c_loss.append(classifier.train_on_batch(X_train, y_true_class)) y_train_true.extend(y_true_class) y_train_pred.extend(classifier.predict(X_train, verbose=0)) arrow = int(index / (NB_ITERATIONS / 30)) stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + " " + "c_loss: " + str([c_loss[len(c_loss) - 1][j] for j in [0, 1]]) + " " + "\t [" + "{0}>".format("=" * (arrow))) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file ped_pred_class = classifier.predict(X_train, verbose=0) # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = arrange_images(X_train) pred_seq = pred_seq * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_y = np.argmax(ped_pred_class[k]) label_true = str(y_orig_classes[k, j]) label_pred = str( [round(float(i), 2) for i in ped_pred_class[k]]) cv2.putText(pred_seq, 'truth: ' + label_true, (2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, label_pred, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq) slices = mask_gen_1.predict(X_train) slice_images = arrange_images(slices) slice_images = slice_images * 127.5 + 127.5 cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_slice_pred.png"), slice_images) # Run over val data print('') y_val_pred = [] y_val_true = [] for index in range(NB_VAL_ITERATIONS): X, y = load_X_y(val_videos_list, index, VAL_DATA_DIR, val_ped_action_classes) X_val = X y_true_class = y[:, CLASS_TARGET_INDEX] val_c_loss.append(classifier.test_on_batch( X_val, y_true_class)) y_val_true.extend(y_true_class) y_val_pred.extend(classifier.predict(X_val, verbose=0)) arrow = int(index / (NB_VAL_ITERATIONS / 40)) stdout.write( "\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + " " + "val_c_loss: " + str([val_c_loss[len(val_c_loss) - 1][j] for j in [0, 1]])) stdout.flush() if SAVE_GENERATED_IMAGES: # Save generated images to file val_ped_pred_class = classifier.predict(X_val, verbose=0) # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = arrange_images(X_val) pred_seq = pred_seq * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y # Add labels as text to the image for k in range(BATCH_SIZE): for j in range(int(VIDEO_LENGTH)): class_num_past = np.argmax(y_orig_classes[k, j]) class_num_y = np.argmax(val_ped_pred_class[k]) label_true = str(y_orig_classes[k, j]) label_pred = str( [round(float(i), 2) for i in ped_pred_class[k]]) cv2.putText(pred_seq, 'truth: ' + label_true, (2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, label_pred, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join( CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_val_pred.png"), pred_seq) # then after each epoch avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0) avg_val_c_loss = np.mean(np.asarray(val_c_loss, dtype=np.float32), axis=0) train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics( np.asarray(y_train_true), np.asarray(y_train_pred), avg='binary', pos_label=1) val_prec, val_rec, val_fbeta, val_support = get_sklearn_metrics( np.asarray(y_val_true), np.asarray(y_val_pred), avg='binary', pos_label=1) loss_values = np.asarray( avg_c_loss.tolist() + [train_prec.tolist()] + [train_rec.tolist()] + avg_val_c_loss.tolist() + [val_prec.tolist()] + [val_rec.tolist()], dtype=np.float32) precs = ['prec_' + action for action in simple_ped_set] recs = ['rec_' + action for action in simple_ped_set] fbeta = ['fbeta_' + action for action in simple_ped_set] c_loss_keys = [ 'c_' + metric for metric in classifier.metrics_names + precs + recs ] val_c_loss_keys = [ 'c_val_' + metric for metric in classifier.metrics_names + precs + recs ] loss_keys = c_loss_keys + val_c_loss_keys logs = dict(zip(loss_keys, loss_values)) TC_cla.on_epoch_end(epoch, logs) # Log the losses with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file: log_file.write("{\"epoch\":%d, %s\n" % (epoch, str(logs).strip('{'))) print("\nAvg c_loss: " + str(avg_c_loss) + " Avg val_c_loss: " + str(avg_val_c_loss)) print("Train Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (train_prec, train_rec, train_fbeta)) print("Val Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (val_prec, val_rec, val_fbeta)) # Save model weights per epoch to file classifier.save_weights( os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True) classifier.save( os.path.join(CHECKPOINT_DIR, 'full_classifier_cla_epoch_' + str(epoch) + '.h5')) print( get_classification_report(np.asarray(y_train_true), np.asarray(y_train_pred))) print( get_classification_report(np.asarray(y_val_true), np.asarray(y_val_pred)))
def test_mtcp(CLA_WEIGHTS): if not os.path.exists(TEST_RESULTS_DIR + '/pred/'): os.mkdir(TEST_RESULTS_DIR + '/pred/') # Setup test test_frames_source = hkl.load( os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl')) # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8, frame_skip=0) test_videos_list = get_video_lists(frames_source=test_frames_source, stride=16, frame_skip=0) # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=16, frame_skip=2) # Load test action annotations test_action_labels = hkl.load( os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl')) test_ped_action_classes, test_ped_class_count = get_action_classes( test_action_labels, mode='sigmoid') print("Test Stats: " + str(test_ped_class_count)) # Build the Spatio-temporal Autoencoder print("Creating models.") # Build stacked classifier # classifier = pretrained_c3d() classifier = ensemble_c3d() # classifier = c3d_scratch() classifier.compile( loss="binary_crossentropy", optimizer=OPTIM_C, # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy']) metrics=['acc']) run_utilities(classifier, CLA_WEIGHTS) n_test_videos = test_videos_list.shape[0] NB_TEST_ITERATIONS = int(n_test_videos / TEST_BATCH_SIZE) # NB_TEST_ITERATIONS = 5 # Setup TensorBoard Callback TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False) LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule) LRS_clas.set_model(classifier) if CLASSIFIER: print("Testing Classifier...") # Run over test data print('') # Time to correct prediction tcp_list = [] tcp_true_list = [] tcp_pred_list = [] y_test_pred = [] y_test_true = [] test_c_loss = [] index = 0 tcp = 1 while index < NB_TEST_ITERATIONS: X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes, batch_size=TEST_BATCH_SIZE) y_past_class = y[:, 0] y_end_class = y[:, -1] if y_end_class[0] == y_past_class[0]: index = index + 1 continue else: stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1)) stdout.flush() for fnum in range(int(VIDEO_LENGTH / 2) + 1): X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes, batch_size=TEST_BATCH_SIZE) X_test = X y_true_imgs = X[:, int(VIDEO_LENGTH / 2):] y_true_class = y[:, VIDEO_LENGTH - fnum - 1] if y[:, 0] == y_true_class[0]: break if (fnum + 1 > 16): tcp_pred_list.append(y_pred_class[0]) tcp_true_list.append(y_true_class[0]) break y_pred_class = classifier.predict(X_test, verbose=0) y_test_pred.extend(classifier.predict(X_test, verbose=0)) test_c_loss.append( classifier.test_on_batch(X_test, y_true_class)) y_test_true.extend(y_true_class) test_ped_pred_class = classifier.predict(X_test, verbose=0) # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1)) pred_seq = arrange_images(X_test) pred_seq = pred_seq * 127.5 + 127.5 # Save generated images to file z = encoder.predict(X_test) test_predicted_images = decoder.predict(z) test_ped_pred_class = sclassifier.predict(X_test, verbose=0) pred_seq = arrange_images( np.concatenate((X_test, test_predicted_images), axis=1)) pred_seq = pred_seq * 127.5 + 127.5 truth_image = arrange_images(y_true_imgs) truth_image = truth_image * 127.5 + 127.5 font = cv2.FONT_HERSHEY_SIMPLEX y_orig_classes = y[:, 0:int(VIDEO_LENGTH / 2)] y_true_classes = y[:, int(VIDEO_LENGTH / 2):] # Add labels as text to the image for k in range(TEST_BATCH_SIZE): for j in range(int(VIDEO_LENGTH / 2)): if y_orig_classes[k, j] > 0.5: label_orig = "crossing" else: label_orig = "not crossing" if y_true_classes[k][j] > 0.5: label_true = "crossing" else: label_true = "not crossing" if test_ped_pred_class[k][0] > 0.5: label_pred = "crossing" else: label_pred = "not crossing" cv2.putText(pred_seq, label_orig, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, label_pred, (2 + (j + 16) * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(pred_seq, 'truth: ' + label_true, (2 + (j + 16) * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(truth_image, label_true, (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imwrite( os.path.join(TEST_RESULTS_DIR + '/mtcp-pred//', str(index) + "_cla_test_pred.png"), pred_seq) cv2.imwrite( os.path.join(TEST_RESULTS_DIR + '/mtcp-truth/', str(index) + "_cla_test_truth.png"), truth_image) if y_true_class[0] != np.round(y_pred_class[0]): index = index + 1 continue else: tcp_pred_list.append(y_pred_class[0]) tcp_true_list.append(y_true_class[0]) tcp_list.append(fnum + 1) index = index + int(VIDEO_LENGTH / 2) # Break from the for loop break # then after each epoch avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0) test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics( np.asarray(y_test_true), np.asarray(y_test_pred), avg='binary', pos_label=1) print("\nAvg test_c_loss: " + str(avg_test_c_loss)) print("Mean time to change prediction: " + str(np.mean(np.asarray(tcp_list)))) print("Standard Deviation " + str(np.std(np.asarray(tcp_list)))) print("Number of correct predictions " + str(len(tcp_list))) print("Test Prec: %.4f, Recall: %.4f, Fbeta: %.4f" % (test_prec, test_rec, test_fbeta)) print("Classification Report") print( get_classification_report(np.asarray(y_test_true), np.asarray(y_test_pred))) print("Confusion matrix") tn, fp, fn, tp = confusion_matrix(y_test_true, np.round(y_test_pred)).ravel() print("TN: %.2f, FP: %.2f, FN: %.2f, TP: %.2f" % (tn, fp, fn, tp)) print("-------------------------------------------") print("Test cases where there is a change in label") test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics( np.asarray(tcp_true_list), np.asarray(tcp_pred_list), avg='binary', pos_label=1) print("Test Prec: %.4f, Recall: %.4f, Fbeta: %.4f" % (test_prec, test_rec, test_fbeta)) test_acc = accuracy_score(tcp_true_list, np.round(tcp_pred_list)) print("Test Accuracy: %.4f" % (test_acc)) avg_prec = average_precision_score(tcp_true_list, tcp_pred_list) print("Average precision: %.4f" % (avg_prec)) precisions, recalls, thresholds = precision_recall_curve( tcp_true_list, tcp_pred_list) print("PR curve precisions: " + str(precisions)) print("PR curve recalls: " + str(recalls)) print("PR curve thresholds: " + str(thresholds)) print("PR curve prec mean: %.4f" % (np.mean(precisions))) print("PR curve prec std: %.4f" % (np.std(precisions))) print("Number of thresholds: %.4f" % (len(thresholds))) print("Classification Report") print( get_classification_report(np.asarray(tcp_true_list), np.asarray(tcp_pred_list))) print("Confusion matrix") tn, fp, fn, tp = confusion_matrix(tcp_true_list, np.round(tcp_pred_list)).ravel() print("TN: %.2f, FP: %.2f, FN: %.2f, TP: %.2f" % (tn, fp, fn, tp))