def test_score_threshold_option(self): """Test the score_threshold option.""" score_threshold = 0.5 option = AudioClassifierOptions(score_threshold=score_threshold) classifier = AudioClassifier(_MODEL_FILE, options=option) categories = classifier.classify(self._input_tensor) for category in categories: score = category.score self.assertGreaterEqual( score, score_threshold, 'Classification with score lower than threshold found. {0}'.format( category))
def __init__(self, train_mode="late_fusion", video_model_path=None, audio_model_path="", time_step=16, base_path="/user/vlongobardi/AFEW/aligned/", feature_name="emobase2010_100", stride=1): # model_type=0, # self.model_type = model_type self.time_step = time_step self.train_mode = train_mode self.feature_name = feature_name self.classes = classes self.lb = LabelBinarizer() self.lb.fit_transform(np.array(classes)) self.feature_num = get_feature_number(self.feature_name) if video_model_path is not None: if train_mode == "late_fusion": if "a_model5_2" in video_model_path: self.model = a_model5_2(14) if "a_model5_3" in video_model_path: self.model = a_model5_3(14) if "a_model7" in video_model_path: self.model = a_model7(14) if "a_model7_1" in video_model_path: self.model = a_model7_1(14) self.model.load_weights(video_model_path) print("VideoClassifier loaded successfully", video_model_path) else: self.model = SharmaNet((self.time_step, 224, 224, 3), train_mode="early", audio_dim=self.feature_num) # , dim=self.model_type) self.model.load_weights(video_model_path) print("VideoClassifier loaded successfully", video_model_path) else: if train_mode == "late_fusion": self.ac = AudioClassifier(audio_model_path) self.fc = FramesClassifier(time_step=time_step) else: self.stride = stride t_files = glob.glob(base_path + "Train" + "/*/*csv") v_files = glob.glob(base_path + "Val" + "/*/*csv") self.csv_fusion = self.generate_feature(t_files, v_files) self.do_training(t_files, v_files)
def test_default_option(self): """Check if the default option works correctly.""" classifier = AudioClassifier(_MODEL_FILE) categories = classifier.classify(self._input_tensor) # Check if all ground truth classification is found. for gt_classification in self._ground_truth_classifications: is_gt_found = False for real_classification in categories: is_label_match = real_classification.label == gt_classification.label is_score_match = abs(real_classification.score - gt_classification.score) < _ACCEPTABLE_ERROR_RANGE # If a matching classification is found, stop the loop. if is_label_match and is_score_match: is_gt_found = True break # If no matching classification found, fail the test. self.assertTrue(is_gt_found, '{0} not found.'.format(gt_classification))
def _create_ground_truth_csv(self, output_file=_GROUND_TRUTH_FILE): """A util function to regenerate the ground truth result. This function is not used in the test but it exists to make adding more audio and ground truth data to the test easier in the future. Args: output_file: Filename to write the ground truth CSV. """ classifier = AudioClassifier(_MODEL_FILE) categories = classifier.classify(self._input_tensor) with open(output_file, 'w') as f: header = ['label', 'score'] writer = csv.DictWriter(f, fieldnames=header) writer.writeheader() for category in categories: writer.writerow({ 'label': category.label, 'score': category.score, })
class VideoClassifier: def __init__(self, train_mode="late_fusion", video_model_path=None, audio_model_path="", time_step=16, base_path="/user/vlongobardi/AFEW/aligned/", feature_name="emobase2010_100", stride=1): # model_type=0, # self.model_type = model_type self.time_step = time_step self.train_mode = train_mode self.feature_name = feature_name self.classes = classes self.lb = LabelBinarizer() self.lb.fit_transform(np.array(classes)) self.feature_num = get_feature_number(self.feature_name) if video_model_path is not None: if train_mode == "late_fusion": if "a_model5_2" in video_model_path: self.model = a_model5_2(14) if "a_model5_3" in video_model_path: self.model = a_model5_3(14) if "a_model7" in video_model_path: self.model = a_model7(14) if "a_model7_1" in video_model_path: self.model = a_model7_1(14) self.model.load_weights(video_model_path) print("VideoClassifier loaded successfully", video_model_path) else: self.model = SharmaNet((self.time_step, 224, 224, 3), train_mode="early", audio_dim=self.feature_num) # , dim=self.model_type) self.model.load_weights(video_model_path) print("VideoClassifier loaded successfully", video_model_path) else: if train_mode == "late_fusion": self.ac = AudioClassifier(audio_model_path) self.fc = FramesClassifier(time_step=time_step) else: self.stride = stride t_files = glob.glob(base_path + "Train" + "/*/*csv") v_files = glob.glob(base_path + "Val" + "/*/*csv") self.csv_fusion = self.generate_feature(t_files, v_files) self.do_training(t_files, v_files) def do_training(self, t_files, v_files): skips = 0 iters = 1 bs = 16 ep = 75 opts = ["SGD", "Adam"] lrs = [0.01] models = [] if self.train_mode == "late_fusion": models = [a_model5_2, a_model5_3, a_model7, a_model7_1] elif self.train_mode == "early_fusion": models = [SharmaNet] models_name = [x.__name__ for x in models] for index, model in enumerate(models): for opt in opts: for lr in lrs: for iteration in range(iters): if skips > 0: skips -= 1 continue train_infos = { "iteration": iteration, "model_name": models_name[index], "batch_size": bs, "epoch": ep, "lr": lr, "opt": opt } print( "\n\n################################################################################\n" "############################## ITERATION " + str(iteration + 1) + " of " + str(iters) + " ###########################\n######################################################" + " ########################\nepochs:", ep, "batch_size:", bs, "\nmodel:", models_name[index], "in", models_name, "\nopt:", opt, "in", opts, "\nlr:", lr, "in", lrs) m = None if self.train_mode == "late_fusion": train_infos["generator1"] = self.late_gen train_infos["generator2"] = self.late_gen m = model(14) elif self.train_mode == "early_fusion": train_infos["generator1"] = self.early_gen_train train_infos["generator2"] = self.early_gen_new_val t_files, v_files = self.csv_fusion[ "train"], self.csv_fusion["val"] m = model((self.time_step, 224, 224, 3), train_mode="early", audio_dim=self.feature_num) for layer in m.layers[0:8]: layer.trainable = False # , dim=self.model_type) # self.model = self.train(t_files, v_files, train_infos, m) def generate_feature(self, t_files, v_files): if self.train_mode == "late_fusion": if not exists('lables_late_fusion' + self.feature_name + '.csv'): print("\n##### GENERATING CSV FOR LATE FUSION... #####") csv_late_fusion = self._generate_data_for_late_fusion(t_files + v_files) print("\n##### CSV GENERATED! #####") else: csv_late_fusion = self.load_late_csv() return csv_late_fusion elif self.train_mode == "early_fusion": if not exists('features_path_early_fusion_train_' + self.feature_name + '.csv'): print("\n##### GENERATING CSV FOR EARLY FUSION... #####") csv_early_fusion = { "train": self._generate_data_for_early_fusion(t_files, "train"), "val": self._generate_data_for_early_fusion(v_files, "val") } print("\n##### CSV GENERATED! #####") else: csv_early_fusion = {} for name in ["train", "val"]: csv_early_fusion[name] = self.load_early_csv(name) return csv_early_fusion def load_late_csv(self): csv_late_fusion = {} print('Opening csv: lables_late_fusion' + self.feature_name + '.csv') with open('lables_late_fusion' + self.feature_name + '.csv', 'r') as f: f.readline() csv_reader = csv.reader(f) for clip_id, ground_truth, frame_label, audio_label in csv_reader: csv_late_fusion[clip_id] = [ ground_truth, frame_label, audio_label ] return csv_late_fusion def load_early_csv(self, dataset): csv_early_fusion = {} print("Opening csv: features_path_early_fusion_" + dataset + "_" + self.feature_name + '.csv') with open( 'features_path_early_fusion_' + dataset + "_" + self.feature_name + '.csv', 'r') as f: f.readline() csv_reader = csv.reader(f) for clip_id, ground_truth, frame_label, audio_label in csv_reader: if clip_id not in csv_early_fusion: csv_early_fusion[clip_id] = [] csv_early_fusion[clip_id].append( [ground_truth, frame_label, audio_label]) return csv_early_fusion def _generate_data_for_late_fusion(self, total_files): my_csv = {} total = len(total_files) for file in total_files: clip_id = file.split(".")[0] audio_path = clip_id.replace("AFEW/aligned", self.feature_name) print("audio_path", audio_path) label_from_audio = self.ac.clip_classification(audio_path) ground_truth, label_from_frame = self.fc.predict(file) clip_id = basename(clip_id) my_csv[clip_id] = [ ground_truth, label_from_frame, label_from_audio ] print(len(my_csv), "/", total) with open('lables_late_fusion' + self.feature_name + '.csv', 'w') as f: f.write("clip_id, ground_truth, frame_label, audio_label\n") for k in my_csv: f.write( str(k) + "," + str(my_csv[k][0]) + "," + str(my_csv[k][1]) + "," + str(my_csv[k][2]) + "\n") return my_csv def _generate_data_for_early_fusion(self, files, name): # '/user/vlongobardi/AFEW/aligned/Train/Angry/012738600.csv' # '/user/vlongobardi/early_feature/framefeature/Train/Angry/012738600_0.dat' # '/user/vlongobardi/early_feature/emobase2010_600/Train/Angry/012738600_0.arff' if "full" in self.feature_name: frame_to_discard = 0 else: window_size = int(self.feature_name.split("_")[1]) frame_to_discard = ceil(window_size / 2 / 40) my_csv = {} for file in tqdm(files): clip_id_temp = file.split(".")[0] base_path = clip_id_temp.replace( "AFEW/aligned", "early_feature/framefeature") + "*" frames_features_path = glob.glob(base_path) audio_features_path = glob.glob( base_path.replace("early_feature/framefeature", "late_feature/" + self.feature_name)) frames_features_path.sort( key=lambda x: int(x.split("_")[-1].split(".")[0])) if "full" not in self.feature_name: audio_features_path.sort( key=lambda x: int(x.split("_")[-1].split(".")[0])) ground_truth = basename(dirname(clip_id_temp)) clip_id = basename(clip_id_temp) # discard video frames based on window size frames_features_path = frames_features_path[frame_to_discard:] if len(frames_features_path) < 16: continue # print("FRAME TOO FEW SAMPLES:", len(frames_features_path), clip_id) if len(audio_features_path ) < 16 and "full" not in self.feature_name: continue # print("AUDIO TOO FEW SAMPLES:", len(audio_features_path), clip_id) for index, frame in enumerate(frames_features_path): if clip_id not in my_csv.keys(): my_csv[clip_id] = [] if "full" not in self.feature_name: my_csv[clip_id].append( [ground_truth, frame, audio_features_path[index]]) else: my_csv[clip_id].append( [ground_truth, frame, audio_features_path[0]]) with open( 'features_path_early_fusion_' + name + "_" + self.feature_name + '.csv', 'w') as f: f.write("clip_id, ground_truth, frame_label, audio_label\n") for key in my_csv: for line in my_csv[key]: f.write(key + "," + line[0] + "," + line[1] + "," + line[2] + "\n") return my_csv def late_gen(self, list_files, batch_size, mode="Train"): c = 0 if mode == "Train": random.shuffle(list_files) while True: labels = [] features = np.zeros( (batch_size, 2 * len(self.classes))).astype('float') for i in range(c, c + batch_size): clip_id = basename(list_files[i].split(".")[0]) ground_truth, label_from_frame, label_from_audio = self.csv_fusion[ clip_id] features[i - c] = np.append( self.lb.transform(np.array([label_from_audio])), self.lb.transform(np.array([label_from_frame]))) labels.append(ground_truth) c += batch_size if c + batch_size > len(list_files): c = 0 if mode == "Train": random.shuffle(list_files) if mode == "eval": break labels = self.lb.transform(np.array(labels)) yield features, labels def early_gen_train(self, list_files, batch_size): c = 0 clip_ids = list(list_files.keys()) random.shuffle(clip_ids) while True: labels = [] features = [ np.zeros((batch_size, self.time_step, self.feature_num)).astype('float'), np.zeros( (batch_size, self.time_step, 224, 224, 3)).astype('float') ] for i in range(c, c + batch_size): clip_id = clip_ids[i] video_info = list_files[clip_id] ground_truth = video_info[0][0] csv_path = '/user/vlongobardi/AFEW/aligned/Train/GroundTruth/ID.csv' csv_path = csv_path.replace("GroundTruth", ground_truth).replace( "ID", clip_id) images = DataGen(csv_path, '', 1, 31, NoAug(), 16, 1, 12, test=True)[0][0][0] first_frame_num = int( video_info[0][1].split("_")[-1].split(".")[0]) start = random.randint(0, len(video_info) - self.time_step) for index, elem in enumerate(video_info[start:self.time_step + start]): ground_truth, _, audio_path = elem features[0][i - c][index] = np.array( from_arff_to_feture(audio_path)).reshape( self.feature_num, ) features[1][i - c][index] = images[first_frame_num + start + index] labels.append(ground_truth) c += batch_size if c + batch_size > len(clip_ids): c = 0 random.shuffle(clip_ids) labels = self.lb.transform(np.array(labels)).reshape( (batch_size, 7)) yield features, labels def early_gen_new_val(self, list_files, batch_size, mode="val", stride=1): """ stride 50% sul su tutti i file """ c = 0 labels = features = [] clip_ids = list(list_files.keys()) while True: for clip_id in clip_ids: video_info = list_files[clip_id] ground_truth = video_info[0][0] csv_path = '/user/vlongobardi/AFEW/aligned/Val/GroundTruth/ID.csv' csv_path = csv_path.replace("GroundTruth", ground_truth).replace( "ID", clip_id) images = DataGen(csv_path, '', 1, 31, NoAug(), 16, 1, 12, test=True)[0][0][0] first_frame_num = int( video_info[0][1].split("_")[-1].split(".")[0]) for start in range(0, len(video_info) - self.time_step, self.time_step // stride): if c == 0: labels = [] features = [ np.zeros((batch_size, self.time_step, self.feature_num)).astype('float'), np.zeros((batch_size, self.time_step, 224, 224, 3)).astype('float') ] for index, elem in enumerate( video_info[start:self.time_step + start]): audio_path = elem[2] features[0][c][index] = np.array( from_arff_to_feture(audio_path)).reshape( self.feature_num, ) features[1][c][index] = images[first_frame_num + start + index] labels.append(ground_truth) c += 1 if c == batch_size: c = 0 labels = self.lb.transform(np.array(labels)).reshape( (batch_size, 7)) yield features, labels if mode == "eval": break def early_gen_test_clip(self, list_files, clip_id, stride=1): """ stride su singolo file, quindi va richiamato per ogni file """ ground_truth = list_files[0][0] csv_path = '/user/vlongobardi/AFEW/aligned/Val/GroundTruth/ID.csv' csv_path = csv_path.replace("GroundTruth", ground_truth).replace("ID", clip_id) first_frame_num = int(list_files[0][1].split("_")[-1].split(".")[0]) start = 0 end = len(list_files) - self.time_step while True: labels = [] features = [ np.zeros( (1, self.time_step, self.feature_num)).astype('float'), np.zeros((1, self.time_step, 224, 224, 3)).astype('float') ] images = DataGen(csv_path, '', 1, 31, NoAug(), 16, 1, 12, test=True)[0][0][0] for index, elem in enumerate(list_files[start:start + self.time_step]): audio_path = elem[2] features[0][0][index] = np.array( from_arff_to_feture(audio_path)).reshape( self.feature_num, ) features[1][0][index] = images[first_frame_num + start + index] labels.append(ground_truth) start += self.time_step // stride if start >= end: break labels = self.lb.transform(np.array(labels)).reshape((1, 7)) yield features, labels def get_validation_dim(self): if self.stride == 2: if "full" in self.feature_name: return 141 elif "600" in self.feature_name: return 0 elif "300" in self.feature_name: return 114 elif "100" in self.feature_name: return 128 elif self.stride == 1: if "full" in self.feature_name: return 76 elif "600" in self.feature_name: return 0 elif "300" in self.feature_name: return 63 elif "100" in self.feature_name: return 69 elif self.stride == self.time_step: return 0 def train(self, train_files, val_files, train_data, model): if train_data["opt"] == "Adam": optimizer = Adam(lr=train_data["lr"]) else: optimizer = SGD(lr=train_data["lr"]) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() train_gen = train_data["generator1"](train_files, train_data["batch_size"]) no_of_training_images = len(train_files) if self.train_mode == "early_fusion": no_of_val_images = self.get_validation_dim() # val_gen = train_data["generator2"](val_files, train_data["batch_size"], stride) else: no_of_val_images = len(val_files) # val_gen = train_data["generator2"](val_files, train_data["batch_size"]) # stride = 1, no overlapping # stride = 2, overlapping: 50% # stride = time_step, stride: 1 model_name = "_lr" + str( train_data["lr"]) + "_Opt" + train_data["opt"] + "_Model" + str( train_data["model_name"] ) + "_Feature" + self.feature_name + "_" + str( train_data["iteration"] ) + "_" + self.train_mode # + "_modelType" + str(self.model_type) if self.train_mode == "early_fusion": model_name += "stride" + str(self.stride) model_name += ".h5" def custom_scheduler(epoch): print(0.1 / 10**(floor(epoch / 25) + 1)) return 0.1 / 10**(floor(epoch / 25) + 1) class CheckValCMCallback(keras.callbacks.Callback): def __init__(self, m, dim, validation_files, epoch): super().__init__() self.vc = m self.dim = dim self.val_files = validation_files self.epoch = epoch self.accs = [] def on_epoch_end(self, epoch, logs=None): if self.vc.train_mode == "early_fusion": csv_fusion = self.vc.load_early_csv("val") # gen = self.vc.early_gen_new_val(csv_fusion, 16, "eval") # predictions = [] # ground_truths = [] # for x in gen: # ground_truths.append(self.vc.lb.inverse_transform(x[1])[0]) # pred = self.model.predict(x[0]) # pred = self.vc.lb.inverse_transform(pred) # predictions.append(pred[0]) # self.vc.print_stats(ground_truths, predictions, "Video" + str(epoch)) gen = self.vc.early_gen_new_val(csv_fusion, 16, "eval") else: gen = self.vc.late_gen(self.val_files, 16, "eval") acc = self.model.evaluate_generator(gen, self.dim, workers=0) self.accs.append(acc) print("Evaluate:", acc) if self.epoch == epoch + 1: print("Validation_Accuracy =", self.accs) cb = [ ModelCheckpoint(filepath=str( "weights_new_fusion/videoModel__t{accuracy:.4f}_epoch{epoch:02d}" + model_name), monitor="val_accuracy", save_weights_only=True), TensorBoard(log_dir="NewFusionLogs/" + self.train_mode + "/" + self.feature_name, write_graph=True, write_images=True) ] if self.train_mode == "early_fusion": cb += [LearningRateScheduler(custom_scheduler)] cb += [ CheckValCMCallback(self, no_of_val_images, val_files, train_data["epoch"]) ] history = model.fit_generator( train_gen, # validation_data=val_gen, epochs=train_data["epoch"], steps_per_epoch=(no_of_training_images * 2 // train_data["batch_size"]), # validation_steps=(no_of_val_images // train_data["batch_size"]), workers=0, verbose=1, callbacks=cb) print("\n\nTrain_Accuracy =", history.history['accuracy']) # print("\nVal_Accuracy =", history.history['val_accuracy']) print("\n\nTrain_Loss =", history.history['loss']) # print("\nVal_Loss =", history.history['val_loss']) # model_name = "videoModel_" + "_epoch" + str(train_data["epoch"]) + model_name # print("\n\nModels saved as:", model_name) # print("Train:", history.history['accuracy'][-1], "Val:", history.history['val_accuracy'][-1]) # model.save("video_models/" + model_name) # model.save_weights("video_models_early_weights/" + model_name) # return model def print_stats(self, ground_truths, predictions, name): cm = confusion_matrix(ground_truths, predictions, self.classes) print("###" + name + " Results###\n") # print_cm(cm, self.classes) # print("\n\n") print_cm( np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], decimals=3), self.classes) print("\n\n") print("Accuracy score: ", accuracy_score(ground_truths, predictions), "\n\n") # print("Report") # print(classification_report(ground_truths, predictions)) print( "#################################################################end###\n\n\n" ) def print_confusion_matrix(self, stride=1): """ IMPLEMENT FOR EARLY FUSION MISSING """ csv_fusion = {} predictions = [] ground_truths = [] if self.train_mode == "early_fusion": csv_fusion = self.load_early_csv("val") print("CSV loaded", len(csv_fusion)) gen = self.early_gen_new_val(csv_fusion, 1, "eval", stride) for x in gen: ground_truths.append(self.lb.inverse_transform(x[1])[0]) pred = self.model.predict(x[0]) pred = self.lb.inverse_transform(pred) predictions.append(pred[0]) # print("\ngt, pred", self.lb.inverse_transform(x[1]), pred) self.print_stats(ground_truths, predictions, "Video") else: with open('lables_late_fusion' + self.feature_name + '.csv', 'r') as f: f.readline() csv_reader = csv.reader(f) for row in csv_reader: csv_fusion[row[0]] = [row[1], row[2], row[3]] a_p = [] f_p = [] files = glob.glob("/user/vlongobardi/late_feature/" + self.feature_name + "/*/*csv") for file in files: clip_id = basename(file).split(".")[0] ground_truth, frame_pred, audio_pred = csv_fusion[clip_id] sample = np.append(self.lb.transform(np.array([audio_pred])), self.lb.transform(np.array([frame_pred]))) pred = self.model.predict(sample.reshape((1, 14))) pred = self.lb.inverse_transform(pred)[0] predictions.append(pred) a_p.append(audio_pred) f_p.append(frame_pred) ground_truths.append(ground_truth) self.print_stats(ground_truths, predictions, "Video") self.print_stats(ground_truths, a_p, "Audio") self.print_stats(ground_truths, f_p, "Frame")