def __init__(self, unique_section_names, target_dir, mode): super().__init__() n_files_ea_section = [] # number of files for each section n_vectors_ea_file = [] # number of vectors for each file data = numpy.empty( (0, CONFIG["feature"]["n_frames"] * CONFIG["feature"]["n_mels"]), dtype=float, ) for section_name in unique_section_names: # get file list for each section # all values of y_true are zero in training print("target_dir : %s" % (target_dir + "_" + section_name)) files, _ = util.file_list_generator( target_dir=target_dir, section_name=section_name, dir_name="train", mode=mode, ) print("number of files : %s" % (str(len(files)))) n_files_ea_section.append(len(files)) # extract features from audio files and # concatenate them into Numpy array. features, n_features = concat_features(files) data = numpy.append(data, features, axis=0) n_vectors_ea_file.append(n_features) n_vectors_ea_file = flatten(n_vectors_ea_file) # make target labels for conditioning # they are not one-hot vector! labels = numpy.zeros((data.shape[0]), dtype=int) start_index = 0 for section_index in range(unique_section_names.shape[0]): for file_id in range(n_files_ea_section[section_index]): labels[ start_index : start_index + n_vectors_ea_file[file_id] ] = section_index start_index += n_vectors_ea_file[file_id] # 1D vector to 2D image (1ch) self.data = data.reshape( ( data.shape[0], 1, # number of channels CONFIG["feature"]["n_frames"], CONFIG["feature"]["n_mels"], ) ) self.labels = labels
def fit_gamma_dist(model, target_dir, mode): """ - Calculate anomaly scores over sections. - Fit gamma distribution for anomaly scores. - Save the parameters of the distribution. """ section_names = util.get_section_names(target_dir, dir_name="train") dataset_scores = numpy.array([], dtype=numpy.float64) # calculate anomaly scores over sections for section_index, section_name in enumerate(section_names): section_files, _ = util.file_list_generator( target_dir=target_dir, section_name=section_name, dir_name="train", mode=mode, ) section_scores = [0.0 for k in section_files] for file_idx, file_path in enumerate(section_files): section_scores[file_idx] = calc_anomaly_score( model, file_path=file_path, section_index=section_index ) section_scores = numpy.array(section_scores) dataset_scores = numpy.append(dataset_scores, section_scores) dataset_scores = numpy.array(dataset_scores) # fit gamma distribution for anomaly scores gamma_params = scipy.stats.gamma.fit(dataset_scores) gamma_params = list(gamma_params) # save the parameters of the distribution score_file_path = "{model}/score_distr_{machine_type}.pkl".format( model=CONFIG["model_directory"], machine_type=os.path.split(target_dir)[1] ) joblib.dump(gamma_params, score_file_path)
def main(): """ Perform model evaluation. """ # check mode # "development": mode == True # "evaluation": mode == False mode = util.command_line_chk() # constant: True or False if mode is None: sys.exit(-1) # make result directory os.makedirs(CONFIG["result_directory"], exist_ok=True) # load base_directory list dir_list = util.select_dirs(config=CONFIG, mode=mode) # initialize lines in csv for AUC and pAUC csv_lines = [] performance = {"section": None, "all": None} # anomaly scores and decision results score_list = {"anomaly": None, "decision": None} if mode: performance["all"] = [] for idx, target_dir in enumerate(dir_list): print("===============================================") print("[%d/%d] %s" % (idx + 1, len(dir_list), target_dir)) print("================ MODEL LOAD =================") model = load_model(config=CONFIG, machine_type=os.path.split(target_dir)[1]) decision_threshold = calc_decision_threshold(target_dir) if mode: # results for each machine type csv_lines.append([os.path.split(target_dir)[1] ]) # append machine type csv_lines.append([ "section", "domain", "AUC", "pAUC", "precision", "recall", "F1 score" ]) performance["section"] = [] for dir_name in ["source_test", "target_test"]: for section_name in util.get_section_names(target_dir, dir_name=dir_name): # load test file test_files, y_true = util.file_list_generator( target_dir=target_dir, section_name=section_name, dir_name=dir_name, mode=mode, ) print( "============== BEGIN TEST FOR A SECTION %s OF %s ==============" % (section_name, dir_name)) # - perform test for a section # - anomaly scores and decision results are saved in score_list y_pred = test_section(model, test_files, decision_threshold, score_list) # save anomaly scores and decision results save_anomaly_score(score_list, target_dir, section_name, dir_name) if mode: # evaluation_scores (tuple): auc, p_auc, prec, recall, f1_score eval_scores = calc_evaluation_scores( y_true, y_pred, decision_threshold) csv_lines.append([ section_name.split("_", 1)[1], dir_name.split("_", 1)[0], *eval_scores, # unpack ]) performance["section"].append(eval_scores) performance["all"].append(eval_scores) print( "============ END OF TEST FOR A SECTION %s OF %s ============\n" % (section_name, dir_name)) if mode: # calculate averages for AUCs and pAUCs csv_lines = calc_performance_section(performance["section"], csv_lines) del model if mode: # calculate averages for AUCs and pAUCs over all sections csv_lines = calc_performance_all(performance["all"], csv_lines) # output results save_result(csv_lines)
def main(): """ Perform model training and validation. """ # check mode # "development": mode == True # "evaluation": mode == False mode = util.command_line_chk() # constant: True or False if mode is None: sys.exit(-1) # make output directory os.makedirs(CONFIG["model_directory"], exist_ok=True) # load base_directory list dir_list = util.select_dirs(config=CONFIG, mode=mode) for idx, target_dir in enumerate(dir_list): print("===============================================") print("[%d/%d] %s" % (idx + 1, len(dir_list), target_dir)) print("\n============== DATASET_GENERATOR ==============") # generate file list under "target_dir" directory. files, _ = util.file_list_generator( target_dir=target_dir, section_name="*", dir_name="train", mode=mode, ) dcase_dataset = DcaseDataset(files) # generate dataset from file list. print("===============================================") print("\n=========== DATALOADER_GENERATOR ==============") data_loader = {"train": None, "val": None} data_loader["train"], data_loader["val"] = get_dataloader( dcase_dataset) print("===============================================") print("\n================ MODEL TRAINING ===============") model = get_model().to(DEVICE) optimizer, _ = get_optimizer(model) # optimizer, scheduler = get_optimizer(model) # optional # display summary of model through torchinfo summary( model, input_size=( CONFIG["training"]["batch_size"], CONFIG["feature"]["n_mels"] * CONFIG["feature"]["n_frames"], ), ) # training loop for epoch in range(1, CONFIG["training"]["epochs"] + 1): print("Epoch {:2d}: ".format(epoch), end="") training( model=model, data_loader=data_loader["train"], optimizer=optimizer, # scheduler=scheduler # optional ) validation(model=model, data_loader=data_loader["val"]) del data_loader # delete the dataset for training. # fit gamma distribution for anomaly scores # and save the parameters of the distribution fit_gamma_dist(dcase_dataset, model, target_dir) print("============== SAVE MODEL ==============") save_model( model, model_dir=CONFIG["model_directory"], machine_type=os.path.split(target_dir)[1], ) print("============== END TRAINING ==============")