model_input = sys.argv[1] input_directory = sys.argv[2] output_directory = sys.argv[3] # Find files. input_files = [] for f in os.listdir(input_directory): if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) if not os.path.isdir(output_directory): os.mkdir(output_directory) # Load model. print('Loading 12ECG model...') model = load_12ECG_model(model_input) # Iterate over files. print('Extracting 12ECG features...') num_files = len(input_files) for i, f in enumerate(input_files): print(' {}/{}...'.format(i+1, num_files)) tmp_input_file = os.path.join(input_directory,f) data,header_data = load_challenge_data(tmp_input_file) current_label, current_score,classes = run_12ECG_classifier(data,header_data, model) # Save results. save_challenge_predictions(output_directory,f,current_score,current_label,classes) print('Done.')
# Find files. input_files = [] for f in os.listdir(input_directory): if os.path.isfile( os.path.join(input_directory, f) ) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) if not os.path.isdir(output_directory): os.mkdir(output_directory) classes = get_classes(input_directory, input_files) # Load model. print('Loading 12ECG model...') model = load_12ECG_model() # Iterate over files. print('Extracting 12ECG features...') num_files = len(input_files) for i, f in enumerate(input_files): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) current_label, current_score = run_12ECG_classifier( data, header_data, classes, model) # Save results. save_challenge_predictions(output_directory, f, current_score, current_label, classes) print('Done.')
def confusion_matrixes(input_directory): # Find files. input_files = [] for f in os.listdir(input_directory): if os.path.isfile( os.path.join(input_directory, f) ) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) if not os.path.isdir(output_directory): os.mkdir(output_directory) classes = get_classes(input_directory, input_files) classes = np.array(classes) print(classes) # Load model. print('Loading 12ECG model...') model = load_12ECG_model() # Iterate over files. print('Extracting 12ECG features...') num_files = len(input_files) results = np.asarray([[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]) confusion_matrix_labels = np.zeros((9, 9)) confusion_matrix_scores = np.zeros((9, 9)) bad = 0 good = 0 for i, f in enumerate(input_files): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) current_label, current_score, real_out = run_12ECG_classifier( data, header_data, classes, model) confusion_matrix_labels[np.argmax(real_out)] += current_label confusion_matrix_scores[np.argmax(real_out)] += current_score if np.argmax(real_out) == np.argmax(current_label): good += 1 else: bad += 1 # Save results. for i in range(confusion_matrix_labels.shape[0]): confusion_matrix_labels[i] = confusion_matrix_labels[i] / np.sum( confusion_matrix_labels[i]) * 100 df_cm = pd.DataFrame(confusion_matrix_labels, index=[i for i in classes], columns=[i for i in classes]) plt.figure(figsize=(9, 9)) svm = sn.heatmap(df_cm, annot=True) figure = svm.get_figure() figure.savefig('./cmatrix/labels/labels.png', dpi=400) df_cm = pd.DataFrame(confusion_matrix_scores, index=[i for i in classes], columns=[i for i in classes]) plt.figure(figsize=(9, 9)) svm = sn.heatmap(df_cm, annot=True) figure = svm.get_figure() figure.savefig('./cmatrix/scores/scores.png', dpi=400) print("Accuracy: %0.4f" % ((good / (good + bad)))) print('Done.')
def eval_all(tranches:Optional[str]=None) -> pd.DataFrame: """ finished, checked, Parameters: ----------- tranches: str, optional, tranches for making the evaluation, can be one of "AB", "E", "F", or None (None defaults to "ABEF") """ models = load_12ECG_model() dr = CR(TrainCfg.db_dir) ds_config = deepcopy(TrainCfg) if tranches: ds_config.tranches_for_training = tranches ds = CINC2020(config=ds_config, training=False) print("start collecting results...") time.sleep(3) truth_labels, truth_array = [], [] binary_predictions, scalar_predictions = [], [] classes = ModelCfg.full_classes # ds.records = ds.records[:10] # for fast debug with tqdm(ds.records, total=len(ds.records)) as t: for rec in t: data_fp = dr.get_data_filepath(rec) data, header_data = load_challenge_data(data_fp) current_label, current_score, _ = \ run_12ECG_classifier(data, header_data, models, verbose=0) binary_predictions.append(current_label) scalar_predictions.append(current_score) tl = dr.get_labels(rec, fmt='a') ta = list(repeat(0, len(classes))) for c in tl: ta[classes.index(c)] = 1 truth_labels.append(tl) truth_array.append(ta) # gather results into a DataFrame print("gathering results into a `DataFrame`...") df_eval_res = pd.DataFrame(scalar_predictions) df_eval_res.columns = classes df_eval_res['binary_predictions'] = '' df_eval_res['truth_labels'] = '' classes = np.array(classes) for idx, row in df_eval_res.iterrows(): df_eval_res.at[idx, 'binary_predictions'] = \ classes[np.where(binary_predictions[idx]==1)[0]].tolist() df_eval_res.at[idx, 'truth_labels'] = truth_labels[idx] df_eval_res.index = ds.records classes = classes.tolist() auroc, auprc, accuracy, f_measure, f_beta_measure, g_beta_measure, challenge_metric = \ evaluate_12ECG_score( classes=classes, truth=np.array(truth_array), scalar_pred=np.array(scalar_predictions), binary_pred=np.array(binary_predictions), ) msg = f""" results on tranches {tranches or 'all'}: ------------------------------ auroc: {auroc} auprc: {auprc} accuracy: {accuracy} f_measure: {f_measure} f_beta_measure: {f_beta_measure} g_beta_measure: {g_beta_measure} challenge_metric: {challenge_metric} ---------------------------------------- """ print(msg) # in case no logger return df_eval_res
def eval_all_parallel(tranches:Optional[str]=None) -> pd.DataFrame: """ since signal preprocessing in `special_detectors` already uses `multiprocessing`, it would raise ``AssertionError: daemonic processes are not allowed to have children`` """ batch_size = 16 loaded_models = load_12ECG_model() dr = CR(TrainCfg.db_dir) ds_config = deepcopy(TrainCfg) if tranches: ds_config.tranches_for_training = tranches ds = CINC2020(config=ds_config, training=False) data_loader = DataLoader( dataset=ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, drop_last=False, ) if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") truth_array = np.array([]).reshape(0, len(ModelCfg.full_classes)) binary_predictions = np.array([]).reshape(0, len(ModelCfg.full_classes)) scalar_predictions = np.array([]).reshape(0, len(ModelCfg.full_classes)) print("start collecting results...") time.sleep(3) with tqdm(total=len(ds)) as pbar: for step, (signals, labels) in enumerate(data_loader): signals = signals.to(device=device, dtype=_DTYPE) labels = labels.numpy() labels = extend_predictions( labels, ds.all_classes, ModelCfg.full_classes, ) truth_array = np.concatenate((truth_array, labels)) dl_scores = [] for subset, model in loaded_models.items(): model.eval() subset_scores, subset_bin = model.inference(signals) if subset in ModelCfg.tranche_classes.keys(): subset_scores = extend_predictions( subset_scores, ModelCfg.tranche_classes[subset], ModelCfg.dl_classes, ) subset_scores = subset_scores[0] # remove the batch dimension dl_scores.append(subset_scores) if "NSR" in ModelCfg.dl_classes: dl_nsr_cid = ModelCfg.dl_classes.index("NSR") elif "426783006" in ModelCfg.dl_classes: dl_nsr_cid = ModelCfg.dl_classes.index("426783006") else: dl_nsr_cid = None # TODO: make a classifier using the scores from the 4 different dl models dl_scores = np.max(np.array(dl_scores), axis=0) dl_conclusions = (dl_scores >= ModelCfg.bin_pred_thr).astype(int) # treat exceptional cases max_prob = dl_scores.max() if max_prob < ModelCfg.bin_pred_nsr_thr and dl_nsr_cid is not None: dl_conclusions[row_idx, dl_nsr_cid] = 1 elif dl_conclusions.sum() == 0: dl_conclusions = ((dl_scores+ModelCfg.bin_pred_look_again_tol) >= max_prob) dl_conclusions = (dl_conclusions & (dl_scores >= ModelCfg.bin_pred_nsr_thr)) dl_conclusions = dl_conclusions.astype(int) dl_scores = extend_predictions( dl_scores, ModelCfg.dl_classes, ModelCfg.full_classes, ) dl_conclusions = extend_predictions( dl_conclusions, ModelCfg.dl_classes, ModelCfg.full_classes, ) with mp.Pool(processes=batch_size) as pool: sd_conclusion = pool.starmap( func=_run_special_detector_once, iterable=[(s,) for s in signals.tolist()], ) sd_conclusion = np.array(sd_conclusion) step_scores = np.where(dl_scores>=sd_conclusion, dl_scores, sd_conclusion) step_conclusions = np.where(dl_conclusions*sd_conclusion!=0, np.ones_like(dl_conclusions, dtype=int), np.zeros_like(dl_conclusions, dtype=int)) binary_predictions = np.concatenate((binary_predictions, step_conclusions)) scalar_predictions = np.concatenate((scalar_predictions, step_scores)) pbar.update(signals.shape[0]) truth_labels = [dr.get_labels(rec, fmt='a') for rec in ds.records] # gather results into a DataFrame print("gathering results into a `DataFrame`...") df_eval_res = pd.DataFrame(scalar_predictions) df_eval_res.columns = ModelCfg.full_classes df_eval_res['binary_predictions'] = '' df_eval_res['truth_labels'] = '' for idx, row in df_eval_res.iterrows(): df_eval_res.at[idx, 'binary_predictions'] = \ np.array(ModelCfg.full_classes)[np.where(binary_predictions[idx]==1)[0]].tolist() df_eval_res.at[idx, 'truth_labels'] = truth_labels[idx] df_eval_res.index = ds.records auroc, auprc, accuracy, f_measure, f_beta_measure, g_beta_measure, challenge_metric = \ evaluate_12ECG_score( classes=ModelCfg.full_classes, truth=np.array(truth_array), scalar_pred=np.array(scalar_predictions), binary_pred=np.array(binary_predictions), ) msg = f""" results on tranches {tranches or 'all'}: ------------------------------ auroc: {auroc} auprc: {auprc} accuracy: {accuracy} f_measure: {f_measure} f_beta_measure: {f_beta_measure} g_beta_measure: {g_beta_measure} challenge_metric: {challenge_metric} ---------------------------------------- """ print(msg) # in case no logger return df_eval_res
def __init__(self): self.classes = np.array([ "AF" ,"I-AVB" ,"LBBB" ,"Normal" ,"PAC" ,"PVC" ,"RBBB" ,"STD" ,"STE" ]) self.model = load_12ECG_model()