def generate_gap(file_name, device, num_patients, task, batch_size, normalize_signals, features_subset, gap_norm_opt): file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', file_name) file_path = os.path.join(file_dir, f'{file_name}_ugap_test.pkl') if not os.path.exists(file_path): num_classes = 2 if 'rem' in task.lower() else 5 feature_opt, signal_len, one_slice, dataset_dir = run_params( file_name, features_subset, def_feature_opt='HSIC+Concat', task=task) train_loader, val_loader, test_loader = init_datasets( num_patients=num_patients, dataset_dir=dataset_dir, task=task, one_slice=True, batch_size=batch_size, conv_d=1, oversample=False, normalize_signals=normalize_signals, features_subset=features_subset, num_ch=2, low_sp=False, modes=['train', 'val', 'test']) haifa_model = HSICClassifier( num_classes=num_classes, signal_len=signal_len, feature_opt=feature_opt, gap_norm_opt=gap_norm_opt, feature_len=train_loader.dataset.feature_len, in_channels=2).to(device) haifa_model.load_state_dict( torch.load(os.path.join(file_dir, f'{file_name}_params.pkl'), map_location='cpu')) gap_train, gap_val, gap_test, rep_size = \ generate_gap_internal(train_loader, val_loader, test_loader, haifa_model, file_dir, file_name, device) else: with open(os.path.join(file_dir, f'{file_name}_ugap_train.pkl'), 'rb') as h: gap_train = pkl.load(h) with open(os.path.join(file_dir, f'{file_name}_ugap_val.pkl'), 'rb') as h: gap_val = pkl.load(h) with open(os.path.join(file_dir, f'{file_name}_ugap_test.pkl'), 'rb') as h: gap_test = pkl.load(h) for key, value in gap_test.items(): rep_size = value.shape[0] break return gap_train, gap_val, gap_test, rep_size
def get_templates(file_name, oversample, template_opt='spindle', cam_target=2, num_patients=40, num_ch=2, before=5., after=5., task='all', try2load=True, gap_norm_opt='batch_norm', cuda_id=0, random=False, normalize_signals=False, noise_inter=0): device = get_device(cuda_id) features_subset = [] if 'freq' in file_name.lower(): features_subset += ['frequency'] if 'num_spindles' in file_name.lower(): features_subset += ['num_spindles'] elif 'spindle' in file_name.lower(): features_subset += ['spindle'] assert template_opt in ['spindle', 'activation', 'sw', 'rem', 'emg'] low_sp = 'low' in file_name feature_opt, signal_len, one_slice, dataset_dir = run_params(file_name, features_subset, def_feature_opt='HSIC+Concat', task=task) fs = 80 if 'ds' in file_name.lower() else 125 file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', file_name) num_classes = 2 if 'rem' in task.lower() else 5 print(f'Sample Frequency: {fs}') print(f'Num Classes: {num_classes}') random_str = 'random_' if random else '' save_name = f'{random_str}{template_opt}_template_class{cam_target}_{file_name}_{num_patients}patients' file_path = os.path.join('plot_results', save_name) if try2load and os.path.exists(file_path): with open(file_path, 'rb') as f: all_templates = pkl.load(f) else: filter_stage = cam_target if isinstance(cam_target, int) else None test_loader = init_datasets(num_patients=num_patients, dataset_dir=dataset_dir, batch_size=1, conv_d=1, features_subset=features_subset, one_slice=one_slice, modes=['test'], random_flag=False, num_ch=num_ch, low_sp=low_sp, filter_stage=filter_stage, task=task, normalize_signals=normalize_signals, oversample=oversample) model = HSICClassifier(num_classes=num_classes, signal_len=signal_len, feature_opt=feature_opt, in_channels=num_ch, feature_len=test_loader.dataset.feature_len, gap_norm_opt=gap_norm_opt) model.load_state_dict(torch.load(os.path.join(file_dir, f'{file_name}_params.pkl'), map_location='cpu')) model.to(device) model.eval() all_templates = [] with torch.no_grad(): for batch_idx, (signal, label, signal_name, features) in enumerate(tqdm(test_loader)): signal, features = signal.to(device), features.to(device) _, cam, _ = model(signal, features) signal = test_loader.dataset.unnormalize_signal(signal) signal = signal.cpu().numpy().reshape(-1, signal.shape[-1]) cam = np.squeeze(cam).cpu().numpy() if isinstance(cam_target, int): cam = cam[cam_target, :] else: cam = cam[label, :] signal = np.squeeze(signal) if template_opt == 'spindle': templates = get_spindle_templates(signal, fs=fs, cam=cam, random=random, sec_before=before, sec_after=after, num_ch=num_ch, noise_inter=noise_inter) if template_opt == 'activation': if num_ch == 2: signal = signal[0, :] # TODO!! templates = get_activation_templates(cam, signal, sec_before=before, sec_after=after, fs=fs) if template_opt == 'sw': templates = get_sw_templates(cam, signal, sec_before=before, sec_after=after, fs=fs, num_ch=num_ch, random=random, noise_inter=noise_inter) if template_opt == 'rem': eog = test_loader.dataset.get_eog(signal_name) templates = get_rem_templates(cam=cam, signal_eog=eog, sec_before=before, sec_after=after, fs_eeg=fs, random=random, noise_inter=noise_inter) if template_opt == 'emg': signal_emg = test_loader.dataset.get_emg(signal_name).squeeze() templates = get_emg_onset_templates(cam=cam, signal_emg=signal_emg, sec_before=before, sec_after=after, fs_eeg=fs, random=random) all_templates += templates all_templates = np.vstack(all_templates).T # num_templates = all_templates.shape[0] # Normalize templates # for i in range(num_templates): # cam_i = all_templates[:, i] # if ((cam_i - cam_i.mean()) != 0).sum() < 5: # continue # if max(cam_i) != min(cam_i): # cam_i = (cam_i - min(cam_i)) / (max(cam_i) - min(cam_i)) # all_templates[:, i] = cam_i if random is not None: save_name = os.path.join('noise', f'{save_name}_figure_data_noise={noise_inter}') with open(os.path.join('plot_results', save_name), 'wb') as f: pkl.dump(all_templates, f, protocol=pkl.HIGHEST_PROTOCOL) return all_templates
feature_opt = run_params(features_subset) torch.manual_seed(44) device = get_device(cuda_id) file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', file_name) train_loader, val_loader, test_loader = init_datasets( task=task, balanced_dataset=balanced_dataset, batch_size=batch_size, normalize_signals=True, features_subset=features_subset) main_model = HSICClassifier(num_classes=2, feature_opt=feature_opt, gap_norm_opt='batch_norm', feature_len=train_loader.dataset.feature_len, in_channels=2).to(device) main_model.load_state_dict( torch.load(os.path.join(file_dir, f'{file_name}_params.pkl'), map_location='cpu')) main_model.eval() features_len, _ = feature_names_len_from_subset(features_subset) feature_predictor = MLP1Layer(in_size=rep_size, hidden_size=rep_size, out_size=features_len).to(device) optimizer = optim.Adam(feature_predictor.parameters(), lr=lr, weight_decay=1e-6)
def get_global_template(file_name, cam_target=1, label='af', feature_subset='rr', feature_opt='HSIC+Concat', noise_lim_sec=0.): cuda_id = 0 label_lookup = [ 'Normal Sinus Rhythm', 'Atrial Fibrillation', 'Other Rhythm', 'Noisy' ] signal_len = 5400 test_dataset = ECGDataset("test", feature_subset=feature_subset, feature_opt=feature_opt, oversample=label, naf=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) model = HSICClassifier(in_channels=1, num_classes=3, feature_len=test_dataset.feature_len, feature_opt=feature_opt, gap_norm_opt='batch_norm') main_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) model_to_load = f'{file_name}_params.pkl' model.load_state_dict( torch.load(os.path.join(main_dir, 'saved_models', file_name, model_to_load), map_location='cpu')) model.eval() rr_locals = pd.read_csv( os.path.join(main_dir, 'data', 'rr_local', 'rr_locals_90_test_filtered7.csv')) rr_locals.set_index('Unnamed: 0', inplace=True) all_templates_cam = [] all_templates_sig = [] total_activation = [] list_signal_names = [] before = 0.7 after = 0.7 for batch_idx, (data, target, feature, _, signal_name, feature_rep) in enumerate(test_loader): logits, cam, _ = model(data, feature_rep) cam = np.squeeze(cam.detach().numpy())[cam_target, :] cam = np.maximum(cam, Counter(cam).most_common(1)[0][0]) cam = resample(cam, signal_len) data = np.squeeze(data.detach().numpy()) rr_local = rr_locals.loc[signal_name].values.astype(int) rr_local = _fix_peaks(rr_local, data) rr_local = rr_local[rr_local > 1] if (rr_local != 0).any(): tmpls_cam, rr1 = _get_templates(cam, rr_local, before=before, after=after, fs=90, noise_lim_sec=noise_lim_sec) if tmpls_cam.shape[0]: all_templates_cam.append(tmpls_cam) list_signal_names.append( [f'{signal_name}_{i}' for i in range(tmpls_cam.shape[1])]) tmpls_sig, rr2 = _get_templates(data, rr_local, before=before, after=after, fs=90, noise_lim_sec=0) if tmpls_sig.shape[0]: all_templates_sig.append(tmpls_sig) non_zero_index = np.where( abs(data) > 1e-4 )[0] # The downsampling process introduced artifacts so 0 is not really 0, more like 1e-5 total_activation.append(cam[non_zero_index[0]:non_zero_index[-1]]) cam_mat = np.hstack(all_templates_cam) sig_mat = np.hstack(all_templates_sig) for i in range(cam_mat.shape[1]): cam_i = cam_mat[:, i] if ((cam_i - cam_i.mean()) != 0).sum() < 5: continue if max(cam_i) != min(cam_i): cam_i = (cam_i - min(cam_i)) / (max(cam_i) - min(cam_i)) cam_mat[:, i] = cam_i # generate numeric measures total_mean_activation = np.mean( [np.mean(total_activation[i]) for i in range(len(total_activation))]) total_activation_std = np.std(np.hstack(total_activation)) / np.sqrt( np.size(np.hstack(total_activation))) save_data = { 'total_mean_activation': total_mean_activation, 'total_activation_std': total_activation_std, 'cam_mat': cam_mat, 'sig_mat': sig_mat, 'before': before, 'after': after, 'list_signal_names': list_signal_names } with open( os.path.join(main_dir, 'saved_models', file_name, f'figure_data_noise={noise_lim_sec}_pval.pkl'), 'wb') as h: pkl.dump(save_data, h, protocol=pkl.HIGHEST_PROTOCOL) return cam_mat, sig_mat, before, after
file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', exp_name) if not os.path.exists(file_dir): os.mkdir(file_dir) device = get_device(cuda_id) train_loader, val_loader, _ = create_dataloaders(batch_size, feature_subset, feature_opt, naf=True) model = HSICClassifier(num_classes=2, in_channels=1, feature_len=train_loader.dataset.feature_len, feature_opt=feature_opt, gap_norm_opt='batch_norm').to(device) optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.95, 0.99), eps=1e-08, weight_decay=0, amsgrad=False) classification_criterion = nn.CrossEntropyLoss() independence_criterion = HSICLoss(feature_opt, lambda_hsic, model.activation_size, device, decay_factor=0.7,
feature_opt = run_params(features_subset) torch.manual_seed(44) device = get_device(cuda_id) file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', file_name) if not os.path.exists(file_dir): os.mkdir(file_dir) lambda_vec = lambda_hsic * np.hstack([np.linspace(0, 1, warm_start), np.ones(100)]) print(f'{file_name} started training') train_loader, val_loader, test_loader = init_datasets(batch_size=batch_size, features_subset=features_subset, task=task, balanced_dataset=balanced_dataset, normalize_signals=True) model = HSICClassifier(num_classes=2, feature_opt=feature_opt, feature_len=train_loader.dataset.feature_len, in_channels=2, gap_norm_opt='None').to(device) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, eta_min=lr/100, T_max=num_epochs) classification_criterion = nn.CrossEntropyLoss() independence_criterion = HSICLoss(feature_opt, lambda_hsic, model.activation_size, device, decay_factor=0.7, external_feature_std=train_loader.dataset.med_dist) def train(epoch): model.train() epoch_loss = 0 for batch_idx, (signals, labels, _, features) in enumerate(tqdm(train_loader)): signals, labels, features = signals.to(device), labels.to(device), features.to(device)