Пример #1
0
def generate_gap(file_name, device, num_patients, task, batch_size,
                 normalize_signals, features_subset, gap_norm_opt):
    file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'saved_models', file_name)
    file_path = os.path.join(file_dir, f'{file_name}_ugap_test.pkl')
    if not os.path.exists(file_path):
        num_classes = 2 if 'rem' in task.lower() else 5
        feature_opt, signal_len, one_slice, dataset_dir = run_params(
            file_name,
            features_subset,
            def_feature_opt='HSIC+Concat',
            task=task)

        train_loader, val_loader, test_loader = init_datasets(
            num_patients=num_patients,
            dataset_dir=dataset_dir,
            task=task,
            one_slice=True,
            batch_size=batch_size,
            conv_d=1,
            oversample=False,
            normalize_signals=normalize_signals,
            features_subset=features_subset,
            num_ch=2,
            low_sp=False,
            modes=['train', 'val', 'test'])

        haifa_model = HSICClassifier(
            num_classes=num_classes,
            signal_len=signal_len,
            feature_opt=feature_opt,
            gap_norm_opt=gap_norm_opt,
            feature_len=train_loader.dataset.feature_len,
            in_channels=2).to(device)

        haifa_model.load_state_dict(
            torch.load(os.path.join(file_dir, f'{file_name}_params.pkl'),
                       map_location='cpu'))

        gap_train, gap_val, gap_test, rep_size = \
            generate_gap_internal(train_loader, val_loader, test_loader, haifa_model, file_dir, file_name, device)
    else:
        with open(os.path.join(file_dir, f'{file_name}_ugap_train.pkl'),
                  'rb') as h:
            gap_train = pkl.load(h)

        with open(os.path.join(file_dir, f'{file_name}_ugap_val.pkl'),
                  'rb') as h:
            gap_val = pkl.load(h)

        with open(os.path.join(file_dir, f'{file_name}_ugap_test.pkl'),
                  'rb') as h:
            gap_test = pkl.load(h)

    for key, value in gap_test.items():
        rep_size = value.shape[0]
        break

    return gap_train, gap_val, gap_test, rep_size
def get_templates(file_name, oversample, template_opt='spindle', cam_target=2, num_patients=40, num_ch=2, before=5., after=5.,
                  task='all', try2load=True, gap_norm_opt='batch_norm', cuda_id=0, random=False, normalize_signals=False, noise_inter=0):
    device = get_device(cuda_id)
    features_subset = []
    if 'freq' in file_name.lower():
        features_subset += ['frequency']
    if 'num_spindles' in file_name.lower():
        features_subset += ['num_spindles']
    elif 'spindle' in file_name.lower():
        features_subset += ['spindle']
    assert template_opt in ['spindle', 'activation', 'sw', 'rem', 'emg']

    low_sp = 'low' in file_name
    feature_opt, signal_len, one_slice, dataset_dir = run_params(file_name, features_subset, def_feature_opt='HSIC+Concat',
                                                                 task=task)
    fs = 80 if 'ds' in file_name.lower() else 125
    file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', file_name)
    num_classes = 2 if 'rem' in task.lower() else 5

    print(f'Sample Frequency: {fs}')
    print(f'Num Classes: {num_classes}')
    random_str = 'random_' if random else ''
    save_name = f'{random_str}{template_opt}_template_class{cam_target}_{file_name}_{num_patients}patients'
    file_path = os.path.join('plot_results', save_name)
    
    if try2load and os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            all_templates = pkl.load(f)
    else:
        filter_stage = cam_target if isinstance(cam_target, int) else None
        test_loader = init_datasets(num_patients=num_patients, dataset_dir=dataset_dir, batch_size=1, conv_d=1,
                                    features_subset=features_subset, one_slice=one_slice, modes=['test'],
                                    random_flag=False, num_ch=num_ch, low_sp=low_sp, filter_stage=filter_stage,
                                    task=task, normalize_signals=normalize_signals, oversample=oversample)
    
        model = HSICClassifier(num_classes=num_classes, signal_len=signal_len, feature_opt=feature_opt,
                               in_channels=num_ch, feature_len=test_loader.dataset.feature_len, gap_norm_opt=gap_norm_opt)
        model.load_state_dict(torch.load(os.path.join(file_dir, f'{file_name}_params.pkl'), map_location='cpu'))
        model.to(device)
        model.eval()
    
        all_templates = []
    
        with torch.no_grad():
            for batch_idx, (signal, label, signal_name, features) in enumerate(tqdm(test_loader)):
                signal, features = signal.to(device), features.to(device)
                _, cam, _ = model(signal, features)

                signal = test_loader.dataset.unnormalize_signal(signal)
                signal = signal.cpu().numpy().reshape(-1, signal.shape[-1])
                cam = np.squeeze(cam).cpu().numpy()
                if isinstance(cam_target, int):
                    cam = cam[cam_target, :]
                else:
                    cam = cam[label, :]
    
                signal = np.squeeze(signal)
                if template_opt == 'spindle':
                    templates = get_spindle_templates(signal, fs=fs, cam=cam, random=random,
                                                      sec_before=before, sec_after=after, num_ch=num_ch, noise_inter=noise_inter)
                if template_opt == 'activation':
                    if num_ch == 2:
                        signal = signal[0, :]  # TODO!!
                    templates = get_activation_templates(cam, signal, sec_before=before, sec_after=after, fs=fs)
                if template_opt == 'sw':
                    templates = get_sw_templates(cam, signal, sec_before=before, sec_after=after, fs=fs,
                                                 num_ch=num_ch, random=random, noise_inter=noise_inter)
                if template_opt == 'rem':
                    eog = test_loader.dataset.get_eog(signal_name)
                    templates = get_rem_templates(cam=cam, signal_eog=eog, sec_before=before, sec_after=after,
                                                  fs_eeg=fs, random=random, noise_inter=noise_inter)
                if template_opt == 'emg':
                    signal_emg = test_loader.dataset.get_emg(signal_name).squeeze()
                    templates = get_emg_onset_templates(cam=cam, signal_emg=signal_emg, sec_before=before,
                                                        sec_after=after, fs_eeg=fs, random=random)


                all_templates += templates
    
        all_templates = np.vstack(all_templates).T

        # num_templates = all_templates.shape[0]
        # Normalize templates
        # for i in range(num_templates):
        #     cam_i = all_templates[:, i]
        #     if ((cam_i - cam_i.mean()) != 0).sum() < 5:
        #         continue
        #     if max(cam_i) != min(cam_i):
        #         cam_i = (cam_i - min(cam_i)) / (max(cam_i) - min(cam_i))
        #         all_templates[:, i] = cam_i

        if random is not None:
            save_name = os.path.join('noise', f'{save_name}_figure_data_noise={noise_inter}')

        with open(os.path.join('plot_results', save_name), 'wb') as f:
            pkl.dump(all_templates, f, protocol=pkl.HIGHEST_PROTOCOL)
    return all_templates
Пример #3
0
feature_opt = run_params(features_subset)
torch.manual_seed(44)
device = get_device(cuda_id)
file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                        'saved_models', file_name)

train_loader, val_loader, test_loader = init_datasets(
    task=task,
    balanced_dataset=balanced_dataset,
    batch_size=batch_size,
    normalize_signals=True,
    features_subset=features_subset)

main_model = HSICClassifier(num_classes=2,
                            feature_opt=feature_opt,
                            gap_norm_opt='batch_norm',
                            feature_len=train_loader.dataset.feature_len,
                            in_channels=2).to(device)

main_model.load_state_dict(
    torch.load(os.path.join(file_dir, f'{file_name}_params.pkl'),
               map_location='cpu'))
main_model.eval()

features_len, _ = feature_names_len_from_subset(features_subset)
feature_predictor = MLP1Layer(in_size=rep_size,
                              hidden_size=rep_size,
                              out_size=features_len).to(device)
optimizer = optim.Adam(feature_predictor.parameters(),
                       lr=lr,
                       weight_decay=1e-6)
def get_global_template(file_name,
                        cam_target=1,
                        label='af',
                        feature_subset='rr',
                        feature_opt='HSIC+Concat',
                        noise_lim_sec=0.):
    cuda_id = 0
    label_lookup = [
        'Normal Sinus Rhythm', 'Atrial Fibrillation', 'Other Rhythm', 'Noisy'
    ]
    signal_len = 5400

    test_dataset = ECGDataset("test",
                              feature_subset=feature_subset,
                              feature_opt=feature_opt,
                              oversample=label,
                              naf=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=1,
                                              shuffle=False)

    model = HSICClassifier(in_channels=1,
                           num_classes=3,
                           feature_len=test_dataset.feature_len,
                           feature_opt=feature_opt,
                           gap_norm_opt='batch_norm')

    main_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
    model_to_load = f'{file_name}_params.pkl'

    model.load_state_dict(
        torch.load(os.path.join(main_dir, 'saved_models', file_name,
                                model_to_load),
                   map_location='cpu'))
    model.eval()

    rr_locals = pd.read_csv(
        os.path.join(main_dir, 'data', 'rr_local',
                     'rr_locals_90_test_filtered7.csv'))
    rr_locals.set_index('Unnamed: 0', inplace=True)

    all_templates_cam = []
    all_templates_sig = []
    total_activation = []
    list_signal_names = []
    before = 0.7
    after = 0.7

    for batch_idx, (data, target, feature, _, signal_name,
                    feature_rep) in enumerate(test_loader):
        logits, cam, _ = model(data, feature_rep)

        cam = np.squeeze(cam.detach().numpy())[cam_target, :]
        cam = np.maximum(cam, Counter(cam).most_common(1)[0][0])
        cam = resample(cam, signal_len)

        data = np.squeeze(data.detach().numpy())

        rr_local = rr_locals.loc[signal_name].values.astype(int)
        rr_local = _fix_peaks(rr_local, data)
        rr_local = rr_local[rr_local > 1]
        if (rr_local != 0).any():

            tmpls_cam, rr1 = _get_templates(cam,
                                            rr_local,
                                            before=before,
                                            after=after,
                                            fs=90,
                                            noise_lim_sec=noise_lim_sec)
            if tmpls_cam.shape[0]:
                all_templates_cam.append(tmpls_cam)
                list_signal_names.append(
                    [f'{signal_name}_{i}' for i in range(tmpls_cam.shape[1])])

            tmpls_sig, rr2 = _get_templates(data,
                                            rr_local,
                                            before=before,
                                            after=after,
                                            fs=90,
                                            noise_lim_sec=0)
            if tmpls_sig.shape[0]:
                all_templates_sig.append(tmpls_sig)

        non_zero_index = np.where(
            abs(data) > 1e-4
        )[0]  # The downsampling process introduced artifacts so 0 is not really 0, more like 1e-5
        total_activation.append(cam[non_zero_index[0]:non_zero_index[-1]])

    cam_mat = np.hstack(all_templates_cam)
    sig_mat = np.hstack(all_templates_sig)

    for i in range(cam_mat.shape[1]):
        cam_i = cam_mat[:, i]
        if ((cam_i - cam_i.mean()) != 0).sum() < 5:
            continue
        if max(cam_i) != min(cam_i):
            cam_i = (cam_i - min(cam_i)) / (max(cam_i) - min(cam_i))
            cam_mat[:, i] = cam_i

    # generate numeric measures
    total_mean_activation = np.mean(
        [np.mean(total_activation[i]) for i in range(len(total_activation))])
    total_activation_std = np.std(np.hstack(total_activation)) / np.sqrt(
        np.size(np.hstack(total_activation)))

    save_data = {
        'total_mean_activation': total_mean_activation,
        'total_activation_std': total_activation_std,
        'cam_mat': cam_mat,
        'sig_mat': sig_mat,
        'before': before,
        'after': after,
        'list_signal_names': list_signal_names
    }

    with open(
            os.path.join(main_dir, 'saved_models', file_name,
                         f'figure_data_noise={noise_lim_sec}_pval.pkl'),
            'wb') as h:
        pkl.dump(save_data, h, protocol=pkl.HIGHEST_PROTOCOL)
    return cam_mat, sig_mat, before, after
file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                        'saved_models', exp_name)
if not os.path.exists(file_dir):
    os.mkdir(file_dir)

device = get_device(cuda_id)

train_loader, val_loader, _ = create_dataloaders(batch_size,
                                                 feature_subset,
                                                 feature_opt,
                                                 naf=True)

model = HSICClassifier(num_classes=2,
                       in_channels=1,
                       feature_len=train_loader.dataset.feature_len,
                       feature_opt=feature_opt,
                       gap_norm_opt='batch_norm').to(device)

optimizer = optim.Adam(model.parameters(),
                       lr=lr,
                       betas=(0.95, 0.99),
                       eps=1e-08,
                       weight_decay=0,
                       amsgrad=False)
classification_criterion = nn.CrossEntropyLoss()
independence_criterion = HSICLoss(feature_opt,
                                  lambda_hsic,
                                  model.activation_size,
                                  device,
                                  decay_factor=0.7,
Пример #6
0
feature_opt = run_params(features_subset)

torch.manual_seed(44)
device = get_device(cuda_id)
file_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_models', file_name)
if not os.path.exists(file_dir):
    os.mkdir(file_dir)

lambda_vec = lambda_hsic * np.hstack([np.linspace(0, 1, warm_start), np.ones(100)])

print(f'{file_name} started training')
train_loader, val_loader, test_loader = init_datasets(batch_size=batch_size, features_subset=features_subset,
                                                      task=task, balanced_dataset=balanced_dataset,
                                                      normalize_signals=True)
model = HSICClassifier(num_classes=2, feature_opt=feature_opt, feature_len=train_loader.dataset.feature_len,
                       in_channels=2, gap_norm_opt='None').to(device)

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, eta_min=lr/100, T_max=num_epochs)
classification_criterion = nn.CrossEntropyLoss()
independence_criterion = HSICLoss(feature_opt, lambda_hsic, model.activation_size, device, decay_factor=0.7,
                                  external_feature_std=train_loader.dataset.med_dist)


def train(epoch):
    model.train()
    epoch_loss = 0

    for batch_idx, (signals, labels, _, features) in enumerate(tqdm(train_loader)):
        signals, labels, features = signals.to(device), labels.to(device), features.to(device)