def check_if_same_subject(file_name1, file_name2):
    subject1_id = ClinicalDataReaderSPORE._get_subject_id_from_file_name(
        file_name1)
    subject2_id = ClinicalDataReaderSPORE._get_subject_id_from_file_name(
        file_name2)

    return subject1_id == subject2_id
示例#2
0
def get_subject_id_list(subject_id_exclude_list):
    total_list = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in subject_id_exclude_list
    ]

    return total_list, unique(total_list)
def get_subject_id_list(subject_id_exclude_list):
    subject_id_list_to_exclude = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in subject_id_exclude_list
    ]

    return unique(subject_id_list_to_exclude)
示例#4
0
    def generate_effective_data_csv(data_array, label_obj, out_csv):
        data_dict = {}
        attribute_list = PlotCorrAnalyzeLDA.attribute_list()
        for data_item in data_array:
            item_dict = {}
            scan_name = data_item['scan_name']

            scan_name_as_record = scan_name
            if not label_obj.check_if_have_record(scan_name):
                logger.info(f'Cannot find record for {scan_name}')
                scan_name_as_record = label_obj.check_nearest_record_for_impute(
                    scan_name)
                if scan_name_as_record is None:
                    continue
                else:
                    logger.info(f'Using nearest record {scan_name_as_record}')

            for attr in attribute_list:
                item_dict[attr] = label_obj.get_value_field(
                    scan_name_as_record, attr)

            item_dict['Cancer'] = item_dict['cancer_bengin']
            item_dict['COPD'] = item_dict['copd']
            item_dict['Packyear'] = item_dict['packyearsreported']
            item_dict['SubjectID'] = label_obj._get_subject_id_from_file_name(
                scan_name)
            item_dict['ScanDate'] = label_obj._get_date_str_from_file_name(
                scan_name)
            if item_dict['Cancer'] == 1:
                scan_date_obj = ClinicalDataReaderSPORE._get_date_str_from_file_name(
                    scan_name)
                diag_date_obj = datetime.datetime.strptime(
                    str(int(item_dict['diag_date'])), '%Y%m%d')
                print(str(int(item_dict['diag_date'])))
                print(diag_date_obj)
                item_dict['Time2Diag'] = diag_date_obj - scan_date_obj

            # BMI = mass(lb)/height(inch)^2 * 703
            bmi_val = np.nan
            mass_lb = item_dict['weightpounds']
            height_inch = item_dict['heightinches']
            if (70 < mass_lb < 400) and (40 < height_inch < 90):
                bmi_val = 703 * mass_lb / (height_inch * height_inch)
            item_dict['bmi'] = bmi_val

            for pc_idx in range(20):
                attr_str = PlotCorrAnalyzeLDA.get_pc_str(pc_idx)
                item_dict[attr_str] = data_item['low_dim'][pc_idx]

            data_dict[scan_name] = item_dict

        df = pd.DataFrame.from_dict(data_dict, orient='index')
        PlotCorrAnalyzeLDA.add_label_incidental_cancer_flag(df)

        logger.info(f'Save to csv {out_csv}')
        df.to_csv(out_csv)
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-pca-data-bin', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--out-data-dict-bin', type=str)
    args = parser.parse_args()

    low_dim_array = load_object(args.in_pca_data_bin)
    label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(args.label_file)
    data_dict = generate_data_dict(low_dim_array, label_obj)
    logger.info(f'Save dict data object to {args.out_data_dict_bin}')
    save_object(data_dict, args.out_data_dict_bin)
示例#6
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-pca-data-bin', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--out-data-csv', type=str)
    args = parser.parse_args()

    out_csv = args.out_data_csv

    low_dim_array = load_object(args.in_pca_data_bin)
    label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(
        args.label_file)
    generate_effective_data_csv(low_dim_array, label_obj, out_csv)
def generate_data_dict(data_array, label_obj):
    data_dict = {}
    attribute_list = get_attribute_list()
    for data_item in data_array:
        item_dict = {}
        scan_name = data_item['scan_name']

        scan_name_as_record = scan_name
        if not label_obj.check_if_have_record(scan_name):
            # logger.info(f'Cannot find record for {scan_name}')
            scan_name_as_record = label_obj.check_nearest_record_for_impute(scan_name)
            if scan_name_as_record is None:
                continue

        for attr in attribute_list:
            item_dict[attr] = label_obj.get_value_field(scan_name_as_record, attr)

        item_dict['CAC'] = item_dict['Coronary Artery Calcification']
        item_dict['Cancer'] = item_dict['cancer_bengin']
        item_dict['COPD'] = item_dict['copd']
        item_dict['Packyear'] = item_dict['packyearsreported']
        item_dict['SubjectID'] = label_obj._get_subject_id_from_file_name(scan_name)
        item_dict['ScanDate'] = label_obj._get_date_str_from_file_name(scan_name)
        if item_dict['Cancer'] == 1:
            scan_date_obj = ClinicalDataReaderSPORE._get_date_str_from_file_name(scan_name)
            diag_date_obj = datetime.datetime.strptime(str(int(item_dict['diag_date'])), '%Y%m%d')
            time_2_diag = diag_date_obj - scan_date_obj
            item_dict['Time2Diag'] = time_2_diag
            if time_2_diag >= datetime.timedelta(days=365):
                logger.info(time_2_diag)
            item_dict['CancerIncubation'] = int(time_2_diag >= datetime.timedelta(days=365))
            item_dict['CancerSubjectFirstScan'] = label_obj.is_first_cancer_scan(scan_name)

        # BMI = mass(lb)/height(inch)^2 * 703
        bmi_val = np.nan
        mass_lb = item_dict['weightpounds']
        height_inch = item_dict['heightinches']
        if (70 < mass_lb < 400) and (40 < height_inch < 90):
            bmi_val = 703 * mass_lb / (height_inch * height_inch)
        item_dict['bmi'] = bmi_val

        # Image data
        item_dict['ImageData'] = data_item['low_dim']

        data_dict[scan_name] = item_dict

    return data_dict
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--in-csv', type=str)
    args = parser.parse_args()

    df = pd.read_csv(args.in_csv, index_col='Scan')
    data_dict = df.to_dict('index')

    file_list = list(data_dict.keys())

    subject_list = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in file_list
    ]
    subject_list = list(set(subject_list))

    print(f'Number of subjects: {len(subject_list)}')
示例#9
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-pca-data-bin', type=str)
    parser.add_argument('--out-png-folder', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--data-csv', type=str, default=None)
    # parser.add_argument('--low-dim-data-flag', type=str, default='low_dim')
    args = parser.parse_args()

    out_csv = os.path.join(args.out_png_folder, 'data_full.csv')

    low_dim_array = load_object(args.in_pca_data_bin)
    label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(
        args.label_file)
    PlotCorrAnalyzeLDA.generate_effective_data_csv(low_dim_array, label_obj,
                                                   out_csv)

    plot_obj = PlotCorrAnalyzeLDA.create_class_object_w_csv(out_csv)
def main():
    parser = argparse.ArgumentParser('Plot box and scatter data.')
    parser.add_argument('--file-list-total', type=str)
    parser.add_argument('--subject-id-exclude-file-list', type=str)
    parser.add_argument('--file-list-out', type=str)
    args = parser.parse_args()

    file_list_total = read_file_contents_list(args.file_list_total)
    subject_id_exclude_file_list = read_file_contents_list(
        args.subject_id_exclude_file_list)

    subject_id_exclude_list = get_subject_id_list(subject_id_exclude_file_list)

    file_list_reduced = [
        file_name for file_name in file_list_total
        if ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        not in subject_id_exclude_list
    ]

    save_file_contents_list(args.file_list_out, file_list_reduced)
示例#11
0
def main():
    parser = argparse.ArgumentParser(
        description='Get the file list for a specified gender')
    parser.add_argument('--total-file-list',
                        type=str,
                        help='Only to filter out the files in this txt')
    parser.add_argument('--clinical-label-xlsx',
                        type=str,
                        help='Label file for clinical information')
    parser.add_argument('--gender-str',
                        type=str,
                        help='The label for gender type')
    parser.add_argument('--out-file-list-txt',
                        type=str,
                        help='Path to output file list txt file')
    args = parser.parse_args()

    clinical_data_reader = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(
        args.clinical_label_xlsx)
    in_file_list = read_file_contents_list(args.total_file_list)
    out_list = clinical_data_reader.filter_sublist_with_label(
        in_file_list, 'sex', args.gender_str)
    write_list_to_file(out_list, args.out_file_list_txt)
def analysis_correlation(args):
    result_df = pd.read_csv(args.out_csv)
    result_df = result_df.set_index('file_name')

    file_list = read_file_contents_list(args.file_list_txt)
    clinical_reader = ClinicalDataReaderSPORE.create_spore_data_reader_csv(
        in_clinical_csv)
    bmi_array, valid_file_name_list = clinical_reader.get_gt_value_BMI(
        file_list)

    valid_result_df = result_df.loc[valid_file_name_list]
    # valid_result_df['bmi'] = bmi_array

    valid_mean_list = valid_result_df['mean'].to_numpy()

    print(pearsonr(bmi_array, valid_mean_list))

    slope, intercept, r_value, p_value, std_err = linregress(
        bmi_array, valid_mean_list)
    reg_val = intercept + slope * bmi_array

    out_png = os.path.join('/nfs/masi/xuk9/SPORE/CAC_class/data',
                           'bmi_mean_lung.png')

    fig, ax = plt.subplots(figsize=(10, 7))
    ax.scatter(bmi_array, valid_mean_list, label=f'Samples')
    ax.plot(bmi_array,
            reg_val,
            color='r',
            label=f'Slope={slope:.3f}, p-value={p_value:.3E}')
    ax.set_xlabel('BMI ($kg/m^2$)')
    ax.set_ylabel('Averaged intensity (HU) in lung region')

    ax.legend(loc='best')

    plt.savefig(out_png, bbox_inches='tight', pad_inches=0.1)
    plt.close()
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-data-bin', type=str)
    parser.add_argument('--out-png-folder', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--data-csv', type=str, default=None)
    parser.add_argument('--low-dim-data-flag', type=str, default='low_dim')
    args = parser.parse_args()

    plot_obj = None
    if args.data_csv is not None:
        plot_obj = PlotSpacePCA.create_class_object_w_csv(args.data_csv)
    else:
        low_dim_array = load_object(args.in_data_bin)
        label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(args.label_file)
        plot_obj = PlotSpacePCA.create_class_object_w_data(low_dim_array, label_obj, args.low_dim_data_flag)
        out_csv = os.path.join(args.out_png_folder, 'data.csv')
        plot_obj.save_label_file(out_csv)

    plot_obj.plot_copd(os.path.join(args.out_png_folder, 'copd.png'))
    plot_obj.plot_age(os.path.join(args.out_png_folder, 'age.png'))
    plot_obj.plot_packyear(os.path.join(args.out_png_folder, 'packyear.png'))
    plot_obj.plot_ca_cal(os.path.join(args.out_png_folder, 'ca_cal.png'))
    plot_obj.plot_bmi(os.path.join(args.out_png_folder, 'bmi.png'))
示例#14
0
def get_idx_list_array_n_fold_cross_validation(file_name_list, label_list,
                                               num_fold):
    """
    Get the n-folder split at subject level (scans of the same subject always go into one fold)
    :param file_name_list: file name list of scans, with .nii.gz
    :param num_fold: number of folds
    :return:
    """
    scan_label = label_list
    subject_id_full = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in file_name_list
    ]
    subject_id_unique = list(set(subject_id_full))
    subject_label = [
        label_list[subject_id_full.index(subject_id)]
        for subject_id in subject_id_unique
    ]

    skf = StratifiedKFold(n_splits=num_fold, random_state=0)
    # skf = KFold(n_splits=num_fold, random_state=0)
    # logger.info(f'Split data set into {skf.get_n_splits()} folds.')
    # logger.info(f'Number of scans: {len(file_name_list)}')
    # logger.info(f'Number of subjects: {len(subject_id_unique)}')

    subject_train_idx_list_array = []
    subject_test_idx_list_array = []
    for train_idx_list, test_idx_list in skf.split(subject_id_unique,
                                                   subject_label):
        subject_train_idx_list_array.append(train_idx_list)
        subject_test_idx_list_array.append(test_idx_list)

    # for train_idx_list, test_idx_list in skf.split(subject_id_unique):
    #     subject_train_idx_list_array.append(train_idx_list)
    #     subject_test_idx_list_array.append(test_idx_list)

    scan_train_idx_list_array = []
    scan_test_idx_list_array = []
    for idx_fold in range(num_fold):
        scan_train_idx_list = []
        scan_test_idx_list = []
        subject_train_idx_list = subject_train_idx_list_array[idx_fold]
        subject_test_idx_list = subject_test_idx_list_array[idx_fold]

        for idx_subject in subject_train_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_train_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_train_idx_list += subject_scan_train_idx_list

        for idx_subject in subject_test_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_test_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_test_idx_list += subject_scan_test_idx_list

        scan_train_idx_list_array.append(scan_train_idx_list)
        scan_test_idx_list_array.append(scan_test_idx_list)

    num_pos_scan_train_fold_array = []
    num_pos_scan_test_fold_array = []
    num_pos_subject_train_fold_array = []
    num_pos_subject_test_fold_array = []

    fold_train_subject_label_statics_dict_list = []
    fold_test_subject_label_statics_dict_list = []
    for idx_fold in range(num_fold):
        subject_train_idx_list = subject_train_idx_list_array[idx_fold]
        subject_test_idx_list = subject_test_idx_list_array[idx_fold]
        train_label = np.array(
            [subject_label[idx] for idx in subject_train_idx_list])
        test_label = np.array(
            [subject_label[idx] for idx in subject_test_idx_list])

        train_unique, train_counts = np.unique(train_label, return_counts=True)
        train_dict = dict(zip(train_unique, train_counts))
        fold_train_subject_label_statics_dict_list.append(train_dict)

        test_unique, test_counts = np.unique(test_label, return_counts=True)
        test_dict = dict(zip(test_unique, test_counts))
        fold_test_subject_label_statics_dict_list.append(test_dict)

    logger.info(f'Sizes of each fold:')
    logger.info(
        f'# Train (subject): {[len(train_subject_list) for train_subject_list in subject_train_idx_list_array]}'
    )
    logger.info(
        f'# Test (subject): {[len(test_subject_list) for test_subject_list in subject_test_idx_list_array]}'
    )
    logger.info(
        f'# Train (scan): {[len(train_list) for train_list in scan_train_idx_list_array]}'
    )
    logger.info(
        f'# Test (scan): {[len(test_list) for test_list in scan_test_idx_list_array]}'
    )
    for idx_fold in range(num_fold):
        logger.info(
            f'# Train label (subject, fold-{idx_fold}): {fold_train_subject_label_statics_dict_list[idx_fold]}'
        )
    for idx_fold in range(num_fold):
        logger.info(
            f'# Test label (subject, fold-{idx_fold}): {fold_test_subject_label_statics_dict_list[idx_fold]}'
        )

    return scan_train_idx_list_array, scan_test_idx_list_array
示例#15
0
def get_idx_list_array_n_fold_cross_validation_bl(file_name_list, label_list,
                                                  num_fold):
    """
    Get the n-folder split at subject level (scans of the same subject always go into one fold)
    :param file_name_list: file name list of scans, with .nii.gz
    :param num_fold: number of folds
    :return:
    """
    scan_label = label_list
    subject_id_full = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in file_name_list
    ]
    subject_id_unique = list(set(subject_id_full))
    subject_label = [
        label_list[subject_id_full.index(subject_id)]
        for subject_id in subject_id_unique
    ]

    skf = StratifiedKFold(n_splits=num_fold, random_state=0)

    subject_fold_idx_list_array = []
    for train_idx_list, test_idx_list in skf.split(subject_id_unique,
                                                   subject_label):
        subject_fold_idx_list_array.append(test_idx_list)

    subject_train_idx_list_array = []
    subject_validate_idx_list_array = []
    subject_test_idx_list_array = []
    for idx_fold in range(num_fold):
        cur_idx_fold = idx_fold
        fold_subject_train_idx_list = []
        for idx_train_fold in range(num_fold - 2):
            fold_subject_train_idx_list.append(
                subject_fold_idx_list_array[cur_idx_fold])
            cur_idx_fold = (cur_idx_fold + 1) % num_fold
        fold_subject_train_idx_list = np.concatenate(
            fold_subject_train_idx_list)
        subject_train_idx_list_array.append(fold_subject_train_idx_list)
        subject_validate_idx_list_array.append(
            subject_fold_idx_list_array[cur_idx_fold])
        cur_idx_fold = (cur_idx_fold + 1) % num_fold
        subject_test_idx_list_array.append(
            subject_fold_idx_list_array[cur_idx_fold])

    scan_train_idx_list_array = []
    scan_validate_idx_list_array = []
    scan_test_idx_list_array = []
    for idx_fold in range(num_fold):
        scan_train_idx_list = []
        scan_validate_idx_list = []
        scan_test_idx_list = []
        subject_train_idx_list = subject_train_idx_list_array[idx_fold]
        subject_validate_idx_list = subject_validate_idx_list_array[idx_fold]
        subject_test_idx_list = subject_test_idx_list_array[idx_fold]

        for idx_subject in subject_train_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_train_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_train_idx_list += subject_scan_train_idx_list

        for idx_subject in subject_validate_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_validate_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_validate_idx_list += subject_scan_validate_idx_list

        for idx_subject in subject_test_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_test_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_test_idx_list += subject_scan_test_idx_list

        scan_train_idx_list_array.append(scan_train_idx_list)
        scan_validate_idx_list_array.append(scan_validate_idx_list)
        scan_test_idx_list_array.append(scan_test_idx_list)

    show_subject_label_fold_statistics(num_fold,
                                       subject_train_idx_list_array,
                                       scan_train_idx_list_array,
                                       subject_label,
                                       set_flag='Train')
    show_subject_label_fold_statistics(num_fold,
                                       subject_validate_idx_list_array,
                                       scan_validate_idx_list_array,
                                       subject_label,
                                       set_flag='Validate')
    show_subject_label_fold_statistics(num_fold,
                                       subject_test_idx_list_array,
                                       scan_test_idx_list_array,
                                       subject_label,
                                       set_flag='Test')

    return scan_train_idx_list_array, scan_validate_idx_list_array, scan_test_idx_list_array
示例#16
0
def get_data_dict(config, file_list_txt):
    task = config['task']
    in_folder = config['input_img_dir']
    label_csv = config['label_csv']

    in_folder_obj = DataFolder(in_folder,
                               read_file_contents_list(file_list_txt))
    file_list = in_folder_obj.get_data_file_list()

    clinical_reader = ClinicalDataReaderSPORE.create_spore_data_reader_csv(
        label_csv)

    label_array = None
    file_list_with_valid_label = None

    if task == 'BMI':
        label_array, file_list_with_valid_label = clinical_reader.get_gt_value_BMI(
            file_list)

    subject_list = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in file_list_with_valid_label
    ]

    in_folder_obj.set_file_list(file_list_with_valid_label)
    file_path_list = in_folder_obj.get_file_path_list()

    data_dict = {
        'img_names': file_list_with_valid_label,
        'img_subs': subject_list,
        'img_files': file_path_list,
        'gt_val': label_array
    }

    if config['add_jacobian_map']:
        in_jacobian_folder = config['input_jac_dir']
        in_jacobian_folder_obj = DataFolder(in_jacobian_folder,
                                            file_list_with_valid_label)
        jacobian_map_path_list = in_jacobian_folder_obj.get_file_path_list()
        data_dict['jacobian_maps'] = jacobian_map_path_list

    if config['add_valid_mask_map'] | config['apply_random_valid_mask']:
        in_valid_mask_folder = config['input_valid_mask_dir']
        in_valid_mask_folder_obj = DataFolder(in_valid_mask_folder,
                                              file_list_with_valid_label)
        valid_mask_path_list = in_valid_mask_folder_obj.get_file_path_list()
        data_dict['valid_masks'] = valid_mask_path_list

    if config['add_d_index_map']:
        in_d_index_map_folder = config['input_d_index_dir']
        in_d_index_map_folder_obj = DataFolder(in_d_index_map_folder,
                                               file_list_with_valid_label)
        d_index_map_path_list = in_d_index_map_folder_obj.get_file_path_list()
        data_dict['d_index_maps'] = d_index_map_path_list

    if config['add_jac_elem_maps']:
        in_jac_elem_folder = config['input_jac_elem_dir']
        in_jac_elem_folder_obj = DataFolder(in_jac_elem_folder,
                                            file_list_with_valid_label)
        for idx_elem in range(9):
            in_jac_elem_path_list = [
                map_path.replace('.nii.gz', f'_{idx_elem}.nii.gz')
                for map_path in in_jac_elem_folder_obj.get_file_path_list()
            ]
            data_dict[f'jac_elem_{idx_elem}_map'] = in_jac_elem_path_list

    return data_dict
示例#17
0
def get_idx_list_array_n_fold_regression_bl(file_name_list, num_fold):
    subject_id_full = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in file_name_list
    ]
    subject_id_unique = list(set(subject_id_full))
    kf = KFold(n_splits=num_fold, random_state=0)

    subject_fold_idx_list_array = []
    for train_idx_list, test_idx_list in kf.split(subject_id_unique):
        subject_fold_idx_list_array.append(test_idx_list)

    subject_train_idx_list_array = []
    subject_validate_idx_list_array = []
    subject_test_idx_list_array = []
    for idx_fold in range(num_fold):
        cur_idx_fold = idx_fold
        fold_subject_train_idx_list = []
        for idx_train_fold in range(num_fold - 2):
            fold_subject_train_idx_list.append(
                subject_fold_idx_list_array[cur_idx_fold])
            cur_idx_fold = (cur_idx_fold + 1) % num_fold
        fold_subject_train_idx_list = np.concatenate(
            fold_subject_train_idx_list)
        subject_train_idx_list_array.append(fold_subject_train_idx_list)
        subject_validate_idx_list_array.append(
            subject_fold_idx_list_array[cur_idx_fold])
        cur_idx_fold = (cur_idx_fold + 1) % num_fold
        subject_test_idx_list_array.append(
            subject_fold_idx_list_array[cur_idx_fold])

    scan_train_idx_list_array = []
    scan_validate_idx_list_array = []
    scan_test_idx_list_array = []
    for idx_fold in range(num_fold):
        scan_train_idx_list = []
        scan_validate_idx_list = []
        scan_test_idx_list = []
        subject_train_idx_list = subject_train_idx_list_array[idx_fold]
        subject_validate_idx_list = subject_validate_idx_list_array[idx_fold]
        subject_test_idx_list = subject_test_idx_list_array[idx_fold]

        for idx_subject in subject_train_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_train_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_train_idx_list += subject_scan_train_idx_list

        for idx_subject in subject_validate_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_validate_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_validate_idx_list += subject_scan_validate_idx_list

        for idx_subject in subject_test_idx_list:
            subject_id = subject_id_unique[idx_subject]
            subject_scan_test_idx_list = [
                idx for idx, subject in enumerate(subject_id_full)
                if subject == subject_id
            ]
            scan_test_idx_list += subject_scan_test_idx_list

        scan_train_idx_list_array.append(scan_train_idx_list)
        scan_validate_idx_list_array.append(scan_validate_idx_list)
        scan_test_idx_list_array.append(scan_test_idx_list)

    return scan_train_idx_list_array, scan_validate_idx_list_array, scan_test_idx_list_array