Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--load-dr-bin', type=str)
    parser.add_argument('--component-id', type=int)
    parser.add_argument('--save-bin-png', type=str)
    parser.add_argument('--save-csv', type=str)
    args = parser.parse_args()

    dr_data = load_object(args.load_dr_bin)

    file_list = dr_data['file_list']
    projected_data = dr_data['projected_matrix']

    component_val_array = projected_data[:, args.component_id - 1]

    component_dict_array = [{
        'Scan': file_list[file_idx],
        'Value': component_val_array[file_idx]
    } for file_idx in range(len(file_list))]

    plt.hist(component_val_array)

    plt.savefig(args.save_bin_png)

    df = pd.DataFrame(component_dict_array)
    logger.info(f'Save csv to {args.save_csv}')
    df.to_csv(args.save_csv, index=False)
def main():
    parser = argparse.ArgumentParser(
        description='Eliminate the 1D subspace that correspond to BMI')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--in-feature-dim', type=int, default=20)
    parser.add_argument('--out-data-dict-bin', type=str)
    args = parser.parse_args()

    in_dict_obj = load_object(args.in_data_dict_bin)

    scan_name_list = list(in_dict_obj.keys())
    data_X = np.zeros((len(scan_name_list), args.in_feature_dim), dtype=float)
    data_Y = np.zeros((len(scan_name_list), ), dtype=float)

    for idx_scan in range(len(scan_name_list)):
        scan_name = scan_name_list[idx_scan]
        data_X[idx_scan, :] = in_dict_obj[scan_name]['ImageData'][:]
        data_Y[idx_scan] = in_dict_obj[scan_name]['bmi']

    linear_reg_obj = EigenThoraxLinearRegression1D(data_X, data_Y)
    linear_reg_obj.run_regression()
    projected_data_X = linear_reg_obj.project_to_complement_space()

    for idx_scan in range(len(scan_name_list)):
        scan_name = scan_name_list[idx_scan]
        in_dict_obj[scan_name]['ImageData'] = projected_data_X[idx_scan, :]

    save_object(in_dict_obj, args.out_data_dict_bin)
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--low-dim-bin-path', type=str)
    parser.add_argument('--save-bin-path', type=str)
    parser.add_argument('--num-pca-component', type=int, default=10)
    parser.add_argument('--dim-embedded', type=int, default=2)
    args = parser.parse_args()

    logger.info(f'Load low dim data from {args.low_dim_bin_path}')
    low_dim_array = load_object(args.low_dim_bin_path)
    data_matrix = np.zeros((len(low_dim_array), args.num_pca_component))
    for sample_idx in range(len(low_dim_array)):
        data_matrix[sample_idx, :] = low_dim_array[sample_idx]['low_dim'][:]

    logger.info(f'Num of sample: {data_matrix.shape[0]}')
    logger.info(f'Num of included PCs: {data_matrix.shape[1]}')

    logger.info('Start tSNE')
    # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(data_matrix)
    embedded_matrix = TSNE(perplexity=50, n_iter=100000, n_components=args.dim_embedded).fit_transform(
        data_matrix)
    # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(
    #     data_matrix)
    logger.info('Complete')
    logger.info(f'Output shape: {embedded_matrix.shape}')

    for sample_idx in range((len(low_dim_array))):
        low_dim_array[sample_idx]['tsne_data'] = embedded_matrix[sample_idx, :]

    # logger.info(low_dim_array[0])

    logger.info(f'Save data to {args.save_bin_path}')
    save_object(low_dim_array, args.save_bin_path)
Exemplo n.º 4
0
    def load_data(self, in_res_matrix_path, num_res_pc, in_jac_matrix_path,
                  num_jac_pc):
        self._in_res_matrix_obj = load_object(in_res_matrix_path)
        self._num_res_pc = num_res_pc
        self._in_jac_matrix_obj = load_object(in_jac_matrix_path)
        self._num_jac_pc = num_jac_pc

        self._file_list = self._in_res_matrix_obj['file_list']

        num_dim = num_res_pc + num_jac_pc
        num_sample = self._in_res_matrix_obj['projected_matrix'].shape[0]
        self._use_data_matrix = np.zeros((num_sample, num_dim))
        self._use_data_matrix[:, :self._num_res_pc] = self._in_res_matrix_obj[
            'projected_matrix'][:, :self._num_res_pc]
        self._use_data_matrix[:, self.
                              _num_res_pc:num_dim] = self._in_jac_matrix_obj[
                                  'projected_matrix'][:, :self._num_jac_pc]
Exemplo n.º 5
0
    def load_data(self, in_data_matrix_bin_path, num_pc):
        logger.info(f'Load bin data file {in_data_matrix_bin_path}')
        self._data_obj = load_object(in_data_matrix_bin_path)

        self._num_pc = num_pc
        self._file_list = self._data_obj['file_list']

        self._use_data_matrix = self._data_obj['projected_matrix'][:, :self.
                                                                   _num_pc]
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='KMean clustering analysis')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--n-features', type=int)
    args = parser.parse_args()

    in_data_dict = load_object(args.in_data_dict_bin)
    kmean_analyzer = ClusterAnalysisDimAnalyzer(in_data_dict, args.n_features)

    kmean_analyzer.get_optimal_AMI_cancer_first_year()
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--out-data-dict-bin', type=str)
    args = parser.parse_args()

    in_data_dict = load_object(args.in_data_dict_bin)
    ica_obj = RunICA(in_data_dict)

    ica_obj.run_ica()
    ica_obj.save_data_dict_bin(args.out_data_dict_bin)
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        description='Eliminate the 1D subspace that correspond to BMI')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--in-feature-dim', type=int, default=20)
    parser.add_argument('--out-data-dict-bin', type=str)
    args = parser.parse_args()

    in_dict_obj = load_object(args.in_data_dict_bin)
    fs_obj = FSDimReduction1D(in_dict_obj, args.in_feature_dim)
    fs_obj.run_dim_reduct('Age')
    fs_obj.save_bin(args.out_data_dict_bin)
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-pca-data-bin', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--out-data-dict-bin', type=str)
    args = parser.parse_args()

    low_dim_array = load_object(args.in_pca_data_bin)
    label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(args.label_file)
    data_dict = generate_data_dict(low_dim_array, label_obj)
    logger.info(f'Save dict data object to {args.out_data_dict_bin}')
    save_object(data_dict, args.out_data_dict_bin)
Exemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser(description='KMean clustering analysis')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--n-features', type=int)
    parser.add_argument('--out-png-folder', type=str)
    args = parser.parse_args()

    in_data_dict = load_object(args.in_data_dict_bin)
    kmean_analyzer = ClusterAnalysisDimAnalyzer(in_data_dict, args.n_features)

    kmean_analyzer.run_meta_data_kmeans(['bmi', 'Age', 'Packyear'],
                                        'CancerSubjectFirstScan', 10,
                                        args.out_png_folder)
Exemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-pca-data-bin', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--out-data-csv', type=str)
    args = parser.parse_args()

    out_csv = args.out_data_csv

    low_dim_array = load_object(args.in_pca_data_bin)
    label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(
        args.label_file)
    generate_effective_data_csv(low_dim_array, label_obj, out_csv)
    def run_dimension_reduction(self, save_bin_path):
        pca_nii_3d = PCA_NII_3D(None, None, 1)
        pca_nii_3d.load_pca(self._pca_bin_path)

        image_feature_data_obj = load_object(self._data_bin_path)

        projected_matrix = pca_nii_3d._get_pca().transform(image_feature_data_obj['data_matrix'])

        out_data = {
            'file_list': image_feature_data_obj['file_list'],
            'projected_matrix': projected_matrix
        }

        save_object(out_data, save_bin_path)
Exemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(description='KMean clustering analysis')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--n-features', type=int)
    parser.add_argument('--out-png-folder', type=str)
    parser.add_argument('--n-cluster', type=int, default=10)
    args = parser.parse_args()

    in_data_dict = load_object(args.in_data_dict_bin)
    kmean_analyzer = ClusterAnalysisDimAnalyzer(in_data_dict, args.n_features)

    kmean_analyzer.plot_kmean_n_cluster_field_list_cancer_subject_first_scan(
        ['CancerSubjectFirstScan', 'COPD', 'Coronary Artery Calcification', 'Age', 'Packyear', 'bmi'],
        args.n_cluster, args.out_png_folder)
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--n-features', type=int)
    parser.add_argument('--out-png-folder', type=str)
    args = parser.parse_args()

    in_data_dict = load_object(args.in_data_dict_bin)
    data_dict_obj = ClusterAnalysisDataDict(in_data_dict, args.n_features)

    optimal_cluster_num_obj = ClusterAnalysisSearchNumCluster(data_dict_obj)
    out_elbow_png = os.path.join(args.out_png_folder, 'elbow_plot.png')
    out_silhouette_png = os.path.join(args.out_png_folder,
                                      'silhouette_plot.png')
    optimal_cluster_num_obj.ElbowSilhouettePlot(out_elbow_png,
                                                out_silhouette_png)
def main():
    file_list = load_object(in_feature_matrix_bin)['file_list']
    # file_list = read_file_contents_list(female_file_list)
    subject_list = ClinicalDataReaderSPORE.get_subject_list(file_list)

    reader_obj = ClinicalDataReaderSPORE.create_spore_data_reader_csv(spore_csv)
    ori_spore_label_df = pd.read_excel(ori_spore_excel)
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'copd')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'Coronary Artery Calcification')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'race')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'LungRADS')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'smokingstatus')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'packyearsreported')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'education')
    reader_obj.get_attributes_from_original_label_file(ori_spore_label_df, 'cancer_bengin')

    reader_obj.get_summary_characteristics_subject(subject_list)
Exemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-pca-data-bin', type=str)
    parser.add_argument('--out-png-folder', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--data-csv', type=str, default=None)
    # parser.add_argument('--low-dim-data-flag', type=str, default='low_dim')
    args = parser.parse_args()

    out_csv = os.path.join(args.out_png_folder, 'data_full.csv')

    low_dim_array = load_object(args.in_pca_data_bin)
    label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(
        args.label_file)
    PlotCorrAnalyzeLDA.generate_effective_data_csv(low_dim_array, label_obj,
                                                   out_csv)

    plot_obj = PlotCorrAnalyzeLDA.create_class_object_w_csv(out_csv)
Exemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--n-features', type=int)
    parser.add_argument('--out-png-folder', type=str)
    args = parser.parse_args()

    in_data_dict = load_object(args.in_data_dict_bin)
    data_dict_obj = ClusterAnalysisDataDict(in_data_dict, args.n_features)

    # corr_analysis_obj = CorrelationAnalysis(data_dict_obj)
    # max_2_bmi = corr_analysis_obj.correlation_bar_plot('bmi', args.out_png_folder)
    # max_2_age = corr_analysis_obj.correlation_bar_plot('Age', args.out_png_folder)
    # max_2_packyear = corr_analysis_obj.correlation_bar_plot('Packyear', args.out_png_folder)
    #
    # corr_analysis_obj.mutual_info_bar_plot('bmi', args.out_png_folder)
    # corr_analysis_obj.mutual_info_bar_plot('Age', args.out_png_folder)
    # corr_analysis_obj.mutual_info_bar_plot('Packyear', args.out_png_folder)

    # corr_analysis_obj.plot_2D_dim_plot(max_2_bmi, 'bmi', args.out_png_folder)
    # corr_analysis_obj.plot_2D_dim_plot(max_2_age, 'Age', args.out_png_folder)
    # corr_analysis_obj.plot_2D_dim_plot(max_2_packyear, 'Packyear', args.out_png_folder)

    corr_analysis_ortho_obj = CorrelationAnalysis2OrthoSpace(data_dict_obj)

    # corr_analysis_ortho_obj.plot_2D_top_dim_ortho('bmi', args.out_png_folder)
    # corr_analysis_ortho_obj.plot_2D_top_dim_ortho('Age', args.out_png_folder)
    # corr_analysis_ortho_obj.plot_2D_top_dim_ortho('Packyear', args.out_png_folder)
    # corr_analysis_ortho_obj.plot_2D_top_dim_lda_ortho('CAC', args.out_png_folder)
    # corr_analysis_ortho_obj.plot_2D_top_dim_lda_ortho('COPD', args.out_png_folder)
    # corr_analysis_ortho_obj.plot_2D_top_dim_lda_ortho('CancerSubjectFirstScan', args.out_png_folder)
    # corr_analysis_ortho_obj.plot_2D_top_dim_ortho('CancerSubjectFirstScan', args.out_png_folder)

    # corr_analysis_ortho_obj.plot_2D_grid_pack_field_list(args.out_png_folder)
    corr_analysis_ortho_obj.plot_2D_grid_pack_field_tsne_list(
        args.out_png_folder)
Exemplo n.º 18
0
def main():
    file_list = read_file_contents_list(file_list_txt)

    clinical_data_reader = ClinicalDataReaderSPORE.create_spore_data_reader_csv(
        in_csv_file)
    label_list = clinical_data_reader.get_label_for_obese(file_list)
    data_tuples = list(zip(file_list, label_list))
    label_df = pd.DataFrame(data_tuples, columns=['scan', 'label'])

    classifier_obj = MinibatchLinearClassifierWithCV.create_classifier_obj(
        in_folder, file_list, num_fold, label_df, batch_size)

    save_bin_path = path.join(proj_folder, 'model.bin')
    if if_run_training:
        classifier_obj.train()
        classifier_obj.validate()
        # classifier_obj.train_first_fold()
        # save_object(classifier_obj, save_bin_path)

    if if_run_validation:
        classifier_obj = load_object(save_bin_path)
        classifier_obj.valid_first_fold()
        auc_roc_first_fold = classifier_obj.validation_result[0]['roc_auc']
        print(f'auc_roc of fold 0: {auc_roc_first_fold}')
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--in-data-bin', type=str)
    parser.add_argument('--out-png-folder', type=str)
    parser.add_argument('--label-file', type=str)
    parser.add_argument('--data-csv', type=str, default=None)
    parser.add_argument('--low-dim-data-flag', type=str, default='low_dim')
    args = parser.parse_args()

    plot_obj = None
    if args.data_csv is not None:
        plot_obj = PlotSpacePCA.create_class_object_w_csv(args.data_csv)
    else:
        low_dim_array = load_object(args.in_data_bin)
        label_obj = ClinicalDataReaderSPORE.create_spore_data_reader_xlsx(args.label_file)
        plot_obj = PlotSpacePCA.create_class_object_w_data(low_dim_array, label_obj, args.low_dim_data_flag)
        out_csv = os.path.join(args.out_png_folder, 'data.csv')
        plot_obj.save_label_file(out_csv)

    plot_obj.plot_copd(os.path.join(args.out_png_folder, 'copd.png'))
    plot_obj.plot_age(os.path.join(args.out_png_folder, 'age.png'))
    plot_obj.plot_packyear(os.path.join(args.out_png_folder, 'packyear.png'))
    plot_obj.plot_ca_cal(os.path.join(args.out_png_folder, 'ca_cal.png'))
    plot_obj.plot_bmi(os.path.join(args.out_png_folder, 'bmi.png'))
Exemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--bin-folder', type=str)
    parser.add_argument('--out-png-folder', type=str)
    args = parser.parse_args()

    bin_data_dict_path_list = []
    bin_data_dict_name_list = []
    bin_data_dict_n_feature = []
    bin_data_dict_path_list.append(os.path.join(args.bin_folder, 'init_data_dict.bin'))
    bin_data_dict_name_list.append('original (#dim=20)')
    bin_data_dict_n_feature.append(20)
    # bin_data_dict_path_list.append(os.path.join(args.bin_folder, 'reduct_bmi_data_dict.bin'))
    # bin_data_dict_name_list.append('reduce BMI (#dim=19)')
    # bin_data_dict_n_feature.append(19)
    # bin_data_dict_path_list.append(os.path.join(args.bin_folder, 'reduct_bmi_2_data_dict.bin'))
    # bin_data_dict_name_list.append('reduce BMI (#dim=18)')
    # bin_data_dict_n_feature.append(18)
    # bin_data_dict_path_list.append(os.path.join(args.bin_folder, 'reduct_age_1_data_dict.bin'))
    # bin_data_dict_name_list.append('reduce Age (#dim=17)')
    # bin_data_dict_n_feature.append(16)
    # bin_data_dict_path_list.append(os.path.join(args.bin_folder, 'reduct_packyear_1_data_dict.bin'))
    # bin_data_dict_name_list.append('reduce Packyear (#dim=16)')
    # bin_data_dict_n_feature.append(15)
    bin_data_dict_path_list.append(os.path.join(args.bin_folder, 'reduct_packyear_1_data_dict.bin'))
    bin_data_dict_name_list.append('reduce BMI, Age and Packyear (#dim=15)')
    bin_data_dict_n_feature.append(15)

    num_bin_data = 2

    n_cluster_range = range(1, 11)

    fig, ax = plt.subplots(figsize=(20, 14))
    gs = gridspec.GridSpec(2, 2)

    ax_list = []
    for idx_ax in range(4):
        ax_list.append(plt.subplot(gs[idx_ax]))

    idx_ax = 0
    for idx_bin_data in range(num_bin_data):
        bin_data_dict = load_object(bin_data_dict_path_list[idx_bin_data])
        bin_data_name = bin_data_dict_name_list[idx_bin_data]
        bin_data_num_features = bin_data_dict_n_feature[idx_bin_data]
        data_dict_obj = ClusterAnalysisDataDict(bin_data_dict, bin_data_num_features)
        optimal_cluster_num_obj = ClusterAnalysisSearchNumCluster(data_dict_obj)

        elbow_list, silhouette_list = optimal_cluster_num_obj.get_elbow_and_silhouette_array()

        ax_list[idx_ax].plot(n_cluster_range,
                 elbow_list,
                 label=bin_data_name)
        ax_list[idx_ax].set_title('Sum of squared distance to cluster centroids')
        idx_ax += 1

        ax_list[idx_ax].plot(n_cluster_range[1:],
                 silhouette_list[1:],
                 label=bin_data_name)
        ax_list[idx_ax].set_title('Silhouette score')
        idx_ax += 1

    for idx_ax in range(4):
        ax_list[idx_ax].legend(loc='best')

    out_png = os.path.join(args.out_png_folder, 'optimal_num_cluster.png')
    logger.info(f'Save to {out_png}')
    fig.tight_layout()
    plt.savefig(out_png)
    plt.close()
Exemplo n.º 21
0
 def load_data(self):
     self._data_obj = load_object(self._data_bin_path)
Exemplo n.º 22
0
 def load_pca(self, bin_path):
     print(f'Loading pca from ${bin_path}', flush=True)
     self._pca = load_object(bin_path)