import os.path as osp import os import numpy as np from tools.utils import get_logger import argparse import yaml import pandas as pd from tools.clinical import ClinicalDataReaderSPORE import nibabel as nib from tools.data_io import ScanWrapper from sklearn.metrics import roc_curve from tools.utils import mkdir_p from tools.plot import plot_training_curve import argparse logger = get_logger('grad_cam_analysis') # yaml_config_file = 'simg_bmi_regression_0_cam.yaml' fold_idx = 0 def main(): parser = argparse.ArgumentParser() parser.add_argument('--yaml-config', type=str, default='simg_bmi_regression_0_cam.yaml') args = parser.parse_args() SRC_ROOT = os.path.dirname(os.path.realpath(__file__)) + '/..' yaml_config = os.path.join(SRC_ROOT, f'src/yaml/{args.yaml_config}') logger.info(f'Read yaml file {yaml_config}')
from tensorboardX import SummaryWriter from config import SearchConfig from tools import utils from models.search_cnn import SearchCNNController from architect import Architect from tools.visualize import plot config = SearchConfig() device = torch.device("cuda") # tensorboard tb_writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) tb_writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info) def main(): logger.info("Logger is set - training start") torch.cuda.set_device(config.gpus[0]) # seed setting np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True
def main(): assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() args = parse_args() sys.path.append(args.work_dir) from train_config import config log_dir = os.path.join(args.work_dir, 'log') checkpoint_dir = os.path.join(args.work_dir, 'checkpoints') resume_model = os.path.join(checkpoint_dir, 'latest.pth') set_seed(config.seed) local_rank = args.local_rank # start init process if config.distributed: torch.distributed.init_process_group(backend='nccl', init_method='env://') torch.cuda.set_device(local_rank) init_fn = functools.partial(worker_seed_init_fn, num_workers=config.num_workers, local_rank=local_rank, seed=config.seed) train_sampler = torch.utils.data.distributed.DistributedSampler( config.train_dataset, shuffle=True) if config.distributed else None train_loader = DataLoader(config.train_dataset, batch_size=config.batch_size, shuffle=(train_sampler is None), pin_memory=True, num_workers=config.num_workers, sampler=train_sampler, worker_init_fn=init_fn) val_sampler = torch.utils.data.distributed.DistributedSampler( config.val_dataset, shuffle=False) if config.distributed else None val_loader = DataLoader(config.val_dataset, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=config.num_workers, sampler=val_sampler) if (config.distributed and local_rank == 0) or not config.distributed: # automatically create checkpoint folder if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) logger = get_logger('train', log_dir) for key, value in config.__dict__.items(): if not key.startswith('__'): if key not in ['model', 'criterion']: log_info = f'{key}: {value}' logger.info(log_info) if ( config.distributed and local_rank == 0) or not config.distributed else None gpus_type, gpus_num = torch.cuda.get_device_name( ), torch.cuda.device_count() log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None model = config.model.cuda() criterion = config.criterion for name in criterion.keys(): criterion[name] = criterion[name].cuda() # parameters needs to be updated by the optimizer # buffers doesn't needs to be updated by the optimizer for name, param in model.named_parameters(): log_info = f'name: {name}, grad: {param.requires_grad}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None for name, buffer in model.named_buffers(): log_info = f'name: {name}, grad: {buffer.requires_grad}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None optimizer = build_optimizer(config, model) scheduler = build_scheduler(config, optimizer) model = build_training_mode(config, model, optimizer) start_epoch = 1 # automatically resume model for training if checkpoint model exist if os.path.exists(resume_model): checkpoint = torch.load(resume_model, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) saved_epoch = checkpoint['epoch'] start_epoch += saved_epoch best_top1, loss, lr = checkpoint['best_top1'], checkpoint[ 'loss'], checkpoint['lr'] log_info = f'resuming model from {resume_model}. resume_epoch: {saved_epoch}, best_top1: {best_top1:.3f}%, loss: {loss:.4f}, lr: {lr:.6f}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None # calculate training time start_time = time.time() best_top1 = 0.0 for epoch in range(start_epoch, config.epochs + 1): torch.cuda.empty_cache() train_sampler.set_epoch(epoch) if config.distributed else None top1, top5, loss = train_KD(train_loader, model, criterion, optimizer, scheduler, epoch, logger, config) log_info = f'train: epoch {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None top1, top5, loss = validate_KD(val_loader, model, criterion) log_info = f'eval: epoch: {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None if (config.distributed and local_rank == 0) or not config.distributed: # save best top1 model and each epoch checkpoint if top1 > best_top1: torch.save(model.module.student.state_dict(), os.path.join(checkpoint_dir, 'best_student.pth')) best_top1 = top1 torch.save( { 'epoch': epoch, 'best_top1': best_top1, 'loss': loss, 'lr': scheduler.get_lr()[0], 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, os.path.join(checkpoint_dir, 'latest.pth')) if os.path.exists(os.path.join(checkpoint_dir, 'best.pth')): os.rename( os.path.join(checkpoint_dir, 'best_student'), os.path.join( checkpoint_dir, f'{config.student}-epoch{epoch}-top1{best_top1:.3f}.pth' )) training_time = (time.time() - start_time) / 3600 flops, params = compute_flops_and_params(config, model) log_info = f'train done. teacher: {config.teacher}, student: {config.student}, total_flops: {flops}, total_params: {params}, training time: {training_time:.3f} hours, best_top1: {best_top1:.3f}%' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None
import argparse from tools.pca import PCA_NII_3D from tools.data_io import ScanWrapper, DataFolder, save_object, load_object from tools.utils import get_logger import numpy as np import matplotlib.pyplot as plt import matplotlib import pandas as pd logger = get_logger('PCA low dimension') def main(): parser = argparse.ArgumentParser(description='Load a saved pca object') parser.add_argument('--load-dr-bin', type=str) parser.add_argument('--component-id', type=int) parser.add_argument('--save-bin-png', type=str) parser.add_argument('--save-csv', type=str) args = parser.parse_args() dr_data = load_object(args.load_dr_bin) file_list = dr_data['file_list'] projected_data = dr_data['projected_matrix'] component_val_array = projected_data[:, args.component_id - 1] component_dict_array = [{ 'Scan': file_list[file_idx], 'Value': component_val_array[file_idx] } for file_idx in range(len(file_list))]
from tools.clinical import ClinicalDataReaderSPORE from tools.utils import read_file_contents_list from tools.data_io import DataFolder from tools.utils import get_logger import numpy as np logger = get_logger('Dataset') def get_data_dict(config, file_list_txt): task = config['task'] in_folder = config['input_img_dir'] label_csv = config['label_csv'] in_folder_obj = DataFolder(in_folder, read_file_contents_list(file_list_txt)) file_list = in_folder_obj.get_data_file_list() clinical_reader = ClinicalDataReaderSPORE.create_spore_data_reader_csv( label_csv) label_array = None file_list_with_valid_label = None if task == 'BMI': label_array, file_list_with_valid_label = clinical_reader.get_gt_value_BMI( file_list) subject_list = [ ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name) for file_name in file_list_with_valid_label
import argparse from tools.data_io import ScanWrapper, DataFolder, save_object, load_object from tools.utils import get_logger from tools.feature_select import FSDimReduction1D import numpy as np logger = get_logger('Dimension Reduction') def main(): parser = argparse.ArgumentParser( description='Eliminate the 1D subspace that correspond to BMI') parser.add_argument('--in-data-dict-bin', type=str) parser.add_argument('--in-feature-dim', type=int, default=20) parser.add_argument('--out-data-dict-bin', type=str) args = parser.parse_args() in_dict_obj = load_object(args.in_data_dict_bin) fs_obj = FSDimReduction1D(in_dict_obj, args.in_feature_dim) fs_obj.run_dim_reduct('Age') fs_obj.save_bin(args.out_data_dict_bin) if __name__ == '__main__': main()
import numpy as np import matplotlib from matplotlib.ticker import MaxNLocator from tools.clinical import ClinicalDataReaderSPORE from tools.data_io import load_object from tools.utils import get_logger import pandas as pd import os from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn import metrics from sklearn.model_selection import KFold import matplotlib.gridspec as gridspec import matplotlib.mlab as mlab import datetime logger = get_logger('ClusterDataCSV') def get_attribute_list(): return [ 'Age', 'sex', 'race', 'ctscannermake', 'heightinches', 'weightpounds', 'packyearsreported', 'copd', 'Coronary Artery Calcification', 'cancer_bengin', 'diag_date' ] def get_pc_str(idx): return f'pc{idx}' def generate_effective_data_csv(data_array, label_obj, out_csv):
from tools.paral import AbstractParallelRoutine from tools.data_io import DataFolder, ScanWrapper import os from skimage.measure import compare_nrmse import numpy as np import pandas as pd from skimage import metrics import subprocess import re from tools.utils import get_logger logger = get_logger('Loss') class GetLossBetweenFolder(AbstractParallelRoutine): def __init__(self, config, in_folder_1, in_folder_2, file_list_txt): super().__init__(config, in_folder_1, file_list_txt) self._in_data_folder_2 = DataFolder(in_folder_2, file_list_txt) self._nrmse_diff = [] def get_nrmse(self): return self._nrmse_diff def print_file_list(self): file_list = self._in_data_folder.get_data_file_list() for idx in range(len(file_list)): print(f'The {idx}th file is {file_list[idx]}') def _run_single_scan(self, idx): in_file_1_path = self._in_data_folder.get_file_path(idx) in_file_2_path = self._in_data_folder_2.get_file_path(idx)
import argparse from tools.data_io import save_object, load_object from tools.utils import get_logger, read_file_contents_list import numpy as np from scipy.stats import multivariate_normal from scipy.spatial.distance import mahalanobis import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable from tools.clinical import ClinicalDataReaderSPORE import math logger = get_logger('Pairwise distance') def check_if_same_subject(file_name1, file_name2): subject1_id = ClinicalDataReaderSPORE._get_subject_id_from_file_name( file_name1) subject2_id = ClinicalDataReaderSPORE._get_subject_id_from_file_name( file_name2) return subject1_id == subject2_id def main(): parser = argparse.ArgumentParser() parser.add_argument('--in-csv', type=str) args = parser.parse_args() df = pd.read_csv(args.in_csv, index_col='Scan') data_dict = df.to_dict('index')
import os from tools.utils import read_file_contents_list, mkdir_p, convert_3d_2_flat, get_logger from tools.paral import AbstractParallelRoutine from tools.data_io import DataFolder, ScanWrapper, ScanWrapperWithMask import numpy as np import time logger = get_logger('Preprocess') class PreprocessDownSample(AbstractParallelRoutine): def __init__(self, config, in_folder, out_folder, ref_img, order=3): super().__init__(config, in_folder) self._c3d = config['c3d_exe'] self._file_list = self._get_file_list(config['data_file_list']) self._spacing_config = config['spacing'] self._in_folder = in_folder self._out_folder = out_folder mkdir_p(out_folder) self._order = order self._reg_resample = config['niftyreg_resample'] self._reg_resmaple_ref = config['reg_resample_ref_img'] def _run_chunk(self, chunk_list): for id_file in chunk_list: self._in_data_folder.print_idx(id_file) in_img_name = self._in_data_folder.get_file_name(id_file) in_img_path = self._in_data_folder.get_file_path(id_file) out_img_path = os.path.join(self._out_folder, in_img_name) c3d_cmd_str = self._get_c3d_cmd_str(in_img_path, out_img_path) print(c3d_cmd_str, flush=True)
from tools.utils import get_logger import os import numpy as np import matplotlib.pyplot as plt from tools.data_io import ClusterAnalysisDataDict from scipy.stats import pearsonr from sklearn.feature_selection import mutual_info_regression, mutual_info_classif import seaborn as sns import matplotlib.tri as tri from tools.regression import EigenThoraxLinearRegression1D from tools.lda import EigenThoraxLDA1D import matplotlib.gridspec as gridspec from sklearn.manifold import TSNE logger = get_logger('Correlation Analysis') class CorrelationAnalysis2OrthoSpace: def __init__(self, data_dict_obj: ClusterAnalysisDataDict): self._data_obj = data_dict_obj def plot_2D_grid_pack_field_tsne_list(self, out_png_folder): fig, ax = plt.subplots(figsize=(20, 14)) gs = gridspec.GridSpec(2, 3) ax_list = [] for idx_ax in range(6): ax_list.append(plt.subplot(gs[idx_ax])) self._plot_2D_tsne_embeding_field_continue(fig, ax_list[0], 'bmi') self._plot_2D_tsne_embeding_field_continue(fig, ax_list[1], 'Age')
import os from tools.utils import read_file_contents_list, convert_flat_2_3d, get_logger, mkdir_p import nibabel as nib import numpy as np import pickle logger = get_logger('DataFolder') class DataFolder: def __init__(self, in_folder, data_file_list=None): self._in_folder = in_folder self._file_list = [] if data_file_list is None: self._file_list = self._get_file_list_in_folder(in_folder) else: # self._file_list = self._get_file_list(data_file_list) self._file_list = data_file_list self._suffix = '.nii.gz' def get_folder(self): return self._in_folder def if_file_exist(self, idx): file_path = self.get_file_path(idx) return os.path.exists(file_path) def get_file_name(self, idx): return self._file_list[idx] def get_file_path(self, idx):
import numpy as np from sklearn.linear_model import SGDClassifier from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from tools.preprocess import ScanFolderBatchReader from tools.utils import get_logger from tools.cross_validation import get_idx_list_array_n_fold_cross_validation from sklearn import metrics logger = get_logger('Classifier') def get_validation_statics(label, predicted_prob): fpr, tpr, _ = metrics.roc_curve(label, predicted_prob, pos_label=1) precision, recall, _ = metrics.precision_recall_curve(label, predicted_prob, pos_label=1) roc_auc = metrics.roc_auc_score(label, predicted_prob) prc_auc = metrics.auc(recall, precision) summary_item = { 'fpr': fpr, 'tpr': tpr, 'precision': precision, 'recall': recall, 'roc_auc': roc_auc, 'prc_auc': prc_auc, 'label': label, 'pred': predicted_prob }
import argparse from tools.utils import get_logger from tools.utils import read_file_contents_list, save_file_contents_list from tools.clinical import ClinicalDataReaderSPORE logger = get_logger('Exclude file list') def main(): parser = argparse.ArgumentParser('Plot box and scatter data.') parser.add_argument('--file-list-total', type=str) parser.add_argument('--subject-id-exclude-file-list', type=str) parser.add_argument('--file-list-out', type=str) args = parser.parse_args() file_list_total = read_file_contents_list(args.file_list_total) subject_id_exclude_file_list = read_file_contents_list( args.subject_id_exclude_file_list) subject_id_exclude_list = get_subject_id_list(subject_id_exclude_file_list) file_list_reduced = [ file_name for file_name in file_list_total if ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name) not in subject_id_exclude_list ] save_file_contents_list(args.file_list_out, file_list_reduced) def get_subject_id_list(subject_id_exclude_list):
import numpy as np import nibabel as nib from multiprocessing import Pool from tools.data_io import DataFolder, ScanWrapper import os from tools.paral import AbstractParallelRoutine from tools.utils import get_logger logger = get_logger('Average') class AverageScans: def __init__(self, config, in_folder=None, data_file_txt=None, in_data_folder_obj=None): self._data_folder = None if in_data_folder_obj is None: self._data_folder = DataFolder(in_folder, data_file_txt) else: self._data_folder = in_data_folder_obj self._standard_ref = ScanWrapper(self._data_folder.get_first_path()) self._num_processes = config['num_processes'] def get_average_image_union(self, save_path): im_shape = self._get_std_shape() average_union = np.zeros(im_shape) average_union.fill(np.nan) non_null_mask_count_image = np.zeros(im_shape)
import os import numpy as np from tools.utils import get_logger import argparse import yaml import pandas as pd import nibabel as nib from tools.data_io import ScanWrapper, DataFolder from tools.utils import mkdir_p from tools.plot import ClipPlotSeriesWithBack, ClipPlotIntensityDeformationWall from tools.paral import AbstractParallelRoutine logger = get_logger('CAM intensity plot') atlas_intensity_folder = '/nfs/masi/xuk9/SPORE/CAC_class/data/atlas/valid_region/s6.1_int' atlas_jacobian_folder = '/nfs/masi/xuk9/SPORE/CAC_class/data/atlas/valid_region/s6.2_jac' class ParaPlotClip(AbstractParallelRoutine): def __init__( self, in_int_folder_obj, in_jac_folder_obj, in_att_folder_obj, out_png_folder, num_process): super().__init__(in_int_folder_obj, num_process) self._in_jac_folder_obj = in_jac_folder_obj self._in_att_folder_obj = in_att_folder_obj
import argparse from tools.data_io import save_object, load_object from tools.utils import get_logger, read_file_contents_list import numpy as np from scipy.stats import multivariate_normal from scipy.spatial.distance import mahalanobis import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable logger = get_logger('Gaussian fit') class FitGaussianJoint: def __init__(self): self._in_res_matrix_obj = None self._in_jac_matrix_obj = None self._num_res_pc = None self._num_jac_pc = None self._file_list = None self._use_data_matrix = None self._gaussian_model = None def load_data(self, in_res_matrix_path, num_res_pc, in_jac_matrix_path, num_jac_pc): self._in_res_matrix_obj = load_object(in_res_matrix_path) self._num_res_pc = num_res_pc self._in_jac_matrix_obj = load_object(in_jac_matrix_path) self._num_jac_pc = num_jac_pc self._file_list = self._in_res_matrix_obj['file_list']
import argparse from tools.pca import PCA_NII_3D import matplotlib.pyplot as plt import numpy as np import matplotlib from matplotlib.ticker import MaxNLocator from tools.utils import get_logger import pandas as pd logger = get_logger('Bin Plot') def main(): parser = argparse.ArgumentParser( description='Bin plot data using csv file') parser.add_argument('--in-csv-list', nargs='+', type=str) parser.add_argument('--label-list', nargs='+', type=str) parser.add_argument('--color-list', nargs='+', type=str) parser.add_argument('--column-flag', type=str) parser.add_argument('--x-label', type=str) parser.add_argument('--title', type=str) parser.add_argument('--out-png', type=str) args = parser.parse_args() num_data = len(args.in_csv_list) in_csv_list = args.in_csv_list label_list = args.label_list color_list = args.color_list data_array_sequence = [] for idx in range(num_data):
import numpy as np import argparse import os.path as osp import os from tools.utils import get_logger import yaml import pandas as pd from sklearn.metrics import r2_score from tools.utils import mkdir_p from tools.plot import plot_prediction_scatter logger = get_logger('Plot Ground Truth vs. Prediction scatter') def main(): parser = argparse.ArgumentParser() parser.add_argument('--yaml-config', type=str, default='simg_bmi_regression_18_nfs.yaml') # parser.add_argument('--idx-fold', type=int, default=4) args = parser.parse_args() SRC_ROOT = os.path.dirname(os.path.realpath(__file__)) + '/..' yaml_config = os.path.join(SRC_ROOT, f'src/yaml/{args.yaml_config}') logger.info(f'Read yaml file {yaml_config}') f = open(yaml_config, 'r').read() config = yaml.safe_load(f) scatter_plot_dir = os.path.join( '/nfs/masi/xuk9/SPORE/CAC_class/prediction_plots', args.yaml_config) mkdir_p(scatter_plot_dir)
import argparse from tools.utils import get_logger import pandas as pd from sklearn.linear_model import LogisticRegression import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from sklearn import metrics logger = get_logger('Logistic regression, plot') def main(): parser = argparse.ArgumentParser() parser.add_argument('--in-csv-1', type=str) parser.add_argument('--in-csv-2', type=str) parser.add_argument('--column-flag', type=str) parser.add_argument('--out-png', type=str) args = parser.parse_args() logger.info(f'Run logistic regression') logger.info(f'Reading {args.in_csv_1}') rvs1 = pd.read_csv(args.in_csv_1)[args.column_flag].to_numpy() logger.info(f'Data length {len(rvs1)}') logger.info(f'Reading {args.in_csv_2}') rvs2 = pd.read_csv(args.in_csv_2)[args.column_flag].to_numpy() logger.info(f'Data length {len(rvs2)}') fig, ax = plt.subplots(figsize=(8, 4))
import pandas as pd import re from datetime import datetime from tools.utils import get_logger import numpy as np import collections logger = get_logger('Clinical') class ClinicalDataReaderSPORE: def __init__(self, data_frame): self._df = data_frame def get_summary_characteristics_subject(self, included_subject_list): df_sess_list = self._df.index.to_list() df_subject_list = [ClinicalDataReaderSPORE._get_subject_id_from_sess_name(sess_name) for sess_name in df_sess_list] logger.info(f'Get the characteristics for included subjects: {len(included_subject_list)}') missing_subject = [subject_id for subject_id in included_subject_list if subject_id not in df_subject_list] if len(missing_subject) > 0: logger.info(f'Number of missing subject: {len(missing_subject)}') logger.info(missing_subject) included_subject_list = [subject_id for subject_id in included_subject_list if subject_id in df_subject_list] included_subject_idx_list = [df_subject_list.index(subject_id) for subject_id in included_subject_list] df_included_only = self._df.iloc[included_subject_idx_list, :] logger.info(f'Number rows of included only data frame: {len(df_included_only.index)}')
for name, price, image_path, printbar_url in products_data: try: pin_uploader.create_pin(board_id, name, price, image_path, printbar_url) print(f'pin created: {name}') except requests.exceptions.HTTPError as err: logger.exception(err) pin_uploader.logout() raise sleep(settings['pin_creating_period']) sleep(settings['board_creating_period']) def remove_imgs_dir(): shutil.rmtree(SAVED_IMAGES_DIR, ignore_errors=True) def create_imgs_dir(): if not os.path.isdir(SAVED_IMAGES_DIR): os.mkdir(SAVED_IMAGES_DIR) if __name__ == "__main__": _atexit.register(remove_imgs_dir) create_imgs_dir() logger = get_logger('file.log', 'p2p') try: parse_printbar_and_upload_to_pinterest(logger) except Exception as err: logger.exception(f'unknow exception: {err}')
import numpy as np from tools.clinical import ClinicalDataReaderSPORE from tools.utils import get_logger import pandas as pd SPORE_label_csv = '/nfs/masi/SPORE/file/clinical/self_creat/Limitedhistory20200420.csv' logger = get_logger('PLCOm2012') def PLCOm2012( age, race, education, body_mass_index, copd, phist, fhist, smoking_status, smoking_intensity, duration, quit_time): # this for spore, please also refer to Norm_cancer_risk def get_num(x): if x == 'yes' or x == 'current' or x == 1: return 1 else: return 0 def get_race(x): d = { 1.0: 0, 2.0: 0.3944778, 2.5: -0.7434744, 3.0: -0.466585, 4.0: 0, 5.0: 1.027152 } return d[x]
import argparse from tools.pca import PCA_NII_3D import matplotlib.pyplot as plt import numpy as np import matplotlib from tools.data_io import DataFolder, ScanWrapper import matplotlib.gridspec as gridspec import os from tools.utils import get_logger, mkdir_p from matplotlib import colors from mpl_toolkits.axes_grid1 import make_axes_locatable logger = get_logger('Plot - PC') class PlotPCGrid: def __init__(self, pc_folder_obj, step_axial, step_sagittal, step_coronal, num_show_pc_each_line): self._pc_folder_obj = pc_folder_obj self._step_axial = step_axial, self._step_coronal = step_coronal, self._step_sagittal = step_sagittal, self._num_show_pc = num_show_pc_each_line self._cm = 'hsv' self._num_view = 3 self._out_dpi = 20 self._num_clip = 1 self._sub_title_font_size = 60 def plot_pc(self, out_png): fig = plt.figure(figsize=(self._num_show_pc * 20,
import argparse import numpy as np from tools.utils import get_logger from sklearn.manifold import TSNE from tools.data_io import load_object, save_object logger = get_logger('tSNE') def main(): parser = argparse.ArgumentParser(description='Load a saved pca object') parser.add_argument('--low-dim-bin-path', type=str) parser.add_argument('--save-bin-path', type=str) parser.add_argument('--num-pca-component', type=int, default=10) parser.add_argument('--dim-embedded', type=int, default=2) args = parser.parse_args() logger.info(f'Load low dim data from {args.low_dim_bin_path}') low_dim_array = load_object(args.low_dim_bin_path) data_matrix = np.zeros((len(low_dim_array), args.num_pca_component)) for sample_idx in range(len(low_dim_array)): data_matrix[sample_idx, :] = low_dim_array[sample_idx]['low_dim'][:] logger.info(f'Num of sample: {data_matrix.shape[0]}') logger.info(f'Num of included PCs: {data_matrix.shape[1]}') logger.info('Start tSNE') # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(data_matrix) embedded_matrix = TSNE(perplexity=50, n_iter=100000, n_components=args.dim_embedded).fit_transform( data_matrix) # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(
def main(): assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() args = parse_args() sys.path.append(args.work_dir) from test_config import config log_dir = os.path.join(args.work_dir, 'log') set_seed(config.seed) collater = DetectionCollater() val_loader = DataLoader(config.val_dataset, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=config.num_workers, collate_fn=collater.next) if not os.path.exists(log_dir): os.makedirs(log_dir) logger = get_logger('test', log_dir) for key, value in config.__dict__.items(): if not key.startswith('__'): if key not in [ 'model', 'criterion', 'decoder', 'train_dataset', 'val_dataset' ]: log_info = f'{key}: {value}' logger.info(log_info) gpus_type, gpus_num = torch.cuda.get_device_name( ), torch.cuda.device_count() log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}' logger.info(log_info) model = config.model decoder = config.decoder if config.trained_model_path: saved_model = torch.load(os.path.join(BASE_DIR, config.trained_model_path), map_location=torch.device('cpu')) model.load_state_dict(saved_model) flops, params = compute_flops_and_params(config, model) log_info = f'model: {config.network}, flops: {flops}, params: {params}' logger.info(log_info) model = model.cuda() decoder = decoder.cuda() model = nn.DataParallel(model) result_dict = validate_detection(config.val_dataset, val_loader, model, decoder, config) log_info = f'eval_result: ' if result_dict: for key, value in result_dict.items(): log_info += f'{key}: {value} ,' else: log_info += f', no target detected in testset images!' logger.info(log_info) return
import argparse from tools.utils import get_logger import pandas as pd from tools.utils import read_file_contents_list, write_list_to_file import numpy as np import matplotlib.pyplot as plt import os from sklearn.model_selection import KFold logger = get_logger('Generate 5-fold file name list.') def main(): parser = argparse.ArgumentParser() parser.add_argument('--neg-sample-list', type=str) parser.add_argument('--pos-sample-list', type=str) parser.add_argument('--out-file-list-folder', type=str) parser.add_argument('--n-fold', type=int, default=5) args = parser.parse_args() n_fold = KFold(n_splits=args.n_fold) neg_sample_list = read_file_contents_list(args.neg_sample_list) pos_sample_list = read_file_contents_list(args.pos_sample_list) n_fold_file_name_list = [] for neg_train_idx, neg_test_idx in n_fold.split(neg_sample_list): neg_train_file_name_list = [ neg_sample_list[idx_file_name] for idx_file_name in neg_train_idx ]
def multi_train(Net, FLAGS): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # global_step = tf.train.global_step() train_func, valid_func, [train_steps, valid_steps] = multi_process_data_read(FLAGS) images = tf.placeholder(dtype=tf.float32, shape=[None] + FLAGS.image_size + [3], name='input') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels') logits = Net.inference(images) total_loss = train_tools.classify_loss(logits, labels) train_op = train_tools.train(total_loss, global_step, FLAGS.batch_size * train_steps, FLAGS) accuracy_op = train_tools.classify_accuracy(logits, labels) best_model0 = 0 # sess define, load model, model initial sess, summary_op, summary_writer, saver = MH.sess_and_saver_initial( FLAGS.output_dir, FLAGS.is_loadmodel) logger = get_logger(FLAGS.model_name, FLAGS.output_dir) train_queue = Queue(2) valid_queue = Queue(2) train_process = Process(target=train_func, args=(train_queue, )) valid_process = Process(target=valid_func, args=(valid_queue, )) try: train_process.start() valid_process.start() print('Begin Training') for epoch in range(FLAGS.num_epochs): for step in range(101): # t1 = time.clock() batch_x, batch_y = train_queue.get(True) if step and step % FLAGS.log_interval == 0: _, loss_value, acc_value, summary_str, global_step_value = sess.run( [ train_op, total_loss, accuracy_op, summary_op, global_step ], feed_dict={ images: batch_x, labels: batch_y }) logger.info( 'epoch:{4} [{0} / {1}] step {2}, loss {3}'.format( step // FLAGS.log_interval, train_steps // FLAGS.log_interval, step, loss_value, epoch)) summary_writer.add_summary( summary_str, global_step=global_step_value) else: # print('read data', time.clock()-t1) _ = sess.run([train_op], feed_dict={ images: batch_x, labels: batch_y }) # print('>', end='') # print('sess run time', time.clock()-t0) # print('batch time', time.clock()-t1) logger.info('Evaluating...') accuracy_sum = [] for step in range(101): batch_x, batch_y = valid_queue.get(True) if step and step % FLAGS.log_interval == 0: loss_value, acc_value, summary_str = sess.run( [total_loss, accuracy_op, summary_op], feed_dict={ images: batch_x, labels: batch_y }) logger.info('[{0} / {1}] step {2}, loss {3}'.format( step // FLAGS.log_interval, valid_steps // FLAGS.log_interval, step, loss_value)) accuracy_sum.append(acc_value) # summary_writer.add_summary(summary_str, step) else: [acc_value] = sess.run([accuracy_op], feed_dict={ images: batch_x, labels: batch_y }) # print('#',end='') accuracy_sum.append(acc_value) validation_acc = np.mean(accuracy_sum) logger.info('#######################') logger.info('epoch: {0} validation acc{1}'.format( epoch, validation_acc)) logger.info('#######################') summary = tf.Summary() summary.ParseFromString(summary_str) summary.value.add(tag='val_acc', simple_value=validation_acc) summary_writer.add_summary(summary, epoch) if validation_acc > best_model0: best_model0 = validation_acc checkpoint_path = os.path.join(FLAGS.output_dir, FLAGS.model_name + '.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) train_process.join() valid_process.terminate() except KeyboardInterrupt as e: print('KeyboardInter', e) finally: train_process.terminate() valid_process.terminate()
from tools.clinical import ClinicalDataReaderSPORE from tools.data_io import load_object from tools.utils import get_logger import pandas as pd import os import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from sklearn.cluster import KMeans from sklearn.metrics import adjusted_mutual_info_score from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.manifold import TSNE from sklearn.manifold import LocallyLinearEmbedding logger = get_logger('KMeans') class ClusterAnalysisDimAnalyzer: def __init__(self, data_dict, n_features): self._data_dict = data_dict self._n_feature = n_features self._kmean_n_cluster_range = range(3, 9) self._bar_w = 0.12 self._n_init_kmeans = 10000 self._con_factor = 0.6 def plot_kmean_n_cluster_field_list_cancer_subject_first_scan( self, field_list, n_cluster,
import argparse from tools.pca import PCA_NII_3D import matplotlib.pyplot as plt import numpy as np import matplotlib from matplotlib.ticker import MaxNLocator from tools.clinical import ClinicalDataReaderSPORE from tools.data_io import load_object from tools.utils import get_logger import pandas as pd import os logger = get_logger('Plot - PC 2 dim') class PlotSpacePCA: def __init__(self, label_df): self._label_df = label_df self._x_lim = (-2500, 2500) self._y_lim = (-5000, 5000) self._if_set_lim = False def save_label_file(self, out_csv): self._label_df.to_csv(out_csv) def plot_copd(self, out_png): plt.figure(figsize=(16, 10)) df_yes = self._label_df[self._label_df['copd'] == 'Yes'] df_no = self._label_df[self._label_df['copd'] == 'No']