Пример #1
0
import os.path as osp
import os
import numpy as np
from tools.utils import get_logger
import argparse
import yaml
import pandas as pd
from tools.clinical import ClinicalDataReaderSPORE
import nibabel as nib
from tools.data_io import ScanWrapper
from sklearn.metrics import roc_curve
from tools.utils import mkdir_p
from tools.plot import plot_training_curve
import argparse

logger = get_logger('grad_cam_analysis')

# yaml_config_file = 'simg_bmi_regression_0_cam.yaml'
fold_idx = 0


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--yaml-config',
                        type=str,
                        default='simg_bmi_regression_0_cam.yaml')
    args = parser.parse_args()

    SRC_ROOT = os.path.dirname(os.path.realpath(__file__)) + '/..'
    yaml_config = os.path.join(SRC_ROOT, f'src/yaml/{args.yaml_config}')
    logger.info(f'Read yaml file {yaml_config}')
Пример #2
0
from tensorboardX import SummaryWriter
from config import SearchConfig
from tools import utils
from models.search_cnn import SearchCNNController
from architect import Architect
from tools.visualize import plot

config = SearchConfig()

device = torch.device("cuda")

# tensorboard
tb_writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
tb_writer.add_text('config', config.as_markdown(), 0)

logger = utils.get_logger(
    os.path.join(config.path, "{}.log".format(config.name)))
config.print_params(logger.info)


def main():
    logger.info("Logger is set - training start")

    torch.cuda.set_device(config.gpus[0])

    # seed setting
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True
def main():
    assert torch.cuda.is_available(), 'need gpu to train network!'
    torch.cuda.empty_cache()

    args = parse_args()
    sys.path.append(args.work_dir)
    from train_config import config
    log_dir = os.path.join(args.work_dir, 'log')
    checkpoint_dir = os.path.join(args.work_dir, 'checkpoints')
    resume_model = os.path.join(checkpoint_dir, 'latest.pth')

    set_seed(config.seed)

    local_rank = args.local_rank
    # start init process
    if config.distributed:
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        torch.cuda.set_device(local_rank)

    init_fn = functools.partial(worker_seed_init_fn,
                                num_workers=config.num_workers,
                                local_rank=local_rank,
                                seed=config.seed)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        config.train_dataset, shuffle=True) if config.distributed else None
    train_loader = DataLoader(config.train_dataset,
                              batch_size=config.batch_size,
                              shuffle=(train_sampler is None),
                              pin_memory=True,
                              num_workers=config.num_workers,
                              sampler=train_sampler,
                              worker_init_fn=init_fn)
    val_sampler = torch.utils.data.distributed.DistributedSampler(
        config.val_dataset, shuffle=False) if config.distributed else None
    val_loader = DataLoader(config.val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=config.num_workers,
                            sampler=val_sampler)

    if (config.distributed and local_rank == 0) or not config.distributed:
        # automatically create checkpoint folder
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

    logger = get_logger('train', log_dir)

    for key, value in config.__dict__.items():
        if not key.startswith('__'):
            if key not in ['model', 'criterion']:
                log_info = f'{key}: {value}'
                logger.info(log_info) if (
                    config.distributed
                    and local_rank == 0) or not config.distributed else None

    gpus_type, gpus_num = torch.cuda.get_device_name(
    ), torch.cuda.device_count()
    log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None

    model = config.model.cuda()
    criterion = config.criterion

    for name in criterion.keys():
        criterion[name] = criterion[name].cuda()

    # parameters needs to be updated by the optimizer
    # buffers doesn't needs to be updated by the optimizer
    for name, param in model.named_parameters():
        log_info = f'name: {name}, grad: {param.requires_grad}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

    for name, buffer in model.named_buffers():
        log_info = f'name: {name}, grad: {buffer.requires_grad}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

    optimizer = build_optimizer(config, model)
    scheduler = build_scheduler(config, optimizer)
    model = build_training_mode(config, model, optimizer)

    start_epoch = 1
    # automatically resume model for training if checkpoint model exist
    if os.path.exists(resume_model):
        checkpoint = torch.load(resume_model, map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        saved_epoch = checkpoint['epoch']
        start_epoch += saved_epoch
        best_top1, loss, lr = checkpoint['best_top1'], checkpoint[
            'loss'], checkpoint['lr']

        log_info = f'resuming model from {resume_model}. resume_epoch: {saved_epoch}, best_top1: {best_top1:.3f}%, loss: {loss:.4f}, lr: {lr:.6f}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

    # calculate training time
    start_time = time.time()
    best_top1 = 0.0

    for epoch in range(start_epoch, config.epochs + 1):
        torch.cuda.empty_cache()
        train_sampler.set_epoch(epoch) if config.distributed else None
        top1, top5, loss = train_KD(train_loader, model, criterion, optimizer,
                                    scheduler, epoch, logger, config)
        log_info = f'train: epoch {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

        top1, top5, loss = validate_KD(val_loader, model, criterion)
        log_info = f'eval: epoch: {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

        if (config.distributed and local_rank == 0) or not config.distributed:
            # save best top1 model and each epoch checkpoint
            if top1 > best_top1:
                torch.save(model.module.student.state_dict(),
                           os.path.join(checkpoint_dir, 'best_student.pth'))
                best_top1 = top1

            torch.save(
                {
                    'epoch': epoch,
                    'best_top1': best_top1,
                    'loss': loss,
                    'lr': scheduler.get_lr()[0],
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                }, os.path.join(checkpoint_dir, 'latest.pth'))

            if os.path.exists(os.path.join(checkpoint_dir, 'best.pth')):
                os.rename(
                    os.path.join(checkpoint_dir, 'best_student'),
                    os.path.join(
                        checkpoint_dir,
                        f'{config.student}-epoch{epoch}-top1{best_top1:.3f}.pth'
                    ))

    training_time = (time.time() - start_time) / 3600
    flops, params = compute_flops_and_params(config, model)
    log_info = f'train done. teacher: {config.teacher}, student: {config.student}, total_flops: {flops}, total_params: {params}, training time: {training_time:.3f} hours, best_top1: {best_top1:.3f}%'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None
Пример #4
0
import argparse
from tools.pca import PCA_NII_3D
from tools.data_io import ScanWrapper, DataFolder, save_object, load_object
from tools.utils import get_logger
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd

logger = get_logger('PCA low dimension')


def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--load-dr-bin', type=str)
    parser.add_argument('--component-id', type=int)
    parser.add_argument('--save-bin-png', type=str)
    parser.add_argument('--save-csv', type=str)
    args = parser.parse_args()

    dr_data = load_object(args.load_dr_bin)

    file_list = dr_data['file_list']
    projected_data = dr_data['projected_matrix']

    component_val_array = projected_data[:, args.component_id - 1]

    component_dict_array = [{
        'Scan': file_list[file_idx],
        'Value': component_val_array[file_idx]
    } for file_idx in range(len(file_list))]
Пример #5
0
from tools.clinical import ClinicalDataReaderSPORE
from tools.utils import read_file_contents_list
from tools.data_io import DataFolder
from tools.utils import get_logger
import numpy as np

logger = get_logger('Dataset')


def get_data_dict(config, file_list_txt):
    task = config['task']
    in_folder = config['input_img_dir']
    label_csv = config['label_csv']

    in_folder_obj = DataFolder(in_folder,
                               read_file_contents_list(file_list_txt))
    file_list = in_folder_obj.get_data_file_list()

    clinical_reader = ClinicalDataReaderSPORE.create_spore_data_reader_csv(
        label_csv)

    label_array = None
    file_list_with_valid_label = None

    if task == 'BMI':
        label_array, file_list_with_valid_label = clinical_reader.get_gt_value_BMI(
            file_list)

    subject_list = [
        ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        for file_name in file_list_with_valid_label
Пример #6
0
import argparse
from tools.data_io import ScanWrapper, DataFolder, save_object, load_object
from tools.utils import get_logger
from tools.feature_select import FSDimReduction1D
import numpy as np

logger = get_logger('Dimension Reduction')


def main():
    parser = argparse.ArgumentParser(
        description='Eliminate the 1D subspace that correspond to BMI')
    parser.add_argument('--in-data-dict-bin', type=str)
    parser.add_argument('--in-feature-dim', type=int, default=20)
    parser.add_argument('--out-data-dict-bin', type=str)
    args = parser.parse_args()

    in_dict_obj = load_object(args.in_data_dict_bin)
    fs_obj = FSDimReduction1D(in_dict_obj, args.in_feature_dim)
    fs_obj.run_dim_reduct('Age')
    fs_obj.save_bin(args.out_data_dict_bin)


if __name__ == '__main__':
    main()
Пример #7
0
import numpy as np
import matplotlib
from matplotlib.ticker import MaxNLocator
from tools.clinical import ClinicalDataReaderSPORE
from tools.data_io import load_object
from tools.utils import get_logger
import pandas as pd
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import metrics
from sklearn.model_selection import KFold
import matplotlib.gridspec as gridspec
import matplotlib.mlab as mlab
import datetime

logger = get_logger('ClusterDataCSV')


def get_attribute_list():
    return [
        'Age', 'sex', 'race', 'ctscannermake', 'heightinches', 'weightpounds',
        'packyearsreported', 'copd', 'Coronary Artery Calcification',
        'cancer_bengin', 'diag_date'
    ]


def get_pc_str(idx):
    return f'pc{idx}'


def generate_effective_data_csv(data_array, label_obj, out_csv):
Пример #8
0
from tools.paral import AbstractParallelRoutine
from tools.data_io import DataFolder, ScanWrapper
import os
from skimage.measure import compare_nrmse
import numpy as np
import pandas as pd
from skimage import metrics
import subprocess
import re
from tools.utils import get_logger

logger = get_logger('Loss')


class GetLossBetweenFolder(AbstractParallelRoutine):
    def __init__(self, config, in_folder_1, in_folder_2, file_list_txt):
        super().__init__(config, in_folder_1, file_list_txt)
        self._in_data_folder_2 = DataFolder(in_folder_2, file_list_txt)
        self._nrmse_diff = []

    def get_nrmse(self):
        return self._nrmse_diff

    def print_file_list(self):
        file_list = self._in_data_folder.get_data_file_list()
        for idx in range(len(file_list)):
            print(f'The {idx}th file is {file_list[idx]}')

    def _run_single_scan(self, idx):
        in_file_1_path = self._in_data_folder.get_file_path(idx)
        in_file_2_path = self._in_data_folder_2.get_file_path(idx)
Пример #9
0
import argparse
from tools.data_io import save_object, load_object
from tools.utils import get_logger, read_file_contents_list
import numpy as np
from scipy.stats import multivariate_normal
from scipy.spatial.distance import mahalanobis
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from tools.clinical import ClinicalDataReaderSPORE
import math

logger = get_logger('Pairwise distance')


def check_if_same_subject(file_name1, file_name2):
    subject1_id = ClinicalDataReaderSPORE._get_subject_id_from_file_name(
        file_name1)
    subject2_id = ClinicalDataReaderSPORE._get_subject_id_from_file_name(
        file_name2)

    return subject1_id == subject2_id


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--in-csv', type=str)
    args = parser.parse_args()

    df = pd.read_csv(args.in_csv, index_col='Scan')
    data_dict = df.to_dict('index')
Пример #10
0
import os
from tools.utils import read_file_contents_list, mkdir_p, convert_3d_2_flat, get_logger
from tools.paral import AbstractParallelRoutine
from tools.data_io import DataFolder, ScanWrapper, ScanWrapperWithMask
import numpy as np
import time

logger = get_logger('Preprocess')


class PreprocessDownSample(AbstractParallelRoutine):
    def __init__(self, config, in_folder, out_folder, ref_img, order=3):
        super().__init__(config, in_folder)
        self._c3d = config['c3d_exe']
        self._file_list = self._get_file_list(config['data_file_list'])
        self._spacing_config = config['spacing']
        self._in_folder = in_folder
        self._out_folder = out_folder
        mkdir_p(out_folder)
        self._order = order
        self._reg_resample = config['niftyreg_resample']
        self._reg_resmaple_ref = config['reg_resample_ref_img']

    def _run_chunk(self, chunk_list):
        for id_file in chunk_list:
            self._in_data_folder.print_idx(id_file)
            in_img_name = self._in_data_folder.get_file_name(id_file)
            in_img_path = self._in_data_folder.get_file_path(id_file)
            out_img_path = os.path.join(self._out_folder, in_img_name)
            c3d_cmd_str = self._get_c3d_cmd_str(in_img_path, out_img_path)
            print(c3d_cmd_str, flush=True)
Пример #11
0
from tools.utils import get_logger
import os
import numpy as np
import matplotlib.pyplot as plt
from tools.data_io import ClusterAnalysisDataDict
from scipy.stats import pearsonr
from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
import seaborn as sns
import matplotlib.tri as tri
from tools.regression import EigenThoraxLinearRegression1D
from tools.lda import EigenThoraxLDA1D
import matplotlib.gridspec as gridspec
from sklearn.manifold import TSNE

logger = get_logger('Correlation Analysis')


class CorrelationAnalysis2OrthoSpace:
    def __init__(self, data_dict_obj: ClusterAnalysisDataDict):
        self._data_obj = data_dict_obj

    def plot_2D_grid_pack_field_tsne_list(self, out_png_folder):
        fig, ax = plt.subplots(figsize=(20, 14))
        gs = gridspec.GridSpec(2, 3)

        ax_list = []
        for idx_ax in range(6):
            ax_list.append(plt.subplot(gs[idx_ax]))

        self._plot_2D_tsne_embeding_field_continue(fig, ax_list[0], 'bmi')
        self._plot_2D_tsne_embeding_field_continue(fig, ax_list[1], 'Age')
Пример #12
0
import os
from tools.utils import read_file_contents_list, convert_flat_2_3d, get_logger, mkdir_p
import nibabel as nib
import numpy as np
import pickle

logger = get_logger('DataFolder')


class DataFolder:
    def __init__(self, in_folder, data_file_list=None):
        self._in_folder = in_folder
        self._file_list = []
        if data_file_list is None:
            self._file_list = self._get_file_list_in_folder(in_folder)
        else:
            # self._file_list = self._get_file_list(data_file_list)
            self._file_list = data_file_list
        self._suffix = '.nii.gz'

    def get_folder(self):
        return self._in_folder

    def if_file_exist(self, idx):
        file_path = self.get_file_path(idx)
        return os.path.exists(file_path)

    def get_file_name(self, idx):
        return self._file_list[idx]

    def get_file_path(self, idx):
Пример #13
0
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from tools.preprocess import ScanFolderBatchReader
from tools.utils import get_logger
from tools.cross_validation import get_idx_list_array_n_fold_cross_validation
from sklearn import metrics

logger = get_logger('Classifier')


def get_validation_statics(label, predicted_prob):
    fpr, tpr, _ = metrics.roc_curve(label, predicted_prob, pos_label=1)
    precision, recall, _ = metrics.precision_recall_curve(label,
                                                          predicted_prob,
                                                          pos_label=1)
    roc_auc = metrics.roc_auc_score(label, predicted_prob)
    prc_auc = metrics.auc(recall, precision)

    summary_item = {
        'fpr': fpr,
        'tpr': tpr,
        'precision': precision,
        'recall': recall,
        'roc_auc': roc_auc,
        'prc_auc': prc_auc,
        'label': label,
        'pred': predicted_prob
    }
import argparse
from tools.utils import get_logger
from tools.utils import read_file_contents_list, save_file_contents_list
from tools.clinical import ClinicalDataReaderSPORE

logger = get_logger('Exclude file list')


def main():
    parser = argparse.ArgumentParser('Plot box and scatter data.')
    parser.add_argument('--file-list-total', type=str)
    parser.add_argument('--subject-id-exclude-file-list', type=str)
    parser.add_argument('--file-list-out', type=str)
    args = parser.parse_args()

    file_list_total = read_file_contents_list(args.file_list_total)
    subject_id_exclude_file_list = read_file_contents_list(
        args.subject_id_exclude_file_list)

    subject_id_exclude_list = get_subject_id_list(subject_id_exclude_file_list)

    file_list_reduced = [
        file_name for file_name in file_list_total
        if ClinicalDataReaderSPORE._get_subject_id_from_file_name(file_name)
        not in subject_id_exclude_list
    ]

    save_file_contents_list(args.file_list_out, file_list_reduced)


def get_subject_id_list(subject_id_exclude_list):
Пример #15
0
import numpy as np
import nibabel as nib
from multiprocessing import Pool
from tools.data_io import DataFolder, ScanWrapper
import os
from tools.paral import AbstractParallelRoutine
from tools.utils import get_logger

logger = get_logger('Average')


class AverageScans:
    def __init__(self,
                 config,
                 in_folder=None,
                 data_file_txt=None,
                 in_data_folder_obj=None):
        self._data_folder = None
        if in_data_folder_obj is None:
            self._data_folder = DataFolder(in_folder, data_file_txt)
        else:
            self._data_folder = in_data_folder_obj
        self._standard_ref = ScanWrapper(self._data_folder.get_first_path())
        self._num_processes = config['num_processes']

    def get_average_image_union(self, save_path):
        im_shape = self._get_std_shape()

        average_union = np.zeros(im_shape)
        average_union.fill(np.nan)
        non_null_mask_count_image = np.zeros(im_shape)
Пример #16
0
import os
import numpy as np
from tools.utils import get_logger
import argparse
import yaml
import pandas as pd
import nibabel as nib
from tools.data_io import ScanWrapper, DataFolder
from tools.utils import mkdir_p
from tools.plot import ClipPlotSeriesWithBack, ClipPlotIntensityDeformationWall
from tools.paral import AbstractParallelRoutine


logger = get_logger('CAM intensity plot')


atlas_intensity_folder = '/nfs/masi/xuk9/SPORE/CAC_class/data/atlas/valid_region/s6.1_int'
atlas_jacobian_folder = '/nfs/masi/xuk9/SPORE/CAC_class/data/atlas/valid_region/s6.2_jac'


class ParaPlotClip(AbstractParallelRoutine):
    def __init__(
            self,
            in_int_folder_obj,
            in_jac_folder_obj,
            in_att_folder_obj,
            out_png_folder,
            num_process):
        super().__init__(in_int_folder_obj, num_process)
        self._in_jac_folder_obj = in_jac_folder_obj
        self._in_att_folder_obj = in_att_folder_obj
Пример #17
0
import argparse
from tools.data_io import save_object, load_object
from tools.utils import get_logger, read_file_contents_list
import numpy as np
from scipy.stats import multivariate_normal
from scipy.spatial.distance import mahalanobis
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

logger = get_logger('Gaussian fit')


class FitGaussianJoint:
    def __init__(self):
        self._in_res_matrix_obj = None
        self._in_jac_matrix_obj = None
        self._num_res_pc = None
        self._num_jac_pc = None
        self._file_list = None
        self._use_data_matrix = None
        self._gaussian_model = None

    def load_data(self, in_res_matrix_path, num_res_pc, in_jac_matrix_path,
                  num_jac_pc):
        self._in_res_matrix_obj = load_object(in_res_matrix_path)
        self._num_res_pc = num_res_pc
        self._in_jac_matrix_obj = load_object(in_jac_matrix_path)
        self._num_jac_pc = num_jac_pc

        self._file_list = self._in_res_matrix_obj['file_list']
Пример #18
0
import argparse
from tools.pca import PCA_NII_3D
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
from matplotlib.ticker import MaxNLocator
from tools.utils import get_logger
import pandas as pd

logger = get_logger('Bin Plot')


def main():
    parser = argparse.ArgumentParser(
        description='Bin plot data using csv file')
    parser.add_argument('--in-csv-list', nargs='+', type=str)
    parser.add_argument('--label-list', nargs='+', type=str)
    parser.add_argument('--color-list', nargs='+', type=str)
    parser.add_argument('--column-flag', type=str)
    parser.add_argument('--x-label', type=str)
    parser.add_argument('--title', type=str)
    parser.add_argument('--out-png', type=str)
    args = parser.parse_args()

    num_data = len(args.in_csv_list)
    in_csv_list = args.in_csv_list
    label_list = args.label_list
    color_list = args.color_list

    data_array_sequence = []
    for idx in range(num_data):
import numpy as np
import argparse
import os.path as osp
import os
from tools.utils import get_logger
import yaml
import pandas as pd
from sklearn.metrics import r2_score
from tools.utils import mkdir_p
from tools.plot import plot_prediction_scatter

logger = get_logger('Plot Ground Truth vs. Prediction scatter')


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--yaml-config',
                        type=str,
                        default='simg_bmi_regression_18_nfs.yaml')
    # parser.add_argument('--idx-fold', type=int, default=4)
    args = parser.parse_args()

    SRC_ROOT = os.path.dirname(os.path.realpath(__file__)) + '/..'
    yaml_config = os.path.join(SRC_ROOT, f'src/yaml/{args.yaml_config}')
    logger.info(f'Read yaml file {yaml_config}')
    f = open(yaml_config, 'r').read()
    config = yaml.safe_load(f)

    scatter_plot_dir = os.path.join(
        '/nfs/masi/xuk9/SPORE/CAC_class/prediction_plots', args.yaml_config)
    mkdir_p(scatter_plot_dir)
Пример #20
0
import argparse
from tools.utils import get_logger
import pandas as pd
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn import metrics

logger = get_logger('Logistic regression, plot')


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--in-csv-1', type=str)
    parser.add_argument('--in-csv-2', type=str)
    parser.add_argument('--column-flag', type=str)
    parser.add_argument('--out-png', type=str)
    args = parser.parse_args()

    logger.info(f'Run logistic regression')

    logger.info(f'Reading {args.in_csv_1}')
    rvs1 = pd.read_csv(args.in_csv_1)[args.column_flag].to_numpy()
    logger.info(f'Data length {len(rvs1)}')

    logger.info(f'Reading {args.in_csv_2}')
    rvs2 = pd.read_csv(args.in_csv_2)[args.column_flag].to_numpy()
    logger.info(f'Data length {len(rvs2)}')

    fig, ax = plt.subplots(figsize=(8, 4))
Пример #21
0
import pandas as pd
import re
from datetime import datetime
from tools.utils import get_logger
import numpy as np
import collections

logger = get_logger('Clinical')


class ClinicalDataReaderSPORE:
    def __init__(self, data_frame):
        self._df = data_frame

    def get_summary_characteristics_subject(self, included_subject_list):
        df_sess_list = self._df.index.to_list()
        df_subject_list = [ClinicalDataReaderSPORE._get_subject_id_from_sess_name(sess_name) for sess_name in
                           df_sess_list]

        logger.info(f'Get the characteristics for included subjects: {len(included_subject_list)}')
        missing_subject = [subject_id for subject_id in included_subject_list if subject_id not in df_subject_list]
        if len(missing_subject) > 0:
            logger.info(f'Number of missing subject: {len(missing_subject)}')
            logger.info(missing_subject)

        included_subject_list = [subject_id for subject_id in included_subject_list if subject_id in df_subject_list]
        included_subject_idx_list = [df_subject_list.index(subject_id) for subject_id in included_subject_list]

        df_included_only = self._df.iloc[included_subject_idx_list, :]
        logger.info(f'Number rows of included only data frame: {len(df_included_only.index)}')
Пример #22
0
        for name, price, image_path, printbar_url in products_data:
            try:
                pin_uploader.create_pin(board_id, name, price, image_path,
                                        printbar_url)
                print(f'pin created: {name}')
            except requests.exceptions.HTTPError as err:
                logger.exception(err)
                pin_uploader.logout()
                raise
            sleep(settings['pin_creating_period'])
        sleep(settings['board_creating_period'])


def remove_imgs_dir():
    shutil.rmtree(SAVED_IMAGES_DIR, ignore_errors=True)


def create_imgs_dir():
    if not os.path.isdir(SAVED_IMAGES_DIR):
        os.mkdir(SAVED_IMAGES_DIR)


if __name__ == "__main__":
    _atexit.register(remove_imgs_dir)
    create_imgs_dir()
    logger = get_logger('file.log', 'p2p')

    try:
        parse_printbar_and_upload_to_pinterest(logger)
    except Exception as err:
        logger.exception(f'unknow exception: {err}')
Пример #23
0
import numpy as np
from tools.clinical import ClinicalDataReaderSPORE
from tools.utils import get_logger
import pandas as pd

SPORE_label_csv = '/nfs/masi/SPORE/file/clinical/self_creat/Limitedhistory20200420.csv'

logger = get_logger('PLCOm2012')


def PLCOm2012(
        age, race, education, body_mass_index, copd, phist, fhist,
        smoking_status, smoking_intensity, duration,
        quit_time):  # this for spore, please also refer to Norm_cancer_risk
    def get_num(x):
        if x == 'yes' or x == 'current' or x == 1:
            return 1
        else:
            return 0

    def get_race(x):
        d = {
            1.0: 0,
            2.0: 0.3944778,
            2.5: -0.7434744,
            3.0: -0.466585,
            4.0: 0,
            5.0: 1.027152
        }
        return d[x]
Пример #24
0
import argparse
from tools.pca import PCA_NII_3D
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
from tools.data_io import DataFolder, ScanWrapper
import matplotlib.gridspec as gridspec
import os
from tools.utils import get_logger, mkdir_p
from matplotlib import colors
from mpl_toolkits.axes_grid1 import make_axes_locatable

logger = get_logger('Plot - PC')


class PlotPCGrid:
    def __init__(self, pc_folder_obj, step_axial, step_sagittal, step_coronal,
                 num_show_pc_each_line):
        self._pc_folder_obj = pc_folder_obj
        self._step_axial = step_axial,
        self._step_coronal = step_coronal,
        self._step_sagittal = step_sagittal,
        self._num_show_pc = num_show_pc_each_line
        self._cm = 'hsv'
        self._num_view = 3
        self._out_dpi = 20
        self._num_clip = 1
        self._sub_title_font_size = 60

    def plot_pc(self, out_png):
        fig = plt.figure(figsize=(self._num_show_pc * 20,
Пример #25
0
import argparse
import numpy as np
from tools.utils import get_logger
from sklearn.manifold import TSNE
from tools.data_io import load_object, save_object

logger = get_logger('tSNE')


def main():
    parser = argparse.ArgumentParser(description='Load a saved pca object')
    parser.add_argument('--low-dim-bin-path', type=str)
    parser.add_argument('--save-bin-path', type=str)
    parser.add_argument('--num-pca-component', type=int, default=10)
    parser.add_argument('--dim-embedded', type=int, default=2)
    args = parser.parse_args()

    logger.info(f'Load low dim data from {args.low_dim_bin_path}')
    low_dim_array = load_object(args.low_dim_bin_path)
    data_matrix = np.zeros((len(low_dim_array), args.num_pca_component))
    for sample_idx in range(len(low_dim_array)):
        data_matrix[sample_idx, :] = low_dim_array[sample_idx]['low_dim'][:]

    logger.info(f'Num of sample: {data_matrix.shape[0]}')
    logger.info(f'Num of included PCs: {data_matrix.shape[1]}')

    logger.info('Start tSNE')
    # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(data_matrix)
    embedded_matrix = TSNE(perplexity=50, n_iter=100000, n_components=args.dim_embedded).fit_transform(
        data_matrix)
    # embedded_matrix = TSNE(perplexity=50, learning_rate=10000, n_components=args.dim_embedded).fit_transform(
Пример #26
0
def main():
    assert torch.cuda.is_available(), 'need gpu to train network!'
    torch.cuda.empty_cache()

    args = parse_args()
    sys.path.append(args.work_dir)
    from test_config import config
    log_dir = os.path.join(args.work_dir, 'log')

    set_seed(config.seed)

    collater = DetectionCollater()
    val_loader = DataLoader(config.val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=config.num_workers,
                            collate_fn=collater.next)

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    logger = get_logger('test', log_dir)

    for key, value in config.__dict__.items():
        if not key.startswith('__'):
            if key not in [
                    'model', 'criterion', 'decoder', 'train_dataset',
                    'val_dataset'
            ]:
                log_info = f'{key}: {value}'
                logger.info(log_info)

    gpus_type, gpus_num = torch.cuda.get_device_name(
    ), torch.cuda.device_count()
    log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}'
    logger.info(log_info)

    model = config.model
    decoder = config.decoder

    if config.trained_model_path:
        saved_model = torch.load(os.path.join(BASE_DIR,
                                              config.trained_model_path),
                                 map_location=torch.device('cpu'))
        model.load_state_dict(saved_model)

    flops, params = compute_flops_and_params(config, model)
    log_info = f'model: {config.network}, flops: {flops}, params: {params}'
    logger.info(log_info)

    model = model.cuda()
    decoder = decoder.cuda()
    model = nn.DataParallel(model)

    result_dict = validate_detection(config.val_dataset, val_loader, model,
                                     decoder, config)
    log_info = f'eval_result: '
    if result_dict:
        for key, value in result_dict.items():
            log_info += f'{key}: {value} ,'
    else:
        log_info += f', no target detected in testset images!'
    logger.info(log_info)

    return
Пример #27
0
import argparse
from tools.utils import get_logger
import pandas as pd
from tools.utils import read_file_contents_list, write_list_to_file
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import KFold

logger = get_logger('Generate 5-fold file name list.')


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--neg-sample-list', type=str)
    parser.add_argument('--pos-sample-list', type=str)
    parser.add_argument('--out-file-list-folder', type=str)
    parser.add_argument('--n-fold', type=int, default=5)
    args = parser.parse_args()

    n_fold = KFold(n_splits=args.n_fold)

    neg_sample_list = read_file_contents_list(args.neg_sample_list)
    pos_sample_list = read_file_contents_list(args.pos_sample_list)

    n_fold_file_name_list = []

    for neg_train_idx, neg_test_idx in n_fold.split(neg_sample_list):
        neg_train_file_name_list = [
            neg_sample_list[idx_file_name] for idx_file_name in neg_train_idx
        ]
Пример #28
0
def multi_train(Net, FLAGS):
    with tf.Graph().as_default():

        global_step = tf.Variable(0, trainable=False)
        # global_step = tf.train.global_step()

        train_func, valid_func, [train_steps,
                                 valid_steps] = multi_process_data_read(FLAGS)

        images = tf.placeholder(dtype=tf.float32,
                                shape=[None] + FLAGS.image_size + [3],
                                name='input')
        labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels')

        logits = Net.inference(images)
        total_loss = train_tools.classify_loss(logits, labels)
        train_op = train_tools.train(total_loss, global_step,
                                     FLAGS.batch_size * train_steps, FLAGS)
        accuracy_op = train_tools.classify_accuracy(logits, labels)
        best_model0 = 0

        # sess define, load model, model initial
        sess, summary_op, summary_writer, saver = MH.sess_and_saver_initial(
            FLAGS.output_dir, FLAGS.is_loadmodel)
        logger = get_logger(FLAGS.model_name, FLAGS.output_dir)

        train_queue = Queue(2)
        valid_queue = Queue(2)
        train_process = Process(target=train_func, args=(train_queue, ))
        valid_process = Process(target=valid_func, args=(valid_queue, ))

        try:
            train_process.start()
            valid_process.start()
            print('Begin Training')
            for epoch in range(FLAGS.num_epochs):
                for step in range(101):

                    # t1 = time.clock()
                    batch_x, batch_y = train_queue.get(True)
                    if step and step % FLAGS.log_interval == 0:
                        _, loss_value, acc_value, summary_str, global_step_value = sess.run(
                            [
                                train_op, total_loss, accuracy_op, summary_op,
                                global_step
                            ],
                            feed_dict={
                                images: batch_x,
                                labels: batch_y
                            })

                        logger.info(
                            'epoch:{4} [{0} / {1}] step {2}, loss {3}'.format(
                                step // FLAGS.log_interval,
                                train_steps // FLAGS.log_interval, step,
                                loss_value, epoch))
                        summary_writer.add_summary(
                            summary_str, global_step=global_step_value)
                    else:

                        # print('read data', time.clock()-t1)
                        _ = sess.run([train_op],
                                     feed_dict={
                                         images: batch_x,
                                         labels: batch_y
                                     })
                        # print('>', end='')
                        # print('sess run time', time.clock()-t0)
                        # print('batch time', time.clock()-t1)
                logger.info('Evaluating...')
                accuracy_sum = []
                for step in range(101):
                    batch_x, batch_y = valid_queue.get(True)
                    if step and step % FLAGS.log_interval == 0:
                        loss_value, acc_value, summary_str = sess.run(
                            [total_loss, accuracy_op, summary_op],
                            feed_dict={
                                images: batch_x,
                                labels: batch_y
                            })

                        logger.info('[{0} / {1}] step {2}, loss {3}'.format(
                            step // FLAGS.log_interval,
                            valid_steps // FLAGS.log_interval, step,
                            loss_value))
                        accuracy_sum.append(acc_value)
                        # summary_writer.add_summary(summary_str, step)
                    else:
                        [acc_value] = sess.run([accuracy_op],
                                               feed_dict={
                                                   images: batch_x,
                                                   labels: batch_y
                                               })
                        # print('#',end='')
                        accuracy_sum.append(acc_value)

                validation_acc = np.mean(accuracy_sum)
                logger.info('#######################')
                logger.info('epoch: {0} validation acc{1}'.format(
                    epoch, validation_acc))
                logger.info('#######################')
                summary = tf.Summary()
                summary.ParseFromString(summary_str)
                summary.value.add(tag='val_acc', simple_value=validation_acc)
                summary_writer.add_summary(summary, epoch)
                if validation_acc > best_model0:
                    best_model0 = validation_acc
                    checkpoint_path = os.path.join(FLAGS.output_dir,
                                                   FLAGS.model_name + '.ckpt')
                    saver.save(sess, checkpoint_path, global_step=global_step)

            train_process.join()
            valid_process.terminate()
        except KeyboardInterrupt as e:
            print('KeyboardInter', e)
        finally:
            train_process.terminate()
            valid_process.terminate()
Пример #29
0
from tools.clinical import ClinicalDataReaderSPORE
from tools.data_io import load_object
from tools.utils import get_logger
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_mutual_info_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE
from sklearn.manifold import LocallyLinearEmbedding


logger = get_logger('KMeans')


class ClusterAnalysisDimAnalyzer:
    def __init__(self, data_dict, n_features):
        self._data_dict = data_dict
        self._n_feature = n_features
        self._kmean_n_cluster_range = range(3, 9)
        self._bar_w = 0.12
        self._n_init_kmeans = 10000
        self._con_factor = 0.6

    def plot_kmean_n_cluster_field_list_cancer_subject_first_scan(
            self,
            field_list,
            n_cluster,
import argparse
from tools.pca import PCA_NII_3D
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
from matplotlib.ticker import MaxNLocator
from tools.clinical import ClinicalDataReaderSPORE
from tools.data_io import load_object
from tools.utils import get_logger
import pandas as pd
import os


logger = get_logger('Plot - PC 2 dim')


class PlotSpacePCA:
    def __init__(self, label_df):
        self._label_df = label_df
        self._x_lim = (-2500, 2500)
        self._y_lim = (-5000, 5000)
        self._if_set_lim = False

    def save_label_file(self, out_csv):
        self._label_df.to_csv(out_csv)

    def plot_copd(self, out_png):
        plt.figure(figsize=(16, 10))

        df_yes = self._label_df[self._label_df['copd'] == 'Yes']
        df_no = self._label_df[self._label_df['copd'] == 'No']