예제 #1
0
    def __init__(self) -> None:
        # Load the environment file.
        load_dotenv(verbose=True)

        # Load the needed files.
        self.general = load_yaml("settings.yml")
        self.subreddits = load_yaml("subreddits.yml")

        if self.general is None or self.subreddits is None:
            print("Failed to load the settings or subreddits.")
            exit()
예제 #2
0
def main():
    args = argparser()
    config_path = Path(args.cfg.strip("/"))
    sub_config = load_yaml(config_path)
    print(sub_config)

    sample_sub = pd.read_csv(sub_config['SAMPLE_SUB'])
    n_objects_dict = sample_sub.ImageId.value_counts().to_dict()

    print('start loading mask results....')
    mask_dict = load_mask_dict(sub_config)

    use_contours = sub_config['USECONTOURS']
    min_contour_area = sub_config.get('MIN_CONTOUR_AREA', 0)

    area_threshold = sub_config['AREA_THRESHOLD']
    top_score_threshold = sub_config['TOP_SCORE_THRESHOLD']
    bottom_score_threshold = sub_config['BOTTOM_SCORE_THRESHOLD']
    if sub_config['USELEAK']:
        leak_score_threshold = sub_config['LEAK_SCORE_THRESHOLD']
    else:
        leak_score_threshold = bottom_score_threshold

    rle_dict = build_rle_dict(mask_dict, n_objects_dict, area_threshold,
                              top_score_threshold, bottom_score_threshold,
                              leak_score_threshold, use_contours,
                              min_contour_area)
    sub = buid_submission(rle_dict, sample_sub)
    print((sub.EncodedPixels != -1).sum())
    print(sub.head())

    sub_file = Path(sub_config['SUB_FILE'])
    sub.to_csv(sub_file, index=False)
예제 #3
0
def ctDNA_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config'])
    for arg, value in args.iteritems():
        config[arg] = value

    helpers.makedirs(config["bam_directory"])

    helpers.makedirs(config["results_dir"])

    inputs = helpers.load_yaml(args['input_yaml'])
    patients = inputs.keys()

    workflow.setobj(obj=mgd.OutputChunks('patient_id', ), value=patients)

    workflow.transform(name='get_input_by_patient',
                       func=helpers.get_input_by_patient,
                       ret=mgd.TempOutputObj('patient_input', 'patient_id'),
                       axes=('patient_id', ),
                       args=(
                           inputs,
                           mgd.InputInstance('patient_id'),
                       ))

    workflow.subworkflow(name='patient_workflow',
                         func=patient_workflow,
                         axes=('patient_id', ),
                         args=(
                             config,
                             mgd.InputInstance('patient_id'),
                             mgd.TempInputObj('patient_input', 'patient_id'),
                             mgd.OutputFile(
                                 os.path.join(config['results_dir'],
                                              '{patient_id}.log'),
                                 'patient_id'),
                         ))

    pyp.run(workflow)
def main():
    args = argparser()
    config_path = Path(args.cfg.strip("/"))
    experiment_folder = config_path.parents[0]
    inference_config = load_yaml(config_path)
    print(inference_config)
    
    batch_size = inference_config['BATCH_SIZE']
    device = inference_config['DEVICE']
    
    module = importlib.import_module(inference_config['MODEL']['PY'])
    model_class = getattr(module, inference_config['MODEL']['CLASS'])
    model = model_class(**inference_config['MODEL'].get('ARGS', None)).to(device)
    model.eval()

    num_workers = inference_config['NUM_WORKERS']
    transform = albu.load(inference_config['TEST_TRANSFORMS']) 
    dataset_folder = inference_config['DATA_DIRECTORY'] 
    dataset = PneumothoraxDataset(
        data_folder=dataset_folder, mode='test', 
        transform=transform,
    )
    dataloader =  DataLoader(
        dataset=dataset, batch_size=batch_size, 
        num_workers=num_workers, shuffle=False
    )

    use_flip = inference_config['FLIP']
    checkpoints_list = build_checkpoints_list(inference_config)
  
    mask_dict = defaultdict(int)
    for pred_idx, checkpoint_path in enumerate(checkpoints_list):
        print(checkpoint_path)
        model.load_state_dict(torch.load(checkpoint_path))
        model.eval()
        current_mask_dict = inference_model(model, dataloader, device, use_flip)
        for name, mask in current_mask_dict.items():
            mask_dict[name] = (mask_dict[name] * pred_idx + mask) / (pred_idx + 1)

    if 'RESULT_FOLDER' in inference_config:
        result_path = Path(inference_config['RESULT_FOLDER'], inference_config['RESULT'])
    else:
        result_path = Path(experiment_folder, inference_config['RESULT'])

    with open(result_path, 'wb') as handle:
        pickle.dump(mask_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
예제 #5
0
def prepare_hparams(yaml_file=None, **kwargs):
    """Prepare the trainer hyperparameters and check that all have the correct value.

    Args:
        yaml_file (str): YAML file as configuration.

    Returns:
        obj: Hyperparameter object in TF (tf.contrib.training.HParams).
    """
    if yaml_file is not None:
        config = load_yaml(yaml_file)
        config = flat_config(config)
    else:
        config = {}

    config.update(kwargs)

    return create_hparams(config)
예제 #6
0
def main():
    args = argparser()
    config_file = args.config_file
    process_config = load_yaml(config_file)

    print('start loading mask results....')
    inference_pkl_path = process_config['INFERENCE_PKL_FILE']
    with open(inference_pkl_path, 'rb') as handle:
        infer_pkl_dict = pickle.load(handle)

    only_largest = process_config.get('ONLY_LARGEST',
                                      False)  # 为False ,默认不使用该约束
    min_contour_area = process_config.get('MIN_CONTOUR_AREA',
                                          0)  # 为0 ,默认不使用该约束
    opening = process_config.get('OPENING', True)  # 为True ,默认使用开操作

    binarizer_module = importlib.import_module(
        process_config['MASK_BINARIZER']['PY'])
    binarizer_class = getattr(binarizer_module,
                              process_config['MASK_BINARIZER']['CLASS'])
    if process_config['MASK_BINARIZER'].get('THRESHOLD', False):
        binarizer_threshold = process_config['MASK_BINARIZER']['THRESHOLD']
        binarizer_class = binarizer_class(
            'Inference Time MaskBinarization'
        )  # 仅仅是初始化需要个参数,因为需要调用类中的apply transform
    else:
        print("Please set the THRESHOLD of MASK_BINARIZER ")

    result_dic = Path(process_config['RESULT_PNG_DIC'])
    if os.path.isdir(result_dic):
        shutil.rmtree(result_dic)
    os.makedirs(result_dic, exist_ok=True)
    rle_dict = build_result_png(infer_pkl_dict, result_dic, binarizer_class,
                                binarizer_threshold, only_largest,
                                min_contour_area, opening)

    kaggle_test = process_config.get('KAGGLE_TEST', False)
    if kaggle_test:
        sub = buid_submission(rle_dict)
        sub.to_csv('submit.csv', index=False)
def main():
    args = argparser()
    config_folder = Path(args.train_cfg.strip("/"))
    # config_folder = Path('experiments/albunet_public/01_train_config_part0.yaml'.strip("/"))
    experiment_folder = config_folder.parents[0]

    train_config = load_yaml(config_folder)

    log_dir = Path(experiment_folder, train_config['LOGGER_DIR'])
    log_dir.mkdir(exist_ok=True, parents=True)

    main_logger = init_logger(log_dir, 'train_main.log')

    seed = train_config['SEED']
    init_seed(seed)
    main_logger.info(train_config)

    if "DEVICE_LIST" in train_config:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            map(str, train_config["DEVICE_LIST"]))

    pipeline_name = train_config['PIPELINE_NAME']

    train_transform = albu.load(train_config['TRAIN_TRANSFORMS'])
    valid_transform = albu.load(train_config['VALID_TRANSFORMS'])

    non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0)
    use_sampler = train_config['USE_SAMPLER']

    dataset_folder = train_config['DATA_DIRECTORY']
    folds_distr_path = train_config['FOLD']['FILE']

    num_workers = train_config['WORKERS']
    batch_size = train_config['BATCH_SIZE']
    n_folds = train_config['FOLD']['NUMBER']

    usefolds = map(str, train_config['FOLD']['USEFOLDS'])
    # local_metric_fn, global_metric_fn = init_eval_fns(train_config)

    binarizer_module = importlib.import_module(
        train_config['MASK_BINARIZER']['PY'])
    binarizer_class = getattr(binarizer_module,
                              train_config['MASK_BINARIZER']['CLASS'])
    binarizer_fn = binarizer_class(**train_config['MASK_BINARIZER']['ARGS'])

    eval_module = importlib.import_module(
        train_config['EVALUATION_METRIC']['PY'])
    eval_fn = getattr(eval_module, train_config['EVALUATION_METRIC']['CLASS'])
    eval_fn = functools.partial(eval_fn,
                                **train_config['EVALUATION_METRIC']['ARGS'])

    for fold_id in usefolds:
        main_logger.info('Start training of {} fold....'.format(fold_id))

        train_dataset = BodyMorpDataset(data_folder=dataset_folder,
                                        mode='train',
                                        transform=train_transform,
                                        fold_index=fold_id,
                                        folds_distr_path=folds_distr_path)
        train_sampler = PartDataSampler(folds_distr_path, fold_id,
                                        non_empty_mask_proba)
        if use_sampler:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          sampler=train_sampler)
        else:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          shuffle=True)

        valid_dataset = BodyMorpDataset(
            data_folder=dataset_folder,
            mode='val',
            transform=valid_transform,
            fold_index=str(fold_id),
            folds_distr_path=folds_distr_path,
        )
        valid_dataloader = DataLoader(dataset=valid_dataset,
                                      batch_size=batch_size,
                                      num_workers=num_workers,
                                      shuffle=False)

        train_fold(train_config, experiment_folder, pipeline_name, log_dir,
                   fold_id, train_dataloader, valid_dataloader, binarizer_fn,
                   eval_fn)
예제 #8
0
def main():
    args = argparser()
    config_file = args.config_file
    inference_config = load_yaml(config_file)
    print(inference_config)

    batch_size = inference_config['BATCH_SIZE']
    device = inference_config.get('DEVICE',"cuda") # DEVICE默认是cuda
    
    if "DEVICE_LIST" in inference_config:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, inference_config["DEVICE_LIST"]))

    module = importlib.import_module(inference_config['MODEL']['PY'])
    model_class = getattr(module, inference_config['MODEL']['CLASS'])
    model = model_class(**inference_config['MODEL'].get('ARGS', None)).to(device)
    model.eval()

    usefolds = map(str, inference_config['FOLD']['USEFOLDS'])

    num_workers = inference_config['WORKERS']

    image_size = inference_config.get('IMAGE_SIZE',1024)
    train_transform, valid_transform = generate_transforms(image_size)

    dataset_folder = inference_config['DATA_DIRECTORY']

    dataset = PneumothoraxDataset(
        data_folder=dataset_folder, mode='test', 
        transform=valid_transform,
    )
    dataloader = DataLoader(
        dataset=dataset, batch_size=batch_size, 
        num_workers=num_workers, shuffle=False
    )

    use_flip = inference_config['FLIP']

    submit_best = inference_config['SUBMIT_BEST']

    checkpoints_list = []
    for fold_id in usefolds:
        checkpoints_list.extend(build_checkpoints_list(inference_config, submit_best=submit_best, fold_id=fold_id))

    output_mask_dict = defaultdict(int)
    output_distancemap_dict = defaultdict(int)
    for pred_idx, checkpoint_path in enumerate(checkpoints_list):
        model.load_state_dict(torch.load(checkpoint_path))
        model.eval()
        # 模型推理
        for item in tqdm(dataloader):
            image_ids, images = item
            predicted = inference_image(model, images, device) # batch级别的inference

            masks_predict = predicted["masks_predict"]
            if predicted["distancemap_exist"]: # 如果存在distance map
                distancemap_predict = predicted["distancemap_predict"]

            if use_flip: # 如果flip的话,会覆盖同名变量
                predicted_flipped = flipped_inference_image(model, images, device)
                masks_predict = (predicted["masks_predict"] + predicted_flipped["masks_predict"]) / 2
                if predicted_flipped["distancemap_exist"]: # 如果存在distance map
                    distancemap_predict = (predicted["distancemap_predict"] + predicted_flipped["distancemap_predict"]) / 2

            # 把一个batch图像拆开
            for index,(image_single_id, mask_single_predict) in enumerate(zip(image_ids, masks_predict)):
                output_mask_dict[image_single_id] = (output_mask_dict[image_single_id] * pred_idx + mask_single_predict) / (pred_idx + 1) # 将结果取平均
                if predicted["distancemap_exist"]:
                    output_distancemap_dict[image_single_id] = (output_distancemap_dict[image_single_id] * pred_idx + distancemap_predict[index]) / (pred_idx + 1) # 将结果取平均

    print('Number of mask: {}, number of distance map: {}'.format(len(output_mask_dict.keys()),len(output_distancemap_dict.keys())))

    result_dict = {"mask":output_mask_dict,"distancemap":output_distancemap_dict}

    result_path = Path(inference_config['MODEL']['PRETRAINED']['PIPELINE_PATH'], inference_config['RESULT_PKL_FILE'])
    with open(result_path, 'wb') as handle:
        pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
예제 #9
0
def main():
    args = argparser()
    config_folder = Path(args.train_cfg.strip("/"))
    experiment_folder = config_folder.parents[0]

    train_config = load_yaml(config_folder)

    log_dir = Path(experiment_folder, train_config['LOGGER_DIR'])
    log_dir.mkdir(exist_ok=True, parents=True)

    main_logger = init_logger(log_dir, 'train_main.log')

    seed = train_config['SEED']
    init_seed(seed)
    main_logger.info(train_config)

    if "DEVICE_LIST" in train_config:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            map(str, train_config["DEVICE_LIST"]))

    pipeline_name = train_config['PIPELINE_NAME']
    dataset_folder = train_config['DATA_DIRECTORY']

    train_transform = albu.load(train_config['TRAIN_TRANSFORMS'])
    valid_transform = albu.load(train_config['VALID_TRANSFORMS'])

    non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0)
    use_sampler = train_config['USE_SAMPLER']

    dataset_folder = train_config['DATA_DIRECTORY']
    folds_distr_path = train_config['FOLD']['FILE']

    num_workers = train_config['WORKERS']
    batch_size = train_config['BATCH_SIZE']
    n_folds = train_config['FOLD']['NUMBER']

    usefolds = map(str, train_config['FOLD']['USEFOLDS'])
    local_metric_fn, global_metric_fn = init_eval_fns(train_config)

    for fold_id in usefolds:
        main_logger.info('Start training of {} fold....'.format(fold_id))

        train_dataset = PneumothoraxDataset(
            data_folder=dataset_folder,
            mode='train',
            transform=train_transform,
            fold_index=fold_id,
            folds_distr_path=folds_distr_path,
        )
        train_sampler = PneumoSampler(folds_distr_path, fold_id,
                                      non_empty_mask_proba)
        if use_sampler:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          sampler=train_sampler)
        else:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          shuffle=True)

        valid_dataset = PneumothoraxDataset(
            data_folder=dataset_folder,
            mode='val',
            transform=valid_transform,
            fold_index=str(fold_id),
            folds_distr_path=folds_distr_path,
        )
        valid_dataloader = DataLoader(dataset=valid_dataset,
                                      batch_size=batch_size,
                                      num_workers=num_workers,
                                      shuffle=False)

        train_fold(train_config, experiment_folder, pipeline_name, log_dir,
                   fold_id, train_dataloader, valid_dataloader,
                   local_metric_fn, global_metric_fn)
예제 #10
0
    freeze_model = train_config['MODEL']['FREEZE']

    Learning(optimizer, binarizer_fn, loss_fn, eval_fn, device, n_epochs,
             scheduler, freeze_model, grad_clip, grad_accum, early_stopping,
             validation_frequency, calculation_name, best_checkpoint_folder,
             checkpoints_history_folder, checkpoints_topk,
             fold_logger).run_train(model, train_dataloader, val_dataloader)


if __name__ == '__main__':
    args = argparser()
    config_file = Path(args['train_config'].strip('/'))
    experiment_folder = config_file.parents[0]

    train_config = helpers.load_yaml(config_file)

    log_dir = Path(experiment_folder, train_config['LOGGER_DIR'])
    log_dir.mkdir(parents=True, exist_ok=True)

    main_logger = helpers.init_logger(log_dir, 'train_main.log')

    seed = train_config['SEED']
    helpers.init_seed(seed)
    main_logger.info(train_config)

    if "DEVICE_LIST" in train_config:
        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
            map(str, train_config['DEVICE_LIST']))

    pipeline_name = train_config['PIPELINE_NAME']
예제 #11
0
                        mask = cv2.resize(
                            mask,
                            dsize=(1024, 1024), 
                            interpolation=cv2.INTER_LINEAR
                        )
                    #crazy_mask = (mask > 0.75).astype(np.uint8)
                    #if crazy_mask.sum() < 1000:
                    #  mask = np.zeros_like(mask)
                    mask_dict[name] = mask_dict[name] + mask * weight
        return mask_dict

if __name__ == '__main__':
    args = argparser()
    config_path = Path(args['config'].strip("/"))
    experiment_folder = config_path.parents[0]
    sub_config = load_yaml(config_path)

    sample_sub = pd.read_csv(sub_config['SAMPLE_SUB'])
    n_objects_dict = sample_sub.ImageId.value_counts().to_dict()

    # print('start loading mask results....')
    # mask_dict = load_mask_dict(sub_config)
    
    use_contours = sub_config['USECONTOURS']
    min_contour_area = sub_config.get('MIN_CONTOUR_AREA', 0)

    area_threshold = sub_config['AREA_THRESHOLD']
    top_score_threshold = sub_config['TOP_SCORE_THRESHOLD']
    bottom_score_threshold = sub_config['BOTTOM_SCORE_THRESHOLD']
    if sub_config['USELEAK']:
        leak_score_threshold = sub_config['LEAK_SCORE_THRESHOLD']
예제 #12
0
        curr_masks = curr_masks.squeeze(1).cpu().detach().numpy()
        mask = (mask * pred_idx + curr_masks) / (pred_idx + 1)
    # return (mask.squeeze(0) * 255).astype('uint8')

    area_threshold = cfg['AREA_THRESHOLD']
    top_score_threshold = cfg['TOP_SCORE_THRESHOLD']
    bottom_score_threshold = cfg['BOTTOM_SCORE_THRESHOLD']
    if cfg['USELEAK']:
        leak_score_threshold = cfg['LEAK_SCORE_THRESHOLD']
    else:
        leak_score_threshold = bottom_score_threshold

    return apply_thresholds(mask.squeeze(0), 1, area_threshold,
                            top_score_threshold, bottom_score_threshold,
                            leak_score_threshold)


if __name__ == '__main__':
    args = argparser()
    assert Path(args['dcm_path']).is_file(
    ) and args['dcm_path'][-3:] == 'dcm', 'image path is invalid'

    config_path = Path(args['config'].strip('/'))
    inference_config = load_yaml(config_path)

    mask = predict(args['dcm_path'], inference_config)
    dest_path = args['dcm_path'][:args['dcm_path'].rfind('.'
                                                         )] + '_segmented.png'
    cv2.imwrite(dest_path, mask)
    print(f'Result is stored in {dest_path}')