def __init__(self) -> None: # Load the environment file. load_dotenv(verbose=True) # Load the needed files. self.general = load_yaml("settings.yml") self.subreddits = load_yaml("subreddits.yml") if self.general is None or self.subreddits is None: print("Failed to load the settings or subreddits.") exit()
def main(): args = argparser() config_path = Path(args.cfg.strip("/")) sub_config = load_yaml(config_path) print(sub_config) sample_sub = pd.read_csv(sub_config['SAMPLE_SUB']) n_objects_dict = sample_sub.ImageId.value_counts().to_dict() print('start loading mask results....') mask_dict = load_mask_dict(sub_config) use_contours = sub_config['USECONTOURS'] min_contour_area = sub_config.get('MIN_CONTOUR_AREA', 0) area_threshold = sub_config['AREA_THRESHOLD'] top_score_threshold = sub_config['TOP_SCORE_THRESHOLD'] bottom_score_threshold = sub_config['BOTTOM_SCORE_THRESHOLD'] if sub_config['USELEAK']: leak_score_threshold = sub_config['LEAK_SCORE_THRESHOLD'] else: leak_score_threshold = bottom_score_threshold rle_dict = build_rle_dict(mask_dict, n_objects_dict, area_threshold, top_score_threshold, bottom_score_threshold, leak_score_threshold, use_contours, min_contour_area) sub = buid_submission(rle_dict, sample_sub) print((sub.EncodedPixels != -1).sum()) print(sub.head()) sub_file = Path(sub_config['SUB_FILE']) sub.to_csv(sub_file, index=False)
def ctDNA_workflow(args): pyp = pypeliner.app.Pypeline(config=args) workflow = pypeliner.workflow.Workflow() config = helpers.load_yaml(args['config']) for arg, value in args.iteritems(): config[arg] = value helpers.makedirs(config["bam_directory"]) helpers.makedirs(config["results_dir"]) inputs = helpers.load_yaml(args['input_yaml']) patients = inputs.keys() workflow.setobj(obj=mgd.OutputChunks('patient_id', ), value=patients) workflow.transform(name='get_input_by_patient', func=helpers.get_input_by_patient, ret=mgd.TempOutputObj('patient_input', 'patient_id'), axes=('patient_id', ), args=( inputs, mgd.InputInstance('patient_id'), )) workflow.subworkflow(name='patient_workflow', func=patient_workflow, axes=('patient_id', ), args=( config, mgd.InputInstance('patient_id'), mgd.TempInputObj('patient_input', 'patient_id'), mgd.OutputFile( os.path.join(config['results_dir'], '{patient_id}.log'), 'patient_id'), )) pyp.run(workflow)
def main(): args = argparser() config_path = Path(args.cfg.strip("/")) experiment_folder = config_path.parents[0] inference_config = load_yaml(config_path) print(inference_config) batch_size = inference_config['BATCH_SIZE'] device = inference_config['DEVICE'] module = importlib.import_module(inference_config['MODEL']['PY']) model_class = getattr(module, inference_config['MODEL']['CLASS']) model = model_class(**inference_config['MODEL'].get('ARGS', None)).to(device) model.eval() num_workers = inference_config['NUM_WORKERS'] transform = albu.load(inference_config['TEST_TRANSFORMS']) dataset_folder = inference_config['DATA_DIRECTORY'] dataset = PneumothoraxDataset( data_folder=dataset_folder, mode='test', transform=transform, ) dataloader = DataLoader( dataset=dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False ) use_flip = inference_config['FLIP'] checkpoints_list = build_checkpoints_list(inference_config) mask_dict = defaultdict(int) for pred_idx, checkpoint_path in enumerate(checkpoints_list): print(checkpoint_path) model.load_state_dict(torch.load(checkpoint_path)) model.eval() current_mask_dict = inference_model(model, dataloader, device, use_flip) for name, mask in current_mask_dict.items(): mask_dict[name] = (mask_dict[name] * pred_idx + mask) / (pred_idx + 1) if 'RESULT_FOLDER' in inference_config: result_path = Path(inference_config['RESULT_FOLDER'], inference_config['RESULT']) else: result_path = Path(experiment_folder, inference_config['RESULT']) with open(result_path, 'wb') as handle: pickle.dump(mask_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
def prepare_hparams(yaml_file=None, **kwargs): """Prepare the trainer hyperparameters and check that all have the correct value. Args: yaml_file (str): YAML file as configuration. Returns: obj: Hyperparameter object in TF (tf.contrib.training.HParams). """ if yaml_file is not None: config = load_yaml(yaml_file) config = flat_config(config) else: config = {} config.update(kwargs) return create_hparams(config)
def main(): args = argparser() config_file = args.config_file process_config = load_yaml(config_file) print('start loading mask results....') inference_pkl_path = process_config['INFERENCE_PKL_FILE'] with open(inference_pkl_path, 'rb') as handle: infer_pkl_dict = pickle.load(handle) only_largest = process_config.get('ONLY_LARGEST', False) # 为False ,默认不使用该约束 min_contour_area = process_config.get('MIN_CONTOUR_AREA', 0) # 为0 ,默认不使用该约束 opening = process_config.get('OPENING', True) # 为True ,默认使用开操作 binarizer_module = importlib.import_module( process_config['MASK_BINARIZER']['PY']) binarizer_class = getattr(binarizer_module, process_config['MASK_BINARIZER']['CLASS']) if process_config['MASK_BINARIZER'].get('THRESHOLD', False): binarizer_threshold = process_config['MASK_BINARIZER']['THRESHOLD'] binarizer_class = binarizer_class( 'Inference Time MaskBinarization' ) # 仅仅是初始化需要个参数,因为需要调用类中的apply transform else: print("Please set the THRESHOLD of MASK_BINARIZER ") result_dic = Path(process_config['RESULT_PNG_DIC']) if os.path.isdir(result_dic): shutil.rmtree(result_dic) os.makedirs(result_dic, exist_ok=True) rle_dict = build_result_png(infer_pkl_dict, result_dic, binarizer_class, binarizer_threshold, only_largest, min_contour_area, opening) kaggle_test = process_config.get('KAGGLE_TEST', False) if kaggle_test: sub = buid_submission(rle_dict) sub.to_csv('submit.csv', index=False)
def main(): args = argparser() config_folder = Path(args.train_cfg.strip("/")) # config_folder = Path('experiments/albunet_public/01_train_config_part0.yaml'.strip("/")) experiment_folder = config_folder.parents[0] train_config = load_yaml(config_folder) log_dir = Path(experiment_folder, train_config['LOGGER_DIR']) log_dir.mkdir(exist_ok=True, parents=True) main_logger = init_logger(log_dir, 'train_main.log') seed = train_config['SEED'] init_seed(seed) main_logger.info(train_config) if "DEVICE_LIST" in train_config: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( map(str, train_config["DEVICE_LIST"])) pipeline_name = train_config['PIPELINE_NAME'] train_transform = albu.load(train_config['TRAIN_TRANSFORMS']) valid_transform = albu.load(train_config['VALID_TRANSFORMS']) non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0) use_sampler = train_config['USE_SAMPLER'] dataset_folder = train_config['DATA_DIRECTORY'] folds_distr_path = train_config['FOLD']['FILE'] num_workers = train_config['WORKERS'] batch_size = train_config['BATCH_SIZE'] n_folds = train_config['FOLD']['NUMBER'] usefolds = map(str, train_config['FOLD']['USEFOLDS']) # local_metric_fn, global_metric_fn = init_eval_fns(train_config) binarizer_module = importlib.import_module( train_config['MASK_BINARIZER']['PY']) binarizer_class = getattr(binarizer_module, train_config['MASK_BINARIZER']['CLASS']) binarizer_fn = binarizer_class(**train_config['MASK_BINARIZER']['ARGS']) eval_module = importlib.import_module( train_config['EVALUATION_METRIC']['PY']) eval_fn = getattr(eval_module, train_config['EVALUATION_METRIC']['CLASS']) eval_fn = functools.partial(eval_fn, **train_config['EVALUATION_METRIC']['ARGS']) for fold_id in usefolds: main_logger.info('Start training of {} fold....'.format(fold_id)) train_dataset = BodyMorpDataset(data_folder=dataset_folder, mode='train', transform=train_transform, fold_index=fold_id, folds_distr_path=folds_distr_path) train_sampler = PartDataSampler(folds_distr_path, fold_id, non_empty_mask_proba) if use_sampler: train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, sampler=train_sampler) else: train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True) valid_dataset = BodyMorpDataset( data_folder=dataset_folder, mode='val', transform=valid_transform, fold_index=str(fold_id), folds_distr_path=folds_distr_path, ) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False) train_fold(train_config, experiment_folder, pipeline_name, log_dir, fold_id, train_dataloader, valid_dataloader, binarizer_fn, eval_fn)
def main(): args = argparser() config_file = args.config_file inference_config = load_yaml(config_file) print(inference_config) batch_size = inference_config['BATCH_SIZE'] device = inference_config.get('DEVICE',"cuda") # DEVICE默认是cuda if "DEVICE_LIST" in inference_config: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, inference_config["DEVICE_LIST"])) module = importlib.import_module(inference_config['MODEL']['PY']) model_class = getattr(module, inference_config['MODEL']['CLASS']) model = model_class(**inference_config['MODEL'].get('ARGS', None)).to(device) model.eval() usefolds = map(str, inference_config['FOLD']['USEFOLDS']) num_workers = inference_config['WORKERS'] image_size = inference_config.get('IMAGE_SIZE',1024) train_transform, valid_transform = generate_transforms(image_size) dataset_folder = inference_config['DATA_DIRECTORY'] dataset = PneumothoraxDataset( data_folder=dataset_folder, mode='test', transform=valid_transform, ) dataloader = DataLoader( dataset=dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False ) use_flip = inference_config['FLIP'] submit_best = inference_config['SUBMIT_BEST'] checkpoints_list = [] for fold_id in usefolds: checkpoints_list.extend(build_checkpoints_list(inference_config, submit_best=submit_best, fold_id=fold_id)) output_mask_dict = defaultdict(int) output_distancemap_dict = defaultdict(int) for pred_idx, checkpoint_path in enumerate(checkpoints_list): model.load_state_dict(torch.load(checkpoint_path)) model.eval() # 模型推理 for item in tqdm(dataloader): image_ids, images = item predicted = inference_image(model, images, device) # batch级别的inference masks_predict = predicted["masks_predict"] if predicted["distancemap_exist"]: # 如果存在distance map distancemap_predict = predicted["distancemap_predict"] if use_flip: # 如果flip的话,会覆盖同名变量 predicted_flipped = flipped_inference_image(model, images, device) masks_predict = (predicted["masks_predict"] + predicted_flipped["masks_predict"]) / 2 if predicted_flipped["distancemap_exist"]: # 如果存在distance map distancemap_predict = (predicted["distancemap_predict"] + predicted_flipped["distancemap_predict"]) / 2 # 把一个batch图像拆开 for index,(image_single_id, mask_single_predict) in enumerate(zip(image_ids, masks_predict)): output_mask_dict[image_single_id] = (output_mask_dict[image_single_id] * pred_idx + mask_single_predict) / (pred_idx + 1) # 将结果取平均 if predicted["distancemap_exist"]: output_distancemap_dict[image_single_id] = (output_distancemap_dict[image_single_id] * pred_idx + distancemap_predict[index]) / (pred_idx + 1) # 将结果取平均 print('Number of mask: {}, number of distance map: {}'.format(len(output_mask_dict.keys()),len(output_distancemap_dict.keys()))) result_dict = {"mask":output_mask_dict,"distancemap":output_distancemap_dict} result_path = Path(inference_config['MODEL']['PRETRAINED']['PIPELINE_PATH'], inference_config['RESULT_PKL_FILE']) with open(result_path, 'wb') as handle: pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
def main(): args = argparser() config_folder = Path(args.train_cfg.strip("/")) experiment_folder = config_folder.parents[0] train_config = load_yaml(config_folder) log_dir = Path(experiment_folder, train_config['LOGGER_DIR']) log_dir.mkdir(exist_ok=True, parents=True) main_logger = init_logger(log_dir, 'train_main.log') seed = train_config['SEED'] init_seed(seed) main_logger.info(train_config) if "DEVICE_LIST" in train_config: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( map(str, train_config["DEVICE_LIST"])) pipeline_name = train_config['PIPELINE_NAME'] dataset_folder = train_config['DATA_DIRECTORY'] train_transform = albu.load(train_config['TRAIN_TRANSFORMS']) valid_transform = albu.load(train_config['VALID_TRANSFORMS']) non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0) use_sampler = train_config['USE_SAMPLER'] dataset_folder = train_config['DATA_DIRECTORY'] folds_distr_path = train_config['FOLD']['FILE'] num_workers = train_config['WORKERS'] batch_size = train_config['BATCH_SIZE'] n_folds = train_config['FOLD']['NUMBER'] usefolds = map(str, train_config['FOLD']['USEFOLDS']) local_metric_fn, global_metric_fn = init_eval_fns(train_config) for fold_id in usefolds: main_logger.info('Start training of {} fold....'.format(fold_id)) train_dataset = PneumothoraxDataset( data_folder=dataset_folder, mode='train', transform=train_transform, fold_index=fold_id, folds_distr_path=folds_distr_path, ) train_sampler = PneumoSampler(folds_distr_path, fold_id, non_empty_mask_proba) if use_sampler: train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, sampler=train_sampler) else: train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True) valid_dataset = PneumothoraxDataset( data_folder=dataset_folder, mode='val', transform=valid_transform, fold_index=str(fold_id), folds_distr_path=folds_distr_path, ) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False) train_fold(train_config, experiment_folder, pipeline_name, log_dir, fold_id, train_dataloader, valid_dataloader, local_metric_fn, global_metric_fn)
freeze_model = train_config['MODEL']['FREEZE'] Learning(optimizer, binarizer_fn, loss_fn, eval_fn, device, n_epochs, scheduler, freeze_model, grad_clip, grad_accum, early_stopping, validation_frequency, calculation_name, best_checkpoint_folder, checkpoints_history_folder, checkpoints_topk, fold_logger).run_train(model, train_dataloader, val_dataloader) if __name__ == '__main__': args = argparser() config_file = Path(args['train_config'].strip('/')) experiment_folder = config_file.parents[0] train_config = helpers.load_yaml(config_file) log_dir = Path(experiment_folder, train_config['LOGGER_DIR']) log_dir.mkdir(parents=True, exist_ok=True) main_logger = helpers.init_logger(log_dir, 'train_main.log') seed = train_config['SEED'] helpers.init_seed(seed) main_logger.info(train_config) if "DEVICE_LIST" in train_config: os.environ['CUDA_VISIBLE_DEVICES'] = ','.join( map(str, train_config['DEVICE_LIST'])) pipeline_name = train_config['PIPELINE_NAME']
mask = cv2.resize( mask, dsize=(1024, 1024), interpolation=cv2.INTER_LINEAR ) #crazy_mask = (mask > 0.75).astype(np.uint8) #if crazy_mask.sum() < 1000: # mask = np.zeros_like(mask) mask_dict[name] = mask_dict[name] + mask * weight return mask_dict if __name__ == '__main__': args = argparser() config_path = Path(args['config'].strip("/")) experiment_folder = config_path.parents[0] sub_config = load_yaml(config_path) sample_sub = pd.read_csv(sub_config['SAMPLE_SUB']) n_objects_dict = sample_sub.ImageId.value_counts().to_dict() # print('start loading mask results....') # mask_dict = load_mask_dict(sub_config) use_contours = sub_config['USECONTOURS'] min_contour_area = sub_config.get('MIN_CONTOUR_AREA', 0) area_threshold = sub_config['AREA_THRESHOLD'] top_score_threshold = sub_config['TOP_SCORE_THRESHOLD'] bottom_score_threshold = sub_config['BOTTOM_SCORE_THRESHOLD'] if sub_config['USELEAK']: leak_score_threshold = sub_config['LEAK_SCORE_THRESHOLD']
curr_masks = curr_masks.squeeze(1).cpu().detach().numpy() mask = (mask * pred_idx + curr_masks) / (pred_idx + 1) # return (mask.squeeze(0) * 255).astype('uint8') area_threshold = cfg['AREA_THRESHOLD'] top_score_threshold = cfg['TOP_SCORE_THRESHOLD'] bottom_score_threshold = cfg['BOTTOM_SCORE_THRESHOLD'] if cfg['USELEAK']: leak_score_threshold = cfg['LEAK_SCORE_THRESHOLD'] else: leak_score_threshold = bottom_score_threshold return apply_thresholds(mask.squeeze(0), 1, area_threshold, top_score_threshold, bottom_score_threshold, leak_score_threshold) if __name__ == '__main__': args = argparser() assert Path(args['dcm_path']).is_file( ) and args['dcm_path'][-3:] == 'dcm', 'image path is invalid' config_path = Path(args['config'].strip('/')) inference_config = load_yaml(config_path) mask = predict(args['dcm_path'], inference_config) dest_path = args['dcm_path'][:args['dcm_path'].rfind('.' )] + '_segmented.png' cv2.imwrite(dest_path, mask) print(f'Result is stored in {dest_path}')