def __prepare_tensor_dataset(self): tensor_dataset_path = os.path.join( self.metaconf["ws_path"], "tensor_datasets", self.dataset_params.tensor_dataset_name) # compare configs, if not same, refresh dataset current_config_snapshot_exists = H.config_snapshot( "dataset_params", self.dataset_params.params, "src/data/aux/.dataset_config_snapshot.json", ) if not current_config_snapshot_exists: H.makedirs(tensor_dataset_path) _tqdm_kwargs = { "desc": "Preparing TensorDataset", "total": len(self.generic_dataset) } for i, sample in tqdm(enumerate(self.generic_dataset), **_tqdm_kwargs): f_folder_path = os.path.join(tensor_dataset_path, f"{sample['texture']}") H.makedirs(f_folder_path) f_path = os.path.join(f_folder_path, f"nodule_{i}.pt") save_nodules = { "nodule": sample["nodule"], "texture": sample["texture"] } torch.save(save_nodules, f_path) return tensor_dataset_path
def patient_workflow(config, patient_id, patient_input, output_file): workflow = pypeliner.workflow.Workflow() patient_bam_dir = config["bam_directory"] + patient_id patient_result_dir = config["results_dir"] + patient_id helpers.makedirs(patient_bam_dir) helpers.makedirs(patient_result_dir) input_args = helpers.create_input_args(patient_input, patient_bam_dir) workflow.setobj(obj=mgd.OutputChunks('sample_id', ), value=input_args['all_samples']) workflow.subworkflow(name='align_samples', func=alignment.align_sample, axes=('sample_id', ), args=( config, mgd.InputFile('fastq_1', 'sample_id', fnames=input_args['fastqs_r1']), mgd.InputFile('fastq_2', 'sample_id', fnames=input_args['fastqs_r2']), mgd.InputInstance('sample_id'), mgd.OutputFile('sample.bam', 'sample_id', fnames=input_args['all_bams']), mgd.OutputFile('sample.bam.bai', 'sample_id', fnames=input_args['all_bais']), )) workflow.subworkflow(name='run_analyses', func=analysis.partition_tumour, args=( config, input_args, patient_id, patient_result_dir, mgd.InputFile('sample.bam', 'sample_id', fnames=input_args['all_bams'], axes_origin=[]), mgd.InputFile('sample.bam.bai', 'sample_id', fnames=input_args['all_bais'], axes_origin=[]), mgd.OutputFile(output_file), )) return workflow
def ctDNA_workflow(args): pyp = pypeliner.app.Pypeline(config=args) workflow = pypeliner.workflow.Workflow() config = helpers.load_yaml(args['config']) for arg, value in args.iteritems(): config[arg] = value helpers.makedirs(config["bam_directory"]) helpers.makedirs(config["results_dir"]) inputs = helpers.load_yaml(args['input_yaml']) patients = inputs.keys() workflow.setobj(obj=mgd.OutputChunks('patient_id', ), value=patients) workflow.transform(name='get_input_by_patient', func=helpers.get_input_by_patient, ret=mgd.TempOutputObj('patient_input', 'patient_id'), axes=('patient_id', ), args=( inputs, mgd.InputInstance('patient_id'), )) workflow.subworkflow(name='patient_workflow', func=patient_workflow, axes=('patient_id', ), args=( config, mgd.InputInstance('patient_id'), mgd.TempInputObj('patient_input', 'patient_id'), mgd.OutputFile( os.path.join(config['results_dir'], '{patient_id}.log'), 'patient_id'), )) pyp.run(workflow)
def optimizer_step(self, current_epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None): gradplot_savepath = H.makedirs( os.path.join(self.metaconf["ws_path"], "artifacts", "gradflow_plots")) fig = H.plot_grad_flow(self.named_parameters(), "VAE", self.global_step, gradplot_savepath) self.logger.experiment.add_figure("gradflow_plots", fig, self.global_step) optimizer.step() optimizer.zero_grad()
def optimizer_step( self, current_epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None ): g_norm = H.get_gradient_norm(self.parameters()) gradplot_savepath = H.makedirs( os.path.join(self.metaconf["ws_path"], "artifacts", "gradflow_plots") ) fig = H.plot_grad_flow(self.named_parameters(), "RLS", 0, gradplot_savepath) self.logger.experiment.add_figure("gradflow_plots", fig, self.global_step) if np.isnan(g_norm): log.warning(" gradient norm is NaN -> skip") optimizer.zero_grad() return elif g_norm > self.hparams.optimizer_max_grad_norm: log.warning(f" gradient norm is too high: {g_norm:.5f} -> clip to OPTIMIZER_MAX_GRAD_NORM") torch.nn.utils.clip_grad_norm_(self.parameters(), self.hparams.optimizer_max_grad_norm) else: log.info(f" gradient norm: {g_norm:.5f}") optimizer.step() optimizer.zero_grad()