def demultiplex_all(context: CGConfig, bcl_converter: str, flowcells_directory: click.Path, dry_run: bool): """Demultiplex all flowcells that are ready under the flowcells_directory""" LOG.info("Running cg demultiplex all, using %s.", bcl_converter) if flowcells_directory: flowcells_directory: Path = Path(str(flowcells_directory)) else: flowcells_directory: Path = Path(context.demultiplex.run_dir) demultiplex_api: DemultiplexingAPI = context.demultiplex_api demultiplex_api.set_dry_run(dry_run=dry_run) tb_api: TrailblazerAPI = context.trailblazer_api LOG.info("Search for flowcells ready to demultiplex in %s", flowcells_directory) for sub_dir in flowcells_directory.iterdir(): if not sub_dir.is_dir(): continue LOG.info("Found directory %s", sub_dir) try: flowcell_obj = Flowcell(flowcell_path=sub_dir, bcl_converter=bcl_converter) except FlowcellError: continue if not demultiplex_api.is_demultiplexing_possible( flowcell=flowcell_obj) and not dry_run: continue if not flowcell_obj.validate_sample_sheet(): LOG.warning( "Malformed sample sheet. Run cg demultiplex samplesheet validate %s", flowcell_obj.sample_sheet_path, ) continue delete_demux_api: DeleteDemuxAPI = DeleteDemuxAPI( config=context, demultiplex_base=demultiplex_api.out_dir, dry_run=dry_run, run_path=(flowcells_directory / sub_dir), ) delete_demux_api.delete_flow_cell( cg_stats=False, demultiplexing_dir=True, run_dir=False, housekeeper=True, init_files=False, status_db=False, ) slurm_job_id: int = demultiplex_api.start_demultiplexing( flowcell=flowcell_obj) demultiplex_api.add_to_trailblazer(tb_api=tb_api, slurm_job_id=slurm_job_id, flowcell=flowcell_obj)
def convert(experiment_path: click.Path, group_name: str, world_name: str, output_path: Optional[click.Path], with_date: bool) -> None: experiment_path = Path(str(experiment_path)) experiment_id = experiment_path.name output_base_path = experiment_path if output_path is None else output_path dataframes = [] for run_path in experiment_path.iterdir(): if run_path.is_dir(): run_id = run_path.name df = pd.read_csv(run_path / 'measures2.txt', sep=' ') df.rename(columns={'genome': 'genome_id'}, inplace=True) df = pd.pivot_table(df, values='value', index=['generation', 'genome_id'], columns=['measures'], aggfunc='last') df_history = pd.read_csv(run_path / 'history.txt', sep=' ') df_history.rename(columns={ 'idgenome': 'genome_id', 'idparent1': 'parent1_id', 'idparent2': 'parent2_id' }, inplace=True) df_history.columns.rename('measures', inplace=True) df_history.set_index(['generation', 'genome_id'], inplace=True) df_history.drop(df_history.columns[-1], axis=1, inplace=True) for c in ['parent1_id', 'parent2_id']: df_history[c] = df_history[c].replace('N', np.nan).astype(float) df = df.join(df_history, how='inner') df = pd.concat([df], keys=[run_id], names=['run_id']) dataframes.append(df) df = pd.concat(dataframes) n_rows = len(df) n_files = len(dataframes) print( f'Found {n_rows} rows in {n_files} runs ({n_rows // n_files} on average)' ) output_file_path = output_base_path / get_file_name( group_name, world_name, experiment_id, with_date) print(f'Wrote hdf5 file to {str(output_file_path)}') df.to_hdf(output_file_path, key='descriptors')