Пример #1
0
def alignment_workflow(args):
    config = inpututils.load_config(args)
    config = config['alignment']

    lib = args["library_id"]
    alignment_dir = args["out_dir"]
    bams_dir = args["bams_dir"]

    trim = args['trim']
    center = args['sequencing_center']

    sampleinfo = inpututils.get_sample_info(args['input_yaml'])

    cellids = inpututils.get_samples(args['input_yaml'])
    fastq1_files, fastq2_files = inpututils.get_fastqs(args['input_yaml'])

    alignment_files = get_output_files(alignment_dir, lib)
    alignment_meta = os.path.join(alignment_dir, 'metadata.yaml')

    bam_files_template = os.path.join(bams_dir, '{cell_id}.bam')
    mt_bam_files_template = os.path.join(bams_dir, '{cell_id}_MT.bam')
    bams_meta = os.path.join(bams_dir, 'metadata.yaml')

    lanes = sorted(set([v[1] for v in fastq1_files.keys()]))
    cells = sorted(set([v[0] for v in fastq1_files.keys()]))

    input_yaml_blob = os.path.join(alignment_dir, 'input.yaml')

    workflow = pypeliner.workflow.Workflow()

    workflow.setobj(
        obj=mgd.OutputChunks('cell_id', 'lane'),
        value=list(fastq1_files.keys()),
    )

    workflow.subworkflow(
        name='alignment_workflow',
        func=align.create_alignment_workflow,
        args=(
            mgd.InputFile('fastq_1',
                          'cell_id',
                          'lane',
                          fnames=fastq1_files,
                          axes_origin=[]),
            mgd.InputFile('fastq_2',
                          'cell_id',
                          'lane',
                          fnames=fastq2_files,
                          axes_origin=[]),
            mgd.OutputFile('bam_markdups',
                           'cell_id',
                           template=bam_files_template,
                           axes_origin=[],
                           extensions=['.bai']),
            mgd.OutputFile('mt_bam_markdups',
                           'cell_id',
                           template=mt_bam_files_template,
                           axes_origin=[],
                           extensions=['.bai']),
            mgd.OutputFile(alignment_files['alignment_metrics_csv']),
            mgd.OutputFile(alignment_files['gc_metrics_csv']),
            mgd.OutputFile(alignment_files['fastqc_metrics_csv']),
            mgd.OutputFile(alignment_files['plot_metrics_output']),
            config['ref_genome'],
            config,
            sampleinfo,
            cellids,
            mgd.OutputFile(alignment_files['alignment_metrics_tar']),
            lib,
            trim,
            center,
        ),
    )

    workflow.transform(
        name='generate_meta_files_results',
        func='single_cell.utils.helpers.generate_and_upload_metadata',
        args=(sys.argv[0:], alignment_dir, list(alignment_files.values()),
              mgd.OutputFile(alignment_meta)),
        kwargs={
            'input_yaml_data': inpututils.load_yaml(args['input_yaml']),
            'input_yaml': mgd.OutputFile(input_yaml_blob),
            'metadata': {
                'library_id': lib,
                'cell_ids': cells,
                'lane_ids': lanes,
                'type': 'alignment'
            }
        })

    workflow.transform(
        name='generate_meta_files_bams',
        func='single_cell.utils.helpers.generate_and_upload_metadata',
        args=(sys.argv[0:], bams_dir,
              mgd.Template('aligned.bam',
                           'cell_id',
                           template=bam_files_template),
              mgd.OutputFile(bams_meta)),
        kwargs={
            'metadata': {
                'library_id': lib,
                'cell_ids': cells,
                'lane_ids': lanes,
                'type': 'cellbams'
            },
            'template':
            (mgd.InputChunks('cell_id'), bam_files_template, 'cell_id'),
        })

    return workflow
Пример #2
0
def hmmcopy_workflow(args):
    config = inpututils.load_config(args)
    config = config['hmmcopy']

    sampleinfo = inpututils.get_sample_info(args['input_yaml'])
    cellids = inpututils.get_samples(args['input_yaml'])
    bam_files = inpututils.get_bams(args['input_yaml'])

    lib = args["library_id"]

    workflow = pypeliner.workflow.Workflow(
        ctx={'docker_image': config['docker']['single_cell_pipeline']}, )

    hmmcopy_dir = args["out_dir"]

    hmmcopy_files = get_output_files(hmmcopy_dir, lib)
    hmmcopy_meta = os.path.join(hmmcopy_dir, 'metadata.yaml')
    input_yaml_blob = os.path.join(hmmcopy_dir, 'input.yaml')

    workflow.setobj(
        obj=mgd.OutputChunks('cell_id'),
        value=list(bam_files.keys()),
    )

    workflow.subworkflow(
        name='hmmcopy_workflow',
        func=hmmcopy.create_hmmcopy_workflow,
        args=(mgd.InputFile('bam_markdups',
                            'cell_id',
                            fnames=bam_files,
                            extensions=['.bai']),
              mgd.OutputFile(hmmcopy_files['reads_csvs']),
              mgd.OutputFile(hmmcopy_files['segs_csvs']),
              mgd.OutputFile(hmmcopy_files['metrics_csvs']),
              mgd.OutputFile(hmmcopy_files['params_csvs']),
              mgd.OutputFile(hmmcopy_files['igv_csvs']),
              mgd.OutputFile(hmmcopy_files['segs_pdf']),
              mgd.OutputFile(hmmcopy_files['bias_pdf']),
              mgd.OutputFile(hmmcopy_files['heatmap_pdf']),
              mgd.OutputFile(hmmcopy_files['metrics_pdf']),
              mgd.OutputFile(hmmcopy_files['kernel_density_pdf']),
              mgd.OutputFile(hmmcopy_files['hmmcopy_data_tar']), cellids,
              config, sampleinfo),
    )

    workflow.transform(
        name='generate_meta_files_results',
        func='single_cell.utils.helpers.generate_and_upload_metadata',
        args=(sys.argv[0:], hmmcopy_dir, list(hmmcopy_files.values()),
              mgd.OutputFile(hmmcopy_meta)),
        kwargs={
            'input_yaml_data': inpututils.load_yaml(args['input_yaml']),
            'input_yaml': mgd.OutputFile(input_yaml_blob),
            'metadata': {
                'library_id': lib,
                'cell_ids': list(bam_files.keys()),
                'type': 'hmmcopy',
            }
        })

    return workflow