Python copy示例，cellranger.io.copy Python示例

示例#1

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def join(args, outs, chunk_defs, chunk_outs):
    if args.skip:
        return

    chunk_out = chunk_outs[0]
    cr_io.copy(chunk_out.pca_h5, outs.pca_h5)
    cr_io.copytree(chunk_out.pca_csv, outs.pca_csv)

示例#2

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def main(args, outs):
    list_of_files = [
        args.protospacer_calls_summary, args.protospacer_calls_per_cell,
        args.cells_per_protospacer, args.protospacer_umi_thresholds_csv,
        args.protospacer_umi_thresholds_json,
        args.perturbation_efficiencies_by_feature,
        args.perturbations_efficiencies_by_target
    ]

    cr_io.makedirs(outs.crispr_analysis, allow_existing=True)

    for (file_path, file_name) in itertools.izip(
            list_of_files, protospacer_calling.CRISPR_ANALYSIS_FILE_NAMES):
        if file_path is None:
            continue
        cr_io.copy(file_path, os.path.join(outs.crispr_analysis, file_name))

    if os.path.isdir(args.perturbation_effects_by_feature):
        perturbation_effects_by_feature_dir = os.path.join(
            outs.crispr_analysis, 'perturbation_effects_by_feature')
        cr_io.makedirs(perturbation_effects_by_feature_dir,
                       allow_existing=True)
        cr_io.copytree(args.perturbation_effects_by_feature,
                       perturbation_effects_by_feature_dir,
                       allow_existing=True)

    if os.path.isdir(args.perturbation_effects_by_target):
        perturbation_effects_by_target_dir = os.path.join(
            outs.crispr_analysis, 'perturbation_effects_by_target')
        cr_io.makedirs(perturbation_effects_by_target_dir, allow_existing=True)
        cr_io.copytree(args.perturbation_effects_by_target,
                       perturbation_effects_by_target_dir,
                       allow_existing=True)

示例#3

0

显示文件

def join(args, outs, chunk_defs, chunk_outs):
    if args.skip or not args.is_multi_genome:
        return

    chunk_out = chunk_outs[0]
    cr_io.copy(chunk_out.multi_genome_summary, outs.multi_genome_summary)
    cr_io.copytree(chunk_out.multi_genome_csv, outs.multi_genome_csv)
    cr_io.copytree(chunk_out.multi_genome_json, outs.multi_genome_json)

示例#4

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def main(args, outs):
    if args.skip:
        return

    for h5, csv in zip(args.pca_h5_list, args.pca_csv_list):
        if h5 is not None and csv is not None:
            cr_io.copy(h5, outs.pca_h5)
            cr_io.copytree(csv, outs.pca_csv)

示例#5

0

显示文件

文件： __init__.py 项目： RagnarDanneskjold/cellranger-atac

def main(args, outs):
    if args.selector:
        if args.peaks1 is None:
            raise IOError("Input peaks file 1 is not present")
        cr_io.copy(args.peaks1, outs.peaks)
    else:
        if args.peaks2 is None:
            raise IOError("Input peaks file 2 is not present")
        cr_io.copy(args.peaks2, outs.peaks)

示例#6

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def main(args, outs):
    parsed = parse_parameters(args.params_csv)
    for param in ANALYSIS_PARAMS:
        if param in parsed:
            setattr(outs, param, parsed[param])
        else:
            setattr(outs, param, None)

    if args.params_csv is not None:
        cr_io.copy(args.params_csv, outs.params_csv)

示例#7

0

显示文件

文件： reference.py 项目： PawseySC/uwa-hw

 def write_genome_fasta(self, out_fasta_fn):
     if len(self.genomes) > 1:
         with open(out_fasta_fn, 'w') as f:
             for genome_prefix, in_fasta_fn in itertools.izip(self.genome_prefixes, self.in_fasta_fns):
                 with open(in_fasta_fn, 'r') as g:
                     for line in g:
                         line = line.strip()
                         if line.startswith('>'):
                             line = '>' + genome_prefix + '_' + line[1:]
                         f.write(line + '\n')
     else:
         cr_io.copy(self.in_fasta_fns[0], out_fasta_fn)

示例#8

0

显示文件

def get_gem_group_index_json(args, outs):
    if args.gem_group_index_json:
        cr_io.copy(args.gem_group_index_json, outs.gem_group_index_json)
    else:
        generated_index = cr_matrix.get_gem_group_index(
            args.feature_barcode_matrix)
        if generated_index is not None:
            with open(outs.gem_group_index_json, 'w') as outfile:
                tk_json.dump_numpy({"gem_group_index": generated_index},
                                   outfile)
        else:
            outs.gem_group_index_json = None
    return outs.gem_group_index_json

示例#9

0

显示文件

def join(args, outs, chunk_defs, chunk_outs):
    chunk_out = chunk_outs[0]

    cr_io.copy(chunk_out.web_summary, outs.web_summary)
    cr_io.copy(chunk_out.alerts, outs.alerts)
    cr_io.copy(chunk_out.metrics_summary_json, outs.metrics_summary_json)
    cr_io.copy(chunk_out.metrics_summary_csv, outs.metrics_summary_csv)

示例#10

0

显示文件

def join(args, outs, chunk_defs, chunk_outs):
    # Copy files from single chunk to join
    for out_name in [
            'summary',
            'clonotype_assignments',
            'contig_annotations',
            'contig_annotations_csv',
            'filtered_contig_annotations_csv',
            'contig_annotations_pickle',
    ]:

        src = getattr(chunk_outs[0], out_name)
        dest = getattr(outs, out_name)
        if os.path.isfile(src):
            cr_io.copy(src, dest)
        else:
            setattr(outs, out_name, None)

示例#11

0

显示文件

def main(args, outs):
    if args.selector:
        if args.cell_barcodes1 is None:
            raise IOError("Input barcodes file 1 is not present")
        cr_io.copy(args.cell_barcodes1, outs.cell_barcodes)
        cr_io.copy(args.metrics1, outs.metrics)
    else:
        if args.cell_barcodes2 is None:
            raise IOError("Input barcodes file 2 is not present")
        cr_io.copy(args.cell_barcodes2, outs.cell_barcodes)
        cr_io.copy(args.metrics2, outs.metrics)

示例#12

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def join(args, outs, chunk_defs, chunk_outs):
    cr_io.copy(args.extract_reads_summary, outs.summary)
    cr_io.copy(args.barcode_counts, outs.barcode_counts)
    cr_io.copy(args.feature_counts, outs.feature_counts)

    outs.gem_groups = args.gem_groups
    outs.library_types = args.library_types
    outs.library_ids = args.library_ids
    outs.read_groups = args.read_groups
    outs.align = args.align
    outs.bam_comments = args.bam_comments

    outs.read1s = [co.read1s for co in chunk_outs]
    outs.read2s = [co.read2s for co in chunk_outs]
    outs.tags = [co.tags for co in chunk_outs]

示例#13

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def main(args, outs):
    if args.read1 is not None:
        # Ensure same extension
        out_path, _ = cr_utils.splitexts(outs.read1s)
        _, in_ext = cr_utils.splitexts(args.read1)
        outs.read1s = out_path + in_ext
        cr_io.copy(args.read1, outs.read1s)
    if args.read2 is not None:
        out_path, _ = cr_utils.splitexts(outs.read2s)
        _, in_ext = cr_utils.splitexts(args.read2)
        outs.read2s = out_path + in_ext
        cr_io.copy(args.read2, outs.read2s)
    if args.chunk_tags is not None:
        out_path, _ = cr_utils.splitexts(outs.tags)
        _, in_ext = cr_utils.splitexts(args.chunk_tags)
        outs.tags = out_path + in_ext
        cr_io.copy(args.chunk_tags, outs.tags)

示例#14

0

显示文件

def join(args, outs, chunk_defs, chunk_outs):
    summary_files = [
        args.extract_reads_summary,
        args.correct_barcodes_summary,
        args.trim_reads_summary,
    ]

    summary_files = [
        sum_file for sum_file in summary_files if not sum_file is None
    ]

    cr_report.merge_jsons(summary_files, outs.summary)

    cr_io.copy(args.raw_barcode_counts, outs.raw_barcode_counts)
    cr_io.copy(args.corrected_barcode_counts, outs.corrected_barcode_counts)
    cr_io.copy(args.barcode_summary, outs.barcode_summary)
    outs.gem_groups = args.gem_groups
    outs.read_groups = args.read_groups
    outs.align = args.align
    outs.bam_comments = args.bam_comments

    outs.read1s = [co.read1s for co in chunk_outs]
    outs.read2s = [co.read2s for co in chunk_outs]
    outs.corrected_bcs = [co.corrected_bcs for co in chunk_outs]

示例#15

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def join(args, outs, chunk_defs, chunk_outs):
    outs.chain_type = chunk_outs[0].chain_type
    cr_io.copy(chunk_outs[0].summary, outs.summary)

示例#16

0

显示文件

文件： __init__.py 项目： RagnarDanneskjold/cellranger-atac

def join(args, outs, chunk_defs, chunk_outs):
    for infile, outfile in zip([args.fragments, args.fragments_index, args.aggr_csv], [outs.fragments, outs.fragments_index, outs.aggr_csv]):
        if infile is None:
            outfile = infile
        else:
            cr_io.copy(infile, outfile)

示例#17

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def main(args, outs):
    cr_io.copy(args.trim_reads_summary, outs.summary)

示例#18

0

显示文件

def build_reference_fasta_from_ensembl(gtf_paths, transcripts_to_remove_path,
                                       genome_fasta_path, reference_path,
                                       reference_name, ref_version, mkref_version):
    """Create cellranger-compatible vdj reference files from a list of ENSEMBL-like GTF files.

    Input files are concatenated. No attempt to merge/reconcile information
    across them is made. Providing the files in a different order might change the
    output in cases where there are multiple entries with the same transcript id
    and the same feature type (eg. V-region).
    """

    transcripts = collections.defaultdict(list)

    if transcripts_to_remove_path:
        with open(transcripts_to_remove_path) as f:
            rm_transcripts = set([line.strip() for line in f.readlines()])
    else:
        rm_transcripts = set()

    # Note: We cannot symlink here because some filesystems in the wild
    #       do not support symlinks.
    print 'Copying genome reference sequence...'
    os.makedirs(os.path.dirname(get_vdj_reference_fasta(reference_path)))
    tmp_genome_fa_path = os.path.join(reference_path, 'genome.fasta')
    cr_io.copy(genome_fasta_path, tmp_genome_fa_path)
    print '...done.\n'

    print 'Indexing genome reference sequence...'
    tk_subproc.check_call(['samtools', 'faidx', tmp_genome_fa_path])
    print '...done.\n'

    print 'Loading genome reference sequence...'
    genome_fasta = pysam.FastaFile(tmp_genome_fa_path)
    print '...done.\n'

    print 'Computing hash of genome FASTA file...'
    fasta_hash = cr_io.compute_hash_of_file(tmp_genome_fa_path)
    print '...done.\n'

    for gtf in gtf_paths:
        print 'Reading GTF {}'.format(gtf)

        for line_no, entry in enumerate(get_gtf_iter(open(gtf))):
            if not entry.feature in [ENSEMBL_FIVE_PRIME_UTR_FEATURE, ENSEMBL_CDS_FEATURE]:
                continue
            entry = parse_attributes(entry)
            transcript_id = entry.attributes.get('transcript_id')
            transcript_biotype = entry.attributes.get('transcript_biotype')
            gene_biotype = entry.attributes.get('gene_biotype')
            gene_name = entry.attributes.get('gene_name')

            # Skip irrelevant biotypes
            if transcript_biotype not in ENSEMBL_VDJ_BIOTYPES and not gene_biotype in ENSEMBL_VDJ_BIOTYPES:
                continue

            # Skip blacklisted gene names
            if transcript_id in rm_transcripts:
                continue

            # Warn and skip if transcript_id missing
            if transcript_id is None:
                print 'Warning: Entry on row %d has no transcript_id' % line_no
                continue

            # Warn and skip if gene_name missing
            if gene_name is None:
                print 'Warning: Transcript %s on row %d has biotype %s but no gene_name. Skipping.' % (transcript_id, line_no, transcript_biotype)
                continue

            # Infer region type from biotype
            if transcript_biotype in ENSEMBL_VDJ_BIOTYPES:
                vdj_feature = infer_ensembl_vdj_feature_type(entry.feature, transcript_biotype)
            else:
                vdj_feature = infer_ensembl_vdj_feature_type(entry.feature, gene_biotype)

            # Warn and skip if region type could not be inferred
            if vdj_feature is None:
                print 'Warning: Transcript %s has biotype %s. Could not infer VDJ gene type. Skipping.' % (transcript_id, transcript_biotype)
                continue

            # Features that share a transcript_id and feature type are presumably exons
            # so keep them together.
            transcripts[(transcript_id, vdj_feature)].append(entry)

        print '...done.\n'

    print 'Computing hash of genes GTF files...'
    digest = hashlib.sha1()
    # concatenate all the hashes into a string and then hash that string
    digest.update(reduce(lambda x,y: x+y, [cr_io.compute_hash_of_file(gtf) for gtf in gtf_paths]))
    gtf_hash = digest.hexdigest()
    print '...done.\n'

    print 'Fetching sequences...'
    out_fasta = open(get_vdj_reference_fasta(reference_path), 'w')

    feature_id = 1
    seen_features = set()

    for (transcript_id, region_type), regions in transcripts.iteritems():
        if not all(r.chrom == regions[0].chrom for r  in regions):
            chroms = sorted(list(set([r.chrom for r in regions])))
            print 'Warning: Transcript %s spans multiple contigs: %s. Skipping.' % (transcript_id, str(chroms))
            continue

        if not all(r.strand == regions[0].strand for r in regions):
            print 'Warning: Transcript %s spans multiple strands. Skipping.' % transcript_id
            continue

        chrom = regions[0].chrom
        strand = regions[0].strand
        ens_gene_name = standardize_ensembl_gene_name(regions[0].attributes['gene_name'])
        transcript_id = regions[0].attributes['transcript_id']

        if chrom not in genome_fasta:
            print 'Warning: Transcript %s is on contig "%s" which is not in the provided reference fasta. Skipping.' % (transcript_id, chrom)
            continue

        # Build sequence
        regions.sort(key=lambda r: r.start)
        seq = ''
        for region in regions:
            # GTF coordinates are 1-based
            start, end = int(region.start)-1, int(region.end)
            seq += genome_fasta.fetch(chrom, start, end)

        # Revcomp if transcript on reverse strand
        if strand == '-':
            seq = tk_seq.get_rev_comp(seq)

        # Strip Ns from termini
        if 'N' in seq:
            print 'Warning: Feature %s contains Ns. Stripping from the ends.' % str((ens_gene_name, transcript_id, region_type))
            seq = seq.strip('N')

        if len(seq) == 0:
            print 'Warning: Feature %s is all Ns. Skipping.' % str((ens_gene_name, transcript_id, region_type))
            continue

        # Infer various attributes from the Ensembl gene name
        record_id = transcript_id
        gene_name = ens_gene_name
        display_name = make_display_name(gene_name=gene_name, allele_name=None)
        chain = infer_ensembl_vdj_chain(gene_name)
        chain_type = infer_ensembl_vdj_chain_type(gene_name)
        # Ensembl doesn't encode alleles
        allele_name = '00'

        # Disallow spaces in these fields
        if ' ' in region_type:
            raise ValueError('Spaces not allowed in region type: "%s"' % region_type)
        if ' ' in gene_name:
            raise ValueError('Spaces not allowed in gene name: "%s"' % gene_name)
        if ' ' in record_id:
            raise ValueError('Spaces not allowed in record ID: "%s"' % record_id)

        # Warn on features we couldn't classify properly
        if chain_type not in vdj_constants.VDJ_CHAIN_TYPES:
            print ('Warning: Could not infer chain type for: %s. ' + \
                'Expected the first two characters of the gene name to be in %s. Feature skipped.') % \
                (str((gene_name, record_id, region_type)),
                 str(tuple(vdj_constants.VDJ_CHAIN_TYPES)))
            continue

        if region_type in vdj_constants.VDJ_C_FEATURE_TYPES and chain in vdj_constants.CHAINS_WITH_ISOTYPES:
            isotype = infer_ensembl_isotype(ens_gene_name)
        else:
            isotype = None

        feature = VdjAnnotationFeature(feature_id=feature_id,
                                       record_id=record_id,
                                       display_name=display_name,
                                       gene_name=gene_name,
                                       region_type=region_type,
                                       chain_type=chain_type,
                                       chain=chain,
                                       isotype=isotype,
                                       allele_name=allele_name,
                                       sequence=seq,
                                       )

        # Don't add duplicate entries
        feat_key = get_duplicate_feature_key(feature)
        if feat_key in seen_features:
            print 'Warning: Skipping duplicate entry for %s (%s, %s).' % (display_name,
                                                                          region_type,
                                                                          record_id)
            continue
        seen_features.add(feat_key)

        feature_id += 1

        out_fasta.write(convert_vdj_feature_to_fasta_entry(feature) + '\n')
    print '...done.\n'

    print 'Deleting copy of genome fasta...'
    os.remove(tmp_genome_fa_path)
    os.remove(tmp_genome_fa_path + '.fai')
    print '...done.\n'

    print 'Writing metadata JSON file into reference folder...'
    metadata = {
        cr_constants.REFERENCE_GENOMES_KEY: reference_name,
        cr_constants.REFERENCE_FASTA_HASH_KEY: fasta_hash,
        cr_constants.REFERENCE_GTF_HASH_KEY: gtf_hash,
        cr_constants.REFERENCE_INPUT_FASTA_KEY: os.path.basename(genome_fasta_path),
        cr_constants.REFERENCE_INPUT_GTF_KEY: ','.join([os.path.basename(gtf_path) for gtf_path in gtf_paths]),
        cr_constants.REFERENCE_VERSION_KEY: ref_version,
        cr_constants.REFERENCE_MKREF_VERSION_KEY: mkref_version,
        cr_constants.REFERENCE_TYPE_KEY: vdj_constants.REFERENCE_TYPE,
    }
    with open(os.path.join(reference_path, cr_constants.REFERENCE_METADATA_FILE), 'w') as json_file:
        json.dump(tk_safe_json.json_sanitize(metadata), json_file, sort_keys=True, indent=4)
    print '...done.\n'

示例#19

0

显示文件

文件： __init__.py 项目： xujingmei111/cellranger

def join(args, outs, chunk_defs, chunk_outs):
    chunk_out = chunk_outs[0]

    cr_io.copy(chunk_out.summary, outs.summary)

示例#20

0

显示文件

def join(args, outs, chunk_defs, chunk_outs):
    """Merge sorted, downsampled fragments from each chunk,
    emit pre and post normalization sensitivity metrics per library
    and merge input peaks if provided"""

    with open(args.library_info, 'r') as f:
        library_info = pickle.load(f)

    ctg_mgr = ReferenceManager(args.reference_path)

    # Merge cell_barcodes
    cell_barcodes = {}
    for group in library_info:
        cell_barcodes_group = get_cell_barcodes(library_info[group]['cells'],
                                                args.reference_path,
                                                with_species=True)
        group_suffix = "-{}".format(group)
        for species in cell_barcodes_group.keys():
            if species not in cell_barcodes.keys():
                cell_barcodes[species] = set()
            cell_barcodes[species].update({
                bc.split("-")[0] + group_suffix
                for bc in cell_barcodes_group[species]
            })
    with open(outs.cell_barcodes, 'w') as f:
        for species in cell_barcodes:
            f.write(species + "," + ",".join(cell_barcodes[species]) + "\n")

    # Merge peaks if provided
    input_peaks = [
        library_info[group]['peaks'] for group in library_info
        if 'peaks' in library_info[group]
    ]
    if len(input_peaks) == 1:
        cr_io.copy(input_peaks[0], outs.peaks)
        outs.skip_peakcalling = True
    if len(input_peaks) == 0:
        outs.peaks = 0
        outs.skip_peakcalling = False
    if len(input_peaks) > 1:
        outs.skip_peakcalling = True
        # cat
        with open(outs.peaks, 'w') as outf:
            for ip in input_peaks:
                with open(ip, 'r') as inf:
                    for line in inf:
                        outf.write(line)
        # sort
        peaks = BedTool(outs.peaks)
        peaks = peaks.sort(faidx=ctg_mgr.fasta_index)

        # merge
        peaks = peaks.merge(d=PEAK_MERGE_DISTANCE)
        peaks.saveas(outs.peaks)

    # override library name when aggring 1 library:
    if len(library_info) == 1:
        library_info[1]['library_info'] = ""

    # merge the metrics
    normalization_metrics = {}
    for cdef, cout in zip(chunk_defs, chunk_outs):
        with open(cout.normalization_metrics, 'r') as f:
            chunk_metrics = json.load(f)
            for key in chunk_metrics:
                normalization_metrics["{}_Library_{}".format(
                    key,
                    library_info[cdef.n]['library_id'])] = chunk_metrics[key]
                # aggregate some metrics across all libraries
                if key in [
                        'total_pre_normalization', 'total_post_normalization'
                ]:
                    if key not in normalization_metrics:
                        normalization_metrics[key] = 0
                    normalization_metrics[key] += chunk_metrics[key]
    with open(outs.normalization_metrics, 'w') as f:
        json.dump(normalization_metrics, f, indent=4)

    # merge the fragments
    base_file, extension = os.path.splitext(outs.fragments)
    if not extension == '.gz':
        raise ValueError('Expecting compressed file output')
    input_tsvs = [str(chunk.fragments) for chunk in chunk_outs]
    merge_keyed_bed(input_tsvs,
                    base_file,
                    threads=martian.get_threads_allocation())

    # index the fragments
    if os.path.getsize(base_file) == 0:
        outs.fragments = None
        outs.fragments_index = None
    else:
        # N.B. tabix_index will automatically compress the input file, adding the .gz suffix
        pysam.tabix_index(base_file, preset='bed', index=outs.fragments_index)

示例#21

0

显示文件

文件： __init__.py 项目： yu1033704806/cellranger

def join(args, outs, chunk_defs, chunk_outs):
    contigs = []
    contig_fastqs = []
    contig_bams = []

    if len(chunk_outs) == 0:
        # No input reads
        # Create empty BAM file
        with open(outs.contig_bam, 'w') as f:
            pass
        outs.contig_bam_bai = None
        # Create empty contig FASTA
        with open(outs.contig_fasta, 'w') as f:
            pass
        outs.contig_fasta_fai = None
        # Create empty contig FASTQ
        with open(outs.contig_fastq, 'w') as f:
            pass
        outs.metrics_summary_json = None
        outs.summary_tsv = None
        outs.umi_summary_tsv = None
        return

    summary_tsvs = []
    umi_summary_tsvs = []

    for chunk_out in chunk_outs:
        if not os.path.isfile(chunk_out.contig_fasta):
            continue
        contigs.append(chunk_out.contig_fasta)

        contig_fastqs.append(chunk_out.contig_fastq)
        contig_bams.append(chunk_out.contig_bam)

        summary_tsvs.append(chunk_out.summary_tsv)
        umi_summary_tsvs.append(chunk_out.umi_summary_tsv)

    cr_io.concatenate_files(outs.contig_fasta, contigs)

    if os.path.getsize(outs.contig_fasta) > 0:
        tk_subproc.check_call('samtools faidx %s' % outs.contig_fasta,
                              shell=True)
        outs.contig_fasta_fai = outs.contig_fasta + '.fai'

    cr_io.concatenate_files(outs.contig_fastq, contig_fastqs)

    if len(summary_tsvs) > 0:
        cr_io.concatenate_headered_files(outs.summary_tsv, summary_tsvs)
    if len(umi_summary_tsvs) > 0:
        cr_io.concatenate_headered_files(outs.umi_summary_tsv,
                                         umi_summary_tsvs)

    if contig_bams:
        # Merge every N BAMs. Trying to merge them all at once
        #  risks hitting the filehandle limit.
        n_merged = 0

        while len(contig_bams) > 1:
            to_merge = contig_bams[0:MERGE_BAMS_N]

            tmp_bam = martian.make_path('merged-%04d.bam' % n_merged)
            n_merged += 1

            print "Merging %d BAMs into %s ..." % (len(to_merge), tmp_bam)
            tk_bam.merge(tmp_bam, to_merge, threads=args.__threads)

            # Delete any temporary bams that have been merged
            for in_bam in to_merge:
                if os.path.basename(in_bam).startswith('merged-'):
                    cr_io.remove(in_bam)

            # Pop the input bams and push the merged bam
            contig_bams = contig_bams[len(to_merge):] + [tmp_bam]

        if os.path.basename(contig_bams[0]).startswith('merged-'):
            # We merged at least two chunks together.
            # Rename it to the output bam.
            cr_io.move(contig_bams[0], outs.contig_bam)
        else:
            # There was only a single chunk, so copy it from the input
            cr_io.copy(contig_bams[0], outs.contig_bam)

        tk_bam.index(outs.contig_bam)

        # Make sure the Martian out matches the actual index filename
        outs.contig_bam_bai = outs.contig_bam + '.bai'

    # Merge the assembler summary jsons
    merged_summary = cr_io.merge_jsons_single_level(
        [out.metrics_summary_json for out in chunk_outs])

    with open(outs.metrics_summary_json, 'w') as f:
        json.dump(tk_safe_json.json_sanitize(merged_summary),
                  f,
                  indent=4,
                  sort_keys=True)

示例#22

0

显示文件

def join(args, outs, chunk_defs, chunk_outs):
    cr_io.copy(chunk_outs[0].summary, outs.summary)
    if chunk_outs[0].report is not None:
        cr_io.copy(chunk_outs[0].report, outs.report)
    outs.chemistry_type = chunk_outs[0].chemistry_type