Пример #1
0
    def test_snv_annot_without_rg():
        'It tests that we can do snv calling with a bam without rg info'
        test_dir = NamedTemporaryDir()
        project_name = 'backbone'
        configuration = {'Snvs':{'default_bam_platform':'sanger'},
                         'General_settings':{'threads':THREADS}}
        settings_path = create_project(directory=test_dir.name,
                                       name=project_name,
                                       configuration=configuration)


        project_dir = join(test_dir.name, project_name)
        #the reference
        reference_dir = join(project_dir, 'mapping/reference')
        os.makedirs(reference_dir)
        reference_fpath = join(reference_dir, 'reference.fasta')
        out = open(reference_fpath, 'w')
        for line in open(join(TEST_DATA_DIR, 'blast/arabidopsis_genes')):
            out.write(line)

        bams_dir = join(project_dir, 'mapping', 'bams')
        os.makedirs(bams_dir)
        bam_fpath = join(bams_dir, 'merged.0.bam')

        shutil.copy(join(TEST_DATA_DIR, 'merged.0.bam'), bam_fpath)
        create_bam_index(bam_fpath)

        annot_input_dir = join(project_dir, 'annotations', 'input')
        os.makedirs(annot_input_dir)
        os.symlink(reference_fpath, join(annot_input_dir, 'reference.fasta'))
        do_analysis(project_settings=settings_path, kind='annotate_snvs', silent=True)
Пример #2
0
    def run(self):
        'It runs the analysis.'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        output_dir = output_dirs['result']
        merged_bam = inputs['merged_bam']
        create_bam_index(merged_bam.last_version)

        pipeline = 'snv_bam_annotator'
        bam_fpath = merged_bam.last_version
        configuration = {'snv_bam_annotator': {'bam_fhand':bam_fpath}}
        settings = self._project_settings
        if 'Snvs' in settings:
            snv_settings = settings['Snvs']
            # read egde conf
            read_edge_conf = self._configure_read_edge_conf(snv_settings)
            configuration['snv_bam_annotator']['read_edge_conf'] = read_edge_conf
            for config_param in ('min_quality', 'min_mapq', 'min_num_alleles',
                                 'max_maf', 'min_num_reads_for_allele'):
                if snv_settings[config_param] is not None:
                    param_value = float(snv_settings[config_param])
                else:
                    param_value = None
                configuration['snv_bam_annotator'][config_param] = param_value

            if 'default_bam_platform' in snv_settings:
                configuration['snv_bam_annotator']['default_bam_platform'] = \
                                             snv_settings['default_bam_platform']

        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=output_dir)
Пример #3
0
def create_snv_annotator(bam_fhand, min_quality=45, default_sanger_quality=25,
                         min_mapq=15, min_num_alleles=1, max_maf=None,
                         read_edge_conf=None, default_bam_platform=None,
                         min_num_reads_for_allele=None, ploidy=2):
    'It creates an annotator capable of annotating the snvs in a SeqRecord'

    #the bam should have an index, does the index exists?
    bam_fhand = get_fhand(bam_fhand)
    create_bam_index(bam_fpath=bam_fhand.name)
    read_edge_conf = _normalize_read_edge_conf(read_edge_conf)

    bam = pysam.Samfile(bam_fhand.name, 'rb')

    # default min num_reads per allele and ploidy
    if min_num_reads_for_allele is None:
        min_num_reads_for_allele = DEFAUL_MIN_NUM_READS_PER_ALLELE
    if ploidy is None:
        ploidy = DEFAULT_PLOIDY

    def annotate_snps(sequence):
        'It annotates the snvs found in the sequence'
        for snv in _snvs_in_bam(bam, reference=sequence,
                                min_quality=min_quality,
                                default_sanger_quality=default_sanger_quality,
                                min_mapq=min_mapq,
                                min_num_alleles=min_num_alleles,
                                max_maf=max_maf,
                                read_edge_conf=read_edge_conf,
                                default_bam_platform=default_bam_platform,
                             min_num_reads_for_allele=min_num_reads_for_allele):
            snv = _summarize_snv(snv)
            location = snv['ref_position']
            type_ = 'snv'

            qualifiers = {'alleles':snv['alleles'],
                          'reference_allele':snv['reference_allele'],
                          'read_groups':snv['read_groups'],
                          'mapping_quality': snv['mapping_quality'],
                          'quality': snv['quality']}
            snv_feat = SeqFeature(location=FeatureLocation(location, location),
                              type=type_,
                              qualifiers=qualifiers)

            annotate_pic(snv_feat)
            annotate_heterozygosity(snv_feat, ploidy=ploidy)

            sequence.features.append(snv_feat)
        return sequence
    return annotate_snps
Пример #4
0
    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        inputs = self._get_input_fpaths()
        bam_path = inputs['bam']
        bam_fpath = bam_path.last_version
        reference_fpath = inputs['reference'].last_version
        out_fhand = open(bam_path.next_version, 'w')
        cmd = ['samtools', 'calmd', '-Abr', bam_fpath, reference_fpath]
        call(cmd, raise_on_error=True, stdout=out_fhand)

        create_bam_index(out_fhand.name)

        out_fhand.close()
        self._log({'analysis_finished':True})
Пример #5
0
    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        settings = self._project_settings
        project_path = settings['General_settings']['project_path']
        tmp_dir      = settings['General_settings']['tmpdir']

        inputs = self._get_input_fpaths()
        bam_paths = inputs['bams']
        reference_path = inputs['reference']

        output_dir = self._create_output_dirs()['result']
        merged_bam_path = VersionedPath(os.path.join(output_dir,
                                        BACKBONE_BASENAMES['merged_bam']))

        merged_bam_fpath = merged_bam_path.next_version

        #Do we have to add the default qualities to the sam file?
        #do we have characters different from ACTGN?
        add_qualities = settings['Sam_processing']['add_default_qualities']
        #memory for the java programs
        java_mem = settings['Other_settings']['java_memory']
        picard_path = settings['Other_settings']['picard_path']

        if add_qualities:
            default_sanger_quality = settings['Other_settings']['default_sanger_quality']
            default_sanger_quality = int(default_sanger_quality)
        else:
            default_sanger_quality = None

        temp_dir = NamedTemporaryDir()
        for bam_path in bam_paths:
            bam_basename = bam_path.basename
            temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                          suffix='.sam')
            sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam')
            bam2sam(bam_path.last_version, temp_sam.name)
            sam_fhand = open(sam_fpath, 'w')
            # First we need to create the sam with added tags and headers
            add_header_and_tags_to_sam(temp_sam, sam_fhand)
            temp_sam.close()
            sam_fhand.close()
            #the standardization
            temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                           suffix='.sam', delete=False)
            standardize_sam(open(sam_fhand.name), temp_sam2,
                            default_sanger_quality,
                            add_def_qual=add_qualities,
                            only_std_char=True)
            temp_sam2.flush()
            shutil.move(temp_sam2.name, sam_fhand.name)

            temp_sam2.close()

        get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')]

        # Once the headers are ready we are going to merge
        sams = get_sam_fpaths(temp_dir.name)
        sams = [open(sam) for sam in sams]

        temp_sam = NamedTemporaryFile(suffix='.sam')
        reference_fhand = open(reference_path.last_version)
        try:
            merge_sam(sams, temp_sam, reference_fhand)
        except Exception:
            if os.path.exists(merged_bam_fpath):
                os.remove(merged_bam_fpath)
            raise
        reference_fhand.close()

        # close files
        for sam in sams:
            sam.close()
        # Convert sam into a bam,(Temporary)
        temp_bam = NamedTemporaryFile(suffix='.bam')
        sam2bam(temp_sam.name, temp_bam.name)

        # finally we need to order the bam
        #print 'unsorted.bam', temp_bam.name
        #raw_input()
        sort_bam_sam(temp_bam.name, merged_bam_fpath,
                     java_conf={'java_memory':java_mem,
                                'picard_path':picard_path}, tmp_dir=tmp_dir )
        temp_bam.close()
        temp_sam.close()
        create_bam_index(merged_bam_fpath)

        self._log({'analysis_finished':True})