def test_snv_annot_without_rg(): 'It tests that we can do snv calling with a bam without rg info' test_dir = NamedTemporaryDir() project_name = 'backbone' configuration = {'Snvs':{'default_bam_platform':'sanger'}, 'General_settings':{'threads':THREADS}} settings_path = create_project(directory=test_dir.name, name=project_name, configuration=configuration) project_dir = join(test_dir.name, project_name) #the reference reference_dir = join(project_dir, 'mapping/reference') os.makedirs(reference_dir) reference_fpath = join(reference_dir, 'reference.fasta') out = open(reference_fpath, 'w') for line in open(join(TEST_DATA_DIR, 'blast/arabidopsis_genes')): out.write(line) bams_dir = join(project_dir, 'mapping', 'bams') os.makedirs(bams_dir) bam_fpath = join(bams_dir, 'merged.0.bam') shutil.copy(join(TEST_DATA_DIR, 'merged.0.bam'), bam_fpath) create_bam_index(bam_fpath) annot_input_dir = join(project_dir, 'annotations', 'input') os.makedirs(annot_input_dir) os.symlink(reference_fpath, join(annot_input_dir, 'reference.fasta')) do_analysis(project_settings=settings_path, kind='annotate_snvs', silent=True)
def run(self): 'It runs the analysis.' inputs, output_dirs = self._get_inputs_and_prepare_outputs() output_dir = output_dirs['result'] merged_bam = inputs['merged_bam'] create_bam_index(merged_bam.last_version) pipeline = 'snv_bam_annotator' bam_fpath = merged_bam.last_version configuration = {'snv_bam_annotator': {'bam_fhand':bam_fpath}} settings = self._project_settings if 'Snvs' in settings: snv_settings = settings['Snvs'] # read egde conf read_edge_conf = self._configure_read_edge_conf(snv_settings) configuration['snv_bam_annotator']['read_edge_conf'] = read_edge_conf for config_param in ('min_quality', 'min_mapq', 'min_num_alleles', 'max_maf', 'min_num_reads_for_allele'): if snv_settings[config_param] is not None: param_value = float(snv_settings[config_param]) else: param_value = None configuration['snv_bam_annotator'][config_param] = param_value if 'default_bam_platform' in snv_settings: configuration['snv_bam_annotator']['default_bam_platform'] = \ snv_settings['default_bam_platform'] return self._run_annotation(pipeline=pipeline, configuration=configuration, inputs=inputs, output_dir=output_dir)
def create_snv_annotator(bam_fhand, min_quality=45, default_sanger_quality=25, min_mapq=15, min_num_alleles=1, max_maf=None, read_edge_conf=None, default_bam_platform=None, min_num_reads_for_allele=None, ploidy=2): 'It creates an annotator capable of annotating the snvs in a SeqRecord' #the bam should have an index, does the index exists? bam_fhand = get_fhand(bam_fhand) create_bam_index(bam_fpath=bam_fhand.name) read_edge_conf = _normalize_read_edge_conf(read_edge_conf) bam = pysam.Samfile(bam_fhand.name, 'rb') # default min num_reads per allele and ploidy if min_num_reads_for_allele is None: min_num_reads_for_allele = DEFAUL_MIN_NUM_READS_PER_ALLELE if ploidy is None: ploidy = DEFAULT_PLOIDY def annotate_snps(sequence): 'It annotates the snvs found in the sequence' for snv in _snvs_in_bam(bam, reference=sequence, min_quality=min_quality, default_sanger_quality=default_sanger_quality, min_mapq=min_mapq, min_num_alleles=min_num_alleles, max_maf=max_maf, read_edge_conf=read_edge_conf, default_bam_platform=default_bam_platform, min_num_reads_for_allele=min_num_reads_for_allele): snv = _summarize_snv(snv) location = snv['ref_position'] type_ = 'snv' qualifiers = {'alleles':snv['alleles'], 'reference_allele':snv['reference_allele'], 'read_groups':snv['read_groups'], 'mapping_quality': snv['mapping_quality'], 'quality': snv['quality']} snv_feat = SeqFeature(location=FeatureLocation(location, location), type=type_, qualifiers=qualifiers) annotate_pic(snv_feat) annotate_heterozygosity(snv_feat, ploidy=ploidy) sequence.features.append(snv_feat) return sequence return annotate_snps
def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) inputs = self._get_input_fpaths() bam_path = inputs['bam'] bam_fpath = bam_path.last_version reference_fpath = inputs['reference'].last_version out_fhand = open(bam_path.next_version, 'w') cmd = ['samtools', 'calmd', '-Abr', bam_fpath, reference_fpath] call(cmd, raise_on_error=True, stdout=out_fhand) create_bam_index(out_fhand.name) out_fhand.close() self._log({'analysis_finished':True})
def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) settings = self._project_settings project_path = settings['General_settings']['project_path'] tmp_dir = settings['General_settings']['tmpdir'] inputs = self._get_input_fpaths() bam_paths = inputs['bams'] reference_path = inputs['reference'] output_dir = self._create_output_dirs()['result'] merged_bam_path = VersionedPath(os.path.join(output_dir, BACKBONE_BASENAMES['merged_bam'])) merged_bam_fpath = merged_bam_path.next_version #Do we have to add the default qualities to the sam file? #do we have characters different from ACTGN? add_qualities = settings['Sam_processing']['add_default_qualities'] #memory for the java programs java_mem = settings['Other_settings']['java_memory'] picard_path = settings['Other_settings']['picard_path'] if add_qualities: default_sanger_quality = settings['Other_settings']['default_sanger_quality'] default_sanger_quality = int(default_sanger_quality) else: default_sanger_quality = None temp_dir = NamedTemporaryDir() for bam_path in bam_paths: bam_basename = bam_path.basename temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam') sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam') bam2sam(bam_path.last_version, temp_sam.name) sam_fhand = open(sam_fpath, 'w') # First we need to create the sam with added tags and headers add_header_and_tags_to_sam(temp_sam, sam_fhand) temp_sam.close() sam_fhand.close() #the standardization temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam', delete=False) standardize_sam(open(sam_fhand.name), temp_sam2, default_sanger_quality, add_def_qual=add_qualities, only_std_char=True) temp_sam2.flush() shutil.move(temp_sam2.name, sam_fhand.name) temp_sam2.close() get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')] # Once the headers are ready we are going to merge sams = get_sam_fpaths(temp_dir.name) sams = [open(sam) for sam in sams] temp_sam = NamedTemporaryFile(suffix='.sam') reference_fhand = open(reference_path.last_version) try: merge_sam(sams, temp_sam, reference_fhand) except Exception: if os.path.exists(merged_bam_fpath): os.remove(merged_bam_fpath) raise reference_fhand.close() # close files for sam in sams: sam.close() # Convert sam into a bam,(Temporary) temp_bam = NamedTemporaryFile(suffix='.bam') sam2bam(temp_sam.name, temp_bam.name) # finally we need to order the bam #print 'unsorted.bam', temp_bam.name #raw_input() sort_bam_sam(temp_bam.name, merged_bam_fpath, java_conf={'java_memory':java_mem, 'picard_path':picard_path}, tmp_dir=tmp_dir ) temp_bam.close() temp_sam.close() create_bam_index(merged_bam_fpath) self._log({'analysis_finished':True})