def get_cigar_map(cigar): """Takes a cigar (as a tuple) and returns a list giving the offsets for each position of a read """ if cigar is None: return None cigar_map = [] offset = 0 for (categ, length) in cigar: # Aligned base if categ == 0: for j in xrange(length): cigar_map.append(offset) offset += 1 # Insertion elif categ == 1: cigar_map.extend([None] * length) #for j in xrange(length): # cigar_map.append(None) # Deletion elif categ == 2: offset += length # Soft-clipping elif categ == 4: cigar_map.extend([None] * length) #for j in xrange(length): # cigar_map.append(None) elif categ == 5: pass else: raise NotSupportedException('Cigar operation not supported: ' + str(categ)) return cigar_map
def __init__(self, out_file, file_type='VCF4.1', template_file=None, template_reader=None, new_source=None, new_info_fields=[], new_format_fields=[], new_filters=[]): self.file_type = file_type if self.file_type == 'VCF4.1': if template_reader is None and template_file is not None: template_reader = vcf.Reader(template_file) elif template_reader is not None: pass else: metadata = OrderedDict() infos = OrderedDict() formats = OrderedDict() filters = OrderedDict() alts = OrderedDict() contigs = OrderedDict() template_reader = namedtuple('template', [ 'metadata', 'infos', 'formats', 'filters', 'alts', 'contigs' ]) template_reader.metadata = metadata template_reader.infos = infos template_reader.formats = formats template_reader.filters = filters template_reader.alts = alts template_reader.contigs = contigs # Add new source to metadata of header if not (new_source is None): sources = template_reader.metadata.setdefault("source", []) sources.append(new_source) # Add new info fields to header for info_id, info_len, info_type, info_desc, _, _ in new_info_fields: info_field = vcf.parser._Info(info_id, info_len, info_type, info_desc, None, None) template_reader.infos[info_id] = info_field # Add new format fields to header for format_id, format_len, format_type, format_desc in new_format_fields: format_field = vcf.parser._Format(format_id, format_len, format_type, format_desc) template_reader.formats[format_id] = format_field # Add new filters to header for filter_id, filter_desc in new_filters: filter_field = vcf.parser._Filter(filter_id, filter_desc) template_reader.filters[filter_id] = filter_field self.writer = vcf.Writer(out_file, template_reader, lineterminator='\n') else: raise NotSupportedException('File type unsupported: ' + file_type)
def write_record(self, record): """ Method to write a raw record. Useful for outputing a record directly (such as when combining multiple files) """ if self.file_type == 'VCF4.1': self.writer.write_record(record) else: raise NotSupportedException('File type unsupported: ' + self.file_type)
def split(args): vc_mode, variant_caller, precalled_filename, gatk_path = tk_io.get_vc_mode( args.vc_precalled, args.variant_mode) precalled_file = None if vc_mode == "precalled" or vc_mode == "precalled_plus": mem_gb = 8 threads = 1 precalled_file = martian.make_path("precalled_vcf.vcf") tenkit.log_subprocess.check_call( ['cp', precalled_filename, precalled_file]) tk_tabix.index_vcf(precalled_file) precalled_file = precalled_file + ".gz" if vc_mode != "precalled": if variant_caller == 'freebayes': mem_gb = 5 threads = 1 elif variant_caller == "gatk": mem_gb = 8 threads = 2 # make sure the gatk jar file exists if gatk_path is None: martian.throw( "variant_caller 'gatk' selected, must supply path to gatk jar file -- e.g. \"gatk:/path/to/GenomeAnalysisTK.jar\"" ) gatk_loc = gatk_path if not (os.path.exists(gatk_loc)): martian.throw( "variant_caller 'gatk' selected, gatk jar file does not exist: %s" % gatk_loc) else: raise NotSupportedException('Variant caller not supported: ' + vc_mode) primary_contigs = tk_reference.load_primary_contigs(args.reference_path) bam_chunk_size_gb = 3.0 if args.restrict_locus is None: loci = tk_chunks.get_sized_bam_chunks(args.input, bam_chunk_size_gb, contig_whitelist=primary_contigs, extra_args={ '__mem_gb': mem_gb, '__threads': threads, 'split_input': precalled_file }) else: loci = [{'locus': args.restrict_locus}] return {'chunks': loci}