示例#1
0
def get_cigar_map(cigar):
    """Takes a cigar (as a tuple) and returns a list giving the offsets
    for each position of a read
    """
    if cigar is None:
        return None

    cigar_map = []
    offset = 0
    for (categ, length) in cigar:
        # Aligned base
        if categ == 0:
            for j in xrange(length):
                cigar_map.append(offset)
                offset += 1
        # Insertion
        elif categ == 1:
            cigar_map.extend([None] * length)
            #for j in xrange(length):
            #    cigar_map.append(None)
        # Deletion
        elif categ == 2:
            offset += length
        # Soft-clipping
        elif categ == 4:
            cigar_map.extend([None] * length)
            #for j in xrange(length):
            #    cigar_map.append(None)
        elif categ == 5:
            pass
        else:
            raise NotSupportedException('Cigar operation not supported: ' + str(categ))

    return cigar_map
示例#2
0
    def __init__(self,
                 out_file,
                 file_type='VCF4.1',
                 template_file=None,
                 template_reader=None,
                 new_source=None,
                 new_info_fields=[],
                 new_format_fields=[],
                 new_filters=[]):
        self.file_type = file_type
        if self.file_type == 'VCF4.1':
            if template_reader is None and template_file is not None:
                template_reader = vcf.Reader(template_file)
            elif template_reader is not None:
                pass
            else:
                metadata = OrderedDict()
                infos = OrderedDict()
                formats = OrderedDict()
                filters = OrderedDict()
                alts = OrderedDict()
                contigs = OrderedDict()
                template_reader = namedtuple('template', [
                    'metadata', 'infos', 'formats', 'filters', 'alts',
                    'contigs'
                ])
                template_reader.metadata = metadata
                template_reader.infos = infos
                template_reader.formats = formats
                template_reader.filters = filters
                template_reader.alts = alts
                template_reader.contigs = contigs

            # Add new source to metadata of header
            if not (new_source is None):
                sources = template_reader.metadata.setdefault("source", [])
                sources.append(new_source)

            # Add new info fields to header
            for info_id, info_len, info_type, info_desc, _, _ in new_info_fields:
                info_field = vcf.parser._Info(info_id, info_len, info_type,
                                              info_desc, None, None)
                template_reader.infos[info_id] = info_field

            # Add new format fields to header
            for format_id, format_len, format_type, format_desc in new_format_fields:
                format_field = vcf.parser._Format(format_id, format_len,
                                                  format_type, format_desc)
                template_reader.formats[format_id] = format_field

            # Add new filters to header
            for filter_id, filter_desc in new_filters:
                filter_field = vcf.parser._Filter(filter_id, filter_desc)
                template_reader.filters[filter_id] = filter_field

            self.writer = vcf.Writer(out_file,
                                     template_reader,
                                     lineterminator='\n')
        else:
            raise NotSupportedException('File type unsupported: ' + file_type)
 def write_record(self, record):
     """ Method to write a raw record.  Useful for outputing a record directly (such as when
         combining multiple files)
     """
     if self.file_type == 'VCF4.1':
         self.writer.write_record(record)
     else:
         raise NotSupportedException('File type unsupported: ' + self.file_type)
示例#4
0
def split(args):
    vc_mode, variant_caller, precalled_filename, gatk_path = tk_io.get_vc_mode(
        args.vc_precalled, args.variant_mode)
    precalled_file = None
    if vc_mode == "precalled" or vc_mode == "precalled_plus":
        mem_gb = 8
        threads = 1
        precalled_file = martian.make_path("precalled_vcf.vcf")
        tenkit.log_subprocess.check_call(
            ['cp', precalled_filename, precalled_file])
        tk_tabix.index_vcf(precalled_file)
        precalled_file = precalled_file + ".gz"
    if vc_mode != "precalled":
        if variant_caller == 'freebayes':
            mem_gb = 5
            threads = 1
        elif variant_caller == "gatk":
            mem_gb = 8
            threads = 2
            # make sure the gatk jar file exists
            if gatk_path is None:
                martian.throw(
                    "variant_caller 'gatk' selected, must supply path to gatk jar file -- e.g. \"gatk:/path/to/GenomeAnalysisTK.jar\""
                )

            gatk_loc = gatk_path
            if not (os.path.exists(gatk_loc)):
                martian.throw(
                    "variant_caller 'gatk' selected, gatk jar file does not exist: %s"
                    % gatk_loc)
        else:
            raise NotSupportedException('Variant caller not supported: ' +
                                        vc_mode)

    primary_contigs = tk_reference.load_primary_contigs(args.reference_path)
    bam_chunk_size_gb = 3.0

    if args.restrict_locus is None:
        loci = tk_chunks.get_sized_bam_chunks(args.input,
                                              bam_chunk_size_gb,
                                              contig_whitelist=primary_contigs,
                                              extra_args={
                                                  '__mem_gb': mem_gb,
                                                  '__threads': threads,
                                                  'split_input': precalled_file
                                              })
    else:
        loci = [{'locus': args.restrict_locus}]

    return {'chunks': loci}