Exemplo n.º 1
0
def binned_bitsets_by_chrom(f, chrom, chrom_col=0, start_col=1, end_col=2):
    """Read a file by chrom name into a bitset"""
    bitset = BinnedBitSet(MAX)
    for line in f:
        if line.startswith("#"):
            continue
        fields = line.split()
        if fields[chrom_col] == chrom:
            start, end = int(fields[start_col]), int(fields[end_col])
            bitset.set_range(start, end-start)
    return bitset
Exemplo n.º 2
0
 def binned_bitsets(self, upstream_pad=0, downstream_pad=0, lens={}):
     # The incoming lens dictionary is a dictionary of chromosome lengths
     # which are used to initialize the bitsets.
     last_chrom = None
     last_bitset = None
     bitsets = dict()
     for interval in self:
         if isinstance(interval, GenomicInterval):
             chrom = interval[self.chrom_col]
             if chrom != last_chrom:
                 if chrom not in bitsets:
                     size = lens.get(chrom, MAX)
                     try:
                         bbs = BinnedBitSet(size)
                     except ValueError as e:
                         # We will only reach here when constructing this bitset from the lens dict
                         # since the value of MAX is always safe.
                         raise Exception(
                             "Invalid chrom length %s in 'lens' dictionary. %s"
                             % (str(size), str(e)))
                     bitsets[chrom] = bbs
                 last_chrom = chrom
                 last_bitset = bitsets[chrom]
             start = max(int(interval[self.start_col]), 0)
             end = min(int(interval[self.end_col]), last_bitset.size)
             last_bitset.set_range(start, end - start)
     return bitsets
Exemplo n.º 3
0
def binned_bitsets_proximity(f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream=0, downstream=0):
    """Read a file into a dictionary of bitsets"""
    last_chrom = None
    last_bitset = None
    bitsets = dict()
    for line in f:
        if line.startswith("#"):
            continue
#        print "input=%s" % ( line ),
        fields = line.split()
        strand = "+"
        if len(fields) >= strand_col + 1:
            if fields[strand_col] == "-":
                strand = "-"
        chrom = fields[chrom_col]
        if chrom != last_chrom:
            if chrom not in bitsets:
                bitsets[chrom] = BinnedBitSet(MAX)
            last_chrom = chrom
            last_bitset = bitsets[chrom]
        start, end = int(fields[start_col]), int(fields[end_col])
        if strand == "+":
            if upstream:
                start = max(0, start - upstream)
            if downstream:
                end = min(MAX, end + downstream)
        if strand == "-":
            if upstream:
                end = min(MAX, end + upstream)
            if downstream:
                start = max(0, start - downstream)
#        print "set: start=%d\tend=%d" % ( start, end )
        if end-start > 0:
            last_bitset.set_range(start, end-start)
    return bitsets
Exemplo n.º 4
0
def binned_bitsets_from_list(list=[]):
    """Read a list into a dictionary of bitsets"""
    last_chrom = None
    last_bitset = None
    bitsets = dict()
    for l in list:
        chrom = l[0]
        if chrom != last_chrom:
            if chrom not in bitsets:
                bitsets[chrom] = BinnedBitSet(MAX)
            last_chrom = chrom
            last_bitset = bitsets[chrom]
        start, end = int(l[1]), int(l[2])
        last_bitset.set_range(start, end - start)
    return bitsets
Exemplo n.º 5
0
def binned_bitsets_from_bed_file(f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={}):
    """
    Read a file into a dictionary of bitsets. The defaults arguments

    - 'f' should be a file like object (or any iterable containing strings)
    - 'chrom_col', 'start_col', and 'end_col' must exist in each line.
    - 'strand_col' is optional, any line without it will be assumed to be '+'
    - if 'lens' is provided bitset sizes will be looked up from it, otherwise
      chromosomes will be assumed to be the maximum size
    """
    last_chrom = None
    last_bitset = None
    bitsets = dict()
    offset = 0
    for line in f:
        if line.startswith("#") or line.isspace():
            continue
        # Ignore browser lines completely
        if line.startswith("browser"):
            continue
        # Need to check track lines due to the offset
        if line.startswith("track"):
            m = re.search(r"offset=(\d+)", line)
            if m and m.group(1):
                offset = int(m.group(1))
            continue
        fields = line.split()
        chrom = fields[chrom_col]
        if chrom != last_chrom:
            if chrom not in bitsets:
                if chrom in lens:
                    size = lens[chrom]
                else:
                    size = MAX
                bitsets[chrom] = BinnedBitSet(size)
            last_chrom = chrom
            last_bitset = bitsets[chrom]
        start, end = int(fields[start_col]) + offset, int(fields[end_col]) + offset
        if upstream_pad:
            start = max(0, start - upstream_pad)
        if downstream_pad:
            end = min(size, end + downstream_pad)
        if start > end:
            warn("Interval start after end!")
        last_bitset.set_range(start, end-start)
    return bitsets
Exemplo n.º 6
0
def bitset_complement(exons):
    bits = BinnedBitSet(MAX)
    introns = []
    for start, end in exons:
        bits.set_range(start, end - start)
    bits.invert()

    # only complement within the range of the list
    ex_start = min([a[0] for a in exons])
    ex_end = max([a[1] for a in exons])
    end = ex_start
    len = ex_end
    while True:
        start = bits.next_set(end)
        if start == bits.size:
            break
        end = bits.next_clear(start)
        if end > len:
            end = len
        if start != end:
            introns.append((start, end))
        if end == len:
            break
    return introns
Exemplo n.º 7
0
def clone(bits):
    b = BinnedBitSet(bits.size)
    b.ior(bits)
    return b
Exemplo n.º 8
0
def list2bits(ex):
    bits = BinnedBitSet(MAX)
    for start, end in ex:
        bits.set_range(start, end - start)
    return bits
def clone( bits ):
    b = BinnedBitSet( bits.size )
    b.ior( bits )
    return b
Exemplo n.º 10
0
def copybits(binnedbits):
    bitset = BinnedBitSet(binnedbits.size)
    bitset.ior(binnedbits)
    return bitset