Exemplo n.º 1
0
def getPerReadAverage(args, in_fh):
    for line in in_fh:
        try:
            line = line.decode('ascii')
        except AttributeError:
            pass
        read = MethRead(line)
        seqs = [x.seq for x in read.calldict.values()]
        # filter out calls with unwanted seq
        filteridx = [
            idx for idx, s in enumerate(seqs) if args.exclude not in s
        ]
        calls_all = np.array([x.call
                              for x in read.calldict.values()])[filteridx]
        ratios = np.array([x.ratio for x in read.calldict.values()])[filteridx]
        # filter out unconfident calls
        calls = [x for x in calls_all if x != -1]
        # drop empty call strings
        if len(calls) > 0:
            print('\t'.join([
                str(x) for x in [
                    read.rname, read.start, read.end, read.qname,
                    np.mean(ratios), '.',
                    np.mean(calls),
                    len(calls)
                ]
            ]),
                  file=args.out)
Exemplo n.º 2
0
def read_tabix(fpath, window):
    with pysam.TabixFile(fpath) as tabix:
        entries = [x for x in tabix.fetch(window)]
    reads = [MethRead(x) for x in entries]
    rdict = dict()
    for read in reads:
        try:
            rdict[read.qname].append(read)
        except:
            rdict[read.qname] = [read]
    return rdict
Exemplo n.º 3
0
def read_tabix(fpath,window) :
    with pysam.TabixFile(fpath) as tabix :
        entries = [x for x in tabix.fetch(window)]
    reads = [MethRead(x) for x in entries]
    rdict = dict()
    # for split-reads, multiple entries are recorded per read name
    for meth in reads :
        qname = meth.qname
        if qname in rdict.keys() :
            rdict[qname] = np.append(rdict[qname],meth.callarray,0)
        else : 
            rdict[qname] = meth.callarray
    return rdict
Exemplo n.º 4
0
def getReadlevel(args, in_fh):
    for line in in_fh:
        try:
            line = line.decode('ascii')
        except AttributeError:
            pass
        read = MethRead(line)
        callkeys = sorted(read.calldict)
        for key in callkeys:
            methcall = read.calldict[key]
            outlist = [read.rname] + [str(x) for x in methcall]
            outlist.insert(3, read.qname)
            print("\t".join(outlist), file=args.out)
def readlevelHeatmap(datapath,reg,thr=0.1,window=10,verbose=False) :
    heat = HeatmapRegion(reg)
    if verbose : print(heat.coord,file=sys.stderr)
    data = tabix(datapath,heat.coord)
    for line in data :
        read = MethRead(line)
        if ( read.start <= heat.start and
                read.end >= heat.end ) :
            heat.addread(read)
    if verbose : 
        print("{} reads covering the entire region out of total {} in the region".format(
            heat.totreads,len(data)),file=sys.stderr)
    heat.makeMatrix(window)
    g = heat.plot(thr,window)
    if verbose : print(heat.title,file=sys.stderr)
    return g
Exemplo n.º 6
0
def getFreq(args, in_fh):
    if args.verbose: print("getting frequency", file=sys.stderr)
    sites = dict()
    n = 0
    for line in in_fh:
        try:
            line = line.decode('ascii')
        except AttributeError:
            pass
        n += 1
        read = MethRead(line)
        #        # debug
        #        print(line,file=sys.stdout)
        #        print(read.ratios)
        #        print(read.keys)
        #        #
        sitekeys = sorted(sites.keys())
        #        print(sitekeys)
        try:
            # print everything in sites if chromosome is different
            if read.rname != sites[sitekeys[0]].rname:
                printind = len(sitekeys)
            else:
                # get index of new position
                printind = bisect.bisect_left(sitekeys, read.keys[0])
        except IndexError:
            printind = 0
        if printind != 0:
            for i in range(printind):
                key = sitekeys[i]
                sites[key].printFreq(args.motif, args.out)
                sites.pop(key)
        for key in read.keys:
            if key not in sites.keys():
                sites[key] = SiteStats(read.calldict[key], read.rname)
            sites[key].update(read.calldict[key])
        if args.verbose:
            if n % 10000 == 0:
                print("parsed {} lines".format(n), file=sys.stderr)
    for key in sorted(sites.keys()):
        sites[key].printFreq(args.motif, args.out)
def read_tabix(fpath,window) :
    with pysam.TabixFile(fpath) as tabix :
        entries = [x for x in tabix.fetch(window)]
    reads = [MethRead(x) for x in entries]
    cgdict = dict()
    gcdict = dict()
    # for split-reads, multiple entries are recorded per read name
    for meth in reads :
        qname = meth.qname
        mod = meth.fields[-1]
        if mod == "CG" :
            if qname in cgdict.keys() :
                cgdict[qname] = np.append(cgdict[qname],meth.callarray,0)
            else : 
                cgdict[qname] = meth.callarray
        elif mod == "GC" :
            if qname in gcdict.keys() :
                gcdict[qname] = np.append(gcdict[qname],meth.callarray,0)
            else : 
                gcdict[qname] = meth.callarray
    return cgdict,gcdict