Exemplo n.º 1
0
def main():
    ''' main scripts '''
    args = parse_argument()

    PromoterList = []
    feature = []

    print "# loading and reading the promoter of Gene ... "
    if args.genefile:
        for g in TableIO.parse(args.genefile, "genebed"):
            a = g.promoter(args.bp)
            PromoterList.append(a)
    if args.rna:
        for g in TableIO.parse(args.rna, "genebed"):
            a = g.promoter(args.bp)
            PromoterList.append(a)
    if args.feature:
        feature = TableIO.parse(args.feature, 'bed')
    print "# loading and reading Done !"

    PromoterData = readIntoBinIndex(PromoterList)
    FeatureData = readIntoBinIndex(feature)

    for i in TableIO.parse(args.bed, 'bed'):
        overlapGene, overlapFeature, overlap_string = [], [], ''
        if i.strand not in ['+', '-']: continue
        else:
            OverlapGene = getOverlapFeatures(i, PromoterData)
            if FeatureData:
                overlapFeature = getOverlapFeatures(i, FeatureData)
            for g in overlapGene + overlapFeature:
                overlap_string += g.id + ';'
            print i, "\t", overlap_string
Exemplo n.º 2
0
 def __init__(self, tabix_file_name,**dict):
     '''
     wrapped in DBI.init(filename,"tabix")
     '''
     self.tabix_file_name=tabix_file_name
     self.dict=dict
     try:
         self.data=pysam.Tabixfile(tabix_file_name)
     except:
         print >>sys.stderr,"WARNING: Can't init the tabix file",tabix_file_name
     self.header=None
     if dict.has_key("header") and dict["header"]==True:
         f=TableIO.parse(tabix_file_name)
         h=f.next()
         l=len(h)
         for i in range(l):
             h[i]=h[i].strip()
         self.header=h
         f.close()
     elif dict.has_key("header") and isinstance(dict["header"],list):
         self.header=dict["header"]
     elif dict.has_key("header") and isinstance(dict["header"],str):
         fh=TableIO.parse(dict["header"])
         self.header=fh.next()
         #print >>sys.stderr,self.header
     self.tabix_format="simple"
     if self.dict.has_key("tabix"):
         self.tabix_format=self.dict["tabix"]
Exemplo n.º 3
0
def Main():
    global args,out,SHIFTSIZE,data
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout


    SHIFTSIZE=args.shiftsize
    data={}
    for x in TableIO.parse(args.chr_length_file,"simple"):
        data[x[0].strip()]=[0 for row in range((long(x[1]>>SHIFTSIZE)+1))] 
    parseAnnotationFile(args.input)
    for x in TableIO.parse(args.chr_length_file,"simple"):
        chrom=x[0].strip()
        length=long(x[1])
        for i,bin in enumerate(data[chrom]):
            start=i<<SHIFTSIZE
            stop=(i+1)<<SHIFTSIZE
            if stop > length: stop=length
            print >>out,chrom+"\t"+str(start)+"\t"+str(stop)+"\t"+str(bin)
Exemplo n.º 4
0
def Main():
    global args, out, SHIFTSIZE, data
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout

    SHIFTSIZE = args.shiftsize
    data = {}
    for x in TableIO.parse(args.chr_length_file, "simple"):
        data[x[0].strip()] = [
            0 for row in range((long(x[1] >> SHIFTSIZE) + 1))
        ]
    parseAnnotationFile(args.input)
    for x in TableIO.parse(args.chr_length_file, "simple"):
        chrom = x[0].strip()
        length = long(x[1])
        for i, bin in enumerate(data[chrom]):
            start = i << SHIFTSIZE
            stop = (i + 1) << SHIFTSIZE
            if stop > length: stop = length
            print >> out, chrom + "\t" + str(start) + "\t" + str(
                stop) + "\t" + str(bin)
Exemplo n.º 5
0
def main():
    ''' main scripts '''
    args = parse_argument()
    
    PromoterList =[]
    feature = []

    print "# loading and reading the promoter of Gene ... "
    if args.genefile:
        for g in TableIO.parse(args.genefile, "genebed"):
            a = g.promoter(args.bp)
            PromoterList.append(a)
    if args.rna:
        for g in TableIO.parse(args.rna, "genebed"):
            a = g.promoter(args.bp)
            PromoterList.append(a)
    if args.feature:
        feature = TableIO.parse(args.feature, 'bed')
    print "# loading and reading Done !" 
    
    PromoterData = readIntoBinIndex(PromoterList)
    FeatureData  = readIntoBinIndex(feature)
    
    for i in TableIO.parse(args.bed, 'bed'):
        overlapGene, overlapFeature, overlap_string = [], [], ''
        if i.strand not in ['+','-']: continue
        else:
            OverlapGene = getOverlapFeatures(i, PromoterData)
            if FeatureData:
                overlapFeature = getOverlapFeatures(i, FeatureData)
            for g in overlapGene +  overlapFeature:
                overlap_string += g.id+';'
            print i, "\t", overlap_string
Exemplo n.º 6
0
Arquivo: DB.py Projeto: nimezhu/xplib
 def __init__(self, tabix_file_name, **dict):
     """
     wrapped in DBI.init(filename,"tabix")
     """
     self.tabix_file_name = tabix_file_name
     self.dict = dict
     try:
         self.data = pysam.Tabixfile(tabix_file_name)
     except:
         print >>sys.stderr, "WARNING: Can't init the tabix file", tabix_file_name
     self.header = None
     if dict.has_key("header") and dict["header"] == True:
         f = TableIO.parse(tabix_file_name)
         h = f.next()
         l = len(h)
         for i in range(l):
             h[i] = h[i].strip()
         self.header = h
         f.close()
     elif dict.has_key("header") and isinstance(dict["header"], list):
         self.header = dict["header"]
     elif dict.has_key("header") and isinstance(dict["header"], str):
         fh = TableIO.parse(dict["header"])
         self.header = fh.next()
         # print >>sys.stderr,self.header
     self.tabix_format = "simple"
     if self.dict.has_key("tabix"):
         self.tabix_format = self.dict["tabix"]
Exemplo n.º 7
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    dbi=DBI.init(args.db,Tools.guess_format(args.db))
    references=dbi.bamfiles[0].references
    for i in TableIO.parse(fin,args.format):
        print i
        n=0
        c_count=0
        reads=dbi.query(i,args.method)
        for read in reads:
            compatible=Tools.compatible_with_transcript(read,i,references=references,strand=args.strand)
            print "HT:"
            for i0,r in enumerate(TableIO.parse(read.reads,"bam2bed12",references=references)):
                print "READ"+str(i0)+"\t",r
            print "COMPATIBLE:",compatible,"\n\n"
            if compatible: c_count+=1
            n+=1
        print "COMPATIBLE / ALL OVERLAP READS =  ",c_count,"/",n
        print "RATIO\t%.4f"%float(c_count)/n
Exemplo n.º 8
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    if args.input=="stdin":
        fin=sys.stdin
    else:
        try:
            x=args.input.split(".")
            if x[-1]=="gz":
                fin=gzip.open(args.input,"r")
            else:
                fin=open(args.input,"r")
        except IOError:
            print >>sys.stderr,"can't read file",args.input
            fin=sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    if args.genome is not None:
        chr_sizes={}
        for x in TableIO.parse(args.genome):
            chr_sizes[x[0]]=int(x[1])
    bins=b()
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#","\t".join(sys.argv)
    print >>out,"chr\tstart\tstop\tbinindex\tcoverage\tcoverage_nt\tbinlevel\tbinsize " 
    header=False
    if (args.format=="metabed"): header=True

    for i,x in enumerate(TableIO.parse(fin,args.format,header=header)):
        if i%1000==0:
            print >>sys.stderr,"reading %d entries\r"%i,
        bins.append(x)
    c=b.bin2cov(bins)
    for chr in sorted(c.keys()):
        for i,x in enumerate(c[chr]):
            (start,end)=b.bin2range(i)
            if args.genome is not None:
                if start > chr_sizes[chr]:
                    continue
                if end > chr_sizes[chr]:
                    end=chr_sizes[chr]
            print >>out,chr,"\t",start,"\t",end,"\t",i,"\t",float(x)/(end-start),"\t",x,"\t",b.bin2level(i),"\t",end-start
Exemplo n.º 9
0
def Main():
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    h=[[0,0],[0,0]]
    l=[[0,0],[0,0]]
    d=Utils.readIntoBinIndex(TableIO.parse(args.centromere,"bed"))
    print >>out,"# Coverage Threshold: ",args.t
    print >>out,"# Chi2 Threshold:",args.chi2
    for i in TableIO.parse(args.input,"oddsratiosnp"):
        mark=0
        for j in Utils.iterOverlapFeature(i,d):
            mark+=1
        if mark>1: mark=1
        if sum(i.A_nt_dis) > args.t and sum(i.B_nt_dis) > args.t:
            if i.odds_ratio > args.chi2:
                h[mark][1]+=1
            else:
                h[mark][0]+=1
            print >>out,i,"\tHigh\t",mark
        else:
            print >>out,i,"\tLow\t",mark
            if i.odds_ratio > args.chi2:
                l[mark][1]+=1
            else:
                l[mark][0]+=1
    print >>out,"# HighOddsRatio:",h[0][1]+l[0][1]+h[1][1]+l[1][1]
    print >>out,"# LowOddsRatio:",h[0][0]+l[0][0]+h[1][0]+l[1][0]
    print >>out,"#"
    print >>out,"# HighCoverage:",sum(h[1])+sum(h[0])
    print >>out,"# LowCoverage :",sum(l[1])+sum(l[0])
    print >>out,"#"
    print >>out,"# HighCoverage, HighOddsRatio",h[1][1]+h[0][1]
    print >>out,"# HighCoverage, LowOddsRatio",h[1][0]+h[0][0]
    print >>out,"#"
    print >>out,"# HighCoverage, InCentromere",sum(h[1])
    print >>out,"# HighCoverage, NotInCentromere",sum(l[1])
    print >>out,"#"

    print >>out,"# HighCoverage, HighOddsRatio, InCentromere",h[1][1]
    print >>out,"# HighCoverage, HighOddsRatio, NotInCentromere",h[0][1]
    print >>out,"# HighCoverage, LowOddsRatio,  InCentromere",h[1][0]
    print >>out,"# HighCoverage, LowOddsRatio, NotInCentromere",h[0][0]
    print >>out,"# LowCoverage, HighOddsRatio, InCentromere",l[1][1]
    print >>out,"# LowCoverage, HighOddsRatio, NotInCentromere",l[0][1]
    print >>out,"# LowCoverage, LowOddsRatio, InCentromere",l[1][0]
    print >>out,"# LowCoverage, LowOddsRatio, NotInCentromere",l[0][0]
Exemplo n.º 10
0
def Main():
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    h = [[0, 0], [0, 0]]
    l = [[0, 0], [0, 0]]
    d = Utils.readIntoBinIndex(TableIO.parse(args.centromere, "bed"))
    print >> out, "# Coverage Threshold: ", args.t
    print >> out, "# Chi2 Threshold:", args.chi2
    for i in TableIO.parse(args.input, "oddsratiosnp"):
        mark = 0
        for j in Utils.iterOverlapFeature(i, d):
            mark += 1
        if mark > 1: mark = 1
        if sum(i.A_nt_dis) > args.t and sum(i.B_nt_dis) > args.t:
            if i.odds_ratio > args.chi2:
                h[mark][1] += 1
            else:
                h[mark][0] += 1
            print >> out, i, "\tHigh\t", mark
        else:
            print >> out, i, "\tLow\t", mark
            if i.odds_ratio > args.chi2:
                l[mark][1] += 1
            else:
                l[mark][0] += 1
    print >> out, "# HighOddsRatio:", h[0][1] + l[0][1] + h[1][1] + l[1][1]
    print >> out, "# LowOddsRatio:", h[0][0] + l[0][0] + h[1][0] + l[1][0]
    print >> out, "#"
    print >> out, "# HighCoverage:", sum(h[1]) + sum(h[0])
    print >> out, "# LowCoverage :", sum(l[1]) + sum(l[0])
    print >> out, "#"
    print >> out, "# HighCoverage, HighOddsRatio", h[1][1] + h[0][1]
    print >> out, "# HighCoverage, LowOddsRatio", h[1][0] + h[0][0]
    print >> out, "#"
    print >> out, "# HighCoverage, InCentromere", sum(h[1])
    print >> out, "# HighCoverage, NotInCentromere", sum(l[1])
    print >> out, "#"

    print >> out, "# HighCoverage, HighOddsRatio, InCentromere", h[1][1]
    print >> out, "# HighCoverage, HighOddsRatio, NotInCentromere", h[0][1]
    print >> out, "# HighCoverage, LowOddsRatio,  InCentromere", h[1][0]
    print >> out, "# HighCoverage, LowOddsRatio, NotInCentromere", h[0][0]
    print >> out, "# LowCoverage, HighOddsRatio, InCentromere", l[1][1]
    print >> out, "# LowCoverage, HighOddsRatio, NotInCentromere", l[0][1]
    print >> out, "# LowCoverage, LowOddsRatio, InCentromere", l[1][0]
    print >> out, "# LowCoverage, LowOddsRatio, NotInCentromere", l[0][0]
Exemplo n.º 11
0
def Main():
    global args,out
    CellLine=["H1"]
    HM=("input","H3K27ac","H3K27me3","H3K36me3","H3K4me1","H3K4me3","H3K9me3")
    marks=[]
    dbi={}

    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    for cell in CellLine:
        for hm in HM:
            mark=cell+"_"+hm
            marks.append(mark)
            dbi[mark]=DBI.init("/data/zhuxp/bam2x/data/bamlist/"+mark+".bamlist","bamlist")
    for i,x in enumerate(TableIO.parse(args.input,args.input_format)):
        print >>out,"QR\t",x
        if i%100==0: print >>sys.stderr,"query %d entries\r"%i,
        for mark in marks:
            print >>out,mark,"\t"
            for j in DBI.query(x,dbi[mark]):
                print >>out,"HT\t",j
Exemplo n.º 12
0
def parseIterChrom(fn):
    last_chrom= None
    fin=open(fn)
    positions=[]
    x2s=[]
    coverage=[]
    for x in TableIO.parse(fin,'simple'):
        (chrom,pos,snp,x2,x2_matrix,nt_dist)=x
        b=x2_matrix.replace("( ","")
        b=b.replace(" )","")
        a=b.split(" ")
        x2_matrix=[]
        s=0
        for y in a:
            s+=int(y)
            x2_matrix.append(int(y))

        if (last_chrom==None) or (chrom==last_chrom):
            coverage.append(s)
            positions.append(pos)
            x2s.append(x2)
            last_chrom=chrom
            continue
        yield last_chrom,positions,x2s,coverage
        positions=[]
        x2s=[]
        coverage=[]
        coverage.append(s)
        x2s.append(x2)
        positions.append(pos)
        last_chrom=chrom
    yield last_chrom,positions,x2s,coverage
Exemplo n.º 13
0
def Main():
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout

    count={}
    dbi1=DBI.init(args.db,"bed") # the DBI init file for bed6 file of all kinds of RNA
    dbi2=DBI.init(args.db_detail,"bed") # the DBI init file for bed12 file of lincRNA and mRNA with intron, exon, UTR
    genome=Genome('mouse', Release=67, account=None)
    for bed in TableIO.parse(args.input,args.format):
        [typ,name,subtype]=annotation(bed,dbi1,dbi2,genome)
        if count.has_key(typ):
            count[typ]+=1
        else:
            count[typ]=1
        print >>out, "\t".join (str(f) for f in [bed.chr,bed.start,bed.stop,bed.id,name,bed.strand,typ, subtype])

    print >>out, "\n".join ("#"+typ+"\t%d"%(count[typ]) for typ in count.keys())
Exemplo n.º 14
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)

    dbi=[];
    for i,bam in enumerate(args.bams):
        print >>out,"# SAMPLE_"+str(i+1)+" BAM File:",bam
        dbi.append(DBI.init(bam,"bam"))
    print >>out,"#",VCF.header(),
    for i,bam in enumerate(args.bams):
        print >>out,"\t","Sample_"+str(i+1),
    print >>out,""
    for i,vcf in enumerate(TableIO.parse(fin,"vcf")):
        vcf.chr=args.chr_prefix+vcf.chr
        if(i%100==0):
            print >>sys.stderr,"processing",i,"vcf\r",
        print >>out,vcf,
        for d in dbi:
            print >>out,"\t",
            for r in d.query(vcf):
                print >>out,format(r),
        print >>out,""
Exemplo n.º 15
0
def Main():
    global args,out
    MAX_SCORE=200
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    i=0
    x=args.input.split("/")
    name=x[-1]
    name=name.replace(".OddsRatio.peaks","")
    name=name.replace(".LogR.Peaks","")
    name=name.replace(".out","")
    name=args.prefix+name

    for x in TableIO.parse(args.input,"simple"):
        if x[0]=="REGION":
            i+=1
            if x[4]==0:
                score=MAX_SCORE
            else:
                score=-10*math.log(x[4],10)
            if score > MAX_SCORE:
                score=MAX_SCORE
            ID=name+"_ORP_"+str(i)
            print >>out,x[1]+"\t"+str(x[2])+"\t"+str(x[3])+"\t"+ID+"\t",
            print >>out,"%.2f"%score        
Exemplo n.º 16
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    if args.input == "stdin":
        fin = sys.stdin
    else:
        try:
            x = args.input.split(".")
            if x[-1] == "gz":
                fin = gzip.open(args.input, "r")
            else:
                fin = open(args.input, "r")
        except IOError:
            print >> sys.stderr, "can't read file", args.input
            fin = sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    h1 = {}
    h2 = {}

    for i in TableIO.parse(fin):
        h1[i[args.c1 - 1]] = i
    for j in TableIO.parse(args.input2):
        h2[j[args.c2 - 1]] = j

        if h1.has_key(j[args.c2 - 1]):
            print >> out, TableIO.format_string(
                h1[j[args.c2 - 1]]) + "\t" + TableIO.format_string(j)
Exemplo n.º 17
0
def main():
    ''' main scripts '''
    args = parse_argument()
    bed  = args.bed
    gene = readIntoBinIndex(TableIO.parse( args.genefile, "genebed") )
    for i in TableIO.parse(args.bed, 'bed'):
        if i.strand not in ['+','-']: continue
        else:
            OverlapGene    = getOverlapFeatures(i, gene)
            Overlap_dict   = Classify_Overlap(i, OverlapGene)
            overlap_string = ''
            for k,v in Overlap_dict.iteritems():
                if v:
                    overlap_string += "".join([ str(k+'_'+each)+';' for each in v])
            if not overlap_string:
                overlap_string = 'intergenic'
            print i, overlap_string
Exemplo n.º 18
0
def Main():
    global args
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout

    dbi=DBI.init(args.db,"genebed")
    count={}
    count["Intergenic"]=0
    for x in TableIO.parse(args.input,args.input_format):
        flag=0
        gene=""
        for hit in dbi.query(x):
            flag=1
            if hit.align_id==gene:
                continue
            gene=hit.align_id
            #print hit
            #print hit.cds_start,hit.cds_stop
            if (hit.cds_start==hit.cds_stop):
                if hit.align_id[0:3]=="Mir":
                    loc="MiRNA"
                else:
                    loc="Non-coding"
            elif hit.strand=="+":
                if x.stop<=hit.cds_start:
                    loc="5-UTR"
                elif x.start>=hit.cds_stop:
                    loc="3-UTR"
                else:
                    loc=judge_exon(x,hit)
                        
            else:
                if x.stop<=hit.cds_start:
                    loc="3-UTR"
                elif x.start>=hit.cds_stop:
                    loc="5-UTR"
                else:
                    loc=judge_exon(x,hit)
            print >>out,"\t".join (str(f) for f in [x.chr,x.start,x.stop,x.id,x.score,x.strand,hit.align_id,loc])
            if count.has_key(loc):
                count[loc]+=1
            else:
                count[loc]=1

        if flag==0:
            print >>out, "\t".join (str(f) for f in [x.chr,x.start,x.stop,x.id,x.score,x.strand,"None","Intergenic"])
            count["Intergenic"]+=1
    
    out2=open(args.output.split(".")[0]+".cisStat","w")
    for key in sorted(count.keys()):
        print >>out2,key+"\t"+str(count[key])
Exemplo n.º 19
0
def Main():
    args = ParseArg()

    if len(args.data) != len(args.name):
        print >> sys.stderr, "ERROR: Number of data is not the same as number of names!"
        sys.exit(0)

    # store data information
    data = {}
    total_reads = {}
    for i in range(len(args.data)):
        temp_name = args.name[i]
        print >> sys.stderr, "\n Reading data file:" + temp_name + "..."
        total_reads[temp_name] = 0
        if args.format[i] == "bam":
            total_reads[temp_name] = reduce(lambda x, y: x + y, [
                int(l.rstrip('\n').split('\t')[2])
                for l in pysam.idxstats(args.data[i])
            ])
        else:
            Format = "bed"
            for b in TableIO.parse(args.data[i], Format):
                total_reads[temp_name] += 1
                if total_reads[temp_name] % 50000 == 0:
                    print >> sys.stderr, "  reading %d reads..\r" % (
                        total_reads[temp_name]),
        data[temp_name] = DBI.init(args.data[i], args.format[i])

    output = open(args.output, 'w')

    Input = open(args.input, 'r')
    lines = Input.read().split("\n")

    # header
    header = ["chr", "start", "end", "type", "name", "subtype", "count"
              ] + data.keys()
    print >> output, "\t".join(g + "_%d" % (f) for f in [1, 2]
                               for g in header) + "\tinteraction\tp-value"

    num = 0
    print >> sys.stderr, "Start process interactions:"
    for l in lines:
        if l.strip() == '': continue
        l = l.strip().split('\t')
        num = num + 1
        if l[0] == "chrM" or l[7] == "chrM": continue
        C1 = Bed([l[0], int(l[1]), int(l[2])])
        C2 = Bed([l[7], int(l[8]), int(l[9])])
        rpkm1 = "\t".join(
            str(f) for f in
            [RPKM(C1, data[n], total_reads[n], n) for n in data.keys()])
        rpkm2 = "\t".join(
            str(f) for f in
            [RPKM(C2, data[n], total_reads[n], n) for n in data.keys()])
        print >> output, "\t".join(
            str(f) for f in l[:7] + [rpkm1] + l[7:14] + [rpkm2, l[14], l[15]])
        if num % 1000 == 0:
            print >> sys.stderr, "  Output interaction: %d\r" % (num),
Exemplo n.º 20
0
def main():
    ''' main scripts '''
    args = parse_argument()
    bed = args.bed
    gene = readIntoBinIndex(TableIO.parse(args.genefile, "genebed"))
    for i in TableIO.parse(args.bed, 'bed'):
        if i.strand not in ['+', '-']: continue
        else:
            OverlapGene = getOverlapFeatures(i, gene)
            Overlap_dict = Classify_Overlap(i, OverlapGene)
            overlap_string = ''
            for k, v in Overlap_dict.iteritems():
                if v:
                    overlap_string += "".join(
                        [str(k + '_' + each) + ';' for each in v])
            if not overlap_string:
                overlap_string = 'intergenic'
            print i, overlap_string
Exemplo n.º 21
0
 def toBed12Tuple(self,chr="chr",strand="read2"):
     '''
     test now
     '''
     from xplib import TableIO
     x=list()
     for i in TableIO.parse(self.reads,"bam2bed12tuple",references=chr,strand=strand):
         x.append(i)
     return x
Exemplo n.º 22
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    if args.input == "stdin":
        fin = sys.stdin
    else:
        try:
            x = args.input.split(".")
            if x[-1] == "gz":
                fin = gzip.open(args.input, "r")
            else:
                fin = open(args.input, "r")
        except IOError:
            print >> sys.stderr, "can't read file", args.input
            fin = sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    enhancer_dbi = DBI.init(args.enhancer_tabix,
                            "tabix",
                            tabix="metabed",
                            header=re.sub(".gz$", ".header",
                                          args.enhancer_tabix))
    promoter_dbi = DBI.init(args.promoter_tabix,
                            "tabix",
                            tabix="metabed",
                            header=re.sub(".gz$", ".header",
                                          args.promoter_tabix))

    for i in TableIO.parse(fin, args.format):
        tss = i.tss()
        tss.start -= args.size
        tss.stop += args.size
        if tss.start < 0: tss.start = 0
        tss.id += "_near" + str(args.size)
        print "QR\t", tss
        for e in enhancer_dbi.query(tss):
            print "EH\t", e
        for p in promoter_dbi.query(tss):
            print "PM\t", p
Exemplo n.º 23
0
 def query(self,x,**kwargs):
     '''
     yield the overlap feature in tabix index files
     '''
     try:
         for item in TableIO.parse(self.data.fetch(x.chr,x.start,x.stop),format=self.tabix_format,header=self.header):
             yield item
     except:
        raise StopIteration
Exemplo n.º 24
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    if args.input=="stdin":
        fin=sys.stdin
    else:
        try:
            x=args.input.split(".")
            if x[-1]=="gz":
                fin=gzip.open(args.input,"r")
            else:
                fin=open(args.input,"r")
        except IOError:
            print >>sys.stderr,"can't read file",args.input
            fin=sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    h1={}
    h2={}

    for i in TableIO.parse(fin):
        h1[i[args.c1-1]]=i
    for j in TableIO.parse(args.input2):
        h2[j[args.c2-1]]=j

        if h1.has_key(j[args.c2-1]):
            print >>out,TableIO.format_string(h1[j[args.c2-1]])+"\t"+TableIO.format_string(j)
Exemplo n.º 25
0
Arquivo: DB.py Projeto: yu68/bam2x
 def __init__(self,file,**dict):
     '''
     Wrapped in xplib.DBI.init()
     '''
     if type(file)==type([1,2,3]):
         f=file
     else:
         format=dict['format']
         f=TableIO.parse(file,format)
     self.data=binindex(f)
Exemplo n.º 26
0
 def __init__(self,file,**dict):
     '''
     Wrapped in xplib.DBI.init()
     '''
     if type(file)==type([1,2,3]):
         f=file
     else:
         format=dict['format']
         f=TableIO.parse(file,format)
     self.data=binindex(f)
Exemplo n.º 27
0
def test():
    if len(sys.argv)==1:
        print >>sys.stderr,"Usage: Utils.py file.bed"
        exit()
    a=TableIO.parse(sys.argv[1],'bed')
    data=readIntoBinIndex(a)
    bed=Bed(["chr1",100000,200000,".",".","."])
    g=getOverlapFeatures(bed,data)
    print "Overlap with",bed
    for i in g:
        print i
Exemplo n.º 28
0
def test():
    if len(sys.argv) == 1:
        print >> sys.stderr, "Usage: Utils.py file.bed"
        exit()
    a = TableIO.parse(sys.argv[1], 'bed')
    data = readIntoBinIndex(a)
    bed = Bed(["chr1", 100000, 200000, ".", ".", "."])
    g = getOverlapFeatures(bed, data)
    print "Overlap with", bed
    for i in g:
        print i
Exemplo n.º 29
0
Arquivo: DB.py Projeto: yu68/bam2x
 def query(self,x):
     '''
     yield the overlap feature in tabix index files
     '''
     f="simple"
     if self.dict.has_key("tabix"):
         f=self.dict["tabix"]
     try:
         for item in TableIO.parse(self.data.fetch(x.chr,x.start,x.stop),format=f,header=self.header):
             yield item
     except:
        raise StopIteration
Exemplo n.º 30
0
def parseIterRegion(fn):
    '''
    yield each region ( gap < args.gap)
    
    '''
    last_chrom = None
    last_position = 0
    fin = open(fn)
    positions = []
    x2s = []
    snps = []
    matrix_x2 = []
    for x in TableIO.parse(fin, 'simple'):
        (chrom, pos, snp, x2, x2_matrix, nt_dist) = x
        '''
        add filter here
        '''

        b = x2_matrix.replace("( ", "")
        b = b.replace(" )", "")
        a = b.split(" ")
        x2_matrix = []
        for y in a:
            x2_matrix.append(int(y))
        t = x2_matrix

        CV = int(t[0]) + int(t[1]) + int(t[2]) + int(t[3])
        MR = (float(t[1]) + float(t[3])) / CV
        if (CV > args.reads or MR < 0.05): continue

        # end of filter
        if (last_chrom == None) or (chrom == last_chrom
                                    and pos - last_position < args.gap):
            matrix_x2.append(x2_matrix)
            positions.append(pos)
            x2s.append(x2)
            snps.append(snp)
            last_chrom = chrom
            last_position = pos
            continue
        yield last_chrom, positions, x2s, matrix_x2, snps
        positions = []
        x2s = []
        matrix_x2 = []
        snps = []
        matrix_x2.append(x2_matrix)
        x2s.append(x2)
        positions.append(pos)
        snps.append(snp)
        last_chrom = chrom
        last_position = pos
    yield last_chrom, positions, x2s, matrix_x2, snps
Exemplo n.º 31
0
def Main():
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    for i in TableIO.parse(args.input, "oddsratiosnp"):
        print i
Exemplo n.º 32
0
def parseAnnotationFile(fn):
    format=args.format
    if format=="bam":
        format="bam2bed"
    for x in TableIO.parse(fn,format):
        if not data.has_key(x.chr):
            print >>sys.stderr,"ignore",x
            print >>sys.stderr,"since this chromosome size is not in ",args.chr_length_file
            continue
        bin_start=x.start>>SHIFTSIZE
        bin_stop=x.stop>>SHIFTSIZE
        for bin in range(bin_start,bin_stop+1):
            data[x.chr][bin]+=1
Exemplo n.º 33
0
def parseAnnotationFile(fn):
    format = args.format
    if format == "bam":
        format = "bam2bed"
    for x in TableIO.parse(fn, format):
        if not data.has_key(x.chr):
            print >> sys.stderr, "ignore", x
            print >> sys.stderr, "since this chromosome size is not in ", args.chr_length_file
            continue
        bin_start = x.start >> SHIFTSIZE
        bin_stop = x.stop >> SHIFTSIZE
        for bin in range(bin_start, bin_stop + 1):
            data[x.chr][bin] += 1
Exemplo n.º 34
0
def Main():
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    for i in TableIO.parse(args.input,'transunit'):
        print >>out,i
Exemplo n.º 35
0
def parseIterRegion(fn):
    '''
    yield each region ( gap < args.gap)
    
    '''
    last_chrom= None
    last_position = 0
    fin=open(fn)
    positions=[]
    x2s=[]
    snps=[]
    matrix_x2=[]
    for x in TableIO.parse(fin,'simple'):
        (chrom,pos,snp,x2,x2_matrix,nt_dist)=x
        '''
        add filter here
        '''
        
        b=x2_matrix.replace("( ","")
        b=b.replace(" )","")
        a=b.split(" ")
        x2_matrix=[]
        for y in a:
            x2_matrix.append(int(y))
        t=x2_matrix

        CV=int(t[0])+int(t[1])+int(t[2])+int(t[3])
        MR=(float(t[1])+float(t[3]))/CV
        if(CV>args.reads or MR<0.05):continue

        # end of filter
        if (last_chrom==None) or (chrom==last_chrom and pos-last_position < args.gap):
            matrix_x2.append(x2_matrix)
            positions.append(pos)
            x2s.append(x2)
            snps.append(snp)
            last_chrom=chrom
            last_position=pos
            continue
        yield last_chrom,positions,x2s,matrix_x2,snps
        positions=[]
        x2s=[]
        matrix_x2=[]
        snps=[]
        matrix_x2.append(x2_matrix)
        x2s.append(x2)
        positions.append(pos)
        snps.append(snp)
        last_chrom=chrom
        last_position=pos
    yield last_chrom,positions,x2s,matrix_x2,snps
Exemplo n.º 36
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    dbi = DBI.init(args.db, Tools.guess_format(args.db))
    references = dbi.bamfiles[0].references
    for i in TableIO.parse(fin, args.format):
        print i
        n = 0
        c_count = 0
        reads = dbi.query(i, args.method)
        for read in reads:
            compatible = Tools.compatible_with_transcript(
                read, i, references=references, strand=args.strand)
            print "HT:"
            for i0, r in enumerate(
                    TableIO.parse(read.reads,
                                  "bam2bed12",
                                  references=references)):
                print "READ" + str(i0) + "\t", r
            print "COMPATIBLE:", compatible, "\n\n"
            if compatible: c_count += 1
            n += 1
        print "COMPATIBLE / ALL OVERLAP READS =  ", c_count, "/", n
        print "RATIO\t%.4f" % float(c_count) / n
Exemplo n.º 37
0
def Main():
    '''
    This program is a test for TableIO.parse(file.bam,"bam2bed")

    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    if args.format=="guess":
        args.format=Tools.guess_format(args.input)
    s=TableIO.parse(args.input,args.format)
    for i in s:
        print >>out,i
Exemplo n.º 38
0
def test():
    if len(sys.argv)==1:
        print >>sys.stderr,"Usage: Utils.py file.bed"
        exit()
    a=TableIO.parse(sys.argv[1],'genebed')
    data=readIntoBinIndex(a)
    bed=Bed( ["chr12", 54380000, 54392000, "HOXC", 0, "+"] )
    g=getOverlapFeatures(bed,data)
    Overlap_dict = Classify_Overlap(bed, g)
    overlap_string = ''
    for k, v in Overlap_dict.iteritems():
        if v:
            overlap_string += "".join([ str(k+'_'+each)+';' for each in v])
    print bed, overlap_string
Exemplo n.º 39
0
def test():
    if len(sys.argv) == 1:
        print >> sys.stderr, "Usage: Utils.py file.bed"
        exit()
    a = TableIO.parse(sys.argv[1], 'genebed')
    data = readIntoBinIndex(a)
    bed = Bed(["chr12", 54380000, 54392000, "HOXC", 0, "+"])
    g = getOverlapFeatures(bed, data)
    Overlap_dict = Classify_Overlap(bed, g)
    overlap_string = ''
    for k, v in Overlap_dict.iteritems():
        if v:
            overlap_string += "".join(
                [str(k + '_' + each) + ';' for each in v])
    print bed, overlap_string
Exemplo n.º 40
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    if args.input=="stdin":
        fin=sys.stdin
    else:
        try:
            x=args.input.split(".")
            if x[-1]=="gz":
                fin=gzip.open(args.input,"r")
            else:
                fin=open(args.input,"r")
        except IOError:
            print >>sys.stderr,"can't read file",args.input
            fin=sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    enhancer_dbi=DBI.init(args.enhancer_tabix,"tabix",tabix="metabed",header=re.sub(".gz$",".header",args.enhancer_tabix))
    promoter_dbi=DBI.init(args.promoter_tabix,"tabix",tabix="metabed",header=re.sub(".gz$",".header",args.promoter_tabix))

    for i in TableIO.parse(fin,args.format):
        tss=i.tss()
        tss.start-=args.size
        tss.stop+=args.size
        if tss.start<0: tss.start=0
        tss.id+="_near"+str(args.size)
        print "QR\t",tss
        for e in enhancer_dbi.query(tss):
            print "EH\t",e
        for p in promoter_dbi.query(tss):
            print "PM\t",p
Exemplo n.º 41
0
def Main():
    args=ParseArg()
    
    if len(args.data)!=len(args.name):
        print >> sys.stderr, "ERROR: Number of data is not the same as number of names!"
        sys.exit(0)

    # store data information
    data={}
    total_reads={}
    for i in range(len(args.data)):
        temp_name=args.name[i]
        print >> sys.stderr, "\n Reading data file:"+temp_name+"..."
        total_reads[temp_name]=0
        if args.format[i]=="bam":
            total_reads[temp_name] = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(args.data[i])])
        else:
            Format="bed"
            for b in TableIO.parse(args.data[i],Format):
                total_reads[temp_name]+=1
                if total_reads[temp_name]%50000==0:
                    print >> sys.stderr, "  reading %d reads..\r"%(total_reads[temp_name]),
        data[temp_name]=DBI.init(args.data[i],args.format[i])
        
    
    output=open(args.output,'w')

    Input=open(args.input,'r')
    lines=Input.read().split("\n")

    # header
    header=["chr","start","end","type","name","subtype","count"]+data.keys()
    print >> output, "\t".join(g+"_%d"%(f) for f in [1,2] for g in header)+"\tinteraction\tp-value"

    num=0    
    print >> sys.stderr, "Start process interactions:"
    for l in lines:
        if l.strip()=='': continue
        l=l.strip().split('\t')
        num=num+1
        if l[0]=="chrM" or l[7]=="chrM": continue
        C1=Bed([l[0],int(l[1]),int(l[2])])
        C2=Bed([l[7],int(l[8]),int(l[9])])
        rpkm1="\t".join (str(f) for f in [RPKM(C1,data[n],total_reads[n],n) for n in data.keys()])
        rpkm2="\t".join (str(f) for f in [RPKM(C2,data[n],total_reads[n],n) for n in data.keys()])
        print >> output, "\t".join(str(f) for f in l[:7]+[rpkm1]+l[7:14]+[rpkm2,l[14],l[15]])
	if num%1000==0:
            print >> sys.stderr, "  Output interaction: %d\r"%(num),
Exemplo n.º 42
0
 def __init__(self,bamfiles,**dict):
     '''
     '''
     if type(bamfiles)==type("string"):
         filename=bamfiles
         bamfiles=[]
         for i in TableIO.parse(filename,"simple"):
             bamfiles.append(i[0])
     self.bamfiles=[]
     for bamfile in bamfiles:
         if type(bamfile)==type("str"):
             try:
                 bamfile=pysam.Samfile(bamfile,"rb")
             except:
                 print >>sys.stderr,"WARNING: Can't init the bam file",bamfile
         self.bamfiles.append(bamfile)
Exemplo n.º 43
0
Arquivo: DB.py Projeto: yu68/bam2x
 def __init__(self,bamfiles,**dict):
     '''
     '''
     if type(bamfiles)==type("string"):
         filename=bamfiles
         bamfiles=[]
         for i in TableIO.parse(filename,"simple"):
             bamfiles.append(i[0])
     self.bamfiles=[]
     for bamfile in bamfiles:
         if type(bamfile)==type("str"):
             try:
                 bamfile=pysam.Samfile(bamfile,"rb")
             except:
                 print >>sys.stderr,"WARNING: Can't init the bam file",bamfile
         self.bamfiles.append(bamfile)
Exemplo n.º 44
0
def ParseInput(fin):
    flag=0
    for x in TableIO.parse(fin):
        if x[0]=="VCF":
            if flag==1:
                yield (vcf,hm,hits)
            vcf=VCF(x[1:])
            hm=[]
            hits=[]
            flag=1
        elif x[0]=="HM ":
            hm=x[1:]
        elif x[0]=="DIS ":
            hits=x[1:]

    yield (vcf,hm,hits)    
Exemplo n.º 45
0
def Main():
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    extend=args.extend
    f=open(args.input)
    for bed in TableIO.parse(f,"bed"):
        bed.start-=extend
        bed.stop+=extend
        bed.id+=str("_extend_"+str(extend)+"bp")
        print >>out,bed
Exemplo n.º 46
0
def parse(fin):
    flag=0
    for x in TableIO.parse(fin):
        if x[0]=="QR":
            if flag==1:
                yield (vcf,sorted(HM.keys()),hits)
            vcf=VCF(x[1:])
            HM={}
            hits={}
            flag=1
        elif x[0]=="HT":
            hit=eval(x[1])
            hits[hm]=hit
        else:
            (cell,hm)=x[0].split("_")
            if not HM.has_key(hm):
                HM[hm]=1
    yield (vcf,sorted(HM.keys()),hits)
Exemplo n.º 47
0
def Main():
    '''
    This program is a test for TableIO.parse(file.bam,"bam2bed")

    '''
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    s = TableIO.parse(args.input, "bam2bed")
    for i in s:
        print >> out, i
Exemplo n.º 48
0
def Main():
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    extend = args.extend
    f = open(args.input)
    for bed in TableIO.parse(f, "bed"):
        bed.start -= extend
        bed.stop += extend
        bed.id += str("_extend_" + str(extend) + "bp")
        print >> out, bed
Exemplo n.º 49
0
def Main():
    args = ParseArg()

    #store bed files with indexing and count information:
    bed = {}

    print >> sys.stderr, "Starting index bed files:"
    for i in range(len(args.beds)):
        temp_name = args.name[i]
        print >> sys.stderr, "  #Indexing for bed file of", temp_name, "\r",
        bed[temp_name] = DBI.init(args.beds[i], 'bed')

    half_len = int(args.len)
    print >> sys.stderr
    print >> sys.stderr, "Reading nucleosome peak xls file from Danpos."
    nucleosomes = TableIO.parse(args.nucleosome, 'metabed', header=True)

    print >> sys.stderr, "Start Counting..."
    count_matrix = []

    out = open(args.output, "w")
    line_head = open(args.nucleosome, 'r').readline().strip()
    line_head = line_head + "\t" + "\t".join(str(f) for f in args.name)
    print >> out, line_head
    for i in nucleosomes:
        chrom = i.chr

        if chrom == 'chrY' or chrom == 'chrX' or chrom == 'chrM':
            continue
        center = int(i.start + i.end) / 2
        count = np.zeros(len(args.beds), dtype="float")
        line = str(i)
        for k, name in enumerate(bed.keys()):
            for j in bed[name].query(
                    Bed([
                        chrom, center - ma - (half_len - 75),
                        center + ma + (half_len - 75)
                    ])):
                j_center = find_center(j, half_len)
                weight = max(min(1, (ma - abs(j_center - center)) / 25.0), 0)
                count[k] += weight
        line = line + "\t" + "\t".join(str(f) for f in count)
        print >> out, line
        count_matrix.append(count)
Exemplo n.º 50
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    if args.input == "stdin":
        fin = sys.stdin
    else:
        try:
            x = args.input.split(".")
            if x[-1] == "gz":
                fin = gzip.open(args.input, "r")
            else:
                fin = open(args.input, "r")
        except IOError:
            print >> sys.stderr, "can't read file", args.input
            fin = sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    genome = GenomeI(args.genome)
    for i in TableIO.parse(fin, args.format):
        print >> out, ">" + i.id + "_cDNA"
        if args.line:
            print >> out, genome.get_cdna_seq(i)
        else:
            print >> out, seq_wrapper(genome.get_cdna_seq(i))
Exemplo n.º 51
0
def Main():
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    argv=sys.argv
    argv[0]=argv[0].split("/")[-1]
    print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :\n#\t"," ".join(argv)
   
    dbi=DBI.init(args.db,args.dbformat)
    hits=0
    query=0
    if args.input=="stdin":
        input=sys.stdin
    else:
        input=args.input

    query_length=0
    hits_number=0
    query_sets=[]
    for x in TableIO.parse(input,args.input_format):
        query_sets.append(x)
    length=len(query_sets)
    
    size=length/args.thread
    #results=[[] for i in range(args.thread)]
    for i in range(args.thread):
        end= (i+1)*size if (i+1)*size < length else length
        end= end if (i+1)!=args.thread else length
        querys=query_sets[i*size:end]
     #   print i*size,end
     #   for j in querys: print j
        thread.start_new_thread(fquery,(querys,dbi,i))
Exemplo n.º 52
0
 def __init__(self,handle=None,**kwargs):
     self.data={}
     if handle is not None:
         self.read(handle)
     '''
     init from file
     Example:
     data=binindex(file="file.bed")
     or
     data=binindex(file="file.vcf",format="vcf")
     '''
     if kwargs.has_key("format"):
         format=kwargs["format"]
         del kwargs["format"]
     else:
         format="bed"
     if kwargs.has_key("file"):
         f=kwargs["file"]
         del kwargs["file"]
         self.read(TableIO.parse(f,format,**kwargs))
Exemplo n.º 53
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)

    for i in TableIO.parse(fin, args.format):
        print >> out, i