Exemplo n.º 1
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    dbi=DBI.init(args.db,Tools.guess_format(args.db))
    references=dbi.bamfiles[0].references
    for i in TableIO.parse(fin,args.format):
        print i
        n=0
        c_count=0
        reads=dbi.query(i,args.method)
        for read in reads:
            compatible=Tools.compatible_with_transcript(read,i,references=references,strand=args.strand)
            print "HT:"
            for i0,r in enumerate(TableIO.parse(read.reads,"bam2bed12",references=references)):
                print "READ"+str(i0)+"\t",r
            print "COMPATIBLE:",compatible,"\n\n"
            if compatible: c_count+=1
            n+=1
        print "COMPATIBLE / ALL OVERLAP READS =  ",c_count,"/",n
        print "RATIO\t%.4f"%float(c_count)/n
Exemplo n.º 2
0
Arquivo: IO.py Projeto: nimezhu/xplib
def fopen(file,mode="r",**kwargs):
    '''
    '''
    if Tools.guess_format(file)=="bam" and mode=="r":
        return pysam.Samfile(file,"rb")
    if mode=="w":
        return open_output(file)
    if mode=="r":
        return open_input(file)
    return None
Exemplo n.º 3
0
Arquivo: IO.py Projeto: sterding/bam2x
def fopen(file, mode="r", **kwargs):
    '''
    '''
    if Tools.guess_format(file) == "bam" and mode == "r":
        return pysam.Samfile(file, "rb")
    if mode == "w":
        return open_output(file)
    if mode == "r":
        return open_input(file)
    return None
Exemplo n.º 4
0
def Main():
    '''
    This program is a test for TableIO.parse(file.bam,"bam2bed")

    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    if args.format=="guess":
        args.format=Tools.guess_format(args.input)
    s=TableIO.parse(args.input,args.format)
    for i in s:
        print >>out,i
Exemplo n.º 5
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    dbi = DBI.init(args.db, Tools.guess_format(args.db))
    references = dbi.bamfiles[0].references
    for i in TableIO.parse(fin, args.format):
        print i
        n = 0
        c_count = 0
        reads = dbi.query(i, args.method)
        for read in reads:
            compatible = Tools.compatible_with_transcript(
                read, i, references=references, strand=args.strand)
            print "HT:"
            for i0, r in enumerate(
                    TableIO.parse(read.reads,
                                  "bam2bed12",
                                  references=references)):
                print "READ" + str(i0) + "\t", r
            print "COMPATIBLE:", compatible, "\n\n"
            if compatible: c_count += 1
            n += 1
        print "COMPATIBLE / ALL OVERLAP READS =  ", c_count, "/", n
        print "RATIO\t%.4f" % float(c_count) / n
Exemplo n.º 6
0
def Main():
    global args,out
    args=ParseArg()
    dict={}
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    argv=sys.argv
    argv[0]=argv[0].split("/")[-1]
    print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :\n#\t"," ".join(argv)
    init_dict={}
    if args.dbformat=="guess":
        if Tools.suffix(args.db)=="gz": 
            args.dbformat="tabix"
            args.tabix_format=Tools.guess_format(args.db)
        else:
            args.dbformat=Tools.guess_format(args.db)

    if args.query_method:
        dict["method"]=args.query_method
    if args.tabix_format:
        init_dict["tabix"]=args.tabix_format

    dbi=DBI.init(args.db,args.dbformat,**init_dict)
    hits=0
    query=0
    if args.input=="stdin":
        input=sys.stdin
    else:
        input=args.input

    query_length=0
    hits_number=0
    if (args.input_format=="guess"):
        args.input_format=Tools.guess_format(args.input)
    for (i0,x) in enumerate(TableIO.parse(input,args.input_format)):
        if i0%100==0:
            print >>sys.stderr,"query ",i0," entries\r",
        print >>out,"QR\t",x
        hit=0
        query+=1
        query_length+=len(x)
        #print dbi;#debug
        results=dbi.query(x,**dict)
        #results=dbi.query(x) #DEBUG
        #print >>sys.stderr,type(results)
        if isinstance(results,numpy.ndarray) or isinstance(results,list):
            print >>out,"HT\t",
            for value in results:
                print >>out,str(value)+",",
            print >>out,""
            hit=1
            hits_number+=1
        elif isinstance(results,str):
            print >>out,"HT\t",
            print >>out,results
            hit=1
            hits_number+=1

        else:
            for j in results:
                print >>out,"HT\t",j
                hit=1
                hits_number+=1

        if args.dbformat=="tabix":
            x.chr=x.chr.replace("chr","")
            for j in dbi.query(x,**dict):
                print >>out,"HT\t",j
                hit=1
                hits_number+=1
        hits+=hit
    print >>out,"# Query Number:",query,"\n# Query Have Hits:",hits
    print >>out,"# Query Length:",query_length
    print >>out,"# Hits Number:",hits_number
Exemplo n.º 7
0
def Main():
    global args, out
    args = ParseArg()
    dict = {}
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    argv = sys.argv
    argv[0] = argv[0].split("/")[-1]
    print >> out, "# This data was generated by program ", argv[
        0], "(version %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :\n#\t", " ".join(argv)
    init_dict = {}
    if args.dbformat == "guess":
        if Tools.suffix(args.db) == "gz":
            args.dbformat = "tabix"
            args.tabix_format = Tools.guess_format(args.db)
        else:
            args.dbformat = Tools.guess_format(args.db)

    if args.query_method:
        dict["method"] = args.query_method
    if args.tabix_format:
        init_dict["tabix"] = args.tabix_format

    dbi = DBI.init(args.db, args.dbformat, **init_dict)
    hits = 0
    query = 0
    if args.input == "stdin":
        input = sys.stdin
    else:
        input = args.input

    query_length = 0
    hits_number = 0
    if (args.input_format == "guess"):
        args.input_format = Tools.guess_format(args.input)
    for (i0, x) in enumerate(TableIO.parse(input, args.input_format)):
        if i0 % 100 == 0:
            print >> sys.stderr, "query ", i0, " entries\r",
        print >> out, "QR\t", x
        hit = 0
        query += 1
        query_length += len(x)
        #print dbi;#debug
        results = dbi.query(x, **dict)
        #results=dbi.query(x) #DEBUG
        #print >>sys.stderr,type(results)
        if isinstance(results, numpy.ndarray) or isinstance(results, list):
            print >> out, "HT\t",
            for value in results:
                print >> out, str(value) + ",",
            print >> out, ""
            hit = 1
            hits_number += 1
        elif isinstance(results, str):
            print >> out, "HT\t",
            print >> out, results
            hit = 1
            hits_number += 1

        else:
            for j in results:
                print >> out, "HT\t", j
                hit = 1
                hits_number += 1

        if args.dbformat == "tabix":
            x.chr = x.chr.replace("chr", "")
            for j in dbi.query(x, **dict):
                print >> out, "HT\t", j
                hit = 1
                hits_number += 1
        hits += hit
    print >> out, "# Query Number:", query, "\n# Query Have Hits:", hits
    print >> out, "# Query Length:", query_length
    print >> out, "# Hits Number:", hits_number
Exemplo n.º 8
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    if args.input=="stdin":
        fin=sys.stdin
    else:
        try:
            x=args.input.split(".")
            if x[-1]=="gz":
                fin=gzip.open(args.input,"r")
            else:
                fin=open(args.input,"r")
        except IOError:
            print >>sys.stderr,"can't read file",args.input
            fin=sys.stdin
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    gene=DBI.init(args.genetab,args.gene_format);
    upstream_list=[]
    downstream_list=[]
    exons_list=[]
    introns_list=[]
    utr3_list=[]
    utr5_list=[]
    for g in gene:
        upstream_list.append(g.upstream(args.upstream));
        downstream_list.append(g.downstream(args.downstream));
        for e in g.Exons():
            exons_list.append(e)
        for i in g.Introns():
            introns_list.append(i)
        if not (g.utr3() is None):
            utr3_list.append(g.utr3())
        if not (g.utr5() is None):
            utr5_list.append(g.utr5())
    upstream=DBI.init(upstream_list,"bed")
    downstream=DBI.init(downstream_list,"bed")
    exons=DBI.init(exons_list,"bed")
    introns=DBI.init(introns_list,"bed")
    utr3=DBI.init(utr3_list,"genebed")
    utr5=DBI.init(utr5_list,"genebed")



    if args.format=="guess":
        args.format=Tools.guess_format(args.input)
    for (i0,i) in enumerate(TableIO.parse(fin,args.format)):
        if i0==0:
            if isinstance(i,Bed12):
                print >>out,"#chr\tstart\tend\tname\tscore\tstrand\tthick_start\tthick_end\titem_rgb\tblock_count\tblock_sizes\tblock_starts\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5"
            elif isinstance(i,GeneBed):
                print >>out,"#name\tchr\tstrand\tstart\tend\tcds_start\texon_count\texon_starts\texont_ends\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5"
            else:
                print >>out,"#chr\tstart\tend\tname\tscore\tstrand\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5"
        


        print >>out,i,
        print >>out,"\t",toIDs(gene.query(i)),

        print >>out,"\t",toIDs(upstream.query(i)),
        print >>out,"\t",toIDs(downstream.query(i)),
        print >>out,"\t",toIDs(exons.query(i)),
        print >>out,"\t",toIDs(introns.query(i)),
        print >>out,"\t",toIDs(utr3.query(i)),
        print >>out,"\t",toIDs(utr5.query(i))
Exemplo n.º 9
0
def Main():
    """
    IO TEMPLATE
    """
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >>sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    if args.input == "stdin":
        fin = sys.stdin
    else:
        try:
            x = args.input.split(".")
            if x[-1] == "gz":
                fin = gzip.open(args.input, "r")
            else:
                fin = open(args.input, "r")
        except IOError:
            print >>sys.stderr, "can't read file", args.input
            fin = sys.stdin
    """
    END OF IO TEMPLATE 
    """
    print >> out, "# This data was generated by program ", sys.argv[0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    gene = DBI.init(args.genetab, args.gene_format)
    upstream_list = []
    downstream_list = []
    exons_list = []
    introns_list = []
    utr3_list = []
    utr5_list = []
    for g in gene:
        upstream_list.append(g.upstream(args.upstream))
        downstream_list.append(g.downstream(args.downstream))
        for e in g.Exons():
            exons_list.append(e)
        for i in g.Introns():
            introns_list.append(i)
        if not (g.utr3() is None):
            utr3_list.append(g.utr3())
        if not (g.utr5() is None):
            utr5_list.append(g.utr5())
    upstream = DBI.init(upstream_list, "bed")
    downstream = DBI.init(downstream_list, "bed")
    exons = DBI.init(exons_list, "bed")
    introns = DBI.init(introns_list, "bed")
    utr3 = DBI.init(utr3_list, "genebed")
    utr5 = DBI.init(utr5_list, "genebed")

    if args.format == "guess":
        args.format = Tools.guess_format(args.input)
    for (i0, i) in enumerate(TableIO.parse(fin, args.format)):
        if i0 == 0:
            if isinstance(i, Bed12):
                print >> out, "#chr\tstart\tend\tname\tscore\tstrand\tthick_start\tthick_end\titem_rgb\tblock_count\tblock_sizes\tblock_starts\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5"
            elif isinstance(i, GeneBed):
                print >> out, "#name\tchr\tstrand\tstart\tend\tcds_start\texon_count\texon_starts\texont_ends\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5"
            else:
                print >> out, "#chr\tstart\tend\tname\tscore\tstrand\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5"

        print >> out, i,
        print >> out, "\t", toIDs(gene.query(i)),

        print >> out, "\t", toIDs(upstream.query(i)),
        print >> out, "\t", toIDs(downstream.query(i)),
        print >> out, "\t", toIDs(exons.query(i)),
        print >> out, "\t", toIDs(introns.query(i)),
        print >> out, "\t", toIDs(utr3.query(i)),
        print >> out, "\t", toIDs(utr5.query(i))