示例#1
0
文件: wBedUniq.py 项目: tsznxx/ngslib
        sys.exit(p.print_help())
    args = p.parse_args()
    return args

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__=="__main__":
    # Get parameters
    args=argParser()
    fh = IO.mopen(args.ofname, 'w')
    lastbed = Bed("chr1\t0\t0")
    cnt = 0 # count of unique items
    for item in IO.BioReader(args.ifname,args.ftype):
        if item.chrom == lastbed.chrom and item.start == lastbed.start and item.stop == lastbed.stop:
            if args.names == 'c':
                lastbed.id += ";"+item.id
        else:
            if lastbed.stop !=0:
                cnt += 1
                if args.names == 'n':
                    lastbed.id = str(cnt)
                elif args.names == 'p':
                    lastbed.id = args.prefix+"_"+str(cnt)
                print >> fh, lastbed
示例#2
0
                   required=False,
                   help='Output file name. Default is "stdout".')
    if len(sys.argv) == 1:
        sys.exit(p.print_help())
    args = p.parse_args()
    return args


# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__ == "__main__":
    args = argParser()
    fh = IO.mopen(args.ofname, 'w')
    # Fasta file
    fa = FastaFile(args.ifname)
    f = IO.mopen(args.names)
    for faid in f:
        faid = faid.rstrip()
        seq = fa.getSeq(faid)
        print >> fh, ">" + faid
        print >> fh, seq.formatSeq()
    fa.close()
    IO.mclose(f)
    IO.mclose(fh)
示例#3
0
    args = p.parse_args()
    return args


# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__ == "__main__":
    # Get parameters
    args = argParser()
    fh = IO.mopen(args.ofname, 'w')
    lastbed = Bed("chr1\t0\t0")
    cnt = 0  # count of unique items
    for item in IO.BioReader(args.ifname, args.ftype):
        if item.chrom == lastbed.chrom and item.start == lastbed.start and item.stop == lastbed.stop:
            if args.names == 'c':
                lastbed.id += ";" + item.id
        else:
            if lastbed.stop != 0:
                cnt += 1
                if args.names == 'n':
                    lastbed.id = str(cnt)
                elif args.names == 'p':
                    lastbed.id = args.prefix + "_" + str(cnt)
                print >> fh, lastbed
示例#4
0
        sys.exit(p.print_help())
    args = p.parse_args()
    return args


# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__ == "__main__":
    # Get parameters
    args = argParser()
    fh = IO.mopen(args.ofname, 'w')
    tlen = 0
    gccnt = 0
    # Read fasta file
    for tseq in IO.BioReader(args.fname, 'fasta'):
        seq = str(tseq.seq).upper()
        cnt = seq.count("G") + seq.count("C")
        length = len(seq) - seq.count("N")
        print >> fh, "%s\t%-3.3f" % (tseq.id, float(cnt) /
                                     (length and length or 1))
        gccnt += cnt
        tlen += length
    print >> fh, "Total\t%-3.3f" % (float(gccnt) / (tlen and tlen or 1))
    IO.mclose(fh)
示例#5
0
文件: wGetTSS.py 项目: tomkp75/ngslib
import sys
import string
from ngslib import IO

# ------------------------------------
# constants
# ------------------------------------

# ------------------------------------
# Misc functions
# ------------------------------------

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__=="__main__":
    if len(sys.argv)==1:
        sys.exit("Example:"+sys.argv[0]+" anno.tab/bed\n\tPrint TSS position in Bed format.")
    if sys.argv[1].endswith(".tab"):
        for item in IO.BioReader(sys.argv[1],ftype= 'genepred'):
            print item.toBed().getTSS()
    else:
        for item in IO.BioReader(sys.argv[1],ftype= 'bed'):
            print item.getTSS()

示例#6
0
        if '.tab' in sys.argv[1]:
            ftype='gene'
        else:
            ftype='bed'

        # initiation annotations.
        annos={}
        #for chrom in IO.genomeSize('hg19'):
        for chrom in Utils.genomeSize(sys.argv[2]):
            if ftype=='bed':
                annos[chrom]=BedList()
            else:
                annos[chrom]=GeneBedList()
        
        # read annotations.
        for anno in IO.BioReader(sys.argv[1],ftype=ftype):
            if annos.has_key(anno.chrom):
                annos[anno.chrom].append(anno)

        # sort
        for chrom in annos:
            annos[chrom].sort()

        # Find nearest annoations
        for item in IO.BioReader(sys.argv[3],ftype='bed'):
            if annos.has_key(item.chrom) and len(annos[item.chrom])>0:
                tanno=annos[item.chrom][annos[item.chrom].bisect(item)]
                olen=item.overlapLength(tanno)
                if olen==1:
                    annostr=tanno.id+':overlap'
                    olen=0
示例#7
0
# ------------------------------------
# Misc functions
# ------------------------------------

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__ == "__main__":
    # Get parameters
    args = argParser()
    fh = IO.mopen(args.ofname, 'w')
    genome = FastaFile(args.genome)
    for i, item in enumerate(IO.BioReader(args.ifname, args.ftype)):
        try:
            strand = item.strand
        except:
            strand = "+"
        seq = item.getSeq(genome)
        if len(seq) > 0:
            print >> fh, '>' + (item.id != "NONAME" and item.id
                                or "item_" + str(i))
            if args.linelength:
                seq = seq.formatSeq(args.linelength)
            if args.case == 'u':
                seq = seq.upper()
            elif args.case == 'l':
示例#8
0
    if len(sys.argv)==1:
        sys.exit(p.print_help())
    args = p.parse_args()
    return args

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__=="__main__":
    args = argParser()
    fh = IO.mopen(args.ofname, 'w')
    # Parse chromosome region
    if args.region:
        m = re.search("(\S+):(\S*)-(\S*):(\S*)",args.region)
        chrom, start, end, strand = m.groups()
    else:
        chrom = args.chrom
        start = args.start
        end   = args.end
        strand= args.strand
    if not chrom:
        raise ValueError("chromosome name is required.")
    start = int(start) if start else None
    end  = int(end) if end else None
    strand = strand if strand in ["+","-","."] else "+"
    # Get seq from Fasta file
示例#9
0
# ------------------------------------
# python modules
# ------------------------------------

import sys
import string
from ngslib import IO

# ------------------------------------
# constants
# ------------------------------------

# ------------------------------------
# Misc functions
# ------------------------------------

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__ == "__main__":
    if len(sys.argv) == 1:
        sys.exit("Example:" + sys.argv[0] + " *.tab ")
    for item in IO.BioReader(sys.argv[1], 'genepred'):
        for exon in item.exons():
            print exon
示例#10
0
    p.add_argument("-n","--names",dest='names',type=str,metavar="names.lst",required=True,help='A file with sequence names. Can be "stdin".')
    p.add_argument("-o","--output",dest='ofname',type=str, default="stdout", metavar="output.fa",required=False,help='Output file name. Default is "stdout".')
    if len(sys.argv)==1:
        sys.exit(p.print_help())
    args = p.parse_args()
    return args

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__=="__main__":
    args = argParser()
    fh = IO.mopen(args.ofname, 'w')
    # Fasta file
    fa = FastaFile(args.ifname)
    f = IO.mopen(args.names)
    for faid in f:
        faid=faid.rstrip()
        seq=fa.getSeq(faid)
        print >> fh, ">"+faid
        print >> fh, seq.formatSeq()
    fa.close()
    IO.mclose(f)
    IO.mclose(fh)