def main(): p = optparse.OptionParser(__doc__) p.add_option("-D", "--debug", action="store_true", dest="D", help="debug") p.add_option("-N", "--novel", action="store_true", dest="novel",\ help="Only check at novel loci") p.add_option("-i", "--identifier", dest="i",\ help="Identifier to use") p.add_option("-o", "--outupt", dest="o",\ help="Input database") options, args = p.parse_args() if options.o: conn = sqlite3.connect(options.o) else: conn = sqlite3.connect('/gen_local/hsuj/temp/masked.data') t = (args[0], ) c = conn.cursor() c.execute('''SELECT * FROM transcripts WHERE tcons=?''', t) res = c.fetchmany() fa = FastA(args[1]) for i in res: starts = map(int, i[3].split(',')) ends = map(int, i[4].split(',')) starts.sort() ends.sort() # Make sure this remains sorted seq = [fa.grabSequence(i[2], x , y) for x, y in zip(starts,ends)] print("".join(seq))
def main(): p = optparse.OptionParser(__doc__) p.add_option("-D", "--debug", action="store_true", dest="debug", help="debug") p.add_option("-r", "--refSeq", action="store_true", dest="r", help="Input GTF is from refSeq Flat File") p.add_option( "-o", "--output", action="store", dest="o", help="Output file, if not set will default to INPUT.an.bam" ) options, args = p.parse_args() gtf = GTF.GTF(args[0]) gtf.load_gtf() vcf = VCFfile(args[1]) fa = FastA(args[2]) pfa = Seq(open("HCN4.pfa.txt", "rU").read()) print ("Length of the protein sequence:%i" % len(pfa)) bleh = open("test.txt", "w+") sequence = "" first = True for x in gtf.data: print (x.start, x.end) if first: # Get last codon gstart = x.start - 3 gend = x.end - 1 else: gstart = x.start gend = x.end temp = Seq(fa.grabSequence("chr15", gstart + 1, gend + 1).rstrip("\n")) bleh.write(str(temp)) temp = temp.reverse_complement() bleh.write(str(temp)) bleh.write("\n") bleh.write("\n") sequence = temp + sequence first = False print ("Our sequence:%i " % len(sequence)) print ("pp: %i " % len(sequence.translate())) """ bleh.write(str(sequence).upper()) bleh.write("\n") bleh.write("\n") bleh.write(str(sequence.translate()).upper()) bleh.write("\n") bleh.write(str(pfa)) """ print (str(sequence.translate().upper()) == str(pfa).upper()) print "\t".join(["rsID", "chrom", "pos", "bp position", "ref", "alt", "AA position", "AA Ref"]) for line in vcf: try: chrom = int(line.chr[3:]) pos = int(line.pos) except TypeError: chrom = ord(line.chr[3:]) if chrom == 15: hit = itertools.takewhile(lambda x: x.start < pos and x.end > pos, gtf.data) for i in hit: position_within_gene = i.end - pos + i.dist_tss + 2 aa_num = position_within_gene / 3 + 1 out_list = [ line.id, str(chrom), str(pos), str(position_within_gene), line.ref, line.alt, str(aa_num), pfa[aa_num - 1], ] print ("\t".join(out_list)) else: pass