def txt_equals_csv(population, algo, reverse, count): splitted = "" for i in range(0, count): splitted = splitted + "Splitted" indir = qlibs.get_datadir() + population sets = os.listdir(indir) text = "" for set in sets: csvfiles = [] txtfiles = [] if os.path.isdir(indir + "/" + set): for dir in os.listdir(indir + "/" + set): if dir.find(algo + reverse + splitted) >= 0: if os.path.isdir(indir + "/" + set + "/" + dir + "/"): for file in os.listdir(indir + "/" + set + "/" + dir + "/"): if file.find(".csv") > 0: csvfiles.append(file) if dir.find("Text" + reverse + algo + splitted) >= 0: if os.path.isdir(indir + "/" + set + "/" + dir + "/"): for file in os.listdir(indir + "/" + set + "/" + dir + "/"): if file.find(".txt") > 0: txtfiles.append(file) for file in txtfiles: if qlibs.find(file.replace(".txt", ".csv"), csvfiles) < 0: text = text + qlibs.get_datadir( ) + population + "/" + set + "/Text" + reverse + algo + splitted + "/" + file + "\n" if text == "": result = True else: outfile = get_filename(population, algo, count) fout = open(outfile, "w") fout.write(text) fout.close() result = False return result
def dist(root, algodir, reverse, regiondir="any", setdir="any"): data = pan.DataFrame() dist = pan.DataFrame() header = True rootfile = "" datadir = qlibs.get_datadir() rootdir = datadir + root + "/" popfirst = True for set in os.listdir(rootdir): if os.path.isdir(rootdir + set): if set == setdir or setdir == "any": outfile = rootdir + set + "/" + "dist_" + algodir + reverse + "_" + root + "_" + set + "_YYYYmmdd.csv" newfile = rootdir + set + "/" + "dist_" + algodir + reverse + "_" + root + "_" + set + "_" + str( datetime.date.today()).replace("-", "") + ".csv" if setdir == "any": rootfile = rootdir + "dist_" + algodir + reverse + "_" + root + "_YYYYmmdd.csv" popfirst = dist_data(rootdir, set, algodir, reverse, outfile, rootfile, regiondir, popfirst) else: dist_data(rootdir, set, algodir, reverse, outfile, regiondir) if os.path.isfile(outfile): if os.path.isfile(newfile): os.remove(newfile) os.rename(outfile, newfile) if rootfile != "": newrootfile = rootdir + "dist_" + algodir + reverse + "_" + root + "_" + str( datetime.date.today()).replace("-", "") + ".csv" if os.path.isfile(rootfile): if os.path.isfile(newrootfile): os.remove(newrootfile) os.rename(rootfile, newrootfile) pass return
def callback(self, event): try: if event.widget.cget("text") == "Browse": if self.opt == "get_seq": self.browse_get_seq() if self.opt == "lost": self.browse_get_seq() if self.opt == "errors": self.browse_errors() if self.opt == "graph": self.browse_graphics() if self.opt == "select": self.browse_select() if event.widget.cget("text") == "Execute": cfgn.save(self) qlibs.create_path(qlibs.get_datadir() + self.pop.get() + "/") if self.opt == "get_seq": self.get_seq() if self.opt == "lost": self.lost() if self.opt == "reverse": self.reverse() if self.opt == "quad_search": self.quad_search() if self.opt == "errors": self.errors() if self.opt == "dist": self.dist() if self.opt == "graph": self.graphics() if self.opt == "select": self.select() if self.opt == "intersect": self.intersections() messagebox.showinfo("Quad", "END!") if event.widget.cget("text") == "Confirm": cfgn.save(self) except: pass return
def get_seq(population, finput, search): result = [] basedir = qlibs.get_datadir() + population + "/" search = indata[1] fin = open(finput, "r") buf = fin.read() fin.close() rows = buf.split("\n") i = 0 while i < len(rows): cols = rows[i].split(";") if len(cols) > 1: code = cols[0] gene = cols[1] text = get_fasta(gene, code, search) if text == "": qlibs.trace("lostfasta", gene + "\t" + code, population) else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write (get_fasta(gene, code, search)) fout.close() time.sleep(5) i = i + 1 return result
def quad_search(self): # # Submit sequence to a prediction algorithm # population = self.pop.get() set = self.set.get() reverse = "" if self.rev.get() == 1: # # Submit the reversed sequences # reverse = "ReverseComplement" textdir = qlibs.get_datadir( ) + population + "/" + set + "/Text" + reverse + "/" regions = "" # # The following enable to choose region type # if int(self.up.get()) == 1: regions = regions + "Upstream" + self.upno.get() + ";" if int(self.u5.get()) == 1: regions = regions + "UtrExon5" + ";" if int(self.u3.get()) == 1: regions = regions + "UtrExon3" + ";" if int(self.ce.get()) == 1: regions = regions + "CdsExon" + ";" if int(self.it.get()) == 1: regions = regions + "Intron" + ";" if int(self.dw.get()) == 1: regions = regions + "Downstream" + self.dwno.get() + ";" if regions != "": regions = regions[:len(regions) - 1] algo = "" if int(self.v.get()) == 1: # # Use qgrs-cpp (https://github.com/freezer333/qgrs-cpp) recompiled as library # algo = "Qgrs" qgrs.save_csv(population, textdir, regions, algo, reverse) if int(self.v.get()) == 2: # # Submit query to QGRS Mapper (http://bioinformatics.ramapo.edu/QGRS/analyze.php) # algo = "QgrsWeb" qgrs.save_csv(population, textdir, regions, algo, reverse) if int(self.v.get()) == 3: # # Use pqs function from R-library pqsfinder previously installed (http://www.bioconductor.org/packages/release/bioc/html/pqsfinder.html) # try: algo = "Pqs" script = qlibs.get_scriptsdir() + "pqs.r" cmd = ["Rscript", script] + [ self.libpath.get(), population, textdir, regions, algo ] result = subprocess.check_output(cmd, universal_newlines=True) except Exception as e: qlibs.trace("pqs", str(e), population) print("qgrs.py " + population + " " + textdir + " " + regions + " " + algo) return
def open(self): indir = qlibs.get_datadir() types = (("csv files", "*.csv"), ("all files", "*.*")) fname = filedialog.askopenfilename(parent=self.root, initialdir=indir, title='Please select a file', filetypes=types) if os.name == 'nt': os.system("start " + fname) if os.name == 'posix': os.system("open " + shlex.quote(fname)) return
def browse_select(self): indir = qlibs.get_datadir() + self.pop.get() + "/" types = (("csv files", "*.csv"), ("all files", "*.*")) fname = filedialog.askopenfilename(parent=self.root, initialdir=indir, title='Please select a file', filetypes=types) self.file.set(fname) print("select") return
def browse_graphics(self): indir = qlibs.get_datadir() + self.pop.get() + "/" types = (("csv files", "*.csv"), ("all files", "*.*")) fname = filedialog.askopenfilename(parent=self.root, initialdir=indir, title='Please select a file', filetypes=types) self.file.set(fname) buf = fname[:fname.rfind("/")] buf = buf[buf.rfind("/") + 1:] self.tit.set(buf) return
def save_union(population, algo, reverse): indir = qlibs.get_datadir() + population + "/" sets = os.listdir(indir) print(sets) for set in sets: if os.path.isdir(indir + set): for dir in os.listdir(indir + set + "/"): if dir.find(algo + reverse + "Splitted") >= 0: if os.path.isdir(indir + set + "/" + dir + "/"): for file in os.listdir(indir + set + "/" + dir + "/"): if file.find(".csv") > 0: union(indir, set, dir, file, population, algo, reverse) return
def getlost(infile, population, set): datadir = qlibs.get_datadir() outdir = datadir + "Lost/" + set + "/" outfile = outdir + "/lost_" + set + ".csv" qlibs.create_path(outdir) rows = qlibs.get_rows(infile) text = "" for row in rows: cols = row.split(";") if len(cols) > 1: code = cols[0] gene = cols[1] found = False indir = qlibs.get_datadir() + population + "/" + set + "/Text/" for region in os.listdir(indir): if os.path.isdir(indir + region): datadir = indir + region + "/" + gene + "__" + code if os.path.isdir(datadir): found = True if found == False: text += code + ";" + gene + ";\n" ft = open(outfile, "w") ft.write(text) ft.close() return
def reverse_set(population, set): datadir = qlibs.get_datadir() indir = datadir + population + "/" + set + "/Text/" outdir = indir.replace("/Text/", "/TextReverseComplement/") regions = [] for r, d, f in os.walk(indir): for rname in d: regions.append(rname) for region in regions: for r, d, f in os.walk(indir + region): for gene_code in d: for r, d, f in os.walk(indir + region + "/" + gene_code + "/"): for fname in f: infile = indir + region + "/" + gene_code + "/" + fname outfile = outdir + region + "/" + gene_code + "/" + fname qlibs.create_path(outdir + region + "/" + gene_code + "/") qlibs.trace("reverse", infile, population) reverse_file(infile, outfile) return
def getlost_fromerrors(infile, population, set): datadir = qlibs.get_datadir() outdir = datadir + "Lost/" + set + "/" outfile = outdir + "/declost_" + set + ".csv" qlibs.create_path(outdir) rows = qlibs.get_rows(infile) text = "" for row in rows: cols = row.split("\t") if len(cols) > 1: codebuf = cols[1] gene = cols[0] buf = codebuf.split(".") if int(buf[1]) > 1: code = buf[0] + "." + str(int(buf[1]) - 1) text += code + ";" + gene + ";\n" ft = open(outfile, "a") ft.write(text) ft.close() return
def get_seq(population, argv): result = [] basedir = qlibs.get_datadir() + population + "/" countfile = qlibs.get_datadir() + "Countfile.log" indata = params(argv) finput = indata[0] search = indata[1] col3rd = indata[3] if col3rd == False: basename = os.path.basename(indata[0]) result.append(basename[:len(basename) - 4]) datadir = basedir + basename[:len(basename) - 4] datadir = create_dir(datadir, indata) fin = open(finput, "r") buf = fin.read() fin.close() rows = buf.split("\n") try: fc = open(countfile, "r") buf = fc.read() fc.close() cols = buf.split("\t") count = int(cols[0]) day = time.strptime(cols[3], "%Y-%m-%d") today = time.strptime(str(datetime.date.today()), "%Y-%m-%d") print(day, today) if today > day: count = 0 elif count >= 5000: tk.messagebox.showinfo("Quad", "Daily hits limit reached!") return result if cols[2] == finput: i = int(cols[1]) if i >= len(rows): res = tk.messagebox.askyesno( "Quad", "WARNING:\n\n File already processed \n\n Do you want to force Execution?" ) if res == True: i = 0 else: i = 0 except: count = 0 i = 0 while i < len(rows): cols = rows[i].split(";") if len(cols) > 1: code = cols[0] gene = cols[1] if col3rd == True: if qlibs.find(cols[2], result) < 0: result.append(cols[2]) datadir = basedir + cols[2] datadir = create_dir(datadir, indata) count = count + 1 print(count, gene, code) text = get_fasta(gene, code, search) if text == "": text = get_fasta_gene(gene, code, search) if text == "": get_fasta_alt(gene, code, search) if text == "": text = get_fasta_gene_alt(gene, code, search) if text == "": qlibs.trace("fasta", gene + "\t" + code, population) else: fout = open( datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() if count % 5000 == 0: tk.messagebox.showinfo("Quad", "Daily hits limit reached!") break time.sleep(5) i = i + 1 fc = open(countfile, "w") fc.write( str(count) + "\t" + str(i) + "\t" + finput + "\t" + str(datetime.date.today())) fc.close() return result
def intersect(population, algo): lists = [] sets = [] dists = [] indir = qlibs.get_datadir() + population + "/" outfile = indir + "intersect_genes_" + population + str( datetime.date.today()).replace("-", "") + ".csv" for set in os.listdir(indir): if os.path.isdir(indir + set): files = [ f for f in os.listdir(indir + set) if os.path.isfile(indir + set + "/" + f) ] lastfile = "" for file in files: try: if file.index("dist_" + algo) == 0 and file.index( ".csv") == len(file) - 4: if file > lastfile: lastfile = file except: pass if lastfile == "": qlibs.trace("intersect", set, pop=population) else: lists.append(qlibs.get_uniques(indir + set + "/" + lastfile)) sets.append(set) f = open(indir + set + "/" + lastfile, "r") buf = f.read() f.close() dists.append(buf) elems = [] setreps = [] setidxs = [] for i in range(len(sets) - 1): j = i + 1 while j < len(lists): for elem in lists[i]: if qlibs.find(elem, lists[j]) >= 0: k = qlibs.find(elem, elems) if k >= 0: if qlibs.find(sets[j], setreps[k]) < 0: setreps[k].append(sets[j]) setidxs[k].append(j) else: elems.append(elem) list = [sets[i], sets[j]] setreps.append(list) list = [i, j] setidxs.append(list) j = j + 1 text = "" print(sets) print(setreps) print(setidxs) for i in range(len(elems)): text = text + elems[i] + ";" for set in setreps[i]: text = text + set + ";" text = text + "\n" if text == "": text = "No intersection found" fout = open(outfile, "w") fout.write(text) fout.close() text = "" for i in range(len(elems)): j = setidxs[i][0] buf = dists[j] print(j) print(elems[i]) rows = buf.split("\n") for row in rows: if row.find(elems[i]) >= 0: text = text + row for set in setreps[i]: text = text + set + ";" text = text + "\n" outfile = indir + "intersect_dists_" + algo + "_" + population + str( datetime.date.today()).replace("-", "") + ".csv" fout = open(outfile, "w") fout.write(text) fout.close() return
def substr_not_founds(population, algo, overlap, reverse, nsplit=2, libpath=""): count = get_filecount(population, algo) if count <= 0: start_file(population, algo) else: count = count - 1 infile = get_filename(population, algo, count) #logdir = qlibs.get_logdir() + population + "/" fin = open(infile, "r") buf = fin.read() fin.close() lines = buf.split("\n") print(lines, reverse) for line in lines: if line.find("C:/") >= 0 or line.find("./") >= 0: if line.find(".txt") >= 0: infile = qlibs.get_datadir() + line[line.find(population):] outfile = infile.replace("/Text", "/" + algo).replace(".txt", ".csv") buf = outfile[outfile.find(algo):] length = buf.find("/") + 2 outdir = outfile[:outfile.find("/" + algo) + length] print(population, line, infile, outfile, outdir, buf) if not os.path.exists(outdir): os.mkdir(outdir) splittedoutfile = infile.replace( "/Text" + reverse, "/Text" + reverse + algo + "Splitted") buf = splittedoutfile[splittedoutfile.find("Text" + reverse + algo + "Splitted"):] length = buf.find("/") + 2 splittedoutdir = splittedoutfile[:splittedoutfile. find("/Text" + reverse + algo + "Splitted") + length] if not os.path.exists(splittedoutdir): os.mkdir(splittedoutdir) if not os.path.exists(outfile): if algo == "QgrsWeb": if qgrs.qgrs_web(population, infile, outfile) == False: found = False try: split_file(population, algo, infile, splittedoutdir, nsplit, overlap, reverse) except: pass if algo == "Qgrs": if platform.system().find("Windows") >= 0: libcpp = cdll.LoadLibrary("../lib/Qgrs.dll") if platform.system().find("Linux") >= 0: libcpp = cdll.LoadLibrary("../lib/qgrs-linux.a") if platform.system().find("Darwin") >= 0: libcpp = cdll.LoadLibrary("../lib/qgrs-mac.so") if qgrs.qgrs_local(population, infile, outfile, libcpp) == False: found = False try: split_file(population, algo, infile, splittedoutdir, nsplit, overlap, reverse) except: pass if algo == "Pqs": if qgrs.pqs(libpath, population, infile, outfile) == False: found = False try: split_file(population, algo, infile, splittedoutdir, nsplit, overlap, reverse) except: pass else: print(outfile) count = count + 1 result = txt_equals_csv(population, algo, reverse, count) return result
def get_seq(self): # # Get transcript sequence from UCSC Genome Browser (https://genome.ucsc.edu/) # u = False d = False uddir = "" udpar = "" ftcidir = "" ftcipar = "" chr = "" nregs = 0 regions = "" opts = "" argv = ["quad.exe"] argv.append(self.file.get()) print("get") print(self.t) if int(self.up.get()) == 1: argv.append("-u") argv.append(self.upno.get()) u = True if int(self.s.get()) == 1: uddir = "Upstream" + str(self.upno.get()) + "OnePerRegion/" regions = regions + "Upstream" + str(self.upno.get()) opts = opts + "u" if int(self.s.get()) == 2: uddir = "Upstream" + str(self.upno.get()) + "/" regions = regions + "Upstream" + str(self.upno.get()) opts = opts + "u" if udpar == "": udpar = "Upstream" + str(self.upno.get()) else: udpar = udpar + ";" + "Upstream" + str(self.upno.get()) if int(self.u5.get()) == 1: argv.append("-f") if int(self.s.get()) == 1: ftcidir = "UtrExon5OnePerRegion/" nregs = nregs + 1 regions = regions + "UtrExon5" opts = opts + "f" if int(self.s.get()) == 2: ftcidir = "UtrExon5/" nregs = nregs + 1 regions = regions + "UtrExon5" opts = opts + "f" if ftcipar == "": ftcipar = "UtrExon5;" else: ftcipar = ftcipar + "UtrExon5;" chr = "f" if int(self.u3.get()) == 1: argv.append("-t") if int(self.s.get()) == 1: ftcidir = "UtrExon3OnePerRegion/" nregs = nregs + 1 regions = regions + "UtrExon3" opts = opts + "t" if int(self.s.get()) == 2: ftcidir = "UtrExon3/" nregs = nregs + 1 regions = regions + "UtrExon3" opts = opts + "t" if ftcipar == "": ftcipar = "UtrExon3;" else: ftcipar = ftcipar + "UtrExon3;" chr = "t" if int(self.ce.get()) == 1: argv.append("-c") if int(self.s.get()) == 1: ftcidir = "CdsExonOnePerRegion/" nregs = nregs + 1 regions = regions + "CdsExon" opts = opts + "c" if int(self.s.get()) == 2: ftcidir = "CdsExon/" nregs = nregs + 1 regions = regions + "CdsExon" opts = opts + "c" if ftcipar == "": ftcipar = "CdsExon;" else: ftcipar = ftcipar + "CdsExon;" chr = "c" if int(self.it.get()) == 1: argv.append("-i") if int(self.s.get()) == 1: ftcidir = "IntronOnePerRegion/" nregs = nregs + 1 regions = regions + "Intron" opts = opts + "i" if int(self.s.get()) == 2: ftcidir = "Intron/" nregs = nregs + 1 regions = regions + "Intron" opts = opts + "i" if ftcipar == "": ftcipar = "Intron;" else: ftcipar = ftcipar + "Intron;" chr = "i" if int(self.dw.get()) == 1: argv.append("-d") argv.append(self.dwno.get()) d = True if int(self.s.get()) == 1: uddir = "Downstream" + self.dwno.get() + "OnePerRegion/" regions = regions + "Downstream" + self.dwno.get() opts = opts + "d" if int(self.s.get()) == 2: uddir = "Upstream" + str(self.upno.get()) + "/" regions = regions + "Downstream" + self.dwno.get() opts = opts + "d" if udpar == "": udpar = "Downstream" + self.dwno.get() else: udpar = udpar + ";" + "Downstream" + self.dwno.get() argv.append("-g") if int(self.s.get()) == 1: argv.append("feature") if int(self.s.get()) == 2: argv.append("gene") else: argv.append("feature") if self.t.get() == 1: argv.append("-n") print(argv) sets = gs.get_seq(self.pop.get(), argv) regionsopr = regions if nregs == 4 and u == True and d == True: regions = "FullGeneSeq" if int(self.s.get()) == 1: opts = "full" if int(self.s.get()) == 2: opts = "full_gene" if int(self.s.get()) == 1: regionsopr = regions + "OnePerRegion" if int(self.s.get()) == 2: regionsopr = regions print(str(int(self.s.get()))) print("sets", sets) for set in sets: print(regionsopr) print("regionsopr") f2t.set_text( qlibs.get_datadir() + self.pop.get() + "/" + set + "/Fasta/" + regionsopr + "/", regions, opts) return