示例#1
0
def txt_equals_csv(population, algo, reverse, count):
    splitted = ""
    for i in range(0, count):
        splitted = splitted + "Splitted"
    indir = qlibs.get_datadir() + population
    sets = os.listdir(indir)
    text = ""
    for set in sets:
        csvfiles = []
        txtfiles = []
        if os.path.isdir(indir + "/" + set):
            for dir in os.listdir(indir + "/" + set):
                if dir.find(algo + reverse + splitted) >= 0:
                    if os.path.isdir(indir + "/" + set + "/" + dir + "/"):
                        for file in os.listdir(indir + "/" + set + "/" + dir +
                                               "/"):
                            if file.find(".csv") > 0: csvfiles.append(file)
                if dir.find("Text" + reverse + algo + splitted) >= 0:
                    if os.path.isdir(indir + "/" + set + "/" + dir + "/"):
                        for file in os.listdir(indir + "/" + set + "/" + dir +
                                               "/"):
                            if file.find(".txt") > 0: txtfiles.append(file)
            for file in txtfiles:
                if qlibs.find(file.replace(".txt", ".csv"), csvfiles) < 0:
                    text = text + qlibs.get_datadir(
                    ) + population + "/" + set + "/Text" + reverse + algo + splitted + "/" + file + "\n"
    if text == "": result = True
    else:
        outfile = get_filename(population, algo, count)
        fout = open(outfile, "w")
        fout.write(text)
        fout.close()
        result = False
    return result
示例#2
0
def dist(root, algodir, reverse, regiondir="any", setdir="any"):
    data = pan.DataFrame()
    dist = pan.DataFrame()
    header = True
    rootfile = ""
    datadir = qlibs.get_datadir()
    rootdir = datadir + root + "/"
    popfirst = True
    for set in os.listdir(rootdir):
        if os.path.isdir(rootdir + set):
            if set == setdir or setdir == "any":
                outfile = rootdir + set + "/" + "dist_" + algodir + reverse + "_" + root + "_" + set + "_YYYYmmdd.csv"
                newfile = rootdir + set + "/" + "dist_" + algodir + reverse + "_" + root + "_" + set + "_" + str(
                    datetime.date.today()).replace("-", "") + ".csv"
                if setdir == "any":
                    rootfile = rootdir + "dist_" + algodir + reverse + "_" + root + "_YYYYmmdd.csv"
                    popfirst = dist_data(rootdir, set, algodir, reverse,
                                         outfile, rootfile, regiondir,
                                         popfirst)
                else:
                    dist_data(rootdir, set, algodir, reverse, outfile,
                              regiondir)
                if os.path.isfile(outfile):
                    if os.path.isfile(newfile): os.remove(newfile)
                    os.rename(outfile, newfile)
    if rootfile != "":
        newrootfile = rootdir + "dist_" + algodir + reverse + "_" + root + "_" + str(
            datetime.date.today()).replace("-", "") + ".csv"
        if os.path.isfile(rootfile):
            if os.path.isfile(newrootfile): os.remove(newrootfile)
            os.rename(rootfile, newrootfile)
        pass
    return
示例#3
0
 def callback(self, event):
     try:
         if event.widget.cget("text") == "Browse":
             if self.opt == "get_seq": self.browse_get_seq()
             if self.opt == "lost": self.browse_get_seq()
             if self.opt == "errors": self.browse_errors()
             if self.opt == "graph": self.browse_graphics()
             if self.opt == "select": self.browse_select()
         if event.widget.cget("text") == "Execute":
             cfgn.save(self)
             qlibs.create_path(qlibs.get_datadir() + self.pop.get() + "/")
             if self.opt == "get_seq": self.get_seq()
             if self.opt == "lost": self.lost()
             if self.opt == "reverse": self.reverse()
             if self.opt == "quad_search": self.quad_search()
             if self.opt == "errors": self.errors()
             if self.opt == "dist": self.dist()
             if self.opt == "graph": self.graphics()
             if self.opt == "select": self.select()
             if self.opt == "intersect": self.intersections()
             messagebox.showinfo("Quad", "END!")
         if event.widget.cget("text") == "Confirm":
             cfgn.save(self)
     except:
         pass
     return
示例#4
0
def get_seq(population, finput, search):
    result = []
    basedir = qlibs.get_datadir() + population + "/"
    search = indata[1]
    fin = open(finput, "r")
    buf = fin.read()
    fin.close()
    rows = buf.split("\n")
    i = 0


    while i < len(rows):
        cols = rows[i].split(";")
        if len(cols) > 1:
            code = cols[0]
            gene = cols[1]
            text = get_fasta(gene, code, search)
            if text == "": qlibs.trace("lostfasta", gene + "\t" + code, population)
            else:
                fout = open(datadir + gene + "__" + code + ".fasta", "w")
                fout.write (get_fasta(gene, code, search))
                fout.close()
            time.sleep(5)
        i = i + 1
  
    return result
示例#5
0
 def quad_search(self):
     #
     # Submit  sequence to a prediction algorithm
     #
     population = self.pop.get()
     set = self.set.get()
     reverse = ""
     if self.rev.get() == 1:
         #
         # Submit the reversed sequences
         #
         reverse = "ReverseComplement"
     textdir = qlibs.get_datadir(
     ) + population + "/" + set + "/Text" + reverse + "/"
     regions = ""
     #
     # The following enable to choose region type
     #
     if int(self.up.get()) == 1:
         regions = regions + "Upstream" + self.upno.get() + ";"
     if int(self.u5.get()) == 1: regions = regions + "UtrExon5" + ";"
     if int(self.u3.get()) == 1: regions = regions + "UtrExon3" + ";"
     if int(self.ce.get()) == 1: regions = regions + "CdsExon" + ";"
     if int(self.it.get()) == 1: regions = regions + "Intron" + ";"
     if int(self.dw.get()) == 1:
         regions = regions + "Downstream" + self.dwno.get() + ";"
     if regions != "": regions = regions[:len(regions) - 1]
     algo = ""
     if int(self.v.get()) == 1:
         #
         # Use qgrs-cpp (https://github.com/freezer333/qgrs-cpp) recompiled as library
         #
         algo = "Qgrs"
         qgrs.save_csv(population, textdir, regions, algo, reverse)
     if int(self.v.get()) == 2:
         #
         # Submit query to QGRS Mapper (http://bioinformatics.ramapo.edu/QGRS/analyze.php)
         #
         algo = "QgrsWeb"
         qgrs.save_csv(population, textdir, regions, algo, reverse)
     if int(self.v.get()) == 3:
         #
         # Use pqs function from R-library pqsfinder previously installed (http://www.bioconductor.org/packages/release/bioc/html/pqsfinder.html)
         #
         try:
             algo = "Pqs"
             script = qlibs.get_scriptsdir() + "pqs.r"
             cmd = ["Rscript", script] + [
                 self.libpath.get(), population, textdir, regions, algo
             ]
             result = subprocess.check_output(cmd, universal_newlines=True)
         except Exception as e:
             qlibs.trace("pqs", str(e), population)
     print("qgrs.py " + population + " " + textdir + " " + regions + " " +
           algo)
     return
示例#6
0
 def open(self):
     indir = qlibs.get_datadir()
     types = (("csv files", "*.csv"), ("all files", "*.*"))
     fname = filedialog.askopenfilename(parent=self.root,
                                        initialdir=indir,
                                        title='Please select a file',
                                        filetypes=types)
     if os.name == 'nt': os.system("start " + fname)
     if os.name == 'posix': os.system("open " + shlex.quote(fname))
     return
示例#7
0
 def browse_select(self):
     indir = qlibs.get_datadir() + self.pop.get() + "/"
     types = (("csv files", "*.csv"), ("all files", "*.*"))
     fname = filedialog.askopenfilename(parent=self.root,
                                        initialdir=indir,
                                        title='Please select a file',
                                        filetypes=types)
     self.file.set(fname)
     print("select")
     return
示例#8
0
 def browse_graphics(self):
     indir = qlibs.get_datadir() + self.pop.get() + "/"
     types = (("csv files", "*.csv"), ("all files", "*.*"))
     fname = filedialog.askopenfilename(parent=self.root,
                                        initialdir=indir,
                                        title='Please select a file',
                                        filetypes=types)
     self.file.set(fname)
     buf = fname[:fname.rfind("/")]
     buf = buf[buf.rfind("/") + 1:]
     self.tit.set(buf)
     return
示例#9
0
def save_union(population, algo, reverse):
    indir = qlibs.get_datadir() + population + "/"
    sets = os.listdir(indir)
    print(sets)
    for set in sets:
        if os.path.isdir(indir + set):
            for dir in os.listdir(indir + set + "/"):
                if dir.find(algo + reverse + "Splitted") >= 0:
                    if os.path.isdir(indir + set + "/" + dir + "/"):
                        for file in os.listdir(indir + set + "/" + dir + "/"):
                            if file.find(".csv") > 0:
                                union(indir, set, dir, file, population, algo,
                                      reverse)
    return
示例#10
0
def getlost(infile, population, set):
    datadir = qlibs.get_datadir()
    outdir = datadir + "Lost/" + set + "/"
    outfile = outdir + "/lost_" + set + ".csv"
    qlibs.create_path(outdir)
    rows = qlibs.get_rows(infile)
    text = ""
    for row in rows:
        cols = row.split(";")
        if len(cols) > 1:
            code = cols[0]
            gene = cols[1]
            found = False
            indir = qlibs.get_datadir() + population + "/" + set + "/Text/"
            for region in os.listdir(indir):
                if os.path.isdir(indir + region):
                    datadir = indir + region + "/" + gene + "__" + code
                    if os.path.isdir(datadir):
                        found = True
            if found == False: text += code + ";" + gene + ";\n"
    ft = open(outfile, "w")
    ft.write(text)
    ft.close()
    return 
示例#11
0
def reverse_set(population, set):
    datadir = qlibs.get_datadir()
    indir = datadir + population + "/" + set + "/Text/"
    outdir = indir.replace("/Text/", "/TextReverseComplement/")
    regions = []
    for r, d, f in os.walk(indir):
        for rname in d:
            regions.append(rname)
    for region in regions:
        for r, d, f in os.walk(indir + region):
            for gene_code in d:
                for r, d, f in os.walk(indir + region  + "/" + gene_code + "/"):
                    for fname in f:
                        infile = indir + region  + "/" + gene_code + "/" + fname
                        outfile = outdir + region + "/" + gene_code + "/" + fname
                        qlibs.create_path(outdir + region + "/" + gene_code + "/")
                        qlibs.trace("reverse", infile, population)
                        reverse_file(infile, outfile)
    return 
示例#12
0
def getlost_fromerrors(infile, population, set):
    datadir = qlibs.get_datadir()
    outdir = datadir + "Lost/" + set + "/"
    outfile = outdir + "/declost_" + set + ".csv"
    qlibs.create_path(outdir)
    rows = qlibs.get_rows(infile)
    text = ""
    for row in rows:
        cols = row.split("\t")
        if len(cols) > 1:
            codebuf = cols[1]
            gene = cols[0]
            buf = codebuf.split(".")
            if int(buf[1]) > 1: 
                code = buf[0] + "." + str(int(buf[1]) - 1)
                text += code + ";" + gene + ";\n"
    ft = open(outfile, "a")
    ft.write(text)
    ft.close()
    return 
示例#13
0
def get_seq(population, argv):
    result = []
    basedir = qlibs.get_datadir() + population + "/"
    countfile = qlibs.get_datadir() + "Countfile.log"

    indata = params(argv)
    finput = indata[0]
    search = indata[1]
    col3rd = indata[3]

    if col3rd == False:
        basename = os.path.basename(indata[0])
        result.append(basename[:len(basename) - 4])
        datadir = basedir + basename[:len(basename) - 4]
        datadir = create_dir(datadir, indata)

    fin = open(finput, "r")
    buf = fin.read()
    fin.close()
    rows = buf.split("\n")

    try:
        fc = open(countfile, "r")
        buf = fc.read()
        fc.close()
        cols = buf.split("\t")
        count = int(cols[0])
        day = time.strptime(cols[3], "%Y-%m-%d")
        today = time.strptime(str(datetime.date.today()), "%Y-%m-%d")
        print(day, today)
        if today > day:
            count = 0
        elif count >= 5000:
            tk.messagebox.showinfo("Quad", "Daily hits limit reached!")
            return result
        if cols[2] == finput:
            i = int(cols[1])
            if i >= len(rows):
                res = tk.messagebox.askyesno(
                    "Quad",
                    "WARNING:\n\n File already processed \n\n Do you want to force Execution?"
                )
                if res == True:
                    i = 0

        else:
            i = 0
    except:
        count = 0
        i = 0

    while i < len(rows):
        cols = rows[i].split(";")
        if len(cols) > 1:
            code = cols[0]
            gene = cols[1]
            if col3rd == True:
                if qlibs.find(cols[2], result) < 0: result.append(cols[2])
                datadir = basedir + cols[2]
                datadir = create_dir(datadir, indata)
            count = count + 1
            print(count, gene, code)
            text = get_fasta(gene, code, search)
            if text == "":
                text = get_fasta_gene(gene, code, search)
                if text == "":
                    get_fasta_alt(gene, code, search)
                    if text == "":
                        text = get_fasta_gene_alt(gene, code, search)
                        if text == "":
                            qlibs.trace("fasta", gene + "\t" + code,
                                        population)
                        else:
                            fout = open(
                                datadir + gene + "__" + code + ".fasta", "w")
                            fout.write(text)
                            fout.close()
                    else:
                        fout = open(datadir + gene + "__" + code + ".fasta",
                                    "w")
                        fout.write(text)
                        fout.close()
                else:
                    fout = open(datadir + gene + "__" + code + ".fasta", "w")
                    fout.write(text)
                    fout.close()
            else:
                fout = open(datadir + gene + "__" + code + ".fasta", "w")
                fout.write(text)
                fout.close()
            if count % 5000 == 0:
                tk.messagebox.showinfo("Quad", "Daily hits limit reached!")
                break
            time.sleep(5)
        i = i + 1

    fc = open(countfile, "w")
    fc.write(
        str(count) + "\t" + str(i) + "\t" + finput + "\t" +
        str(datetime.date.today()))
    fc.close()
    return result
示例#14
0
def intersect(population, algo):
    lists = []
    sets = []
    dists = []
    indir = qlibs.get_datadir() + population + "/"
    outfile = indir + "intersect_genes_" + population + str(
        datetime.date.today()).replace("-", "") + ".csv"
    for set in os.listdir(indir):
        if os.path.isdir(indir + set):
            files = [
                f for f in os.listdir(indir + set)
                if os.path.isfile(indir + set + "/" + f)
            ]
            lastfile = ""
            for file in files:
                try:
                    if file.index("dist_" + algo) == 0 and file.index(
                            ".csv") == len(file) - 4:
                        if file > lastfile: lastfile = file
                except:
                    pass
            if lastfile == "": qlibs.trace("intersect", set, pop=population)
            else:
                lists.append(qlibs.get_uniques(indir + set + "/" + lastfile))
                sets.append(set)
                f = open(indir + set + "/" + lastfile, "r")
                buf = f.read()
                f.close()
                dists.append(buf)
    elems = []
    setreps = []
    setidxs = []
    for i in range(len(sets) - 1):
        j = i + 1
        while j < len(lists):
            for elem in lists[i]:
                if qlibs.find(elem, lists[j]) >= 0:
                    k = qlibs.find(elem, elems)
                    if k >= 0:
                        if qlibs.find(sets[j], setreps[k]) < 0:
                            setreps[k].append(sets[j])
                            setidxs[k].append(j)
                    else:
                        elems.append(elem)
                        list = [sets[i], sets[j]]
                        setreps.append(list)
                        list = [i, j]
                        setidxs.append(list)
            j = j + 1
    text = ""
    print(sets)
    print(setreps)
    print(setidxs)
    for i in range(len(elems)):
        text = text + elems[i] + ";"
        for set in setreps[i]:
            text = text + set + ";"
        text = text + "\n"
    if text == "": text = "No intersection found"
    fout = open(outfile, "w")
    fout.write(text)
    fout.close()

    text = ""
    for i in range(len(elems)):
        j = setidxs[i][0]
        buf = dists[j]
        print(j)
        print(elems[i])
        rows = buf.split("\n")
        for row in rows:
            if row.find(elems[i]) >= 0:
                text = text + row
                for set in setreps[i]:
                    text = text + set + ";"
                text = text + "\n"
    outfile = indir + "intersect_dists_" + algo + "_" + population + str(
        datetime.date.today()).replace("-", "") + ".csv"
    fout = open(outfile, "w")
    fout.write(text)
    fout.close()

    return
示例#15
0
def substr_not_founds(population,
                      algo,
                      overlap,
                      reverse,
                      nsplit=2,
                      libpath=""):
    count = get_filecount(population, algo)
    if count <= 0: start_file(population, algo)
    else: count = count - 1
    infile = get_filename(population, algo, count)
    #logdir = qlibs.get_logdir() + population + "/"
    fin = open(infile, "r")
    buf = fin.read()
    fin.close()
    lines = buf.split("\n")
    print(lines, reverse)
    for line in lines:
        if line.find("C:/") >= 0 or line.find("./") >= 0:
            if line.find(".txt") >= 0:
                infile = qlibs.get_datadir() + line[line.find(population):]
                outfile = infile.replace("/Text",
                                         "/" + algo).replace(".txt", ".csv")
                buf = outfile[outfile.find(algo):]
                length = buf.find("/") + 2
                outdir = outfile[:outfile.find("/" + algo) + length]
                print(population, line, infile, outfile, outdir, buf)
                if not os.path.exists(outdir):
                    os.mkdir(outdir)
                splittedoutfile = infile.replace(
                    "/Text" + reverse, "/Text" + reverse + algo + "Splitted")
                buf = splittedoutfile[splittedoutfile.find("Text" + reverse +
                                                           algo + "Splitted"):]
                length = buf.find("/") + 2
                splittedoutdir = splittedoutfile[:splittedoutfile.
                                                 find("/Text" + reverse +
                                                      algo + "Splitted") +
                                                 length]
                if not os.path.exists(splittedoutdir):
                    os.mkdir(splittedoutdir)
                if not os.path.exists(outfile):
                    if algo == "QgrsWeb":
                        if qgrs.qgrs_web(population, infile, outfile) == False:
                            found = False
                            try:
                                split_file(population, algo, infile,
                                           splittedoutdir, nsplit, overlap,
                                           reverse)
                            except:
                                pass
                    if algo == "Qgrs":
                        if platform.system().find("Windows") >= 0:
                            libcpp = cdll.LoadLibrary("../lib/Qgrs.dll")
                        if platform.system().find("Linux") >= 0:
                            libcpp = cdll.LoadLibrary("../lib/qgrs-linux.a")
                        if platform.system().find("Darwin") >= 0:
                            libcpp = cdll.LoadLibrary("../lib/qgrs-mac.so")
                        if qgrs.qgrs_local(population, infile, outfile,
                                           libcpp) == False:
                            found = False
                            try:
                                split_file(population, algo, infile,
                                           splittedoutdir, nsplit, overlap,
                                           reverse)
                            except:
                                pass
                    if algo == "Pqs":
                        if qgrs.pqs(libpath, population, infile,
                                    outfile) == False:
                            found = False
                            try:
                                split_file(population, algo, infile,
                                           splittedoutdir, nsplit, overlap,
                                           reverse)
                            except:
                                pass
                else:
                    print(outfile)
    count = count + 1
    result = txt_equals_csv(population, algo, reverse, count)
    return result
示例#16
0
 def get_seq(self):
     #
     # Get transcript sequence from UCSC Genome Browser (https://genome.ucsc.edu/)
     #
     u = False
     d = False
     uddir = ""
     udpar = ""
     ftcidir = ""
     ftcipar = ""
     chr = ""
     nregs = 0
     regions = ""
     opts = ""
     argv = ["quad.exe"]
     argv.append(self.file.get())
     print("get")
     print(self.t)
     if int(self.up.get()) == 1:
         argv.append("-u")
         argv.append(self.upno.get())
         u = True
         if int(self.s.get()) == 1:
             uddir = "Upstream" + str(self.upno.get()) + "OnePerRegion/"
             regions = regions + "Upstream" + str(self.upno.get())
             opts = opts + "u"
         if int(self.s.get()) == 2:
             uddir = "Upstream" + str(self.upno.get()) + "/"
             regions = regions + "Upstream" + str(self.upno.get())
             opts = opts + "u"
         if udpar == "": udpar = "Upstream" + str(self.upno.get())
         else: udpar = udpar + ";" + "Upstream" + str(self.upno.get())
     if int(self.u5.get()) == 1:
         argv.append("-f")
         if int(self.s.get()) == 1:
             ftcidir = "UtrExon5OnePerRegion/"
             nregs = nregs + 1
             regions = regions + "UtrExon5"
             opts = opts + "f"
         if int(self.s.get()) == 2:
             ftcidir = "UtrExon5/"
             nregs = nregs + 1
             regions = regions + "UtrExon5"
             opts = opts + "f"
         if ftcipar == "": ftcipar = "UtrExon5;"
         else: ftcipar = ftcipar + "UtrExon5;"
         chr = "f"
     if int(self.u3.get()) == 1:
         argv.append("-t")
         if int(self.s.get()) == 1:
             ftcidir = "UtrExon3OnePerRegion/"
             nregs = nregs + 1
             regions = regions + "UtrExon3"
             opts = opts + "t"
         if int(self.s.get()) == 2:
             ftcidir = "UtrExon3/"
             nregs = nregs + 1
             regions = regions + "UtrExon3"
             opts = opts + "t"
         if ftcipar == "": ftcipar = "UtrExon3;"
         else: ftcipar = ftcipar + "UtrExon3;"
         chr = "t"
     if int(self.ce.get()) == 1:
         argv.append("-c")
         if int(self.s.get()) == 1:
             ftcidir = "CdsExonOnePerRegion/"
             nregs = nregs + 1
             regions = regions + "CdsExon"
             opts = opts + "c"
         if int(self.s.get()) == 2:
             ftcidir = "CdsExon/"
             nregs = nregs + 1
             regions = regions + "CdsExon"
             opts = opts + "c"
         if ftcipar == "": ftcipar = "CdsExon;"
         else: ftcipar = ftcipar + "CdsExon;"
         chr = "c"
     if int(self.it.get()) == 1:
         argv.append("-i")
         if int(self.s.get()) == 1:
             ftcidir = "IntronOnePerRegion/"
             nregs = nregs + 1
             regions = regions + "Intron"
             opts = opts + "i"
         if int(self.s.get()) == 2:
             ftcidir = "Intron/"
             nregs = nregs + 1
             regions = regions + "Intron"
             opts = opts + "i"
         if ftcipar == "": ftcipar = "Intron;"
         else: ftcipar = ftcipar + "Intron;"
         chr = "i"
     if int(self.dw.get()) == 1:
         argv.append("-d")
         argv.append(self.dwno.get())
         d = True
         if int(self.s.get()) == 1:
             uddir = "Downstream" + self.dwno.get() + "OnePerRegion/"
             regions = regions + "Downstream" + self.dwno.get()
             opts = opts + "d"
         if int(self.s.get()) == 2:
             uddir = "Upstream" + str(self.upno.get()) + "/"
             regions = regions + "Downstream" + self.dwno.get()
             opts = opts + "d"
         if udpar == "": udpar = "Downstream" + self.dwno.get()
         else: udpar = udpar + ";" + "Downstream" + self.dwno.get()
     argv.append("-g")
     if int(self.s.get()) == 1: argv.append("feature")
     if int(self.s.get()) == 2: argv.append("gene")
     else: argv.append("feature")
     if self.t.get() == 1:
         argv.append("-n")
     print(argv)
     sets = gs.get_seq(self.pop.get(), argv)
     regionsopr = regions
     if nregs == 4 and u == True and d == True:
         regions = "FullGeneSeq"
         if int(self.s.get()) == 1: opts = "full"
         if int(self.s.get()) == 2: opts = "full_gene"
     if int(self.s.get()) == 1:
         regionsopr = regions + "OnePerRegion"
     if int(self.s.get()) == 2:
         regionsopr = regions
     print(str(int(self.s.get())))
     print("sets", sets)
     for set in sets:
         print(regionsopr)
         print("regionsopr")
         f2t.set_text(
             qlibs.get_datadir() + self.pop.get() + "/" + set + "/Fasta/" +
             regionsopr + "/", regions, opts)
     return