Exemplo n.º 1
0
def db_connect(config=config.DATABASE_CONF):
    connstr = []
    if config.has_key("host"):
        connstr.append("host=%s" % config["host"])
    if config.has_key("port"):
        connstr.append("port=%s" % config["port"])
    if config.has_key("sslmode"):
        connstr.append("sslmode=%s" % config["sslmode"])
    connstr.append("dbname=%s user=%s password=%s" % (config["db"], config["user"], config["pw"]))
    return psycopg.connect(" ".join(connstr))
Exemplo n.º 2
0
def db_connect(config=config.DATABASE_CONF):
    connstr = []
    if config.has_key('host'):
        connstr.append("host=%s" % config['host'])
    if config.has_key('port'):
        connstr.append("port=%s" % config['port'])
    if config.has_key('sslmode'):
        connstr.append("sslmode=%s" % config['sslmode'])
    connstr.append("dbname=%s user=%s password=%s" % (config['db'], config['user'], config['pw']))
    return psycopg.connect(' '.join(connstr))
Exemplo n.º 3
0
def db_connect(config=config.DATABASE_CONF):
    connstr = []
    if config.has_key('host'):
        connstr.append("host=%s" % config['host'])
    if config.has_key('port'):
        connstr.append("port=%s" % config['port'])
    if config.has_key('sslmode'):
        connstr.append("sslmode=%s" % config['sslmode'])
    connstr.append("dbname=%s user=%s password=%s" %
                   (config['db'], config['user'], config['pw']))
    return psycopg.connect(' '.join(connstr))
Exemplo n.º 4
0
    def __init__(self, config, name):

        repolib.BuildRepository.__init__(self, config, name)

        if config.has_key("extensions"):
            self.valid_extensions = config["extensions"]

        if not os.path.isdir(self.repo_path()):
            raise Exception("Repository directory %s does not exists" % self.repo_path())

        self.feeds = (packages_build_repository(self),)
Exemplo n.º 5
0
    def __init__(self, config, name):

        repolib.BuildRepository.__init__(self, config, name)

        if config.has_key("extensions"):
            self.valid_extensions = config['extensions']

        if not os.path.isdir(self.repo_path()):
            raise Exception("Repository directory %s does not exists" %
                            self.repo_path())

        self.feeds = (packages_build_repository(self), )
Exemplo n.º 6
0
    def __init__ ( self , config , name ) :

        repolib.BuildRepository.__init__( self , config , name )

        if not config.has_key( "architectures" ) :
            raise Exception( "Broken configuration : no architecture defined" )

        self.architectures = config["architectures"]

        self.components = []
        if config['components'] != ["-"] :
            self.components.extend( config['components'] )

	if not os.path.isdir( self.repo_path() ) :
            raise Exception( "Repository directory %s does not exists" % self.repo_path() )

        self.feeds = []
        for arch in self.architectures :
            for compname in self.components :
                self.feeds.append( debian_component_repository(self,arch,compname) )
Exemplo n.º 7
0
    def __init__(self, config, name):

        repolib.BuildRepository.__init__(self, config, name)

        if not config.has_key("architectures"):
            raise Exception("Broken configuration : no architecture defined")

        self.architectures = config["architectures"]

        self.components = []
        if config['components'] != ["-"]:
            self.components.extend(config['components'])

        if not os.path.isdir(self.repo_path()):
            raise Exception("Repository directory %s does not exists" %
                            self.repo_path())

        self.feeds = []
        for arch in self.architectures:
            for compname in self.components:
                self.feeds.append(
                    debian_component_repository(self, arch, compname))
Exemplo n.º 8
0
def stats_fastq(path,samples,config):
    if not os.path.exists(path):
        return 1
    n = os.listdir(path)
    if config.has_key("fastqc") and ("results_fastqc" in n):
        files = os.listdir(path+"/results_fastqc")
        h = ["Sample",
             "Link",
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/1%20Basic%20Statistics.html" target="_blank">Basic statistics</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/2%20Per%20Base%20Sequence%20Quality.html" target="_blank">Per base sequence quality</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/3%20Per%20Sequence%20Quality%20Scores.html" target="_blank">Per sequence quality scores</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/4%20Per%20Base%20Sequence%20Content.html" target="_blank">Per base sequence content</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per base GC content</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per sequence GC content</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/6%20Per%20Base%20N%20Content.html" target="_blank">Per base N content</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/7%20Sequence%20Length%20Distribution.html" target="_blank">Sequence Length Distribution</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/8%20Duplicate%20Sequences.html" target="_blank">Sequence Duplication Levels</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/9%20Overrepresented%20Sequences.html" target="_blank">Overrepresented sequences</a>',
             '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/11%20Kmer%20Content.html" target="_blank">Kmer Content</a>',
             "Total sequences",
             "Sequence length",
             "%GC"]
        names = ["","","per_base_quality.png","per_sequence_quality.png","per_base_sequence_content.png","per_base_gc_content.png","per_sequence_gc_content.png",
                 "per_base_n_content.png", "sequence_length_distribution.png", "duplication_levels.png", "", "kmer_profiles.png", "", "", ""]
        table = list()
        n = ""
        for i in h:
            n = n+"<th bgcolor='#A8A8A8'>"+i+"</th>"
        n = "<tr>"+n+"</tr>"
        table.append(n)
        iin = 0
        for sample,data in sorted(samples.iteritems()):
            filess = data[0:(len(data)/2)]
            m = list()
            for f in filess:
                n = list()
                f = f.split("/")
                f = f[len(f)-1]
                if f.replace(".fastq","").replace(".gz","")+"_fastqc" in files:
                    link = "../results_fastqc/"+f.replace(".fastq","").replace(".gz","")+"_fastqc/fastqc_report.html"
                    link = '<a href="LINK" target="_blank">Results</a>'.replace("LINK",link)
                    n.append(link)
                    ff = open(path+"/results_fastqc/"+f.replace(".fastq","").replace(".gz","")+"_fastqc/summary.txt",'r')
                    for i in ff:
                        i = i.strip("\n").split("\t")
                        if len(names[len(n)]) > 0:
                            G = '<a href="../results_fastqc/'+f.replace(".fastq","").replace(".gz","")+"_fastqc"+'/Images/'+names[len(n)]+'" class="lytebox lytetip" data-tip="" data-lyte-options="showPrint:true tipStyle:classic" data-lightbox="image-'+str(iin)+'" data-title="#TIT">#VAL</a>'
                            iin += 1
                        else:
                            G = "#VAL"
                        n.append(G.replace("#VAL",i[0]).replace("#TIT", sample + "_" + str(len(m)+1)))
                    ff.close()
                    ff = open(path+"/results_fastqc/"+f.replace(".fastq","").replace(".gz","")+"_fastqc/fastqc_data.txt",'r')
                    k=0
                    for i in ff:
                        if k==6:
                            n.append(i.strip("\n").split("\t")[1])
                        if k==8:
                            n.append(i.strip("\n").split("\t")[1])
                        if k==9:
                            n.append(i.strip("\n").split("\t")[1])
                        k+=1
                        if k>10:
                            break
                    ff.close()
                else:
                    n = ["NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA"]
                m.append(n)
            s = ["<td bgcolor='#A8A8A8'>"+sample+"</td>"]
            for i in range(len(m[0])):
                if len(m)>1:
                    g = m[0][i]+" / "+m[1][i]
                else:
                    g = m[0][i]
                if ("NA" in g) or ("FAIL" in g):
                    cl = "#CC3300"
                elif "WARN" in g:
                    cl = "#FFCC00"
                else:
                    cl = "#00CC66"
                s.append("<td bgcolor='"+cl+"'>"+g+"</td>")
            s = "<tr>"+"".join(s)+"</tr>"
            table.append(s)
        return "<table>" + "\n".join(table) + "</table>"
    else:
        return ""
Exemplo n.º 9
0
def project_process(path_base, folder):
    samples = get_samples(path_base, folder, path_base + "/" + folder + "/samples.list")
    # Check main process
    print "## MAIN PROCESS ###########################"
    try:
        f = open(path_base + "/" + folder + "/pid.txt", 'r')
        i = f.readline().strip("\n").split("\t")[1]
        f.close()
        k = manager.job_status(i)
        if k == 1:
            st = "DONE"
        elif k == 0:
            st = "RUN"
        else:
            st = "ERROR"
        print "- Main process (" + i + ") status: " + st
    except:
        print "- Main process not found or already finished"
    # Check subprocesses
    print "## SUBPROCESSES ###########################"
    pids = dict()
    try:
        f = open(path_base + "/" + folder + "/temp/pids.txt", 'r')
        for i in f:
            i = i.strip("\n").split("\t")
            pids[i[0]] = [i[1].split("|"),i[2].split("|")]
        f.close()
    except:
        print "- No subprocesses file found"
    f = open(path_base + "/" + folder + "/config.txt", 'r')
    config = dict()
    for i in f:
        if not i.startswith("%"):
            i = i.strip("\n").split("\t")
            if i[0] in ["trimgalore", "fastqc", "star", "htseq-gene", "htseq-exon", 'sam2sortbam']:
                i[1] = i[1].split("/")[0]
                if i[1] != "0":
                    config[i[0]] = i[1]
    if len(config) > 0:
        for pg in ["trimgalore", "fastqc", "star", "htseq-gene", "htseq-exon", "sam2sortbam"]:
            if config.has_key(pg):
                if not pids.has_key(pg):
                    print "- Already done or waiting for previous module output"
                else:
                    pid = pids[pg]
                    print "- ID:       " + "|".join(pid[0])
                    n = list()
                    for i in pid[1]:
                        k = manager.job_status(i)
                        if k == 1:
                            n.append("DONE")
                        elif k == 0:
                            n.append("RUN")
                        else:
                            n.append("ERROR")
                    print "- Status:   " +  "|".join(n)
                    samples_v, stats = check_samples(samples, path_base, folder, pg, "update")
                    sok = str(round(100 * float(stats[1])/float(stats[0]),2))
                    sko = str(round(100 * float(stats[2])/float(stats[0]),2))
                    pending = str(round(100 * float(stats[0]-stats[1]-stats[2])/float(stats[0]),2))
                    print "- Progress: " + sok + "% succeeded / " + sko + "% exited / " + pending + "% pending"
Exemplo n.º 10
0
def stats_fastq(path, samples, config):
    if not os.path.exists(path):
        return 1
    n = os.listdir(path)
    if config.has_key("fastqc") and ("results_fastqc" in n):
        files = os.listdir(path + "/results_fastqc")
        h = [
            "Sample", "Link",
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/1%20Basic%20Statistics.html" target="_blank">Basic statistics</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/2%20Per%20Base%20Sequence%20Quality.html" target="_blank">Per base sequence quality</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/3%20Per%20Sequence%20Quality%20Scores.html" target="_blank">Per sequence quality scores</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/4%20Per%20Base%20Sequence%20Content.html" target="_blank">Per base sequence content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per base GC content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per sequence GC content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/6%20Per%20Base%20N%20Content.html" target="_blank">Per base N content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/7%20Sequence%20Length%20Distribution.html" target="_blank">Sequence Length Distribution</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/8%20Duplicate%20Sequences.html" target="_blank">Sequence Duplication Levels</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/9%20Overrepresented%20Sequences.html" target="_blank">Overrepresented sequences</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/11%20Kmer%20Content.html" target="_blank">Kmer Content</a>',
            "Total sequences", "Sequence length", "%GC"
        ]
        names = [
            "", "", "per_base_quality.png", "per_sequence_quality.png",
            "per_base_sequence_content.png", "per_base_gc_content.png",
            "per_sequence_gc_content.png", "per_base_n_content.png",
            "sequence_length_distribution.png", "duplication_levels.png", "",
            "kmer_profiles.png", "", "", ""
        ]
        table = list()
        n = ""
        for i in h:
            n = n + "<th bgcolor='#A8A8A8'>" + i + "</th>"
        n = "<tr>" + n + "</tr>"
        table.append(n)
        iin = 0
        for sample, data in sorted(samples.iteritems()):
            filess = data[0:(len(data) / 2)]
            m = list()
            for f in filess:
                n = list()
                f = f.split("/")
                f = f[len(f) - 1]
                if f.replace(".fastq", "").replace(".gz",
                                                   "") + "_fastqc" in files:
                    link = "../results_fastqc/" + f.replace(
                        ".fastq", "").replace(
                            ".gz", "") + "_fastqc/fastqc_report.html"
                    link = '<a href="LINK" target="_blank">Results</a>'.replace(
                        "LINK", link)
                    n.append(link)
                    ff = open(
                        path + "/results_fastqc/" +
                        f.replace(".fastq", "").replace(".gz", "") +
                        "_fastqc/summary.txt", 'r')
                    for i in ff:
                        i = i.strip("\n").split("\t")
                        if len(names[len(n)]) > 0:
                            G = '<a href="../results_fastqc/' + f.replace(
                                ".fastq", ""
                            ).replace(
                                ".gz", ""
                            ) + "_fastqc" + '/Images/' + names[len(
                                n
                            )] + '" class="lytebox lytetip" data-tip="" data-lyte-options="showPrint:true tipStyle:classic" data-lightbox="image-' + str(
                                iin) + '" data-title="#TIT">#VAL</a>'
                            iin += 1
                        else:
                            G = "#VAL"
                        n.append(
                            G.replace("#VAL", i[0]).replace(
                                "#TIT", sample + "_" + str(len(m) + 1)))
                    ff.close()
                    ff = open(
                        path + "/results_fastqc/" +
                        f.replace(".fastq", "").replace(".gz", "") +
                        "_fastqc/fastqc_data.txt", 'r')
                    k = 0
                    for i in ff:
                        if k == 6:
                            n.append(i.strip("\n").split("\t")[1])
                        if k == 8:
                            n.append(i.strip("\n").split("\t")[1])
                        if k == 9:
                            n.append(i.strip("\n").split("\t")[1])
                        k += 1
                        if k > 10:
                            break
                    ff.close()
                else:
                    n = [
                        "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
                        "NA", "NA", "NA", "NA", "NA", "NA"
                    ]
                m.append(n)
            s = ["<td bgcolor='#A8A8A8'>" + sample + "</td>"]
            for i in range(len(m[0])):
                if len(m) > 1:
                    g = m[0][i] + " / " + m[1][i]
                else:
                    g = m[0][i]
                if ("NA" in g) or ("FAIL" in g):
                    cl = "#CC3300"
                elif "WARN" in g:
                    cl = "#FFCC00"
                else:
                    cl = "#00CC66"
                s.append("<td bgcolor='" + cl + "'>" + g + "</td>")
            s = "<tr>" + "".join(s) + "</tr>"
            table.append(s)
        return "<table>" + "\n".join(table) + "</table>"
    else:
        return ""
Exemplo n.º 11
0
def stats_fastq(path, samples, config):
    if not os.path.exists(path):
        return 1
    n = os.listdir(path)
    if config.has_key("fastqc") and ("results_fastqc" in n):
        # files: This variable will contain a list of all files in the results_fastqc directory
        files = os.listdir(path + "/results_fastqc")
        headers = [
            "Sample", "Link",
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/1%20Basic%20Statistics.html" target="_blank">Basic statistics</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/2%20Per%20Base%20Sequence%20Quality.html" target="_blank">Per base sequence quality</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/3%20Per%20Sequence%20Quality%20Scores.html" target="_blank">Per sequence quality scores</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/4%20Per%20Base%20Sequence%20Content.html" target="_blank">Per base sequence content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per sequence GC content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/6%20Per%20Base%20N%20Content.html" target="_blank">Per base N content</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/7%20Sequence%20Length%20Distribution.html" target="_blank">Sequence Length Distribution</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/8%20Duplicate%20Sequences.html" target="_blank">Sequence Duplication Levels</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/9%20Overrepresented%20Sequences.html" target="_blank">Overrepresented sequences</a>',
            '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/11%20Kmer%20Content.html" target="_blank">Kmer Content</a>',
            "Total sequences", "Sequence length", "%GC"
        ]
        names = [
            "", "", "per_base_quality.png", "per_sequence_quality.png",
            "per_base_sequence_content.png", "per_base_gc_content.png",
            "per_sequence_gc_content.png", "per_base_n_content.png",
            "sequence_length_distribution.png", "duplication_levels.png", "",
            "kmer_profiles.png", "", "", ""
        ]
        table = list()

        # Write out the table headers
        table_row = ""
        for i in headers:
            table_row = table_row + "<th bgcolor='#A8A8A8'>" + i + "</th>"
        table_row = "<tr>" + table_row + "</tr>"
        table.append(table_row)

        # Write out the data in rows
        image_index = 0
        # sample: This is the sample name
        # data: This is the data associated with that sample
        for sample, data in sorted(samples.iteritems()):
            # sample_files: the absolute path names of the fastq files in the sample
            sample_files = data[0:(len(data) / 2)]
            data_for_sample = list()
            for f in sample_files:
                data_for_read = list()
                f = f.split("/")
                f = f[len(f) - 1]
                # f now contains the name of the fastq file without the directory path
                if config.has_key("bowtie2"):
                    f = f.replace("_R1_001", "_noRNA_R1_001")
                    f = f.replace("_R2_001", "_noRNA_R2_001")
                if f.replace(".fastq", "").replace(".gz",
                                                   "") + "_fastqc" in files:
                    link = "../results_fastqc/" + f.replace(
                        ".fastq", "").replace(
                            ".gz", "") + "_fastqc/fastqc_report.html"
                    link = '<a href="LINK" target="_blank">Results</a>'.replace(
                        "LINK", link)
                    # append the html for a link to the fastqc_report
                    data_for_read.append(link)

                    # Open the summary output file for the fastq. This wil have either 12 or 11 non-empty lines depending on whether
                    # the Kmer report was generated. The lines start with PASS, FAIL or WARN. The next is one of the following:
                    # Basic Statistics, Per base sequence quality, Per tile sequence quality, Per sequence quality scores, Per base sequence content, Per sequence GC content, Per base N content,
                    # Sequence Length Distribution,Sequence Duplication Levels,Overrepresented sequences, Adapter Content, Kmer Content
                    # The third column is always the file name

                    summary_file = open(
                        path + "/results_fastqc/" +
                        f.replace(".fastq", "").replace(".gz", "") +
                        "_fastqc/summary.txt", 'r')
                    for i in summary_file:
                        # The file will contain a line that is of no interest "Adapter Content". Skip it.
                        if "Adapter Content" in i:
                            continue
                        # If the file contain a line "Per tile sequence quality". Skip it.
                        if "Per tile sequence quality" in i:
                            continue
                        i = i.strip("\n").split("\t")
                        # As a reminder of what is in the array names...
                        #names = ["","","per_base_quality.png","per_sequence_quality.png","per_base_sequence_content.png","per_base_gc_content.png","per_sequence_gc_content.png",
                        #         "per_base_n_content.png", "sequence_length_distribution.png", "duplication_levels.png", "", "kmer_profiles.png", "", "", ""]
                        if len(names[len(data_for_read)]) > 0:
                            G = '<a href="../results_fastqc/' + f.replace(
                                ".fastq", ""
                            ).replace(
                                ".gz", ""
                            ) + "_fastqc" + '/Images/' + names[len(
                                data_for_read
                            )] + '" class="lytebox lytetip" data-tip="" data-lyte-options="showPrint:true tipStyle:classic" data-lightbox="image-' + str(
                                image_index) + '" data-title="#TIT">#VAL</a>'
                            image_index += 1
                        else:
                            G = "#VAL"
                        data_for_read.append(
                            G.replace("#VAL", i[0]).replace(
                                "#TIT",
                                sample + "_" + str(len(data_for_sample) + 1)))
                    # There may not be a Kmer line in the summary file. If not, append "NA" to inform people of this lack.
                    if len(data_for_read) == 10:
                        data_for_read.append("NA")
                    summary_file.close()

                    # Open the fastqc_data.txt file
                    data_file = open(
                        path + "/results_fastqc/" +
                        f.replace(".fastq", "").replace(".gz", "") +
                        "_fastqc/fastqc_data.txt", 'r')
                    k = 0
                    for i in data_file:
                        # Total sequences are on the sixth line of the file
                        if k == 6:
                            data_for_read.append(i.strip("\n").split("\t")[1])
                        # Sequence length is on the eigth line of the file
                        if k == 8:
                            data_for_read.append(i.strip("\n").split("\t")[1])
                        # % GC is on the ninth line of the file
                        if k == 9:
                            data_for_read.append(i.strip("\n").split("\t")[1])
                        k += 1
                        if k > 10:
                            break
                    data_file.close()
                else:
                    data_for_read = [
                        "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
                        "NA", "NA", "NA", "NA", "NA", "NA"
                    ]
                data_for_sample.append(data_for_read)

            s = ["<td bgcolor='#A8A8A8'>" + sample + "</td>"]
            for col_index in range(len(data_for_sample[0])):
                if len(data_for_sample) > 1:
                    result_text = data_for_sample[0][
                        col_index] + " / " + data_for_sample[1][col_index]
                else:
                    result_text = data_for_sample[0][col_index]
                if ("FAIL" in result_text):
                    cl = "#CC3300"
                elif "WARN" in result_text:
                    cl = "#FFCC00"
                elif ("NA /" in result_text) or ("/ NA" in result_text):
                    cl = "#A8A8A8"
                else:
                    cl = "#00CC66"
                s.append("<td bgcolor='" + cl + "'>" + result_text + "</td>")
            s = "<tr>" + "".join(s) + "</tr>"
            table.append(s)
        return "<table>" + "\n".join(table) + "</table>"
    else:
        return ""
Exemplo n.º 12
0
    def __init__(self, filename):
        """Load translation, translation details and optionally a translation image"""
        debug(u"Begin loading translation from file: %s" % filename)
        # Open file & read in contents
        try:
            # Language files should be saved as UTF-8 - this conversation done now by directly reading as UTF-8
            f = codecs.open(filename, "r", "UTF-8")
            block = f.read()
            f.close()
        except IOError:
            debug(u"Problem loading information from file, aborting load of translation file")
            raise TranslationLoadError()
        # Language files should be saved as UTF-8 - this conversion done now by directly reading as UTF-8
        #block = block.decode("UTF-8")
        # Convert newlines to unix style
        block = block.decode("u_newlines")
        # Scan document for block between {}, this is our config section
        dicts = re.findall("(?={).+?(?<=})", block, re.DOTALL)
        if len(dicts) > 1:
            debug(u"Found more than one dict-like structure (e.g. pair of \"{}\") in file: \"%s\" - assuming config is the first one" % filename)
        configstring = dicts[0]
        
        debug(u"Translation file config string is: %s" % configstring)

        config = json.loads(configstring)
        conf_items = ["name", "name_translated", "language_code", "created_by", "created_date"]
        func_items = [self.name, self.longname, self.language_code, self.created_by, self.created_date]
        for ci, func in zip(conf_items, func_items):
            if config.has_key(ci):
                func(config[ci])
            else:
                # Translation file invalid, error out of read process
                debug(u"Error loading translation from %s, %s field not found, aborting load of translation" % (filename, ci))
                raise TranslationLoadError()

        # Split block up into lines
        block_lines = re.split("\n", block)
        block_lines2 = []
        # Delete all items of block_lines which begin with "#"
        # Two pass system, first strip out comments
        for line in block_lines:
            if len(line) != 0:
                if line[0] != "#":
                    block_lines2.append(line)
            else:
                block_lines2.append(line)

        # Translation is made up of key\nvalue\n pairs, the keys must be on odd-numbered lines, values on even (after comments are stripped)
        # Blank lines can only occur on even numbered lines since a blank cannot be a key. Thus we need to normalise the file for duplicate newlines
        # while keeping this in mind.

        # Starting with first line
        # Looking for key - Is line blank? If so discard it and start over
        # Looking for key - If first line isn't blank assume it is key, remove from stack
        # Looking for value - If next line is blank, assume it's a blank value, remove from stack
        # Start over looking for a key

        block_lines3 = []
        looking_for_key = True
        for i in block_lines2:
            if looking_for_key:
                if len(i) != 0:
                    block_lines3.append(i)
                    looking_for_key = False
            else:
                block_lines3.append(i)
                looking_for_key = True

        # Check that block_lines3 is an even number of items, if not remove the last one
        # (The array of items must be an even number not including comments)
        # Now need to check through for escaped characters (\n mostly) and convert them to non-escaped versions
        for i in range(len(block_lines3)):
            block_lines3[i] = block_lines3[i].replace("\\n","\n")
        # Then go over the rest, two lines at a time, first line key, second line translation
        translation = {}
        keys = []
        values = []
        for i in range(0, len(block_lines3), 2):
            # Populate keys and values lists
            keys.append(block_lines3[i])
            try:
                values.append(block_lines3[i+1])
            except IndexError:
                print block_lines2[i]
        # Make dict from keys
        translation.fromkeys(keys)
        # Populate dict with values
        for i in range(len(values)):
            translation[keys[i]] = values[i]

        self.translation = translation
Exemplo n.º 13
0
def project_process(path_base, folder):
    samples = get_samples(path_base, folder, path_base + "/" + folder + "/samples.list")
    # Check main process
    print "## MAIN PROCESS ###########################"
    try:
        f = open(path_base + "/" + folder + "/pid.txt", 'r')
        i = f.readline().strip("\n").split("\t")[1]
        f.close()
        k = manager.job_status(i)
        if k == 1:
            st = "DONE"
        elif k == 0:
            st = "RUN"
        else:
            st = "ERROR"
        print "- Main process (" + i + ") status: " + st
    except:
        print "- Main process not found or already finished"
    # Check subprocesses
    print "## SUBPROCESSES ###########################"
    pids = dict()
    try:
        f = open(path_base + "/" + folder + "/temp/pids.txt", 'r')
        for i in f:
            i = i.strip("\n").split("\t")
            pids[i[0]] = [i[1].split("|"),i[2].split("|")]
        f.close()
    except:
        print "- No subprocesses file found"
    f = open(path_base + "/" + folder + "/config.txt", 'r')
    config = dict()
    for i in f:
        if not i.startswith("%"):
            i = i.strip("\n").split("\t")
            if i[0] in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "varscan", "gatk"]:
                i[1] = i[1].split("/")[0]
                if i[1] != "0":
                    config[i[0]] = i[1]
    if config.has_key("varscan") or config.has_key("gatk"):
        config["sam2sortbam"] = 1
    if len(config) > 0:
        for pg in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "sam2sortbam", "varscan", "gatk"]:
            if config.has_key(pg):
                print "Process:  " + pg
                if not pids.has_key(pg):
                    print "- Already done or waiting for previous module output"
                else:
                    pid = pids[pg]
                    print "- ID:       " + "|".join(pid[0])
                    n = list()
                    for i in pid[1]:
                        k = manager.job_status(i)
                        if k == 1:
                            n.append("DONE")
                        elif k == 0:
                            n.append("RUN")
                        else:
                            n.append("ERROR")
                    print "- Status:   " +  "|".join(n)
                    samples_v, stats = check_samples(samples, path_base, folder, pg, "update")
                    sok = str(round(100 * float(stats[1])/float(stats[0]),2))
                    sko = str(round(100 * float(stats[2])/float(stats[0]),2))
                    pending = str(round(100 * float(stats[0]-stats[1]-stats[2])/float(stats[0]),2))
                    print "- Progress: " + sok + "% succeeded / " + sko + "% exited / " + pending + "% pending"