Exemplo n.º 1
0
def comparequery():
    global user_short_passed
    tran_dict = {}
    data = ast.literal_eval(request.data)
    tran = data['transcript'].upper().strip()
    organism = data['organism']
    transcriptome = data['transcriptome']
    connection = sqlite3.connect('{}/trips.sqlite'.format(config.SCRIPT_LOC))
    connection.text_factory = str
    cursor = connection.cursor()
    cursor.execute(
        "SELECT owner FROM organisms WHERE organism_name = '{}' and transcriptome_list = '{}';"
        .format(organism, transcriptome))

    owner = (cursor.fetchone())[0]
    if owner == 1:
        if os.path.isfile("{0}{1}/{1}.{2}.sqlite".format(
                config.ANNOTATION_DIR, organism, transcriptome)):
            sqlite_path_organism = "{0}{1}/{1}.{2}.sqlite".format(
                config.ANNOTATION_DIR, organism, transcriptome)
            transhelve = sqlite3.connect("{0}{1}/{1}.{2}.sqlite".format(
                config.ANNOTATION_DIR, organism, transcriptome))
        else:
            return "Cannot find annotation file {}.{}.sqlite".format(
                organism, transcriptome)
    else:
        transhelve = sqlite3.connect(
            "{0}transcriptomes/{1}/{2}/{3}/{2}_{3}.v2.sqlite".format(
                config.UPLOADS_DIR, owner, organism, transcriptome))
        sqlite_path_organism = "{0}transcriptomes/{1}/{2}/{3}/{2}_{3}.v2.sqlite".format(
            config.UPLOADS_DIR, owner, organism, transcriptome)
    cursor = transhelve.cursor()
    cursor.execute(
        "SELECT * from transcripts WHERE transcript = '{}'".format(tran))
    result = cursor.fetchone()

    transhelve.close()
    minread = int(data['minread'])
    maxread = int(data['maxread'])
    hili_start = int(data['hili_start'])
    hili_stop = int(data['hili_stop'])
    master_filepath_dict = {}
    master_file_dict = data['master_file_dict']
    # This section is purely to sort by label alphabetically
    if master_file_dict == {}:
        return "Error: No files in the File list box. To add files to the file list box click on a study in the studies section above. This will populate the Ribo-seq and RNA-Seq sections with a list of files. Click on one of the files and then press the  Add button in the studies section. This will add the file to the File list box. Selecting another file and clicking Add again will add the new file to the same group in the File list. Alternatively to add a new group simply change the selected colour (by clicking on the coloured box in the studies section) and then click the Add file button."

    connection = sqlite3.connect('{}/trips.sqlite'.format(config.SCRIPT_LOC))
    connection.text_factory = str
    cursor = connection.cursor()

    for color in master_file_dict:
        master_filepath_dict[color] = {
            "filepaths": [],
            "file_ids": [],
            "file_names": [],
            "file_descs": [],
            "mapped_reads": 0,
            "minread": minread,
            "maxread": maxread
        }
        # Overwrite the default minread and maxread with the minread/maxread values that are group specific, this allows users to easily visualise
        # how the profile of different readlenghts differs across a transcript
        if "minread" in master_file_dict[color]:
            master_filepath_dict[color]["minread"] = int(
                master_file_dict[color]["minread"])

        if "maxread" in master_file_dict[color]:
            master_filepath_dict[color]["maxread"] = int(
                master_file_dict[color]["maxread"])

        for file_id in master_file_dict[color]["file_ids"]:
            cursor.execute(
                "SELECT file_name,file_description,file_type from files WHERE file_id = {};"
                .format(file_id))
            result = (cursor.fetchone())
            file_name = master_file_dict[color]["label"]
            file_paths = fetch_file_paths([file_id], organism)

            for filetype in file_paths:
                for file_id in file_paths[filetype]:
                    filepath = file_paths[filetype][file_id]
                    if os.path.isfile(filepath):
                        sqlite_db = SqliteDict(filepath, autocommit=False)
                    else:
                        return "File not found, please report this to [email protected] or via the contact page."
                    #if maxread != 150:
                    #	read_lengths = sqlite_db["read_lengths"]
                    #	for i in range(master_filepath_dict[color]["minread"],master_filepath_dict[color]["maxread"]+1):
                    #		master_filepath_dict[color]["mapped_reads"] += read_lengths[i]

                    if "noncoding_counts" in sqlite_db and "coding_counts" in sqlite_db:
                        master_filepath_dict[color]["mapped_reads"] += float(
                            sqlite_db["noncoding_counts"])
                        master_filepath_dict[color]["mapped_reads"] += float(
                            sqlite_db["coding_counts"])
                    else:
                        if "normalize" in data:
                            return "One or more selected files is missing values for 'coding_counts' and 'non_coding_counts' so cannot normalize with these files, please report this to [email protected] or via the contact page."
                    master_filepath_dict[color]["filepaths"].append(filepath)
                    master_filepath_dict[color]["file_ids"].append(file_id)
                    master_filepath_dict[color]["file_names"].append(file_name)
                    master_filepath_dict[color]["file_descs"].append(result[1])
                    master_filepath_dict[color]["file_type"] = result[2]

    inputtran = True
    if result != None:
        newtran = result[0]
    else:
        inputtran = False
    if inputtran == False:
        cursor.execute(
            "SELECT * from transcripts WHERE gene = '{}'".format(tran))
        result = cursor.fetchall()
        if result != []:
            if len(result) == 1:
                tran = str(result[0][0])
            else:
                return_str = "TRANSCRIPTS"
                rankings = ribo_seq_read_counting(
                    tran,
                    sqlite_path_organism,
                    file_paths["riboseq"].values(),
                    count_type="asite",
                    unique=True)

                for transcript in result:
                    cursor.execute(
                        "SELECT length,cds_start,cds_stop,principal from transcripts WHERE transcript = '{}'"
                        .format(transcript[0]))
                    tran_result = cursor.fetchone()
                    tranlen = tran_result[0]
                    cds_start = tran_result[1]
                    cds_stop = tran_result[2]
                    if tran_result[3] == 1:
                        principal = "principal"
                    else:
                        principal = ""
                    if cds_start == "NULL" or cds_start == None:
                        cdslen = "NULL"
                        threeutrlen = "NULL"
                    else:
                        cdslen = cds_stop - cds_start
                        threeutrlen = tranlen - cds_stop
                        if transcript[0] in rankings:
                            coverage = rankings[transcript[0]]
                        else:
                            coverage = "NULL"
                    return_str += (":{},{},{},{},{},{}".format(
                        transcript[0], tranlen, cds_start, cdslen, threeutrlen,
                        coverage))
                return return_str
        else:
            return "ERROR! Could not find any transcript corresponding to {}".format(
                tran)

    if 'ribocoverage' in data:
        ribocoverage = True
    else:
        ribocoverage = False
    if "ambiguous" in data:
        ambiguous = "ambig"
    else:
        ambiguous = "unambig"

    if "normalize" in data:
        normalize = True
    else:
        normalize = False
    html_args = data["html_args"]
    if html_args["user_short"] == "None" or user_short_passed == True:
        short_code = generate_short_code(data, organism,
                                         html_args["transcriptome"],
                                         "comparison")
    else:
        short_code = html_args["user_short"]
        user_short_passed = True

    try:
        user = current_user.name
    except:
        user = None
    #set colours to default values, if user logged in these will be overwritten
    background_col = config.BACKGROUND_COL
    comp_uga_col = config.UGA_COL
    comp_uag_col = config.UAG_COL
    comp_uaa_col = config.UAA_COL
    title_size = config.TITLE_SIZE
    subheading_size = config.SUBHEADING_SIZE
    axis_label_size = config.AXIS_LABEL_SIZE
    marker_size = config.MARKER_SIZE
    cds_marker_size = config.CDS_MARKER_SIZE
    cds_marker_colour = config.CDS_MARKER_COLOUR
    legend_size = config.LEGEND_SIZE
    if user != None:
        cursor.execute(
            "SELECT user_id from users WHERE username = '******';".format(user))
        result = (cursor.fetchone())
        user_id = result[0]
        #get a list of organism id's this user can access
        cursor.execute(
            "SELECT background_col,comp_uga_col,comp_uag_col,comp_uaa_col,title_size,subheading_size,axis_label_size,marker_size,cds_marker_width,cds_marker_colour,legend_size from user_settings WHERE user_id = '{}';"
            .format(user_id))
        result = (cursor.fetchone())
        background_col = result[0]
        uga_col = result[1]
        uag_col = result[2]
        uaa_col = result[3]
        title_size = result[4]
        subheading_size = result[5]
        axis_label_size = result[6]
        marker_size = result[7]
        cds_marker_size = result[8]
        cds_marker_colour = result[9]
        legend_size = result[10]
        connection.close()

    if tran != "":
        x = riboflask_compare.generate_plot(
            tran, ambiguous, minread, maxread, master_filepath_dict, "y", {},
            ribocoverage, organism, normalize, short_code, background_col,
            hili_start, hili_stop, comp_uag_col, comp_uga_col, comp_uaa_col,
            config.ANNOTATION_DIR, title_size, subheading_size,
            axis_label_size, marker_size, cds_marker_size, cds_marker_colour,
            legend_size, transcriptome)
    else:
        x = "ERROR! Could not find any transcript corresponding to whatever you entered"
    return x
def query():
    global user_short_passed
    tran_dict = {}
    gene_dict = {}
    ribo_user_files = {}
    data = ast.literal_eval(request.data)

    tran = data['transcript'].upper().strip()
    readscore = data['readscore']
    secondary_readscore = data['secondary_readscore']
    minread = int(data['minread'])
    maxread = int(data['maxread'])
    minfiles = int(data['minfiles'])
    organism = data['organism']
    seqhili = data['seqhili'].split(",")
    hili_start = int(data['hili_start'])
    hili_stop = int(data['hili_stop'])
    transcriptome = data['transcriptome']
    advanced = data["advanced"]

    # Send file_list (a list of integers intentionally encoded as strings due to javascript), to be converted to a dictionary with riboseq/rnaseq lists of file paths.
    file_paths_dict = fetch_file_paths(data["file_list"], organism)

    primetype = data["primetype"]
    user_hili_starts = data["user_hili_starts"]
    user_hili_stops = data["user_hili_stops"]
    user_short = data["user_short"]

    connection = sqlite3.connect('{}/trips.sqlite'.format(config.SCRIPT_LOC))
    connection.text_factory = str
    cursor = connection.cursor()
    cursor.execute(
        "SELECT owner FROM organisms WHERE organism_name = '{}' and transcriptome_list = '{}';"
        .format(organism, transcriptome))
    owner = (cursor.fetchone())[0]

    if owner == 1:
        if os.path.isfile("{0}{1}/{1}.{2}.sqlite".format(
                config.ANNOTATION_DIR, organism, transcriptome)):
            sqlite_path_organism = "{0}{1}/{1}.{2}.sqlite".format(
                config.ANNOTATION_DIR, organism, transcriptome)
            transhelve = sqlite3.connect("{0}{1}/{1}.{2}.sqlite".format(
                config.ANNOTATION_DIR, organism, transcriptome))
        else:
            return "Cannot find annotation file {}.{}.sqlite".format(
                organism, transcriptome)
    else:
        sqlite_path_organism = "{0}transcriptomes/{1}/{2}/{3}/{2}_{3}.v2.sqlite".format(
            config.UPLOADS_DIR, owner, organism, transcriptome)
        transhelve = sqlite3.connect(
            "{0}transcriptomes/{1}/{2}/{3}/{2}_{3}.v2.sqlite".format(
                config.UPLOADS_DIR, owner, organism, transcriptome))
    cursor = transhelve.cursor()
    cursor.execute(
        "SELECT * from transcripts WHERE transcript = '{}'".format(tran))

    result = cursor.fetchone()
    inputtran = True

    if result != None:
        newtran = result[0]
    else:
        inputtran = False
    if inputtran == False:
        cursor.execute(
            "SELECT * from transcripts WHERE gene = '{}'".format(tran))
        result = cursor.fetchall()

        if result != []:
            if len(result) == 1:
                tran = str(result[0][0])
            else:
                return_str = "TRANSCRIPTS"

                if len(file_paths_dict["riboseq"].values()) > 0:
                    pre_orfQuant_res = incl_OPM_run_orfQuant(
                        tran, sqlite_path_organism,
                        file_paths_dict["riboseq"].values())
                    pre_TPM_Ribo = TPM(tran, sqlite_path_organism,
                                       file_paths_dict["riboseq"].values(),
                                       "ribo")

                    max_TPM_Ribo = max(pre_TPM_Ribo.values())
                    TPM_Ribo = {
                        transcript: round(
                            (pre_TPM_Ribo[transcript] / max_TPM_Ribo) * 100, 2)
                        for transcript in pre_TPM_Ribo
                    }

                    max_orf = max(pre_orfQuant_res.values())
                    orfQuant_res = {
                        transcript: round(
                            (pre_orfQuant_res[transcript] / max_orf) * 100, 2)
                        for transcript in pre_orfQuant_res
                    }

                else:
                    orfQuant_res = {
                        transcript[0]: "Null"
                        for transcript in result
                    }
                    TPM_Ribo = {transcript[0]: "Null" for transcript in result}

                if len(file_paths_dict["rnaseq"].values()) > 0:
                    pre_TPM_RNA = TPM(tran, sqlite_path_organism,
                                      file_paths_dict["rnaseq"].values(),
                                      "rna")
                    max_TPM_RNA = max(pre_TPM_RNA.values())
                    TPM_RNA = {
                        transcript: round(
                            (pre_TPM_RNA[transcript] / max_TPM_RNA) * 100, 2)
                        for transcript in pre_TPM_RNA
                    }

                else:
                    TPM_RNA = {transcript[0]: "Null" for transcript in result}

                for transcript in result:
                    cursor.execute(
                        "SELECT length,cds_start,cds_stop,principal,version from transcripts WHERE transcript = '{}'"
                        .format(transcript[0]))
                    tran_result = cursor.fetchone()
                    tranlen = tran_result[0]
                    cds_start = tran_result[1]
                    cds_stop = tran_result[2]
                    if str(tran_result[3]) == "1":
                        principal = "principal"
                    else:
                        principal = ""
                    version = tran_result[4]
                    if cds_start == "NULL" or cds_start == None:
                        cdslen = "NULL"
                        threeutrlen = "NULL"
                    else:
                        cdslen = cds_stop - cds_start
                        threeutrlen = tranlen - cds_stop

                    if transcript[0] in orfQuant_res:
                        OPM_coverage = orfQuant_res[transcript[0]]
                    else:
                        OPM_coverage = "NULL"

                    if transcript[0] in TPM_RNA:
                        RNA_coverage = TPM_RNA[transcript[0]]
                    else:
                        RNA_coverage = "NULL"

                    if transcript[0] in TPM_Ribo:
                        ribo_coverage = TPM_Ribo[transcript[0]]
                    else:
                        ribo_coverage = "NULL"

                    return_str += (":{},{},{},{},{},{},{},{},{}".format(
                        transcript[0], version, tranlen, cds_start, cdslen,
                        threeutrlen, OPM_coverage, ribo_coverage,
                        RNA_coverage))

                return return_str

        else:
            return "ERROR! Could not find any transcript corresponding to {}".format(
                tran)
    transhelve.close()
    if 'varlite' in data:
        lite = "y"
    else:
        lite = "n"
    if 'preprocess' in data:
        preprocess = True
    else:
        preprocess = False
    if 'uga_diff' in data:
        uga_diff = True
    else:
        uga_diff = False
    if 'color_readlen_dist' in data:
        color_readlen_dist = True
    else:
        color_readlen_dist = False
    if 'ribocoverage' in data:
        ribocoverage = True
    else:
        ribocoverage = False
    if "nucseq" in data:
        nucseq = True
    else:
        nucseq = False
    if "mismatches" in data:
        mismatches = True
    else:
        mismatches = False
    if "ambiguous" in data:
        ambiguous = "ambig"
    else:
        ambiguous = "unambig"
    if "pcr" in data:
        pcr = True
    else:
        pcr = False
    if "noisered" in data:
        noisered = True
    else:
        noisered = False

    if "mismatch" in data:
        mismatch = True
    else:
        mismatch = False
    if data["user_short"] == "None" or user_short_passed == True:
        short_code = generate_short_code(data, organism, data["transcriptome"],
                                         "interactive_plot")
    else:
        short_code = data["user_short"]
        user_short_passed = True
    try:
        user = current_user.name
    except:
        user = None
    connection = sqlite3.connect('{}/trips.sqlite'.format(config.SCRIPT_LOC))
    connection.text_factory = str
    cursor = connection.cursor()
    background_col = config.BACKGROUND_COL
    uga_col = config.UGA_COL
    uag_col = config.UAG_COL
    uaa_col = config.UAA_COL
    title_size = config.TITLE_SIZE
    subheading_size = config.SUBHEADING_SIZE
    axis_label_size = config.AXIS_LABEL_SIZE
    marker_size = config.MARKER_SIZE
    cds_marker_size = config.CDS_MARKER_SIZE
    cds_marker_colour = config.CDS_MARKER_COLOUR
    legend_size = config.LEGEND_SIZE
    ribo_linewidth = config.RIBO_LINEWIDTH
    #Put any publicly available seq types (apart from riboseq and rnaseq) here
    seq_rules = {
        "proteomics": {
            "frame_breakdown": 1
        },
        "conservation": {
            "frame_breakdown": 1
        },
        "tcpseq": {
            "frame_breakdown": 0
        }
    }

    #get user_id
    if user != None:
        cursor.execute(
            "SELECT user_id from users WHERE username = '******';".format(user))
        result = (cursor.fetchone())
        user_id = result[0]
        #get a list of organism id's this user can access
        cursor.execute(
            "SELECT background_col,uga_col,uag_col,uaa_col,title_size,subheading_size,axis_label_size,marker_size,cds_marker_width,cds_marker_colour,legend_size,ribo_linewidth from user_settings WHERE user_id = '{}';"
            .format(user_id))
        result = (cursor.fetchone())
        background_col = result[0]
        uga_col = result[1]
        uag_col = result[2]
        uaa_col = result[3]
        title_size = result[4]
        subheading_size = result[5]
        axis_label_size = result[6]
        marker_size = result[7]
        cds_marker_size = result[8]
        cds_marker_colour = result[9]
        legend_size = result[10]
        ribo_linewidth = result[11]
        #get rules for all custom seq types
        cursor.execute(
            "SELECT * from seq_rules WHERE user_id = {};".format(user_id))
        result = (cursor.fetchall())
        for row in result:
            seq_name = row[1]
            frame_breakdown = row[2]
            seq_rules[seq_name] = {"frame_breakdown": frame_breakdown}
        connection.close()

    if tran != "":
        x = riboflask.generate_plot(
            tran, ambiguous, minread, maxread, lite, ribocoverage, organism,
            readscore, noisered, primetype, minfiles, nucseq, user_hili_starts,
            user_hili_stops, uga_diff, file_paths_dict, short_code,
            color_readlen_dist, background_col, uga_col, uag_col, uaa_col,
            advanced, config.ANNOTATION_DIR, seqhili, seq_rules, title_size,
            subheading_size, axis_label_size, marker_size, transcriptome,
            config.UPLOADS_DIR, cds_marker_size, cds_marker_colour,
            legend_size, ribo_linewidth, secondary_readscore, pcr, mismatches,
            hili_start, hili_stop)
    else:
        x = "ERROR! Could not find any transcript corresponding to whatever you entered"
    return x