예제 #1
0
    def convert_to_mouse_gene_id(self, species=None, gene_id=None):
        """If the species is rat or human, translate the gene_id to the mouse geneid

        If there is no input gene_id or there's no corresponding mouse gene_id, return None

        """
        if not gene_id:
            return None

        mouse_gene_id = None

        if species == 'mouse':
            mouse_gene_id = gene_id

        elif species == 'rat':

            query = """SELECT mouse
                   FROM GeneIDXRef
                   WHERE rat='%s'""" % escape(gene_id)

            result = g.db.execute(query).fetchone()
            if result != None:
                mouse_gene_id = result.mouse

        elif species == 'human':

            query = """SELECT mouse
                   FROM GeneIDXRef
                   WHERE human='%s'""" % escape(gene_id)

            result = g.db.execute(query).fetchone()
            if result != None:
                mouse_gene_id = result.mouse

        return mouse_gene_id
def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params):
    input_trait_mouse_gene_id = convert_to_mouse_gene_id(
        target_dataset.group.species.lower(), this_trait.geneid)

    lit_corr_data = {}
    for trait, gene_id in list(trait_geneid_dict.items()):
        mouse_gene_id = convert_to_mouse_gene_id(
            target_dataset.group.species.lower(), gene_id)

        if mouse_gene_id and str(mouse_gene_id).find(";") == -1:
            result = g.db.execute(
                """SELECT value
                    FROM LCorrRamin3
                    WHERE GeneId1='%s' and
                            GeneId2='%s'
                """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
            ).fetchone()
            if not result:
                result = g.db.execute("""SELECT value
                    FROM LCorrRamin3
                    WHERE GeneId2='%s' and
                            GeneId1='%s'
                """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
                ).fetchone()
            if result:
                lit_corr = result.value
                lit_corr_data[trait] = [gene_id, lit_corr]
            else:
                lit_corr_data[trait] = [gene_id, 0]
        else:
            lit_corr_data[trait] = [gene_id, 0]

    return lit_corr_data
예제 #3
0
    def compile_final_query(self, from_clause='', where_clause=''):
        """Generates the final query string"""

        from_clause = self.normalize_spaces(from_clause)

        if self.search_term[0] == "*":
            query = (self.base_query + """%s
                        WHERE PublishXRef.InbredSetId = %s
                        and PublishXRef.PhenotypeId = Phenotype.Id
                        and PublishXRef.PublicationId = Publication.Id
                        and PublishFreeze.Id = %s
                        ORDER BY PublishXRef.Id""" %
                     (from_clause, escape(str(self.dataset.group.id)),
                      escape(str(self.dataset.id))))
        else:
            query = (
                self.base_query + """%s
                        WHERE %s
                        and PublishXRef.InbredSetId = %s
                        and PublishXRef.PhenotypeId = Phenotype.Id
                        and PublishXRef.PublicationId = Publication.Id
                        and PublishFreeze.Id = %s
                        ORDER BY PublishXRef.Id""" %
                (from_clause, where_clause, escape(str(
                    self.dataset.group.id)), escape(str(self.dataset.id))))

        return query
예제 #4
0
    def do_lit_correlation_for_trait_list(self):

        input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)

        for trait in self.correlation_results:

            if trait.geneid:
                trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid)
            else:
                trait.mouse_gene_id = None

            if trait.mouse_gene_id and str(trait.mouse_gene_id).find(";") == -1:
                result = g.db.execute(
                    """SELECT value
                       FROM LCorrRamin3
                       WHERE GeneId1='%s' and
                             GeneId2='%s'
                    """ % (escape(str(trait.mouse_gene_id)), escape(str(input_trait_mouse_gene_id)))
                ).fetchone()
                if not result:
                    result = g.db.execute("""SELECT value
                       FROM LCorrRamin3
                       WHERE GeneId2='%s' and
                             GeneId1='%s'
                    """ % (escape(str(trait.mouse_gene_id)), escape(str(input_trait_mouse_gene_id)))
                    ).fetchone()

                if result:
                    lit_corr = result.value
                    trait.lit_corr = lit_corr
                else:
                    trait.lit_corr = 0
            else:
                trait.lit_corr = 0
예제 #5
0
    def compile_final_query(self, from_clause='', where_clause=''):
        """Generates the final query string"""

        from_clause = self.normalize_spaces(from_clause)

        query = (
            self.base_query + """%s
                WHERE %s
                    and ProbeSet.Id = ProbeSetXRef.ProbeSetId
                    and ProbeSetXRef.ProbeSetFreezeId = %s
                ORDER BY ProbeSet.symbol ASC
                            """ %
            (escape(from_clause), where_clause, escape(str(self.dataset.id))))
        return query
예제 #6
0
    def get_alias_where_clause(self):
        search_string = escape(self.search_term[0])

        if self.search_term[0] != "*":
            match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (
                search_string)
        else:
            match_clause = ""

        where_clause = (match_clause + """ProbeSet.Id = ProbeSetXRef.ProbeSetId
               and ProbeSetXRef.ProbeSetFreezeId = %s
                        """ % (escape(str(self.dataset.id))))

        return where_clause
예제 #7
0
    def run_combined(self, from_clause, where_clause):
        """Generates and runs a combined search of an phenotype dataset"""

        logger.debug("Running PhenotypeSearch")

        from_clause = self.normalize_spaces(from_clause)

        query = (self.base_query + """%s
                    WHERE %s
                    PublishXRef.InbredSetId = %s and
                    PublishXRef.PhenotypeId = Phenotype.Id and
                    PublishXRef.PublicationId = Publication.Id and
                    PublishFreeze.Id = %s""" %
                 (from_clause, where_clause, escape(str(
                     self.dataset.group.id)), escape(str(self.dataset.id))))

        return self.execute(query)
예제 #8
0
    def retrieve_other_names(self):
        """This method fetches the the dataset names in search_result.

        If the data set name parameter is not found in the 'Name' field of
        the data set table, check if it is actually the FullName or
        ShortName instead.

        This is not meant to retrieve the data set info if no name at
        all is passed.

        """

        try:
            if self.type == "ProbeSet":
                query_args = tuple(
                    escape(x) for x in (self.name, self.name, self.name))

                self.id, self.name, self.fullname, self.shortname, self.data_scale, self.tissue = fetch1(
                    """
    SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, Tissue.Name
    FROM ProbeSetFreeze, ProbeFreeze, Tissue
    WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
    AND ProbeFreeze.TissueId = Tissue.Id
    AND (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s')
                """ % (query_args), "/dataset/" + self.name + ".json",
                    lambda r: (r["id"], r["name"], r["full_name"], r[
                        "short_name"], r["data_scale"], r["tissue"]))
            else:
                query_args = tuple(
                    escape(x) for x in ((self.type + "Freeze"), self.name,
                                        self.name, self.name))

                self.tissue = "N/A"
                self.id, self.name, self.fullname, self.shortname = fetchone(
                    """
                        SELECT Id, Name, FullName, ShortName
                        FROM %s
                        WHERE (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
                    """ % (query_args))

        except TypeError:
            logger.debug(
                "Dataset {} is not yet available in GeneNetwork.".format(
                    self.name))
            pass
예제 #9
0
    def run_combined(self, from_clause='', where_clause=''):
        """Generates and runs a combined search of an mRNA expression dataset"""

        logger.debug("Running ProbeSetSearch")
        #query = self.base_query + from_clause + " WHERE " + where_clause

        from_clause = self.normalize_spaces(from_clause)

        query = (
            self.base_query + """%s
                WHERE %s
                    and ProbeSet.Id = ProbeSetXRef.ProbeSetId
                    and ProbeSetXRef.ProbeSetFreezeId = %s
                ORDER BY ProbeSet.symbol ASC
                            """ %
            (escape(from_clause), where_clause, escape(str(self.dataset.id))))

        return self.execute(query)
예제 #10
0
    def compile_final_query(self, from_clause='', where_clause=''):
        """Generates the final query string"""

        from_clause = self.normalize_spaces(from_clause)

        if self.search_term[0] == "*":
            query = (self.base_query + """WHERE Geno.Id = GenoXRef.GenoId
                        and GenoXRef.GenoFreezeId = GenoFreeze.Id
                        and GenoFreeze.Id = %s""" %
                     (escape(str(self.dataset.id))))
        else:
            query = (self.base_query + """WHERE %s
                        and Geno.Id = GenoXRef.GenoId
                        and GenoXRef.GenoFreezeId = GenoFreeze.Id
                        and GenoFreeze.Id = %s""" %
                     (where_clause, escape(str(self.dataset.id))))

        return query
예제 #11
0
    def retrieve_genes(self, column_name):
        query = """
                    select ProbeSet.Name, ProbeSet.%s
                    from ProbeSet,ProbeSetXRef
                    where ProbeSetXRef.ProbeSetFreezeId = %s and
                    ProbeSetXRef.ProbeSetId=ProbeSet.Id;
                """ % (column_name, escape(str(self.id)))
        logger.sql(query)
        results = g.db.execute(query).fetchall()

        return dict(results)
예제 #12
0
    def get_where_clause(self):
        field = 'GOterm.acc'
        go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:]

        statements = ("""%s.symbol=GOgene_product.symbol and
           GOassociation.gene_product_id=GOgene_product.id and
           GOterm.id=GOassociation.term_id""" % (escape(self.dataset.type)))

        where_clause = " %s = '%s' and %s " % (field, go_id, statements)

        return where_clause
예제 #13
0
    def get_where_clause(self):
        search_string = escape(self.search_term[0])

        if self.search_term[0] != "*":
            match_clause = """((MATCH (ProbeSet.Name,
                        ProbeSet.description,
                        ProbeSet.symbol,
                        alias,
                        GenbankId,
                        UniGeneId,
                        Probe_Target_Description)
                        AGAINST ('%s' IN BOOLEAN MODE))) AND
                                """ % (search_string)
        else:
            match_clause = ""

        where_clause = (match_clause + """ProbeSet.Id = ProbeSetXRef.ProbeSetId
               and ProbeSetXRef.ProbeSetFreezeId = %s
                        """ % (escape(str(self.dataset.id))))

        return where_clause
예제 #14
0
    def do_lit_correlation_for_all_traits(self):
        input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)

        lit_corr_data = {}
        for trait, gene_id in list(self.trait_geneid_dict.items()):
            mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id)

            if mouse_gene_id and str(mouse_gene_id).find(";") == -1:
                #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id)
                result = g.db.execute(
                    """SELECT value
                       FROM LCorrRamin3
                       WHERE GeneId1='%s' and
                             GeneId2='%s'
                    """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
                ).fetchone()
                if not result:
                    result = g.db.execute("""SELECT value
                       FROM LCorrRamin3
                       WHERE GeneId2='%s' and
                             GeneId1='%s'
                    """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
                    ).fetchone()
                if result:
                    #print("result:", result)
                    lit_corr = result.value
                    lit_corr_data[trait] = [gene_id, lit_corr]
                else:
                    lit_corr_data[trait] = [gene_id, 0]
            else:
                lit_corr_data[trait] = [gene_id, 0]

        lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()),
                                                           key=lambda t: -abs(t[1][1])))

        return lit_corr_data
예제 #15
0
 def retrieve_sample_data(self, trait):
     query = """
                 SELECT
                         Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
                 FROM
                         (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
                 left join ProbeSetSE on
                         (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
                 left join NStrain on
                         (NStrain.DataId = ProbeSetData.Id AND
                         NStrain.StrainId = ProbeSetData.StrainId)
                 WHERE
                         ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                         ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                         ProbeSetFreeze.Name = '%s' AND
                         ProbeSetXRef.DataId = ProbeSetData.Id AND
                         ProbeSetData.StrainId = Strain.Id
                 Order BY
                         Strain.Name
                 """ % (escape(trait), escape(self.name))
     logger.sql(query)
     results = g.db.execute(query).fetchall()
     #logger.debug("RETRIEVED RESULTS HERE:", results)
     return results
예제 #16
0
    def setup(self):

        #logger.debug("IS A PHENOTYPEDATASET")

        # Fields in the database table
        self.search_fields = [
            'Phenotype.Post_publication_description',
            'Phenotype.Pre_publication_description',
            'Phenotype.Pre_publication_abbreviation',
            'Phenotype.Post_publication_abbreviation', 'PublishXRef.mean',
            'Phenotype.Lab_code', 'Publication.PubMed_ID',
            'Publication.Abstract', 'Publication.Title', 'Publication.Authors',
            'PublishXRef.Id'
        ]

        # Figure out what display_fields is
        self.display_fields = [
            'name', 'group_code', 'pubmed_id', 'pre_publication_description',
            'post_publication_description', 'original_description',
            'pre_publication_abbreviation', 'post_publication_abbreviation',
            'mean', 'lab_code', 'submitter', 'owner', 'authorized_users',
            'authors', 'title', 'abstract', 'journal', 'volume', 'pages',
            'month', 'year', 'sequence', 'units', 'comments'
        ]

        # Fields displayed in the search results table header
        self.header_fields = [
            'Index', 'Record', 'Description', 'Authors', 'Year', 'Max LRS',
            'Max LRS Location', 'Additive Effect'
        ]

        self.type = 'Publish'

        self.query_for_group = '''
                            SELECT
                                    InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                            FROM
                                    InbredSet, PublishFreeze
                            WHERE
                                    PublishFreeze.InbredSetId = InbredSet.Id AND
                                    PublishFreeze.Name = "%s"
                    ''' % escape(self.name)
예제 #17
0
    def setup(self):
        # Fields in the database table
        self.search_fields = ['Name', 'Chr']

        # Find out what display_fields is
        self.display_fields = ['name', 'chr', 'mb', 'source2', 'sequence']

        # Fields displayed in the search results table header
        self.header_fields = ['Index', 'ID', 'Location']

        # Todo: Obsolete or rename this field
        self.type = 'Geno'

        self.query_for_group = '''
                SELECT
                        InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                FROM
                        InbredSet, GenoFreeze
                WHERE
                        GenoFreeze.InbredSetId = InbredSet.Id AND
                        GenoFreeze.Name = "%s"
                ''' % escape(self.name)
예제 #18
0
    def setup(self):
        # Fields in the database table
        self.search_fields = [
            'Name', 'Description', 'Probe_Target_Description', 'Symbol',
            'Alias', 'GenbankId', 'UniGeneId', 'RefSeq_TranscriptId'
        ]

        # Find out what display_fields is
        self.display_fields = [
            'name', 'symbol', 'description', 'probe_target_description', 'chr',
            'mb', 'alias', 'geneid', 'genbankid', 'unigeneid', 'omim',
            'refseq_transcriptid', 'blatseq', 'targetseq', 'chipid',
            'comments', 'strand_probe', 'strand_gene', 'proteinid',
            'uniprotid', 'probe_set_target_region', 'probe_set_specificity',
            'probe_set_blat_score', 'probe_set_blat_mb_start',
            'probe_set_blat_mb_end', 'probe_set_strand',
            'probe_set_note_by_rw', 'flag'
        ]

        # Fields displayed in the search results table header
        self.header_fields = [
            'Index', 'Record', 'Symbol', 'Description', 'Location', 'Mean',
            'Max LRS', 'Max LRS Location', 'Additive Effect'
        ]

        # Todo: Obsolete or rename this field
        self.type = 'ProbeSet'

        self.query_for_group = '''
                        SELECT
                                InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                        FROM
                                InbredSet, ProbeSetFreeze, ProbeFreeze
                        WHERE
                                ProbeFreeze.InbredSetId = InbredSet.Id AND
                                ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
                                ProbeSetFreeze.Name = "%s"
                ''' % escape(self.name)
예제 #19
0
    def get_where_clause(self):
        if self.search_operator == "=":
            assert isinstance(self.search_term, (list, tuple))
            self.range_min, self.range_max = self.search_term[:2]
            where_clause = """ (SELECT Pow(2, max(value) -min(value))
                                     FROM ProbeSetData
                                     WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s AND
                                    (SELECT Pow(2, max(value) -min(value))
                                     FROM ProbeSetData
                                     WHERE ProbeSetData.Id = ProbeSetXRef.dataId) < %s
                                    """ % self.mescape(
                min(self.range_min, self.range_max),
                max(self.range_min, self.range_max))
        else:
            # Deal with >, <, >=, and <=
            where_clause = """ (SELECT Pow(2, max(value) -min(value))
                                     FROM ProbeSetData
                                     WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s
                                    """ % (escape(self.search_term[0]))

        logger.debug("where_clause is:", pf(where_clause))

        return where_clause
예제 #20
0
    def get_where_clause(self, cis_trans):
        self.mb_buffer = 5  # default
        chromosome = None
        if cis_trans == "cis":
            the_operator = "<"
        else:
            the_operator = ">"

        if self.search_operator == "=":
            if len(self.search_term) == 2 or len(self.search_term) == 3:
                self.search_term = [float(value) for value in self.search_term]
            if len(self.search_term) == 2:
                lrs_min, lrs_max = self.search_term
                #[int(value) for value in self.search_term]
            elif len(self.search_term) == 3:
                lrs_min, lrs_max, self.mb_buffer = self.search_term
            elif len(self.search_term) == 4:
                lrs_min, lrs_max, self.mb_buffer = [
                    float(value) for value in self.search_term[:3]
                ]
                chromosome = self.search_term[3]
                if "Chr" in chromosome or "chr" in chromosome:
                    chromosome = int(chromosome[3:])
            else:
                SomeError

            if self.search_type == "CISLOD" or self.search_type == "TRANSLOD":
                lrs_min = lrs_min * 4.61
                lrs_max = lrs_max * 4.61

            sub_clause = """ %sXRef.LRS > %s and
                %sXRef.LRS < %s  and """ % (
                escape(self.dataset.type), escape(str(min(lrs_min, lrs_max))),
                escape(self.dataset.type), escape(str(max(lrs_min, lrs_max))))
        else:
            # Deal with >, <, >=, and <=
            sub_clause = """ %sXRef.LRS %s %s and """ % (escape(
                self.dataset.type), escape(
                    self.search_operator), escape(self.search_term[0]))

        if cis_trans == "cis":
            where_clause = sub_clause + """
                    ABS(%s.Mb-Geno.Mb) %s %s and
                    %sXRef.Locus = Geno.name and
                    Geno.SpeciesId = %s and
                    %s.Chr = Geno.Chr""" % (
                escape(self.dataset.type), the_operator,
                escape(str(self.mb_buffer)), escape(self.dataset.type),
                escape(str(self.species_id)), escape(self.dataset.type))
        else:
            if chromosome:
                location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (
                    escape(self.dataset.type), chromosome,
                    escape(self.dataset.type), escape(self.dataset.type),
                    the_operator, escape(str(self.mb_buffer)),
                    escape(self.dataset.type), chromosome)
            else:
                location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (
                    escape(self.dataset.type), the_operator,
                    escape(str(self.mb_buffer)), escape(
                        self.dataset.type), escape(self.dataset.type))
            where_clause = sub_clause + """
                    %sXRef.Locus = Geno.name and
                    Geno.SpeciesId = %s and
                    (%s)""" % (escape(self.dataset.type),
                               escape(str(self.species_id)), location_clause)

        return where_clause
예제 #21
0
 def mescape(self, *items):
     """Multiple escape"""
     escaped = [escape(str(item)) for item in items]
     logger.debug("escaped is:", escaped)
     return tuple(escaped)
예제 #22
0
    def get_trait_data(self, sample_list=None):
        if sample_list:
            self.samplelist = sample_list
        else:
            self.samplelist = self.group.samplelist

        if self.group.parlist != None and self.group.f1list != None:
            if (self.group.parlist + self.group.f1list) in self.samplelist:
                self.samplelist += self.group.parlist + self.group.f1list

        query = """
            SELECT Strain.Name, Strain.Id FROM Strain, Species
            WHERE Strain.Name IN {}
            and Strain.SpeciesId=Species.Id
            and Species.name = '{}'
            """.format(create_in_clause(self.samplelist),
                       *mescape(self.group.species))
        logger.sql(query)
        results = dict(g.db.execute(query).fetchall())
        sample_ids = [results[item] for item in self.samplelist]

        # MySQL limits the number of tables that can be used in a join to 61,
        # so we break the sample ids into smaller chunks
        # Postgres doesn't have that limit, so we can get rid of this after we transition
        chunk_size = 50
        number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
        trait_sample_data = []
        for sample_ids_step in chunks.divide_into_chunks(
                sample_ids, number_chunks):
            if self.type == "Publish":
                dataset_type = "Phenotype"
            else:
                dataset_type = self.type
            temp = ['T%s.value' % item for item in sample_ids_step]
            if self.type == "Publish":
                query = "SELECT {}XRef.Id,".format(escape(self.type))
            else:
                query = "SELECT {}.Name,".format(escape(dataset_type))
            data_start_pos = 1
            query += ', '.join(temp)
            query += ' FROM ({}, {}XRef, {}Freeze) '.format(
                *mescape(dataset_type, self.type, self.type))

            for item in sample_ids_step:
                query += """
                        left join {}Data as T{} on T{}.Id = {}XRef.DataId
                        and T{}.StrainId={}\n
                        """.format(
                    *mescape(self.type, item, item, self.type, item, item))

            if self.type == "Publish":
                query += """
                        WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
                        and {}Freeze.Name = '{}'
                        and {}.Id = {}XRef.{}Id
                        order by {}.Id
                        """.format(*mescape(self.type, self.type, self.type,
                                            self.name, dataset_type, self.type,
                                            dataset_type, dataset_type))
            else:
                query += """
                        WHERE {}XRef.{}FreezeId = {}Freeze.Id
                        and {}Freeze.Name = '{}'
                        and {}.Id = {}XRef.{}Id
                        order by {}.Id
                        """.format(*mescape(
                    self.type, self.type, self.type, self.type, self.name,
                    dataset_type, self.type, self.type, dataset_type))

            #logger.debug("trait data query: ", query)

            logger.sql(query)
            results = g.db.execute(query).fetchall()
            #logger.debug("query results:", results)
            trait_sample_data.append(results)

        trait_count = len(trait_sample_data[0])
        self.trait_data = collections.defaultdict(list)

        # put all of the separate data together into a dictionary where the keys are
        # trait names and values are lists of sample values
        for trait_counter in range(trait_count):
            trait_name = trait_sample_data[0][trait_counter][0]
            for chunk_counter in range(int(number_chunks)):
                self.trait_data[trait_name] += (
                    trait_sample_data[chunk_counter][trait_counter]
                    [data_start_pos:])
예제 #23
0
def retrieve_trait_info(trait, dataset, get_qtl_info=False):
    assert dataset, "Dataset doesn't exist"

    resource_id = get_resource_id(dataset, trait.name)
    if dataset.type == 'Publish':
        the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(
            resource_id, g.user_session.user_id)
    else:
        the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(
            resource_id, g.user_session.user_id, trait.name)

    try:
        response = requests.get(the_url).content
        trait_info = json.loads(response)
    except:  # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait
        if dataset.type == 'Publish':
            query = """
                    SELECT
                            PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID,
                            CAST(Phenotype.Pre_publication_description AS BINARY),
                            CAST(Phenotype.Post_publication_description AS BINARY),
                            CAST(Phenotype.Original_description AS BINARY),
                            CAST(Phenotype.Pre_publication_abbreviation AS BINARY),
                            CAST(Phenotype.Post_publication_abbreviation AS BINARY), PublishXRef.mean,
                            Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
                            CAST(Publication.Authors AS BINARY), CAST(Publication.Title AS BINARY), CAST(Publication.Abstract AS BINARY),
                            CAST(Publication.Journal AS BINARY), Publication.Volume, Publication.Pages,
                            Publication.Month, Publication.Year, PublishXRef.Sequence,
                            Phenotype.Units, PublishXRef.comments
                    FROM
                            PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet
                    WHERE
                            PublishXRef.Id = %s AND
                            Phenotype.Id = PublishXRef.PhenotypeId AND
                            Publication.Id = PublishXRef.PublicationId AND
                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
                            PublishXRef.InbredSetId = InbredSet.Id AND
                            PublishFreeze.Id = %s
                    """ % (trait.name, dataset.id)

            logger.sql(query)
            trait_info = g.db.execute(query).fetchone()

        # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
        # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
        elif dataset.type == 'ProbeSet':
            display_fields_string = ', ProbeSet.'.join(dataset.display_fields)
            display_fields_string = 'ProbeSet.' + display_fields_string
            query = """
                    SELECT %s
                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
                    WHERE
                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                            ProbeSetFreeze.Name = '%s' AND
                            ProbeSet.Name = '%s'
                    """ % (escape(display_fields_string), escape(
                dataset.name), escape(str(trait.name)))
            logger.sql(query)
            trait_info = g.db.execute(query).fetchone()
        # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
        # to avoid the problem of same marker name from different species.
        elif dataset.type == 'Geno':
            display_fields_string = ',Geno.'.join(dataset.display_fields)
            display_fields_string = 'Geno.' + display_fields_string
            query = """
                    SELECT %s
                    FROM Geno, GenoFreeze, GenoXRef
                    WHERE
                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
                            GenoXRef.GenoId = Geno.Id AND
                            GenoFreeze.Name = '%s' AND
                            Geno.Name = '%s'
                    """ % (escape(display_fields_string), escape(
                dataset.name), escape(trait.name))
            logger.sql(query)
            trait_info = g.db.execute(query).fetchone()
        else:  # Temp type
            query = """SELECT %s FROM %s WHERE Name = %s"""
            logger.sql(query)
            trait_info = g.db.execute(query, ','.join(dataset.display_fields),
                                      dataset.type, trait.name).fetchone()

    if trait_info:
        trait.haveinfo = True
        for i, field in enumerate(dataset.display_fields):
            holder = trait_info[i]
            if isinstance(holder, bytes):
                holder = holder.decode("utf-8", errors="ignore")
            setattr(trait, field, holder)

        if dataset.type == 'Publish':
            if trait.group_code:
                trait.display_name = trait.group_code + "_" + str(trait.name)

            trait.confidential = 0
            if trait.pre_publication_description and not trait.pubmed_id:
                trait.confidential = 1

            description = trait.post_publication_description

            # If the dataset is confidential and the user has access to confidential
            # phenotype traits, then display the pre-publication description instead
            # of the post-publication description
            if not trait.pubmed_id:
                trait.abbreviation = trait.pre_publication_abbreviation
                trait.description_display = trait.pre_publication_description
            else:
                trait.abbreviation = trait.post_publication_abbreviation
                if description:
                    trait.description_display = description.strip()
                else:
                    trait.description_display = ""

            if not trait.year.isdigit():
                trait.pubmed_text = "N/A"
            else:
                trait.pubmed_text = trait.year

            if trait.pubmed_id:
                trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id

        if dataset.type == 'ProbeSet' and dataset.group:
            description_string = trait.description
            target_string = trait.probe_target_description

            if str(description_string
                   or "") != "" and description_string != 'None':
                description_display = description_string
            else:
                description_display = trait.symbol

            if (str(description_display or "") != ""
                    and description_display != 'N/A'
                    and str(target_string or "") != ""
                    and target_string != 'None'):
                description_display = description_display + '; ' + target_string.strip(
                )

            # Save it for the jinja2 template
            trait.description_display = description_display

            trait.location_repr = 'N/A'
            if trait.chr and trait.mb:
                trait.location_repr = 'Chr%s: %.6f' % (trait.chr,
                                                       float(trait.mb))

        elif dataset.type == "Geno":
            trait.location_repr = 'N/A'
            if trait.chr and trait.mb:
                trait.location_repr = 'Chr%s: %.6f' % (trait.chr,
                                                       float(trait.mb))

        if get_qtl_info:
            # LRS and its location
            trait.LRS_score_repr = "N/A"
            trait.LRS_location_repr = "N/A"
            trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = ""
            if dataset.type == 'ProbeSet' and not trait.cellid:
                trait.mean = ""
                query = """
                        SELECT
                                ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
                        FROM
                                ProbeSetXRef, ProbeSet
                        WHERE
                                ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                ProbeSet.Name = "{}" AND
                                ProbeSetXRef.ProbeSetFreezeId ={}
                        """.format(trait.name, dataset.id)
                logger.sql(query)
                trait_qtl = g.db.execute(query).fetchone()
                if trait_qtl:
                    trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl
                    if trait.locus:
                        query = """
                            select Geno.Chr, Geno.Mb from Geno, Species
                            where Species.Name = '{}' and
                            Geno.Name = '{}' and
                            Geno.SpeciesId = Species.Id
                            """.format(dataset.group.species, trait.locus)
                        logger.sql(query)
                        result = g.db.execute(query).fetchone()
                        if result:
                            trait.locus_chr = result[0]
                            trait.locus_mb = result[1]
                        else:
                            trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
                    else:
                        trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""

            if dataset.type == 'Publish':
                query = """
                        SELECT
                                PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
                        FROM
                                PublishXRef, PublishFreeze
                        WHERE
                                PublishXRef.Id = %s AND
                                PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
                                PublishFreeze.Id =%s
                """ % (trait.name, dataset.id)
                logger.sql(query)
                trait_qtl = g.db.execute(query).fetchone()
                if trait_qtl:
                    trait.locus, trait.lrs, trait.additive = trait_qtl
                    if trait.locus:
                        query = """
                            select Geno.Chr, Geno.Mb from Geno, Species
                            where Species.Name = '{}' and
                            Geno.Name = '{}' and
                            Geno.SpeciesId = Species.Id
                            """.format(dataset.group.species, trait.locus)
                        logger.sql(query)
                        result = g.db.execute(query).fetchone()
                        if result:
                            trait.locus_chr = result[0]
                            trait.locus_mb = result[1]
                        else:
                            trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
                    else:
                        trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
                else:
                    trait.locus = trait.lrs = trait.additive = ""
            if (dataset.type == 'Publish' or dataset.type == "ProbeSet"
                ) and str(trait.locus_chr or "") != "" and str(trait.locus_mb
                                                               or "") != "":
                trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
                    trait.locus_chr, float(trait.locus_mb))
                if str(trait.lrs or "") != "":
                    trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
    else:
        raise KeyError(
            repr(trait.name) + ' information is not found in the database.')
    return trait
예제 #24
0
    def get_trait_info(self, trait_list=None, species=''):

        #  Note: setting trait_list to [] is probably not a great idea.
        if not trait_list:
            trait_list = []

        for this_trait in trait_list:

            if not this_trait.haveinfo:
                this_trait.retrieveInfo(QTL=1)

            if not this_trait.symbol:
                this_trait.symbol = "N/A"

            # XZ, 12/08/2008: description
            # XZ, 06/05/2009: Rob asked to add probe target description
            description_string = str(
                str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
            target_string = str(
                str(this_trait.probe_target_description).strip(
                    codecs.BOM_UTF8), 'utf-8')

            if len(description_string) > 1 and description_string != 'None':
                description_display = description_string
            else:
                description_display = this_trait.symbol

            if (len(description_display) > 1 and description_display != 'N/A'
                    and len(target_string) > 1 and target_string != 'None'):
                description_display = description_display + '; ' + target_string.strip(
                )

            # Save it for the jinja2 template
            this_trait.description_display = description_display

            if this_trait.chr and this_trait.mb:
                this_trait.location_repr = 'Chr%s: %.6f' % (
                    this_trait.chr, float(this_trait.mb))

            # Get mean expression value
            query = (
                """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
                where ProbeSetXRef.ProbeSetFreezeId = %s and
                ProbeSet.Id = ProbeSetXRef.ProbeSetId and
                ProbeSet.Name = '%s'
            """ %
                (escape(str(this_trait.dataset.id)), escape(this_trait.name)))

            #logger.debug("query is:", pf(query))
            logger.sql(query)
            result = g.db.execute(query).fetchone()

            mean = result[0] if result else 0

            if mean:
                this_trait.mean = "%2.3f" % mean

            # LRS and its location
            this_trait.LRS_score_repr = 'N/A'
            this_trait.LRS_location_repr = 'N/A'

            # Max LRS and its Locus location
            if this_trait.lrs and this_trait.locus:
                query = """
                    select Geno.Chr, Geno.Mb from Geno, Species
                    where Species.Name = '{}' and
                        Geno.Name = '{}' and
                        Geno.SpeciesId = Species.Id
                """.format(species, this_trait.locus)
                logger.sql(query)
                result = g.db.execute(query).fetchone()

                if result:
                    lrs_chr, lrs_mb = result
                    this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
                    this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
                        lrs_chr, float(lrs_mb))

        return trait_list