Пример #1
0
def check_resource_availability(dataset, trait_id=None):
    # At least for now assume temporary entered traits are accessible
    if type(dataset) == str or dataset.type == "Temp":
        return webqtlConfig.DEFAULT_PRIVILEGES

    resource_id = get_resource_id(dataset, trait_id)

    # ZS: This should never be false, but it's technically possible if
    # a non-Temp dataset somehow had a type other than
    # Publish/ProbeSet/Geno
    if resource_id:
        resource_info = get_resource_info(resource_id)

        # If resource isn't already in redis, add it with default
        # privileges
        if not resource_info:
            resource_info = add_new_resource(dataset, trait_id)

    # Check if super-user - we should probably come up with some
    # way to integrate this into the proxy
    if g.user_session.user_id in Redis.smembers("super_users"):
        return webqtlConfig.SUPER_PRIVILEGES

    response = None

    the_url = GN_PROXY_URL + "available?resource={}&user={}".format(
        resource_id, g.user_session.user_id)

    try:
        response = json.loads(requests.get(the_url).content)
    except:
        response = resource_info['default_mask']

    return response
Пример #2
0
def check_owner(dataset=None, trait_id=None, resource_id=None):
    if resource_id:
        resource_info = get_resource_info(resource_id)
        if g.user_session.user_id == resource_info['owner_id']:
            return resource_id
    else:
        resource_id = get_resource_id(dataset, trait_id)
        if resource_id:
            resource_info = get_resource_info(resource_id)
            if g.user_session.user_id == resource_info['owner_id']:
                return resource_id

    return False
def check_owner_or_admin(dataset=None, trait_id=None, resource_id=None):
    if not resource_id:
        if dataset.type == "Temp":
            return "not-admin"
        else:
            resource_id = get_resource_id(dataset, trait_id)

    if g.user_session.user_id in Redis.smembers("super_users"):
        return "owner"

    resource_info = get_resource_info(resource_id)
    if resource_info:
        if g.user_session.user_id == resource_info['owner_id']:
            return "owner"
        else:
            return check_admin(resource_id)

    return "not-admin"
Пример #4
0
    def __init__(self, user_id, kw):
        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_" + kw['species'] + "_" + kw['group'] + \
                "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)

            # Put values in Redis so they can be looked up later if
            # added to a collection
            Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR)
            self.trait_vals = kw['trait_paste'].split()
            self.this_trait = create_trait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = create_trait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()
        self.resource_id = get_resource_id(self.dataset, self.trait_id)
        self.admin_status = get_highest_user_access_role(
            user_id=user_id,
            resource_id=(self.resource_id or ""),
            gn_proxy_url=GN_PROXY_URL)
        # ZS: Get verify/rna-seq link URLs
        try:
            blatsequence = self.this_trait.blatseq
            if not blatsequence:
                # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                query1 = """SELECT Probe.Sequence, Probe.Name
                            FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                            WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                    ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                    ProbeSetFreeze.Name = '%s' AND
                                    ProbeSet.Name = '%s' AND
                                    Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (
                    self.this_trait.dataset.name, self.this_trait.name)
                seqs = g.db.execute(query1).fetchall()
                if not seqs:
                    raise ValueError
                else:
                    blatsequence = ''
                    for seqt in seqs:
                        if int(seqt[1][-1]) % 2 == 1:
                            blatsequence += string.strip(seqt[0])

            # --------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
            blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'

            # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
            query2 = """SELECT Probe.Sequence, Probe.Name
                        FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                ProbeSetFreeze.Name = '%s' AND
                                ProbeSet.Name = '%s' AND
                                Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (
                self.this_trait.dataset.name, self.this_trait.name)

            seqs = g.db.execute(query2).fetchall()
            for seqt in seqs:
                if int(seqt[1][-1]) % 2 == 1:
                    blatsequence += '%3EProbe_' + \
                        seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A'

            if self.dataset.group.species == "rat":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = ""
            elif self.dataset.group.species == "mouse":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % (
                    'mouse', 'mm10', blatsequence)
            elif self.dataset.group.species == "human":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = ""
            else:
                self.UCSC_BLAT_URL = ""
                self.UTHSC_BLAT_URL = ""
        except:
            self.UCSC_BLAT_URL = ""
            self.UTHSC_BLAT_URL = ""

        if self.dataset.type == "ProbeSet":
            self.show_probes = "True"

        trait_units = get_trait_units(self.this_trait)
        self.get_external_links()
        self.build_correlation_tools()

        self.ncbi_summary = get_ncbi_summary(self.this_trait)

        # Get nearest marker for composite mapping
        if not self.temp_trait:
            if check_if_attr_exists(
                    self.this_trait, 'locus_chr'
            ) and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(
                    self.this_trait, self.dataset)
            else:
                self.nearest_marker = ""

        self.make_sample_lists()

        trait_vals_by_group = []
        for sample_type in self.sample_groups:
            trait_vals_by_group.append(get_trait_vals(sample_type.sample_list))

        self.max_digits_by_group = get_max_digits(trait_vals_by_group)

        self.qnorm_vals = quantile_normalize_vals(self.sample_groups,
                                                  trait_vals_by_group)
        self.z_scores = get_z_scores(self.sample_groups, trait_vals_by_group)

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
            self.sample_group_types['samples_other'] = "Other"
            self.sample_group_types['samples_all'] = "All"
        else:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.categorical_var_list = []
        self.numerical_var_list = []
        if not self.temp_trait:
            # ZS: Only using first samplelist, since I think mapping only uses those samples
            self.categorical_var_list = get_categorical_variables(
                self.this_trait, self.sample_groups[0])
            self.numerical_var_list = get_numerical_variables(
                self.this_trait, self.sample_groups[0])

        # ZS: Get list of chromosomes to select for mapping
        self.chr_list = [["All", -1]]
        for i, this_chr in enumerate(
                self.dataset.species.chromosomes.chromosomes):
            self.chr_list.append([
                self.dataset.species.chromosomes.chromosomes[this_chr].name, i
            ])

        self.genofiles = self.dataset.group.get_genofiles()
        study_samplelist_json = self.dataset.group.get_study_samplelists()
        self.study_samplelists = [
            study["title"] for study in study_samplelist_json
        ]

        # ZS: No need to grab scales from .geno file unless it's using
        # a mapping method that reads .geno files
        if "QTLReaper" or "R/qtl" in dataset.group.mapping_names:
            if self.genofiles:
                self.scales_in_geno = get_genotype_scales(self.genofiles)
            else:
                self.scales_in_geno = get_genotype_scales(
                    self.dataset.group.name + ".geno")
        else:
            self.scales_in_geno = {}

        self.has_num_cases = has_num_cases(self.this_trait)

        # ZS: Needed to know whether to display bar chart + get max
        # sample name length in order to set table column width
        self.num_values = 0
        # ZS: So it knows whether to display the Binary R/qtl mapping
        # method, which doesn't work unless all values are 0 or 1
        self.binary = "true"
        # ZS: Since we don't want to show log2 transform option for
        # situations where it doesn't make sense
        self.negative_vals_exist = "false"
        max_samplename_width = 1
        for group in self.sample_groups:
            for sample in group.sample_list:
                if len(sample.name) > max_samplename_width:
                    max_samplename_width = len(sample.name)
                if sample.display_value != "x":
                    self.num_values += 1
                    if sample.display_value != 0 or sample.display_value != 1:
                        self.binary = "false"
                    if sample.value < 0:
                        self.negative_vals_exist = "true"

        # ZS: Check whether any attributes have few enough distinct
        # values to show the "Block samples by group" option
        self.categorical_attr_exists = "false"
        for attribute in self.sample_groups[0].attributes:
            if len(self.sample_groups[0].attributes[attribute].distinct_values
                   ) <= 10:
                self.categorical_attr_exists = "true"
                break

        sample_column_width = max_samplename_width * 8

        self.stats_table_width, self.trait_table_width = get_table_widths(
            self.sample_groups, sample_column_width, self.has_num_cases)

        if self.num_values >= 5000:
            self.maf = 0.01
        else:
            self.maf = 0.05

        trait_symbol = None
        short_description = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol
                short_description = trait_symbol

            elif hasattr(self.this_trait, 'post_publication_abbreviation'):
                short_description = self.this_trait.post_publication_abbreviation

            elif hasattr(self.this_trait, 'pre_publication_abbreviation'):
                short_description = self.this_trait.pre_publication_abbreviation

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = ','.join(self.dataset.group.allsamples)
        hddn['primary_samples'] = ','.join(self.primary_sample_names)
        hddn['trait_id'] = self.trait_id
        hddn['trait_display_name'] = self.this_trait.display_name
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
            hddn['temp_trait'] = True
            hddn['group'] = self.temp_group
            hddn['species'] = self.temp_species
        else:
            hddn['group'] = self.dataset.group.name
            hddn['species'] = self.dataset.group.species
        hddn['use_outliers'] = False
        hddn['method'] = "gemma"
        hddn['selected_chr'] = -1
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['study_samplelists'] = json.dumps(study_samplelist_json)
        hddn['num_perm'] = 0
        hddn['categorical_vars'] = ""
        if self.categorical_var_list:
            hddn['categorical_vars'] = ",".join(self.categorical_var_list)
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
        hddn['do_control'] = False
        hddn['maf'] = 0.05
        hddn['mapping_scale'] = "physic"
        hddn['compare_traits'] = []
        hddn['export_data'] = ""
        hddn['export_format'] = "excel"
        if len(self.scales_in_geno) < 2 and bool(self.scales_in_geno):
            hddn['mapping_scale'] = self.scales_in_geno[list(
                self.scales_in_geno.keys())[0]][0][0]

        # We'll need access to this_trait and hddn in the Jinja2
        # Template, so we put it inside self
        self.hddn = hddn

        js_data = dict(trait_id=self.trait_id,
                       trait_symbol=trait_symbol,
                       max_digits=self.max_digits_by_group,
                       short_description=short_description,
                       unit_type=trait_units,
                       dataset_type=self.dataset.type,
                       species=self.dataset.group.species,
                       scales_in_geno=self.scales_in_geno,
                       data_scale=self.dataset.data_scale,
                       sample_group_types=self.sample_group_types,
                       sample_lists=sample_lists,
                       se_exists=self.sample_groups[0].se_exists,
                       has_num_cases=self.has_num_cases,
                       attributes=self.sample_groups[0].attributes,
                       categorical_attr_exists=self.categorical_attr_exists,
                       categorical_vars=",".join(self.categorical_var_list),
                       num_values=self.num_values,
                       qnorm_values=self.qnorm_vals,
                       zscore_values=self.z_scores,
                       sample_column_width=sample_column_width,
                       temp_uuid=self.temp_uuid)
        self.js_data = js_data
Пример #5
0
def retrieve_trait_info(trait, dataset, get_qtl_info=False):
    assert dataset, "Dataset doesn't exist"

    resource_id = get_resource_id(dataset, trait.name)
    if dataset.type == 'Publish':
        the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(
            resource_id, g.user_session.user_id)
    else:
        the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(
            resource_id, g.user_session.user_id, trait.name)

    try:
        response = requests.get(the_url).content
        trait_info = json.loads(response)
    except:  # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait
        if dataset.type == 'Publish':
            query = """
                    SELECT
                            PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID,
                            CAST(Phenotype.Pre_publication_description AS BINARY),
                            CAST(Phenotype.Post_publication_description AS BINARY),
                            CAST(Phenotype.Original_description AS BINARY),
                            CAST(Phenotype.Pre_publication_abbreviation AS BINARY),
                            CAST(Phenotype.Post_publication_abbreviation AS BINARY), PublishXRef.mean,
                            Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
                            CAST(Publication.Authors AS BINARY), CAST(Publication.Title AS BINARY), CAST(Publication.Abstract AS BINARY),
                            CAST(Publication.Journal AS BINARY), Publication.Volume, Publication.Pages,
                            Publication.Month, Publication.Year, PublishXRef.Sequence,
                            Phenotype.Units, PublishXRef.comments
                    FROM
                            PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet
                    WHERE
                            PublishXRef.Id = %s AND
                            Phenotype.Id = PublishXRef.PhenotypeId AND
                            Publication.Id = PublishXRef.PublicationId AND
                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
                            PublishXRef.InbredSetId = InbredSet.Id AND
                            PublishFreeze.Id = %s
                    """ % (trait.name, dataset.id)

            logger.sql(query)
            trait_info = g.db.execute(query).fetchone()

        # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
        # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
        elif dataset.type == 'ProbeSet':
            display_fields_string = ', ProbeSet.'.join(dataset.display_fields)
            display_fields_string = 'ProbeSet.' + display_fields_string
            query = """
                    SELECT %s
                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
                    WHERE
                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                            ProbeSetFreeze.Name = '%s' AND
                            ProbeSet.Name = '%s'
                    """ % (escape(display_fields_string), escape(
                dataset.name), escape(str(trait.name)))
            logger.sql(query)
            trait_info = g.db.execute(query).fetchone()
        # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
        # to avoid the problem of same marker name from different species.
        elif dataset.type == 'Geno':
            display_fields_string = ',Geno.'.join(dataset.display_fields)
            display_fields_string = 'Geno.' + display_fields_string
            query = """
                    SELECT %s
                    FROM Geno, GenoFreeze, GenoXRef
                    WHERE
                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
                            GenoXRef.GenoId = Geno.Id AND
                            GenoFreeze.Name = '%s' AND
                            Geno.Name = '%s'
                    """ % (escape(display_fields_string), escape(
                dataset.name), escape(trait.name))
            logger.sql(query)
            trait_info = g.db.execute(query).fetchone()
        else:  # Temp type
            query = """SELECT %s FROM %s WHERE Name = %s"""
            logger.sql(query)
            trait_info = g.db.execute(query, ','.join(dataset.display_fields),
                                      dataset.type, trait.name).fetchone()

    if trait_info:
        trait.haveinfo = True
        for i, field in enumerate(dataset.display_fields):
            holder = trait_info[i]
            if isinstance(holder, bytes):
                holder = holder.decode("utf-8", errors="ignore")
            setattr(trait, field, holder)

        if dataset.type == 'Publish':
            if trait.group_code:
                trait.display_name = trait.group_code + "_" + str(trait.name)

            trait.confidential = 0
            if trait.pre_publication_description and not trait.pubmed_id:
                trait.confidential = 1

            description = trait.post_publication_description

            # If the dataset is confidential and the user has access to confidential
            # phenotype traits, then display the pre-publication description instead
            # of the post-publication description
            if not trait.pubmed_id:
                trait.abbreviation = trait.pre_publication_abbreviation
                trait.description_display = trait.pre_publication_description
            else:
                trait.abbreviation = trait.post_publication_abbreviation
                if description:
                    trait.description_display = description.strip()
                else:
                    trait.description_display = ""

            if not trait.year.isdigit():
                trait.pubmed_text = "N/A"
            else:
                trait.pubmed_text = trait.year

            if trait.pubmed_id:
                trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id

        if dataset.type == 'ProbeSet' and dataset.group:
            description_string = trait.description
            target_string = trait.probe_target_description

            if str(description_string
                   or "") != "" and description_string != 'None':
                description_display = description_string
            else:
                description_display = trait.symbol

            if (str(description_display or "") != ""
                    and description_display != 'N/A'
                    and str(target_string or "") != ""
                    and target_string != 'None'):
                description_display = description_display + '; ' + target_string.strip(
                )

            # Save it for the jinja2 template
            trait.description_display = description_display

            trait.location_repr = 'N/A'
            if trait.chr and trait.mb:
                trait.location_repr = 'Chr%s: %.6f' % (trait.chr,
                                                       float(trait.mb))

        elif dataset.type == "Geno":
            trait.location_repr = 'N/A'
            if trait.chr and trait.mb:
                trait.location_repr = 'Chr%s: %.6f' % (trait.chr,
                                                       float(trait.mb))

        if get_qtl_info:
            # LRS and its location
            trait.LRS_score_repr = "N/A"
            trait.LRS_location_repr = "N/A"
            trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = ""
            if dataset.type == 'ProbeSet' and not trait.cellid:
                trait.mean = ""
                query = """
                        SELECT
                                ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
                        FROM
                                ProbeSetXRef, ProbeSet
                        WHERE
                                ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                ProbeSet.Name = "{}" AND
                                ProbeSetXRef.ProbeSetFreezeId ={}
                        """.format(trait.name, dataset.id)
                logger.sql(query)
                trait_qtl = g.db.execute(query).fetchone()
                if trait_qtl:
                    trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl
                    if trait.locus:
                        query = """
                            select Geno.Chr, Geno.Mb from Geno, Species
                            where Species.Name = '{}' and
                            Geno.Name = '{}' and
                            Geno.SpeciesId = Species.Id
                            """.format(dataset.group.species, trait.locus)
                        logger.sql(query)
                        result = g.db.execute(query).fetchone()
                        if result:
                            trait.locus_chr = result[0]
                            trait.locus_mb = result[1]
                        else:
                            trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
                    else:
                        trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""

            if dataset.type == 'Publish':
                query = """
                        SELECT
                                PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
                        FROM
                                PublishXRef, PublishFreeze
                        WHERE
                                PublishXRef.Id = %s AND
                                PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
                                PublishFreeze.Id =%s
                """ % (trait.name, dataset.id)
                logger.sql(query)
                trait_qtl = g.db.execute(query).fetchone()
                if trait_qtl:
                    trait.locus, trait.lrs, trait.additive = trait_qtl
                    if trait.locus:
                        query = """
                            select Geno.Chr, Geno.Mb from Geno, Species
                            where Species.Name = '{}' and
                            Geno.Name = '{}' and
                            Geno.SpeciesId = Species.Id
                            """.format(dataset.group.species, trait.locus)
                        logger.sql(query)
                        result = g.db.execute(query).fetchone()
                        if result:
                            trait.locus_chr = result[0]
                            trait.locus_mb = result[1]
                        else:
                            trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
                    else:
                        trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
                else:
                    trait.locus = trait.lrs = trait.additive = ""
            if (dataset.type == 'Publish' or dataset.type == "ProbeSet"
                ) and str(trait.locus_chr or "") != "" and str(trait.locus_mb
                                                               or "") != "":
                trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
                    trait.locus_chr, float(trait.locus_mb))
                if str(trait.lrs or "") != "":
                    trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
    else:
        raise KeyError(
            repr(trait.name) + ' information is not found in the database.')
    return trait