예제 #1
0
def heatmap_page():
    logger.info("In heatmap, request.form is:", pf(request.form))
    logger.info(request.url)

    start_vars = request.form
    temp_uuid = uuid.uuid4()

    traits = [trait.strip() for trait in start_vars['trait_list'].split(',')]
    if traits[0] != "":
        version = "v5"
        key = "heatmap:{}:".format(version) + json.dumps(start_vars,
                                                         sort_keys=True)
        logger.info("key is:", pf(key))
        with Bench("Loading cache"):
            result = Redis.get(key)

        if result:
            logger.info("Cache hit!!!")
            with Bench("Loading results"):
                result = pickle.loads(result)

        else:
            logger.info("Cache miss!!!")

            template_vars = heatmap.Heatmap(request.form, temp_uuid)
            template_vars.js_data = json.dumps(template_vars.js_data,
                                               default=json_default_handler,
                                               indent="   ")

            result = template_vars.__dict__

            for item in list(template_vars.__dict__.keys()):
                logger.info("  ---**--- {}: {}".format(
                    type(template_vars.__dict__[item]), item))

            pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
            logger.info("pickled result length:", len(pickled_result))
            Redis.set(key, pickled_result)
            Redis.expire(key, 60 * 60)

        with Bench("Rendering template"):
            rendered_template = render_template("heatmap.html", **result)

    else:
        rendered_template = render_template("empty_collection.html",
                                            **{'tool': 'Heatmap'})

    return rendered_template
예제 #2
0
def create_datasets_list():
    if USE_REDIS:
        key = "all_datasets"
        result = Redis.get(key)

        if result:
            logger.debug("Redis cache hit")
            datasets = pickle.loads(result)

    if result is None:
        datasets = list()
        with Bench("Creating DataSets object"):
            type_dict = {'Publish': 'PublishFreeze',
                         'ProbeSet': 'ProbeSetFreeze',
                         'Geno': 'GenoFreeze'}

            for dataset_type in type_dict:
                query = "SELECT Name FROM {}".format(type_dict[dataset_type])
                for result in fetchall(query):
                    #The query at the beginning of this function isn't
                    #necessary here, but still would rather just reuse
                    #it logger.debug("type: {}\tname:
                    #{}".format(dataset_type, result.Name))
                    dataset = create_dataset(result.Name, dataset_type)
                    datasets.append(dataset)

        if USE_REDIS:
            Redis.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL))
            Redis.expire(key, 60*60)

    return datasets
예제 #3
0
def search_page():
    logger.info("in search_page")
    logger.info(request.url)
    result = None
    if USE_REDIS:
        with Bench("Trying Redis cache"):
            key = "search_results:v1:" + \
                json.dumps(request.args, sort_keys=True)
            logger.debug("key is:", pf(key))
            result = Redis.get(key)
            if result:
                logger.info("Redis cache hit on search results!")
                result = pickle.loads(result)
    else:
        logger.info("Skipping Redis cache (USE_REDIS=False)")

    logger.info("request.args is", request.args)
    the_search = SearchResultPage(request.args)
    result = the_search.__dict__
    valid_search = result['search_term_exists']

    if USE_REDIS and valid_search:
        Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
        Redis.expire(key, 60 * 60)

    if valid_search:
        return render_template("search_result_page.html", **result)
    else:
        return render_template("search_error.html")
예제 #4
0
    def quick_search(self):
        #search_terms = ""
        #for term in self.search_terms.split():
        #    search_terms += '+{} '.format(term)

        search_terms = ' '.join('+{}'.format(escape(term))
                                for term in self.search_terms.split())
        print("search_terms are:", search_terms)

        query = """ SELECT table_name, the_key, result_fields
                    FROM QuickSearch
                    WHERE MATCH (terms)
                          AGAINST ('{}' IN BOOLEAN MODE) """.format(
            search_terms)

        with Bench("Doing QuickSearch Query: "):
            dbresults = g.db.execute(query, no_parameters=True).fetchall()
        #print("results: ", pf(results))

        self.results = collections.defaultdict(list)

        type_dict = {
            'PublishXRef': 'phenotype',
            'ProbeSetXRef': 'mrna_assay',
            'GenoXRef': 'genotype'
        }

        self.species_groups = {}

        for dbresult in dbresults:
            this_result = {}
            this_result['table_name'] = dbresult.table_name
            if self.trait_type == type_dict[
                    dbresult.table_name] or self.trait_type == 'all':
                this_result['key'] = dbresult.the_key
                this_result['result_fields'] = json.loads(
                    dbresult.result_fields)
                this_species = this_result['result_fields']['species']
                this_group = this_result['result_fields']['group_name']
                if this_species not in self.species_groups:
                    self.species_groups[this_species] = {}
                if type_dict[dbresult.table_name] not in self.species_groups[
                        this_species]:
                    self.species_groups[this_species][type_dict[
                        dbresult.table_name]] = []
                if this_group not in self.species_groups[this_species][
                        type_dict[dbresult.table_name]]:
                    self.species_groups[this_species][type_dict[
                        dbresult.table_name]].append(this_group)
                #if type_dict[dbresult.table_name] not in self.species_groups:
                #    self.species_groups[type_dict[dbresult.table_name]] = {}
                #if this_species not in self.species_groups[type_dict[dbresult.table_name]]:
                #    self.species_groups[type_dict[dbresult.table_name]][this_species] = []
                #if this_group not in self.species_groups[type_dict[dbresult.table_name]][this_species]:
                #    self.species_groups[type_dict[dbresult.table_name]][this_species].append(this_group)
                self.results[type_dict[dbresult.table_name]].append(
                    this_result)

        import redis
        Redis = redis.Redis()
def gn_server(path):
    """Return JSON record by calling GN_SERVER

    """
    with Bench("GN_SERVER", LOG_SQL):
        res = urllib2.urlopen(GN_SERVER_URL + path)
        rest = res.read()
        res2 = json.loads(rest)
        logger.debug(res2)
        return res2
예제 #6
0
def fetchall(query):
    """Return row iterator by calling SQL directly (the
original fetchall, but with logging)

    """
    with Bench("SQL", LOG_SQL):
        def helper(query):
            res = g.db.execute(query)
            return res.fetchall()
        return logger.sql(query, helper)
예제 #7
0
def fetchone(query):
    """Return tuple containing one row by calling SQL directly (the
original fetchone, but with logging)

    """
    with Bench("SQL", LOG_SQL):
        def helper(query):
            res = g.db.execute(query)
            return res.fetchone()
        return logger.sql(query, helper)
예제 #8
0
def search_page():
    print("in search_page")
    if 'info_database' in request.args:
        print("Going to sharing_info_page")
        template_vars = sharing_info_page()
        if template_vars.redirect_url:
            print("Going to redirect")
            return flask.redirect(template_vars.redirect_url)
        else:
            return render_template("data_sharing.html",
                                   **template_vars.__dict__)
    else:
        key = "search_results:v1:" + json.dumps(request.args, sort_keys=True)
        print("key is:", pf(key))
        with Bench("Loading cache"):
            result = Redis.get(key)

        if result:
            print("Cache hit!!!")
            with Bench("Loading results"):
                result = pickle.loads(result)
        else:
            print("calling search_results.SearchResultPage")
            print("request.args is", request.args)
            the_search = search_results.SearchResultPage(request.args)
            result = the_search.__dict__

            print("result: ", pf(result))
            Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
            Redis.expire(key, 60 * 60)

        if result['quick']:
            return render_template("quick_search.html", **result)
        elif result['search_term_exists']:
            return render_template("search_result_page.html", **result)
        else:
            return render_template("search_error.html")
예제 #9
0
def search_page():
    logger.info("in search_page")
    if 'info_database' in request.args:
        logger.info("Going to sharing_info_page")
        template_vars = sharing_info_page()
        if template_vars.redirect_url:
            logger.info("Going to redirect")
            return flask.redirect(template_vars.redirect_url)
        else:
            return render_template("data_sharing.html",
                                   **template_vars.__dict__)
    else:
        result = None
        if USE_REDIS:
            with Bench("Trying Redis cache"):
                key = "search_results:v1:" + json.dumps(request.args,
                                                        sort_keys=True)
                logger.debug("key is:", pf(key))
                result = Redis.get(key)
                if result:
                    logger.info("Redis cache hit on search results!")
                    result = pickle.loads(result)
        else:
            logger.info("Skipping Redis cache (USE_REDIS=False)")

        logger.info("request.args is", request.args)
        the_search = search_results.SearchResultPage(request.args)
        result = the_search.__dict__

        logger.debugf("result", result)

        if USE_REDIS:
            Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
            Redis.expire(key, 60 * 60)

        if result['search_term_exists']:
            return render_template("search_result_page.html", **result)
        else:
            return render_template("search_error.html")
예제 #10
0
    def __init__(self, kw):
        self.type = kw['type']
        self.terms = kw['terms']
        if self.type == "gene":
            sql = """
                SELECT
                Species.`Name` AS species_name,
                InbredSet.`Name` AS inbredset_name,
                Tissue.`Name` AS tissue_name,
                ProbeSetFreeze.Name AS probesetfreeze_name,
                ProbeSet.Name AS probeset_name,
                ProbeSet.Symbol AS probeset_symbol,
                ProbeSet.`description` AS probeset_description,
                ProbeSet.Chr AS chr,
                ProbeSet.Mb AS mb,
                ProbeSetXRef.Mean AS mean,
                ProbeSetXRef.LRS AS lrs,
                ProbeSetXRef.`Locus` AS locus,
                ProbeSetXRef.`pValue` AS pvalue,
                ProbeSetXRef.`additive` AS additive
                FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue
                WHERE InbredSet.`SpeciesId`=Species.`Id`
                AND ProbeFreeze.InbredSetId=InbredSet.`Id`
                AND ProbeFreeze.`TissueId`=Tissue.`Id`
                AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id
                AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
                AND ProbeSet.Id = ProbeSetXRef.ProbeSetId
                AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
                AND ProbeSetFreeze.confidentiality < 1
                AND ProbeSetFreeze.public > 0
                ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
                LIMIT 6000
                """ % (self.terms)
            with Bench("Running query"):
                logger.sql(sql)
                re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    dataset = create_dataset(line[3],
                                             "ProbeSet",
                                             get_samplelist=False)
                    trait_id = line[4]
                    #with Bench("Building trait object"):
                    this_trait = GeneralTrait(dataset=dataset,
                                              name=trait_id,
                                              get_qtl_info=True,
                                              get_sample_info=False)
                    self.trait_list.append(this_trait)

        elif self.type == "phenotype":
            sql = """
                SELECT
                Species.`Name`,
                InbredSet.`Name`,
                PublishFreeze.`Name`,
                PublishXRef.`Id`,
                Phenotype.`Post_publication_description`,
                Publication.`Authors`,
                Publication.`Year`,
                PublishXRef.`LRS`,
                PublishXRef.`Locus`,
                PublishXRef.`additive`
                FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
                WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
                AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
                AND InbredSet.`SpeciesId`=Species.`Id`
                AND PublishXRef.`PhenotypeId`=Phenotype.`Id`
                AND PublishXRef.`PublicationId`=Publication.`Id`
                AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]"
                    OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]")
                ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
                LIMIT 6000
                """ % (self.terms, self.terms, self.terms, self.terms,
                       self.terms, self.terms, self.terms, self.terms,
                       self.terms, self.terms)
            logger.sql(sql)
            re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    dataset = create_dataset(line[2], "Publish")
                    trait_id = line[3]
                    this_trait = GeneralTrait(dataset=dataset,
                                              name=trait_id,
                                              get_qtl_info=True,
                                              get_sample_info=False)
                    self.trait_list.append(this_trait)
예제 #11
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
                else:
                    self.num_perm = 0
            except:
                self.num_perm = 0

            self.LRSCheck = self.score_type
            self.permCheck = "ON"
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                included_markers, p_values = gemma_mapping.run_gemma(
                    self.dataset, self.samples, self.vals)
            with Bench("Getting markers from csv"):
                marker_obs = get_markers_from_csv(included_markers, p_values,
                                                  self.dataset.group.name)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            results = self.run_rqtl_geno()
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            results = self.gen_reaper_results()
        elif self.mapping_method == "plink":
            results = self.run_plink()
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                mapping_scale=self.mapping_scale,
                qtl_results=self.qtl_results,
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            self.json_data['chr'] = []
            self.json_data['pos'] = []
            self.json_data['lod.hk'] = []
            self.json_data['markernames'] = []

            self.json_data['suggestive'] = self.suggestive
            self.json_data['significant'] = self.significant

            #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                #if index<40:
                #    print("lod score is:", qtl['lod_score'])
                if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    #print("changing to X")
                    self.json_data['chr'].append("X")
                else:
                    self.json_data['chr'].append(str(qtl['chr']))
                self.json_data['pos'].append(qtl['Mb'])
                if 'lrs_value' in qtl.keys():
                    self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                else:
                    self.json_data['lod.hk'].append(str(qtl['lod_score']))
                self.json_data['markernames'].append(qtl['name'])

            #Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data['chrnames'] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data['chrnames'].append([
                    self.species.chromosomes.chromosomes[key].name,
                    self.species.chromosomes.chromosomes[key].mb_length
                ])
                chromosome_mb_lengths[
                    key] = self.species.chromosomes.chromosomes[key].mb_length

            # print("json_data:", self.json_data)

            self.js_data = dict(
                result_score_type=self.score_type,
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                mapping_scale=self.mapping_scale,
                chromosomes=chromosome_mb_lengths,
                qtl_results=self.qtl_results,
            )
예제 #12
0
def mapping_results_page():
    initial_start_vars = request.form
    logger.info(request.url)
    temp_uuid = initial_start_vars['temp_uuid']
    wanted = ('trait_id', 'dataset', 'group', 'species', 'samples', 'vals',
              'sample_vals', 'vals_hash', 'first_run', 'output_files',
              'geno_db_exists', 'method', 'mapping_results_path',
              'trimmed_markers', 'selected_chr', 'chromosomes',
              'mapping_scale', 'plotScale', 'score_type', 'suggestive',
              'significant', 'num_perm', 'permCheck', 'perm_strata',
              'categorical_vars', 'perm_output', 'num_bootstrap', 'bootCheck',
              'bootstrap_results', 'LRSCheck', 'covariates', 'maf', 'use_loco',
              'manhattan_plot', 'color_scheme', 'manhattan_single_color',
              'control_marker', 'do_control', 'genofile', 'genofile_string',
              'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax',
              'additiveCheck', 'showSNP', 'showGenes', 'viewLegend',
              'haplotypeAnalystCheck', 'mapmethod_rqtl_geno',
              'mapmodel_rqtl_geno', 'temp_trait', 'n_samples', 'transform')
    start_vars = {}
    for key, value in list(initial_start_vars.items()):
        if key in wanted:
            start_vars[key] = value

    version = "v3"
    key = "mapping_results:{}:".format(version) + json.dumps(start_vars,
                                                             sort_keys=True)
    with Bench("Loading cache"):
        result = None  # Just for testing
        #result = Redis.get(key)

    #logger.info("************************ Starting result *****************")
    #logger.info("result is [{}]: {}".format(type(result), result))
    #logger.info("************************ Ending result ********************")

    if result:
        logger.info("Cache hit!!!")
        with Bench("Loading results"):
            result = pickle.loads(result)
    else:
        logger.info("Cache miss!!!")
        with Bench("Total time in RunMapping"):
            try:
                template_vars = run_mapping.RunMapping(start_vars, temp_uuid)
                if template_vars.no_results:
                    rendered_template = render_template("mapping_error.html")
                    return rendered_template
            except:
                rendered_template = render_template("mapping_error.html")
                return rendered_template

            template_vars.js_data = json.dumps(template_vars.js_data,
                                               default=json_default_handler,
                                               indent="   ")

            result = template_vars.__dict__

            if result['pair_scan']:
                with Bench("Rendering template"):
                    img_path = result['pair_scan_filename']
                    logger.info("img_path:", img_path)
                    initial_start_vars = request.form
                    logger.info("initial_start_vars:", initial_start_vars)
                    imgfile = open(TEMPDIR + img_path, 'rb')
                    imgdata = imgfile.read()
                    imgB64 = base64.b64encode(imgdata)
                    bytesarray = array.array('B', imgB64)
                    result['pair_scan_array'] = bytesarray
                    rendered_template = render_template(
                        "pair_scan_results.html", **result)
            else:
                gn1_template_vars = display_mapping_results.DisplayMappingResults(
                    result).__dict__

                rendered_template = render_template("mapping_results.html",
                                                    **gn1_template_vars)

    return rendered_template
예제 #13
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' +
                                       self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        #ZS: Check if genotypes exist in the DB in order to create links for markers
        if "geno_db_exists" in start_vars:
            self.geno_db_exists = start_vars['geno_db_exists']
        else:
            try:
                self.geno_db_exists = "True"
            except:
                self.geno_db_exists = "False"

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else None

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset,
                                                     self.samples, self.vals,
                                                     self.covariates,
                                                     self.use_loco)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(
                self.this_trait, self.dataset, self.samples, self.vals,
                self.json_data, self.num_perm, self.bootCheck,
                self.num_bootstrap, self.do_control, self.control_marker,
                self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(json_data=self.json_data,
                                this_trait=self.this_trait.name,
                                data_set=self.dataset.name,
                                maf=self.maf,
                                manhattan_plot=self.manhattan_plot,
                                mapping_scale=self.mapping_scale,
                                qtl_results=self.qtl_results)

        else:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            with Bench("Exporting Results"):
                export_mapping_results(self.dataset, self.this_trait,
                                       self.qtl_results,
                                       self.mapping_results_path,
                                       self.mapping_scale, self.score_type)

            with Bench("Trimming Markers for Figure"):
                if len(self.qtl_results) > 30000:
                    self.qtl_results = trim_markers_for_figure(
                        self.qtl_results)

            with Bench("Trimming Markers for Table"):
                self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([
                        self.species.chromosomes.chromosomes[key].name,
                        self.species.chromosomes.chromosomes[key].mb_length
                    ])
                    chromosome_mb_lengths[
                        key] = self.species.chromosomes.chromosomes[
                            key].mb_length

                self.js_data = dict(
                    result_score_type=self.score_type,
                    json_data=self.json_data,
                    this_trait=self.this_trait.name,
                    data_set=self.dataset.name,
                    maf=self.maf,
                    manhattan_plot=self.manhattan_plot,
                    mapping_scale=self.mapping_scale,
                    chromosomes=chromosome_mb_lengths,
                    qtl_results=self.qtl_results,
                    num_perm=self.num_perm,
                    perm_results=self.perm_output,
                )
    def __init__(self, start_vars, temp_uuid):
        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        #ZS: Needed to zoom in or remap temp traits like PCA traits
        if "temp_trait" in start_vars and start_vars['temp_trait'] != "False":
            self.temp_trait = "True"
            self.group = self.dataset.group.name

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        #ZS: Sometimes a group may have a genofile that only includes a subset of samples
        genofile_samplelist = []
        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
                genofile_samplelist = get_genofile_samplelist(self.dataset)

        all_samples_ordered = self.dataset.group.all_samples_ordered()

        self.vals = []
        self.samples = []
        self.sample_vals = start_vars['sample_vals']
        sample_val_dict = json.loads(self.sample_vals)
        samples = sample_val_dict.keys()
        if (len(genofile_samplelist) != 0):
            for sample in genofile_samplelist:
                self.samples.append(sample)
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                else:
                    self.vals.append("x")
        else:
            for sample in self.dataset.group.samplelist:
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                    self.samples.append(sample)

        if 'n_samples' in start_vars:
            self.n_samples = start_vars['n_samples']
        else:
            self.n_samples = len([val for val in self.vals if val != "x"])

        #ZS: Check if genotypes exist in the DB in order to create links for markers

        self.geno_db_exists = geno_db_exists(self.dataset)

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        self.manhattan_plot = False
        if 'manhattan_plot' in start_vars:
            if start_vars['manhattan_plot'].lower() != "false":
                self.color_scheme = "alternating"
                if "color_scheme" in start_vars:
                    self.color_scheme = start_vars['color_scheme']
                    if self.color_scheme == "single":
                        self.manhattan_single_color = start_vars[
                            'manhattan_single_color']
                self.manhattan_plot = True

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        if 'transform' in start_vars:
            self.transform = start_vars['transform']
        else:
            self.transform = ""
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        if "mapping_scale" in start_vars:
            self.mapping_scale = start_vars['mapping_scale']
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else ""
        self.categorical_vars = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        #self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.first_run = True
            self.output_files = None
            if 'output_files' in start_vars:
                self.output_files = start_vars['output_files']
            if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                self.first_run = False
            self.score_type = "-logP"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                if self.use_loco == "True":
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
                else:
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            perm_strata = []
            if "perm_strata" in start_vars and "categorical_vars" in start_vars:
                self.categorical_vars = start_vars["categorical_vars"].split(
                    ",")
                if len(self.categorical_vars
                       ) and start_vars["perm_strata"] == "True":
                    primary_samples = SampleList(dataset=self.dataset,
                                                 sample_names=self.samples,
                                                 this_trait=self.this_trait)

                    perm_strata = get_perm_strata(self.this_trait,
                                                  primary_samples,
                                                  self.categorical_vars,
                                                  self.samples)
            self.score_type = "LOD"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            if 'mapmethod_rqtl_geno' in start_vars:
                self.method = start_vars['mapmethod_rqtl_geno']
            else:
                self.method = "em"
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.reaper_version = start_vars['reaper_version']

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")

            if self.reaper_version == "new":
                self.first_run = True
                self.output_files = None
                if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                    self.first_run = False
                    if 'output_files' in start_vars:
                        self.output_files = start_vars['output_files'].split(
                            ",")

                results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot, self.first_run, self.output_files)
            else:
                results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-logP"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        else:
            logger.debug("RUNNING NOTHING")

        self.no_results = False
        if len(results) == 0:
            self.no_results = True
        else:
            if self.pair_scan == True:
                self.qtl_results = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                            'chr1'] == "X/Y":
                        if marker['chr1'] > highest_chr or marker[
                                'chr1'] == "X" or marker['chr1'] == "X/Y":
                            highest_chr = marker['chr1']
                        if 'lod_score' in list(marker.keys()):
                            self.qtl_results.append(marker)

                self.trimmed_markers = results

                for qtl in enumerate(self.qtl_results):
                    self.json_data['chr1'].append(str(qtl['chr1']))
                    self.json_data['chr2'].append(str(qtl['chr2']))
                    self.json_data['Mb'].append(qtl['Mb'])
                    self.json_data['markernames'].append(qtl['name'])

                self.js_data = dict(json_data=self.json_data,
                                    this_trait=self.this_trait.name,
                                    data_set=self.dataset.name,
                                    maf=self.maf,
                                    manhattan_plot=self.manhattan_plot,
                                    mapping_scale=self.mapping_scale,
                                    qtl_results=self.qtl_results)

            else:
                self.qtl_results = []
                self.results_for_browser = []
                self.annotations_for_browser = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if 'Mb' in marker:
                        this_ps = marker['Mb'] * 1000000
                    else:
                        this_ps = marker['cM'] * 1000000

                    browser_marker = dict(chr=str(marker['chr']),
                                          rs=marker['name'],
                                          ps=this_ps,
                                          url="/show_trait?trait_id=" +
                                          marker['name'] + "&dataset=" +
                                          self.dataset.group.name + "Geno")

                    if self.geno_db_exists == "True":
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps,
                                            url="/show_trait?trait_id=" +
                                            marker['name'] + "&dataset=" +
                                            self.dataset.group.name + "Geno")
                    else:
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps)

                    if 'lrs_value' in marker and marker['lrs_value'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lrs_value'] /
                                                         4.61)
                    elif 'lod_score' in marker and marker['lod_score'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lod_score'])
                    else:
                        browser_marker['p_wald'] = 0

                    self.results_for_browser.append(browser_marker)
                    self.annotations_for_browser.append(annot_marker)
                    if str(marker['chr']) > '0' or str(
                            marker['chr']) == "X" or str(
                                marker['chr']) == "X/Y":
                        if str(marker['chr']) > str(highest_chr) or str(
                                marker['chr']) == "X" or str(
                                    marker['chr']) == "X/Y":
                            highest_chr = marker['chr']
                        if ('lod_score'
                                in marker.keys()) or ('lrs_value'
                                                      in marker.keys()):
                            if 'Mb' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.6f}".format(
                                        marker['Mb'])
                            elif 'cM' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.3f}".format(
                                        marker['cM'])
                            else:
                                marker['display_pos'] = "N/A"
                            self.qtl_results.append(marker)

                total_markers = len(self.qtl_results)

                with Bench("Exporting Results"):
                    export_mapping_results(self.dataset, self.this_trait,
                                           self.qtl_results,
                                           self.mapping_results_path,
                                           self.mapping_scale, self.score_type)

                with Bench("Trimming Markers for Figure"):
                    if len(self.qtl_results) > 30000:
                        self.qtl_results = trim_markers_for_figure(
                            self.qtl_results)
                        self.results_for_browser = trim_markers_for_figure(
                            self.results_for_browser)
                        filtered_annotations = []
                        for marker in self.results_for_browser:
                            for annot_marker in self.annotations_for_browser:
                                if annot_marker['rs'] == marker['rs']:
                                    filtered_annotations.append(annot_marker)
                                    break
                        self.annotations_for_browser = filtered_annotations
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)
                    else:
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)

                with Bench("Trimming Markers for Table"):
                    self.trimmed_markers = trim_markers_for_table(results)

                chr_lengths = get_chr_lengths(self.mapping_scale,
                                              self.mapping_method,
                                              self.dataset, self.qtl_results)

                #ZS: For zooming into genome browser, need to pass chromosome name instead of number
                if self.dataset.group.species == "mouse":
                    if self.selected_chr == 20:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                elif self.dataset.group.species == "rat":
                    if self.selected_chr == 21:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                else:
                    if self.selected_chr == 22:
                        this_chr = "X"
                    elif self.selected_chr == 23:
                        this_chr = "Y"
                    else:
                        this_chr = str(self.selected_chr)

                if self.mapping_method != "gemma":
                    if self.score_type == "LRS":
                        significant_for_browser = self.significant / 4.61
                    else:
                        significant_for_browser = self.significant

                    self.js_data = dict(
                        #result_score_type = self.score_type,
                        #this_trait = self.this_trait.name,
                        #data_set = self.dataset.name,
                        #maf = self.maf,
                        #manhattan_plot = self.manhattan_plot,
                        #mapping_scale = self.mapping_scale,
                        #chromosomes = chromosome_mb_lengths,
                        #qtl_results = self.qtl_results,
                        categorical_vars=self.categorical_vars,
                        chr_lengths=chr_lengths,
                        num_perm=self.num_perm,
                        perm_results=self.perm_output,
                        significant=significant_for_browser,
                        browser_files=browser_files,
                        selected_chr=this_chr,
                        total_markers=total_markers)
                else:
                    self.js_data = dict(chr_lengths=chr_lengths,
                                        browser_files=browser_files,
                                        selected_chr=this_chr,
                                        total_markers=total_markers)
예제 #15
0
    def __init__(self, start_vars):
        # get trait list from db (database name)
        # calculate correlation with Base vector and targets

        # Check parameters
        assert('corr_type' in start_vars)
        assert(is_str(start_vars['corr_type']))
        assert('dataset' in start_vars)
        # assert('group' in start_vars) permitted to be empty?
        assert('corr_sample_method' in start_vars)
        assert('corr_samples_group' in start_vars)
        assert('corr_dataset' in start_vars)
        assert('corr_return_results' in start_vars)
        if 'loc_chr' in start_vars:
            assert('min_loc_mb' in start_vars)
            assert('max_loc_mb' in start_vars)

        with Bench("Doing correlations"):
            if start_vars['dataset'] == "Temp":
                self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
                self.trait_id = start_vars['trait_id']
                self.this_trait = create_trait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            else:
                helper_functions.get_species_dataset_trait(self, start_vars)

            corr_samples_group = start_vars['corr_samples_group']

            self.sample_data = {}
            self.corr_type = start_vars['corr_type']
            self.corr_method = start_vars['corr_sample_method']
            self.min_expr = get_float(start_vars, 'min_expr')
            self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0)
            self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0)

            if ('loc_chr' in start_vars and
                'min_loc_mb' in start_vars and
                'max_loc_mb' in start_vars):

                self.location_type = get_string(start_vars, 'location_type')
                self.location_chr = get_string(start_vars, 'loc_chr')
                self.min_location_mb = get_int(start_vars, 'min_loc_mb')
                self.max_location_mb = get_int(start_vars, 'max_loc_mb')
            else:
                self.location_type = self.location_chr = self.min_location_mb = self.max_location_mb = None

            self.get_formatted_corr_type()
            self.return_number = int(start_vars['corr_return_results'])

            #The two if statements below append samples to the sample list based upon whether the user
            #rselected Primary Samples Only, Other Samples Only, or All Samples

            primary_samples = self.dataset.group.samplelist
            if self.dataset.group.parlist != None:
                primary_samples += self.dataset.group.parlist
            if self.dataset.group.f1list != None:
                primary_samples += self.dataset.group.f1list

            #If either BXD/whatever Only or All Samples, append all of that group's samplelist
            if corr_samples_group != 'samples_other':
                self.process_samples(start_vars, primary_samples)

            #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
            #exclude the primary samples (because they would have been added in the previous
            #if statement if the user selected All Samples)
            if corr_samples_group != 'samples_primary':
                if corr_samples_group == 'samples_other':
                    primary_samples = [x for x in primary_samples if x not in (
                                    self.dataset.group.parlist + self.dataset.group.f1list)]
                self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples)

            self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
            self.target_dataset.get_trait_data(list(self.sample_data.keys()))

            self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method)

            if self.target_dataset.type == "ProbeSet":
                self.filter_cols = [7, 6]
            elif self.target_dataset.type == "Publish":
                self.filter_cols = [6, 0]
            else:
                self.filter_cols = [4, 0]

            self.correlation_results = []

            self.correlation_data = {}

            if self.corr_type == "tissue":
                self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")

                tissue_corr_data = self.do_tissue_correlation_for_all_traits()
                if tissue_corr_data != None:
                    for trait in list(tissue_corr_data.keys())[:self.return_number]:
                        self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
                else:
                    for trait, values in list(self.target_dataset.trait_data.items()):
                        self.get_sample_r_and_p_values(trait, values)

            elif self.corr_type == "lit":
                self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId")
                lit_corr_data = self.do_lit_correlation_for_all_traits()

                for trait in list(lit_corr_data.keys())[:self.return_number]:
                    self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])

            elif self.corr_type == "sample":
                for trait, values in list(self.target_dataset.trait_data.items()):
                    self.get_sample_r_and_p_values(trait, values)

            self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
                                                                   key=lambda t: -abs(t[1][0])))


            #ZS: Convert min/max chromosome to an int for the location range option
            range_chr_as_int = None
            for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
                if 'loc_chr' in start_vars:
                    if chr_info.name == self.location_chr:
                        range_chr_as_int = order_id

            for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
                trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
                if not trait_object:
                    continue

                chr_as_int = 0
                for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
                    if self.location_type == "highest_lod":
                        if chr_info.name == trait_object.locus_chr:
                            chr_as_int = order_id
                    else:
                        if chr_info.name == trait_object.chr:
                            chr_as_int = order_id

                if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
                    float(self.correlation_data[trait][0]) <= self.p_range_upper):

                    if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
                        if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
                            continue

                    if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
                        continue
                    if self.location_type == "highest_lod":
                        if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
                            continue
                        if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
                            continue
                    else:
                        if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
                            continue
                        if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
                            continue

                    (trait_object.sample_r,
                    trait_object.sample_p,
                    trait_object.num_overlap) = self.correlation_data[trait]

                    # Set some sane defaults
                    trait_object.tissue_corr = 0
                    trait_object.tissue_pvalue = 0
                    trait_object.lit_corr = 0
                    if self.corr_type == "tissue" and tissue_corr_data != None:
                        trait_object.tissue_corr = tissue_corr_data[trait][1]
                        trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                    elif self.corr_type == "lit":
                        trait_object.lit_corr = lit_corr_data[trait][1]

                    self.correlation_results.append(trait_object)

            if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_lit_correlation_for_trait_list()

            if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_tissue_correlation_for_trait_list()

        self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
예제 #16
0
    def __init__(self, kw):
        assert ('type' in kw)
        assert ('terms' in kw)

        self.type = kw['type']
        self.terms = kw['terms']
        assert (is_str(self.type))

        if self.type == "gene":
            sql = """
                SELECT
                Species.`Name` AS species_name,
                InbredSet.`Name` AS inbredset_name,
                Tissue.`Name` AS tissue_name,
                ProbeSetFreeze.Name AS probesetfreeze_name,
                ProbeSet.Name AS probeset_name,
                ProbeSet.Symbol AS probeset_symbol,
                ProbeSet.`description` AS probeset_description,
                ProbeSet.Chr AS chr,
                ProbeSet.Mb AS mb,
                ProbeSetXRef.Mean AS mean,
                ProbeSetXRef.LRS AS lrs,
                ProbeSetXRef.`Locus` AS locus,
                ProbeSetXRef.`pValue` AS pvalue,
                ProbeSetXRef.`additive` AS additive
                FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue
                WHERE InbredSet.`SpeciesId`=Species.`Id`
                AND ProbeFreeze.InbredSetId=InbredSet.`Id`
                AND ProbeFreeze.`TissueId`=Tissue.`Id`
                AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id
                AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
                AND ProbeSet.Id = ProbeSetXRef.ProbeSetId
                AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
                AND ProbeSetFreeze.confidentiality < 1
                AND ProbeSetFreeze.public > 0
                ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
                LIMIT 6000
                """ % (self.terms)
            with Bench("Running query"):
                logger.sql(sql)
                re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    this_trait = {}
                    this_trait['name'] = line[4]
                    this_trait['dataset'] = line[3]
                    this_trait['species'] = line[0]
                    this_trait['group'] = line[1]
                    this_trait['tissue'] = line[2]
                    this_trait['symbol'] = line[5]
                    this_trait['description'] = line[6]
                    this_trait['location_repr'] = 'N/A'
                    if (line[7] != "NULL"
                            and line[7] != "") and (line[8] != 0):
                        this_trait['location_repr'] = 'Chr%s: %.6f' % (
                            line[7], float(line[8]))
                    this_trait['mean'] = line[9]
                    this_trait['LRS_score_repr'] = "N/A"
                    if line[10] != "" and line[10] != None:
                        this_trait['LRS_score_repr'] = '%3.1f' % line[10]
                    this_trait['additive'] = "N/A"
                    if line[13] != "":
                        this_trait['additive'] = line[13]

                    #dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False)
                    #trait_id = line[4]
                    #with Bench("Building trait object"):
                    #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=False, get_sample_info=False)
                    self.trait_list.append(this_trait)

        elif self.type == "phenotype":
            sql = """
                SELECT
                Species.`Name`,
                InbredSet.`Name`,
                PublishFreeze.`Name`,
                PublishXRef.`Id`,
                Phenotype.`Post_publication_description`,
                Publication.`Authors`,
                Publication.`Year`,
                Publication.`PubMed_ID`,
                PublishXRef.`LRS`,
                PublishXRef.`additive`
                FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
                WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
                AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
                AND InbredSet.`SpeciesId`=Species.`Id`
                AND PublishXRef.`PhenotypeId`=Phenotype.`Id`
                AND PublishXRef.`PublicationId`=Publication.`Id`
                AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]"
                    OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]")
                ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
                LIMIT 6000
                """ % (self.terms, self.terms, self.terms, self.terms,
                       self.terms, self.terms, self.terms, self.terms,
                       self.terms, self.terms)
            logger.sql(sql)
            re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    this_trait = {}
                    this_trait['name'] = line[3]
                    this_trait['dataset'] = line[2]
                    this_trait['species'] = line[0]
                    this_trait['group'] = line[1]
                    this_trait['description'] = line[4]
                    this_trait['authors'] = line[5]
                    this_trait['year'] = line[6]
                    if this_trait['year'].isdigit():
                        this_trait['pubmed_text'] = this_trait['year']
                    else:
                        this_trait['pubmed_text'] = "N/A"
                    if line[7] != "" and line[7] != None:
                        this_trait[
                            'pubmed_link'] = webqtlConfig.PUBMEDLINK_URL % line[
                                7]
                    else:
                        this_trait['pubmed_link'] = "N/A"
                    this_trait['LRS_score_repr'] = "N/A"
                    if line[8] != "" and line[8] != None:
                        this_trait['LRS_score_repr'] = '%3.1f' % line[8]
                    this_trait['additive'] = "N/A"
                    if line[9] != "":
                        this_trait['additive'] = line[9]

                    #dataset = create_dataset(line[2], "Publish")
                    #trait_id = line[3]
                    #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False)
                    self.trait_list.append(this_trait)
예제 #17
0
    def __init__(self, kw):
        assert('type' in kw)
        assert('terms' in kw)

        self.type = kw['type']
        self.terms = kw['terms']
        assert(is_str(self.type))

        if self.type == "gene":
            sql = """
                SELECT
                Species.`Name` AS species_name,
                InbredSet.`Name` AS inbredset_name,
                Tissue.`Name` AS tissue_name,
                ProbeSetFreeze.Name AS probesetfreeze_name,
                ProbeSetFreeze.FullName AS probesetfreeze_fullname,
                ProbeSet.Name AS probeset_name,
                ProbeSet.Symbol AS probeset_symbol,
                CAST(ProbeSet.`description` AS BINARY) AS probeset_description,
                ProbeSet.Chr AS chr,
                ProbeSet.Mb AS mb,
                ProbeSetXRef.Mean AS mean,
                ProbeSetXRef.LRS AS lrs,
                ProbeSetXRef.`Locus` AS locus,
                ProbeSetXRef.`pValue` AS pvalue,
                ProbeSetXRef.`additive` AS additive,
                ProbeSetFreeze.Id AS probesetfreeze_id,
                Geno.Chr as geno_chr,
                Geno.Mb as geno_mb
                FROM Species 
                INNER JOIN InbredSet ON InbredSet.`SpeciesId`=Species.`Id` 
                INNER JOIN ProbeFreeze ON ProbeFreeze.InbredSetId=InbredSet.`Id` 
                INNER JOIN Tissue ON ProbeFreeze.`TissueId`=Tissue.`Id` 
                INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id 
                INNER JOIN ProbeSetXRef ON ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id 
                INNER JOIN ProbeSet ON ProbeSet.Id = ProbeSetXRef.ProbeSetId 
                LEFT JOIN Geno ON ProbeSetXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id
                WHERE ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,ProbeSet.alias,ProbeSet.GenbankId, ProbeSet.UniGeneId, ProbeSet.Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
                AND ProbeSetFreeze.confidentiality < 1
                AND ProbeSetFreeze.public > 0
                ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
                LIMIT 6000
                """ % (self.terms)
            with Bench("Running query"):
                logger.sql(sql)
                re = g.db.execute(sql).fetchall()

            trait_list = []
            dataset_to_permissions = {}
            with Bench("Creating trait objects"):
                for i, line in enumerate(re):
                    this_trait = {}
                    this_trait['index'] = i + 1
                    this_trait['name'] = line[5]
                    this_trait['dataset'] = line[3]
                    this_trait['dataset_fullname'] = line[4]
                    this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[5], line[3]))
                    this_trait['species'] = line[0]
                    this_trait['group'] = line[1]
                    this_trait['tissue'] = line[2]
                    this_trait['symbol'] = line[6]
                    if line[7]:
                        this_trait['description'] = line[7].decode('utf-8', 'replace')
                    else:
                        this_trait['description'] = "N/A"
                    this_trait['location_repr'] = 'N/A'
                    if (line[8] != "NULL" and line[8] != "") and (line[9] != 0):
                        this_trait['location_repr'] = 'Chr%s: %.6f' % (line[8], float(line[9]))
                    try:
                        this_trait['mean'] = '%.3f' % line[10]
                    except:
                        this_trait['mean'] = "N/A"
                    this_trait['LRS_score_repr'] = "N/A"
                    if line[11] != "" and line[11] != None:
                        this_trait['LRS_score_repr'] = '%3.1f' % line[11]
                    this_trait['additive'] = "N/A"
                    if line[14] != "" and line[14] != None:
                        this_trait['additive'] = '%.3f' % line[14]
                    this_trait['dataset_id'] = line[15]
                    this_trait['locus_chr'] = line[16]
                    this_trait['locus_mb'] = line[17]

                    dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="ProbeSet",species=this_trait["species"])
                    if dataset_ob.id not in dataset_to_permissions:
                        permissions = check_resource_availability(dataset_ob)
                        dataset_to_permissions[dataset_ob.id] = permissions
                    else:
                        pemissions = dataset_to_permissions[dataset_ob.id]
                    if "view" not in permissions['data']:
                        continue

                    max_lrs_text = "N/A"
                    if this_trait['locus_chr'] != None and this_trait['locus_mb'] != None:
                        max_lrs_text = "Chr" + str(this_trait['locus_chr']) + ": " + str(this_trait['locus_mb'])
                    this_trait['max_lrs_text'] = max_lrs_text

                    trait_list.append(this_trait)

            self.trait_count = len(trait_list)
            self.trait_list = json.dumps(trait_list)

            self.header_fields = ['Index',
                                'Record',
                                'Species',
                                'Group',
                                'Tissue',
                                'Dataset',
                                'Symbol',
                                'Description',
                                'Location',
                                'Mean',
                                'Max LRS',
                                'Max LRS Location',
                                'Additive Effect']

        elif self.type == "phenotype":
            search_term = self.terms
            group_clause = ""
            if "_" in self.terms:
                if len(self.terms.split("_")[0]) == 3:
                    search_term = self.terms.split("_")[1]
                    group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(self.terms.split("_")[0])
            sql = """
                SELECT
                Species.`Name`,
                InbredSet.`Name`,
                PublishFreeze.`Name`,
                PublishFreeze.`FullName`,
                PublishXRef.`Id`,
                CAST(Phenotype.`Pre_publication_description` AS BINARY),
                CAST(Phenotype.`Post_publication_description` AS BINARY),
                Publication.`Authors`,
                Publication.`Year`,
                Publication.`PubMed_ID`,
                PublishXRef.`LRS`,
                PublishXRef.`additive`,
                InbredSet.`InbredSetCode`,
                PublishXRef.`mean`
                FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
                WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
                AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
                AND InbredSet.`SpeciesId`=Species.`Id`
                {0}
                AND PublishXRef.`PhenotypeId`=Phenotype.`Id`
                AND PublishXRef.`PublicationId`=Publication.`Id`
                AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Pre_publication_description REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Lab_code REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.PubMed_ID REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.Abstract REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.Title REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.Authors REGEXP "[[:<:]]{1}[[:>:]]"
                    OR PublishXRef.Id REGEXP "[[:<:]]{1}[[:>:]]")
                ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
                LIMIT 6000
                """.format(group_clause, search_term)
            logger.sql(sql)
            re = g.db.execute(sql).fetchall()
            trait_list = []
            with Bench("Creating trait objects"):
                for i, line in enumerate(re):
                    this_trait = {}
                    this_trait['index'] = i + 1
                    this_trait['name'] = str(line[4])
                    if len(str(line[12])) == 3:
                        this_trait['display_name'] = str(line[12]) + "_" + this_trait['name']
                    else:
                        this_trait['display_name'] = this_trait['name']
                    this_trait['dataset'] = line[2]
                    this_trait['dataset_fullname'] = line[3]
                    this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[4], line[2]))
                    this_trait['species'] = line[0]
                    this_trait['group'] = line[1]
                    if line[9] != None and line[6] != None:
                        this_trait['description'] = line[6].decode('utf-8', 'replace')
                    elif line[5] != None:
                        this_trait['description'] = line[5].decode('utf-8', 'replace')
                    else:
                        this_trait['description'] = "N/A"
                    if line[13] != None and line[13] != "":
                        this_trait['mean'] = line[13]
                    else:
                        this_trait['mean'] = "N/A"
                    this_trait['authors'] = line[7]
                    this_trait['year'] = line[8]
                    if this_trait['year'].isdigit():
                        this_trait['pubmed_text'] = this_trait['year']
                    else:
                        this_trait['pubmed_text'] = "N/A"
                    if line[9] != "" and line[9] != None:
                        this_trait['pubmed_link'] = webqtlConfig.PUBMEDLINK_URL % line[8]
                    else:
                        this_trait['pubmed_link'] = "N/A"
                        if line[12]:
                            this_trait['display_name'] = line[12] + "_" + str(this_trait['name'])
                    this_trait['LRS_score_repr'] = "N/A"
                    if line[10] != "" and line[10] != None:
                        this_trait['LRS_score_repr'] = '%3.1f' % line[10]
                    this_trait['additive'] = "N/A"
                    if line[11] != "" and line[11] != None:
                        this_trait['additive'] = '%.3f' % line[11]

                    this_trait['max_lrs_text'] = "N/A"
                    trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False)
                    if not trait_ob:
                        continue
                    if this_trait['dataset'] == this_trait['group'] + "Publish":
                      try:
                        if trait_ob.locus_chr != "" and trait_ob.locus_mb != "":
                            this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb)
                      except:
                          this_trait['max_lrs_text'] = "N/A"

                    trait_list.append(this_trait)

            self.trait_count = len(trait_list)
            self.trait_list = json.dumps(trait_list)

            self.header_fields = ['Index',
                                'Species',
                                'Group',
                                'Record',
                                'Description',
                                'Authors',
                                'Year',
                                'Max LRS',
                                'Max LRS Location',
                                'Additive Effect']
예제 #18
0
def marker_regression_page():
    initial_start_vars = request.form
    logger.debug("Marker regression called with initial_start_vars:",
                 initial_start_vars.items())
    temp_uuid = initial_start_vars['temp_uuid']
    wanted = ('trait_id', 'dataset', 'method', 'trimmed_markers',
              'selected_chr', 'chromosomes', 'mapping_scale', 'score_type',
              'suggestive', 'significant', 'num_perm', 'permCheck',
              'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results',
              'LRSCheck', 'maf', 'manhattan_plot', 'control_marker',
              'control_marker_db', 'do_control', 'genofile', 'pair_scan',
              'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck',
              'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck',
              'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno')
    start_vars = {}
    for key, value in initial_start_vars.iteritems():
        if key in wanted or key.startswith(('value:')):
            start_vars[key] = value
    logger.debug("Marker regression called with start_vars:", start_vars)

    version = "v3"
    key = "marker_regression:{}:".format(version) + json.dumps(start_vars,
                                                               sort_keys=True)
    logger.info("key is:", pf(key))
    with Bench("Loading cache"):
        result = None  # Just for testing
        #result = Redis.get(key)

    #logger.info("************************ Starting result *****************")
    #logger.info("result is [{}]: {}".format(type(result), result))
    #logger.info("************************ Ending result ********************")

    if result:
        logger.info("Cache hit!!!")
        with Bench("Loading results"):
            result = pickle.loads(result)
    else:
        logger.info("Cache miss!!!")
        with Bench("Total time in MarkerRegression"):
            template_vars = marker_regression.MarkerRegression(
                start_vars, temp_uuid)

        if template_vars.mapping_method != "gemma" and template_vars.mapping_method != "plink":
            template_vars.js_data = json.dumps(template_vars.js_data,
                                               default=json_default_handler,
                                               indent="   ")

        result = template_vars.__dict__

        if result['pair_scan']:
            with Bench("Rendering template"):
                img_path = result['pair_scan_filename']
                logger.info("img_path:", img_path)
                initial_start_vars = request.form
                logger.info("initial_start_vars:", initial_start_vars)
                imgfile = open(TEMPDIR + img_path, 'rb')
                imgdata = imgfile.read()
                imgB64 = imgdata.encode("base64")
                bytesarray = array.array('B', imgB64)
                result['pair_scan_array'] = bytesarray
                rendered_template = render_template("pair_scan_results.html",
                                                    **result)
        else:
            #for item in template_vars.__dict__.keys():
            #    logger.info("  ---**--- {}: {}".format(type(template_vars.__dict__[item]), item))

            gn1_template_vars = marker_regression_gn1.MarkerRegression(
                result).__dict__
            #pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
            #logger.info("pickled result length:", len(pickled_result))
            #Redis.set(key, pickled_result)
            #Redis.expire(key, 1*60)

            with Bench("Rendering template"):
                if (gn1_template_vars['mapping_method']
                        == "gemma") or (gn1_template_vars['mapping_method']
                                        == "plink"):
                    gn1_template_vars.pop('qtlresults', None)
                print("TEMPLATE KEYS:", list(gn1_template_vars.keys()))
                rendered_template = render_template(
                    "marker_regression_gn1.html", **gn1_template_vars)

    # with Bench("Rendering template"):
    # if result['pair_scan'] == True:
    # img_path = result['pair_scan_filename']
    # logger.info("img_path:", img_path)
    # initial_start_vars = request.form
    # logger.info("initial_start_vars:", initial_start_vars)
    # imgfile = open(TEMPDIR + '/' + img_path, 'rb')
    # imgdata = imgfile.read()
    # imgB64 = imgdata.encode("base64")
    # bytesarray = array.array('B', imgB64)
    # result['pair_scan_array'] = bytesarray
    # rendered_template = render_template("pair_scan_results.html", **result)
    # else:
    # rendered_template = render_template("marker_regression.html", **result)
    # rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars)

    return rendered_template