def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        tempdata = temp_data.TempData(temp_uuid)
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        self.gen_data(tempdata)

        #Get chromosome lengths for drawing the manhattan plot
        chromosome_mb_lengths = {}
        for key in self.species.chromosomes.chromosomes.keys():
            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
        self.js_data = dict(
            chromosomes = chromosome_mb_lengths,
            qtl_results = self.qtl_results,
        )
    def __init__(self, start_vars, temp_uuid):

        #Currently only getting trait data for one trait, but will need
        #to change this to accept multiple traits once the collection page is implemented
        helper_functions.get_species_dataset_trait(self, start_vars)

        tempdata = temp_data.TempData(temp_uuid)
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        print("start_vars:", start_vars)
 
        self.set_options(start_vars)
 
        self.json_data = {}
 
        #if self.method == "qtl_reaper":
        self.json_data['lodnames'] = ['lod.hk']
        self.gen_reaper_results(tempdata)
        #else:
        #    self.gen_pylmm_results(tempdata)
        #self.gen_qtl_results(tempdata)

        #Get chromosome lengths for drawing the interval map plot
        chromosome_mb_lengths = {}
        self.json_data['chrnames'] = []
        for key in self.species.chromosomes.chromosomes.keys():
            self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
            
            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
        #print("self.qtl_results:", self.qtl_results)
        
        print("JSON DATA:", self.json_data)
        
        #os.chdir(webqtlConfig.TMPDIR)
        json_filename = webqtlUtil.genRandStr(prefix="intmap_")
        json.dumps(self.json_data, webqtlConfig.TMPDIR + json_filename)
        
        self.js_data = dict(
            manhattan_plot = self.manhattan_plot,
            additive = self.additive,
            chromosomes = chromosome_mb_lengths,
            qtl_results = self.qtl_results,
            json_data = self.json_data
            #lrs_lod = self.lrs_lod,
        )
    def __init__(self, start_vars, temp_uuid):

        # Currently only getting trait data for one trait, but will need
        # to change this to accept multiple traits once the collection page is implemented
        helper_functions.get_species_dataset_trait(self, start_vars)

        tempdata = temp_data.TempData(temp_uuid)

        self.samples = []  # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars["value:" + sample]
            self.samples.append(str(sample))
            self.vals.append(value)

        print("start_vars:", start_vars)

        self.set_options(start_vars)

        self.score_type = "LRS"
        self.cutoff = 3

        self.json_data = {}
        self.json_data["lodnames"] = ["lod.hk"]
        self.gen_reaper_results(tempdata)

        # Get chromosome lengths for drawing the interval map plot
        chromosome_mb_lengths = {}
        self.json_data["chrnames"] = []
        for key in self.species.chromosomes.chromosomes.keys():
            self.json_data["chrnames"].append(
                [self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]
            )

            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

        print("JSON DATA:", self.json_data)

        json_filename = webqtlUtil.genRandStr(prefix="intmap_")
        json.dumps(self.json_data, webqtlConfig.TMPDIR + json_filename)

        self.js_data = dict(
            result_score_type=self.score_type,
            manhattan_plot=self.manhattan_plot,
            chromosomes=chromosome_mb_lengths,
            qtl_results=self.qtl_results,
            json_data=self.json_data,
        )
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        self.mapping_method = start_vars['method']
        self.maf = start_vars['maf'] # Minor allele frequency
        print("self.maf:", self.maf)
 
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            qtl_results = self.run_gemma()
        elif self.mapping_method == "plink":
            qtl_results = self.run_plink()
            #print("qtl_results:", pf(qtl_results))
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            #self.qtl_results = self.gen_data(tempdata)
            qtl_results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")
            
        self.lod_cutoff = 2
        self.filtered_markers = []
        for marker in qtl_results:
            if marker['chr'] > 0:
                self.filtered_markers.append(marker)

        #Get chromosome lengths for drawing the manhattan plot
        chromosome_mb_lengths = {}
        for key in self.species.chromosomes.chromosomes.keys():
            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
        self.js_data = dict(
            this_trait = self.this_trait.name,
            data_set = self.dataset.name,
            maf = self.maf,
            chromosomes = chromosome_mb_lengths,
            qtl_results = self.filtered_markers,
        )
    def __init__(self, start_vars, temp_uuid):

        print("TESTING!!!")

        #Currently only getting trait data for one trait, but will need
        #to change this to accept multiple traits once the collection page is implemented
        helper_functions.get_species_dataset_trait(self, start_vars)

        tempdata = temp_data.TempData(temp_uuid)
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        print("start_vars:", start_vars)
 
        self.set_options(start_vars)
 
        self.gen_qtl_results(tempdata)

        #Get chromosome lengths for drawing the interval map plot
        chromosome_mb_lengths = {}
        for key in self.species.chromosomes.chromosomes.keys():
            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
        #print("self.qtl_results:", self.qtl_results)
        
        self.js_data = dict(
            chromosomes = chromosome_mb_lengths,
            qtl_results = self.qtl_results,
            #lrs_lod = self.lrs_lod,
        )
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        # tempdata = temp_data.TempData(temp_uuid)

        self.json_data = {}
        self.json_data["lodnames"] = ["lod.hk"]

        self.samples = []  # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars["value:" + sample]
            self.samples.append(str(sample))
            self.vals.append(value)

        self.mapping_method = start_vars["method"]
        if start_vars["manhattan_plot"] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars["maf"]  # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  # ZS: LRS or LOD

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            self.dataset.group.get_specified_markers(markers=included_markers)
            self.dataset.group.markers.add_pvalues(p_values)
            results = self.dataset.group.markers.markers
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            if start_vars["num_perm"] == "":
                self.num_perm = 0
            else:
                self.num_perm = start_vars["num_perm"]
            self.control = start_vars["control_marker"]
            self.do_control = start_vars["do_control"]
            print("StartVars:", start_vars)
            self.method = start_vars["mapmethod_rqtl_geno"]
            self.model = start_vars["mapmodel_rqtl_geno"]

            if start_vars["pair_scan"] == "true":
                self.pair_scan = True

            results = self.run_rqtl_geno()
            print("qtl_results:", results)
        elif self.mapping_method == "plink":
            results = self.run_plink()
            # print("qtl_results:", pf(results))
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            self.num_perm = start_vars["num_perm"]
            if self.num_perm != "":
                if int(self.num_perm) > 0:
                    self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  # This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker["chr1"] > 0 or marker["chr1"] == "X" or marker["chr1"] == "X/Y":
                    if marker["chr1"] > highest_chr or marker["chr1"] == "X" or marker["chr1"] == "X/Y":
                        highest_chr = marker["chr1"]
                    if "lod_score" in marker:
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data["chr1"].append(str(qtl["chr1"]))
                self.json_data["chr2"].append(str(qtl["chr2"]))
                self.json_data["Mb"].append(qtl["Mb"])
                self.json_data["markernames"].append(qtl["name"])

            self.js_data = dict(
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                qtl_results=self.qtl_results,
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1  # This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker["chr"] > 0 or marker["chr"] == "X" or marker["chr"] == "X/Y":
                    if marker["chr"] > highest_chr or marker["chr"] == "X" or marker["chr"] == "X/Y":
                        highest_chr = marker["chr"]
                    if "lod_score" in marker:
                        self.qtl_results.append(marker)

            self.json_data["chr"] = []
            self.json_data["pos"] = []
            self.json_data["lod.hk"] = []
            self.json_data["markernames"] = []

            self.json_data["suggestive"] = self.suggestive
            self.json_data["significant"] = self.significant

            # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                if index < 40:
                    print("lod score is:", qtl["lod_score"])
                if qtl["chr"] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    print("changing to X")
                    self.json_data["chr"].append("X")
                else:
                    self.json_data["chr"].append(str(qtl["chr"]))
                self.json_data["pos"].append(qtl["Mb"])
                if "lrs_value" in qtl:
                    self.json_data["lod.hk"].append(str(qtl["lrs_value"]))
                else:
                    self.json_data["lod.hk"].append(str(qtl["lod_score"]))
                self.json_data["markernames"].append(qtl["name"])

            # Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data["chrnames"] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data["chrnames"].append(
                    [
                        self.species.chromosomes.chromosomes[key].name,
                        self.species.chromosomes.chromosomes[key].mb_length,
                    ]
                )
                chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

            # print("json_data:", self.json_data)

            self.js_data = dict(
                result_score_type=self.score_type,
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                chromosomes=chromosome_mb_lengths,
                qtl_results=self.qtl_results,
            )
示例#7
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_" + kw['species'] + "_" + kw[
                'group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
            self.trait_vals = kw['trait_paste'].split()
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #ZS: Get verify/rna-seq link URLs
        try:
            blatsequence = self.this_trait.blatseq
            if not blatsequence:
                #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                query1 = """SELECT Probe.Sequence, Probe.Name
                           FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                           WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                 ProbeSetFreeze.Name = '%s' AND
                                 ProbeSet.Name = '%s' AND
                                 Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (
                    self.this_trait.dataset.name, self.this_trait.name)
                seqs = g.db.execute(query1).fetchall()
                if not seqs:
                    raise ValueError
                else:
                    blatsequence = ''
                    for seqt in seqs:
                        if int(seqt[1][-1]) % 2 == 1:
                            blatsequence += string.strip(seqt[0])

            #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
            blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'
            #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
            query2 = """SELECT Probe.Sequence, Probe.Name
                        FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                              ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                              ProbeSetFreeze.Name = '%s' AND
                              ProbeSet.Name = '%s' AND
                              Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (
                self.this_trait.dataset.name, self.this_trait.name)

            seqs = g.db.execute(query2).fetchall()
            for seqt in seqs:
                if int(seqt[1][-1]) % 2 == 1:
                    blatsequence += '%3EProbe_' + string.strip(
                        seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A'

            if self.dataset.group.species == "rat":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = ""
            elif self.dataset.group.species == "mouse":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % (
                    'mouse', 'mm10', blatsequence)
            elif self.dataset.group.species == "human":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = ""
            else:
                self.UCSC_BLAT_URL = ""
                self.UTHSC_BLAT_URL = ""
        except:
            self.UCSC_BLAT_URL = ""
            self.UTHSC_BLAT_URL = ""

        if self.dataset.type == "ProbeSet":
            self.show_probes = "True"

        trait_units = get_trait_units(self.this_trait)
        self.get_external_links()
        self.build_correlation_tools()

        self.ncbi_summary = get_ncbi_summary(self.this_trait)

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if check_if_attr_exists(
                    self.this_trait, 'locus_chr'
            ) and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(
                    self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        self.qnorm_vals = quantile_normalize_vals(self.sample_groups)
        self.z_scores = get_z_scores(self.sample_groups)

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
            self.sample_group_types['samples_other'] = "Other"
            self.sample_group_types['samples_all'] = "All"
        else:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        categorical_var_list = []
        if not self.temp_trait:
            categorical_var_list = get_categorical_variables(
                self.this_trait, self.sample_groups[0]
            )  #ZS: Only using first samplelist, since I think mapping only uses those samples

        #ZS: Get list of chromosomes to select for mapping
        self.chr_list = [["All", -1]]
        for i, this_chr in enumerate(
                self.dataset.species.chromosomes.chromosomes):
            self.chr_list.append([
                self.dataset.species.chromosomes.chromosomes[this_chr].name, i
            ])

        self.genofiles = self.dataset.group.get_genofiles()

        self.has_num_cases = has_num_cases(self.this_trait)

        self.stats_table_width, self.trait_table_width = get_table_widths(
            self.sample_groups, self.has_num_cases)

        #ZS: Needed to know whether to display bar chart + get max sample name length in order to set table column width
        self.num_values = 0
        self.binary = "true"  #ZS: So it knows whether to display the Binary R/qtl mapping method, which doesn't work unless all values are 0 or 1
        self.negative_vals_exist = "false"  #ZS: Since we don't want to show log2 transform option for situations where it doesn't make sense
        max_samplename_width = 1
        for group in self.sample_groups:
            for sample in group.sample_list:
                if len(sample.name) > max_samplename_width:
                    max_samplename_width = len(sample.name)
                if sample.display_value != "x":
                    self.num_values += 1
                    if sample.display_value != 0 or sample.display_value != 1:
                        self.binary = "false"
                    if sample.value < 0:
                        self.negative_vals_exist = "true"

        sample_column_width = max_samplename_width * 8

        if self.num_values >= 5000:
            self.maf = 0.01
        else:
            self.maf = 0.05

        trait_symbol = None
        short_description = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol
                short_description = trait_symbol

            elif hasattr(self.this_trait, 'post_publication_abbreviation'):
                short_description = self.this_trait.post_publication_abbreviation

            elif hasattr(self.this_trait, 'pre_publication_abbreviation'):
                short_description = self.this_trait.pre_publication_abbreviation

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples,
                                             ' ')
        hddn['primary_samples'] = string.join(self.primary_sample_names, ',')
        hddn['trait_id'] = self.trait_id
        hddn['trait_display_name'] = self.this_trait.display_name
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
            hddn['temp_trait'] = True
            hddn['group'] = self.temp_group
            hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "gemma"
        hddn['selected_chr'] = -1
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['categorical_vars'] = ""
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.05
        hddn['compare_traits'] = []
        hddn['export_data'] = ""
        hddn['export_format'] = "excel"

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        js_data = dict(trait_id=self.trait_id,
                       trait_symbol=trait_symbol,
                       short_description=short_description,
                       unit_type=trait_units,
                       dataset_type=self.dataset.type,
                       data_scale=self.dataset.data_scale,
                       sample_group_types=self.sample_group_types,
                       sample_lists=sample_lists,
                       attribute_names=self.sample_groups[0].attributes,
                       categorical_vars=",".join(categorical_var_list),
                       num_values=self.num_values,
                       qnorm_values=self.qnorm_vals,
                       zscore_values=self.z_scores,
                       sample_column_width=sample_column_width,
                       temp_uuid=self.temp_uuid)
        self.js_data = js_data
示例#8
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
           hddn['temp_trait'] = True
           hddn['group'] = self.temp_group
           hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only"
            self.sample_group_types['samples_other'] = "Non-" + self.dataset.group.name
            self.sample_group_types['samples_all'] = "All Cases"
        else:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.trait_table_width = get_trait_table_width(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id = self.trait_id,
                       trait_symbol = trait_symbol,
                       dataset_type = self.dataset.type,
                       data_scale = self.dataset.data_scale,
                       sample_group_types = self.sample_group_types,
                       sample_lists = sample_lists,
                       attribute_names = self.sample_groups[0].attributes,
                       temp_uuid = self.temp_uuid)
        self.js_data = js_data
示例#9
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
                else:
                    self.num_perm = 0
            except:
                self.num_perm = 0

            self.LRSCheck = self.score_type
            self.permCheck = "ON"
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                included_markers, p_values = gemma_mapping.run_gemma(
                    self.dataset, self.samples, self.vals)
            with Bench("Getting markers from csv"):
                marker_obs = get_markers_from_csv(included_markers, p_values,
                                                  self.dataset.group.name)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            results = self.run_rqtl_geno()
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            results = self.gen_reaper_results()
        elif self.mapping_method == "plink":
            results = self.run_plink()
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                mapping_scale=self.mapping_scale,
                qtl_results=self.qtl_results,
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            self.json_data['chr'] = []
            self.json_data['pos'] = []
            self.json_data['lod.hk'] = []
            self.json_data['markernames'] = []

            self.json_data['suggestive'] = self.suggestive
            self.json_data['significant'] = self.significant

            #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                #if index<40:
                #    print("lod score is:", qtl['lod_score'])
                if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    #print("changing to X")
                    self.json_data['chr'].append("X")
                else:
                    self.json_data['chr'].append(str(qtl['chr']))
                self.json_data['pos'].append(qtl['Mb'])
                if 'lrs_value' in qtl.keys():
                    self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                else:
                    self.json_data['lod.hk'].append(str(qtl['lod_score']))
                self.json_data['markernames'].append(qtl['name'])

            #Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data['chrnames'] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data['chrnames'].append([
                    self.species.chromosomes.chromosomes[key].name,
                    self.species.chromosomes.chromosomes[key].mb_length
                ])
                chromosome_mb_lengths[
                    key] = self.species.chromosomes.chromosomes[key].mb_length

            # print("json_data:", self.json_data)

            self.js_data = dict(
                result_score_type=self.score_type,
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                mapping_scale=self.mapping_scale,
                chromosomes=chromosome_mb_lengths,
                qtl_results=self.qtl_results,
            )
    def __init__(self, start_vars, temp_uuid):
        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        #ZS: Needed to zoom in or remap temp traits like PCA traits
        if "temp_trait" in start_vars and start_vars['temp_trait'] != "False":
            self.temp_trait = "True"
            self.group = self.dataset.group.name

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        #ZS: Sometimes a group may have a genofile that only includes a subset of samples
        genofile_samplelist = []
        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
                genofile_samplelist = get_genofile_samplelist(self.dataset)

        all_samples_ordered = self.dataset.group.all_samples_ordered()

        self.vals = []
        self.samples = []
        self.sample_vals = start_vars['sample_vals']
        sample_val_dict = json.loads(self.sample_vals)
        samples = sample_val_dict.keys()
        if (len(genofile_samplelist) != 0):
            for sample in genofile_samplelist:
                self.samples.append(sample)
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                else:
                    self.vals.append("x")
        else:
            for sample in self.dataset.group.samplelist:
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                    self.samples.append(sample)

        if 'n_samples' in start_vars:
            self.n_samples = start_vars['n_samples']
        else:
            self.n_samples = len([val for val in self.vals if val != "x"])

        #ZS: Check if genotypes exist in the DB in order to create links for markers

        self.geno_db_exists = geno_db_exists(self.dataset)

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        self.manhattan_plot = False
        if 'manhattan_plot' in start_vars:
            if start_vars['manhattan_plot'].lower() != "false":
                self.color_scheme = "alternating"
                if "color_scheme" in start_vars:
                    self.color_scheme = start_vars['color_scheme']
                    if self.color_scheme == "single":
                        self.manhattan_single_color = start_vars[
                            'manhattan_single_color']
                self.manhattan_plot = True

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        if 'transform' in start_vars:
            self.transform = start_vars['transform']
        else:
            self.transform = ""
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        if "mapping_scale" in start_vars:
            self.mapping_scale = start_vars['mapping_scale']
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else ""
        self.categorical_vars = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        #self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.first_run = True
            self.output_files = None
            if 'output_files' in start_vars:
                self.output_files = start_vars['output_files']
            if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                self.first_run = False
            self.score_type = "-logP"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                if self.use_loco == "True":
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
                else:
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            perm_strata = []
            if "perm_strata" in start_vars and "categorical_vars" in start_vars:
                self.categorical_vars = start_vars["categorical_vars"].split(
                    ",")
                if len(self.categorical_vars
                       ) and start_vars["perm_strata"] == "True":
                    primary_samples = SampleList(dataset=self.dataset,
                                                 sample_names=self.samples,
                                                 this_trait=self.this_trait)

                    perm_strata = get_perm_strata(self.this_trait,
                                                  primary_samples,
                                                  self.categorical_vars,
                                                  self.samples)
            self.score_type = "LOD"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            if 'mapmethod_rqtl_geno' in start_vars:
                self.method = start_vars['mapmethod_rqtl_geno']
            else:
                self.method = "em"
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.reaper_version = start_vars['reaper_version']

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")

            if self.reaper_version == "new":
                self.first_run = True
                self.output_files = None
                if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                    self.first_run = False
                    if 'output_files' in start_vars:
                        self.output_files = start_vars['output_files'].split(
                            ",")

                results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot, self.first_run, self.output_files)
            else:
                results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-logP"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        else:
            logger.debug("RUNNING NOTHING")

        self.no_results = False
        if len(results) == 0:
            self.no_results = True
        else:
            if self.pair_scan == True:
                self.qtl_results = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                            'chr1'] == "X/Y":
                        if marker['chr1'] > highest_chr or marker[
                                'chr1'] == "X" or marker['chr1'] == "X/Y":
                            highest_chr = marker['chr1']
                        if 'lod_score' in list(marker.keys()):
                            self.qtl_results.append(marker)

                self.trimmed_markers = results

                for qtl in enumerate(self.qtl_results):
                    self.json_data['chr1'].append(str(qtl['chr1']))
                    self.json_data['chr2'].append(str(qtl['chr2']))
                    self.json_data['Mb'].append(qtl['Mb'])
                    self.json_data['markernames'].append(qtl['name'])

                self.js_data = dict(json_data=self.json_data,
                                    this_trait=self.this_trait.name,
                                    data_set=self.dataset.name,
                                    maf=self.maf,
                                    manhattan_plot=self.manhattan_plot,
                                    mapping_scale=self.mapping_scale,
                                    qtl_results=self.qtl_results)

            else:
                self.qtl_results = []
                self.results_for_browser = []
                self.annotations_for_browser = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if 'Mb' in marker:
                        this_ps = marker['Mb'] * 1000000
                    else:
                        this_ps = marker['cM'] * 1000000

                    browser_marker = dict(chr=str(marker['chr']),
                                          rs=marker['name'],
                                          ps=this_ps,
                                          url="/show_trait?trait_id=" +
                                          marker['name'] + "&dataset=" +
                                          self.dataset.group.name + "Geno")

                    if self.geno_db_exists == "True":
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps,
                                            url="/show_trait?trait_id=" +
                                            marker['name'] + "&dataset=" +
                                            self.dataset.group.name + "Geno")
                    else:
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps)

                    if 'lrs_value' in marker and marker['lrs_value'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lrs_value'] /
                                                         4.61)
                    elif 'lod_score' in marker and marker['lod_score'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lod_score'])
                    else:
                        browser_marker['p_wald'] = 0

                    self.results_for_browser.append(browser_marker)
                    self.annotations_for_browser.append(annot_marker)
                    if str(marker['chr']) > '0' or str(
                            marker['chr']) == "X" or str(
                                marker['chr']) == "X/Y":
                        if str(marker['chr']) > str(highest_chr) or str(
                                marker['chr']) == "X" or str(
                                    marker['chr']) == "X/Y":
                            highest_chr = marker['chr']
                        if ('lod_score'
                                in marker.keys()) or ('lrs_value'
                                                      in marker.keys()):
                            if 'Mb' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.6f}".format(
                                        marker['Mb'])
                            elif 'cM' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.3f}".format(
                                        marker['cM'])
                            else:
                                marker['display_pos'] = "N/A"
                            self.qtl_results.append(marker)

                total_markers = len(self.qtl_results)

                with Bench("Exporting Results"):
                    export_mapping_results(self.dataset, self.this_trait,
                                           self.qtl_results,
                                           self.mapping_results_path,
                                           self.mapping_scale, self.score_type)

                with Bench("Trimming Markers for Figure"):
                    if len(self.qtl_results) > 30000:
                        self.qtl_results = trim_markers_for_figure(
                            self.qtl_results)
                        self.results_for_browser = trim_markers_for_figure(
                            self.results_for_browser)
                        filtered_annotations = []
                        for marker in self.results_for_browser:
                            for annot_marker in self.annotations_for_browser:
                                if annot_marker['rs'] == marker['rs']:
                                    filtered_annotations.append(annot_marker)
                                    break
                        self.annotations_for_browser = filtered_annotations
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)
                    else:
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)

                with Bench("Trimming Markers for Table"):
                    self.trimmed_markers = trim_markers_for_table(results)

                chr_lengths = get_chr_lengths(self.mapping_scale,
                                              self.mapping_method,
                                              self.dataset, self.qtl_results)

                #ZS: For zooming into genome browser, need to pass chromosome name instead of number
                if self.dataset.group.species == "mouse":
                    if self.selected_chr == 20:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                elif self.dataset.group.species == "rat":
                    if self.selected_chr == 21:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                else:
                    if self.selected_chr == 22:
                        this_chr = "X"
                    elif self.selected_chr == 23:
                        this_chr = "Y"
                    else:
                        this_chr = str(self.selected_chr)

                if self.mapping_method != "gemma":
                    if self.score_type == "LRS":
                        significant_for_browser = self.significant / 4.61
                    else:
                        significant_for_browser = self.significant

                    self.js_data = dict(
                        #result_score_type = self.score_type,
                        #this_trait = self.this_trait.name,
                        #data_set = self.dataset.name,
                        #maf = self.maf,
                        #manhattan_plot = self.manhattan_plot,
                        #mapping_scale = self.mapping_scale,
                        #chromosomes = chromosome_mb_lengths,
                        #qtl_results = self.qtl_results,
                        categorical_vars=self.categorical_vars,
                        chr_lengths=chr_lengths,
                        num_perm=self.num_perm,
                        perm_results=self.perm_output,
                        significant=significant_for_browser,
                        browser_files=browser_files,
                        selected_chr=this_chr,
                        total_markers=total_markers)
                else:
                    self.js_data = dict(chr_lengths=chr_lengths,
                                        browser_files=browser_files,
                                        selected_chr=this_chr,
                                        total_markers=total_markers)
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)
        
        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf'] # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
 
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            qtl_results = self.run_gemma()
        elif self.mapping_method == "rqtl_plink":
            qtl_results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            if start_vars['num_perm'] == "":
                self.num_perm = 0
            else:
                self.num_perm = start_vars['num_perm']
            self.control = start_vars['control_marker']
            print("StartVars:", start_vars)
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']

            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            print("pair scan:", self.pair_scan)

            print("DOING RQTL GENO")
            qtl_results = self.run_rqtl_geno()
            print("qtl_results:", qtl_results)
        elif self.mapping_method == "plink":
            qtl_results = self.run_plink()
            #print("qtl_results:", pf(qtl_results))
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            self.num_perm = start_vars['num_perm']
            if self.num_perm != "":
                if int(self.num_perm) > 0:
	             self.run_permutations(str(temp_uuid))
            qtl_results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")
            
        self.lod_cutoff = 2    
        self.filtered_markers = []
        highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
        for marker in qtl_results:
            if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
                if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
                    highest_chr = marker['chr']
                if 'lod_score' in marker:
                    self.filtered_markers.append(marker)

        self.json_data['chr'] = []
        self.json_data['pos'] = []
        self.json_data['lod.hk'] = []
        self.json_data['markernames'] = []

        self.json_data['suggestive'] = self.suggestive
        self.json_data['significant'] = self.significant

        #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
        self.qtl_results = []
        for qtl in self.filtered_markers:
            print("lod score is:", qtl['lod_score'])
            if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                print("changing to X")
                self.json_data['chr'].append("X")
            else:
                self.json_data['chr'].append(str(qtl['chr']))
            self.json_data['pos'].append(qtl['Mb'])
            self.json_data['lod.hk'].append(str(qtl['lod_score']))
            self.json_data['markernames'].append(qtl['name'])

        #Get chromosome lengths for drawing the interval map plot
        chromosome_mb_lengths = {}
        self.json_data['chrnames'] = []
        for key in self.species.chromosomes.chromosomes.keys():
            self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
        print("json_data:", self.json_data)
        

        self.js_data = dict(
            json_data = self.json_data,
            this_trait = self.this_trait.name,
            data_set = self.dataset.name,
            maf = self.maf,
            manhattan_plot = self.manhattan_plot,
            chromosomes = chromosome_mb_lengths,
            qtl_results = self.filtered_markers,
        )
示例#12
0
    def __init__(self, start_vars):
        # get trait list from db (database name)
        # calculate correlation with Base vector and targets

        # Check parameters
        assert('corr_type' in start_vars)
        assert(is_str(start_vars['corr_type']))
        assert('dataset' in start_vars)
        # assert('group' in start_vars) permitted to be empty?
        assert('corr_sample_method' in start_vars)
        assert('corr_samples_group' in start_vars)
        assert('corr_dataset' in start_vars)
        assert('corr_return_results' in start_vars)
        if 'loc_chr' in start_vars:
            assert('min_loc_mb' in start_vars)
            assert('max_loc_mb' in start_vars)

        with Bench("Doing correlations"):
            if start_vars['dataset'] == "Temp":
                self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
                self.trait_id = start_vars['trait_id']
                self.this_trait = create_trait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            else:
                helper_functions.get_species_dataset_trait(self, start_vars)

            corr_samples_group = start_vars['corr_samples_group']

            self.sample_data = {}
            self.corr_type = start_vars['corr_type']
            self.corr_method = start_vars['corr_sample_method']
            self.min_expr = get_float(start_vars, 'min_expr')
            self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0)
            self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0)

            if ('loc_chr' in start_vars and
                'min_loc_mb' in start_vars and
                'max_loc_mb' in start_vars):

                self.location_type = get_string(start_vars, 'location_type')
                self.location_chr = get_string(start_vars, 'loc_chr')
                self.min_location_mb = get_int(start_vars, 'min_loc_mb')
                self.max_location_mb = get_int(start_vars, 'max_loc_mb')
            else:
                self.location_type = self.location_chr = self.min_location_mb = self.max_location_mb = None

            self.get_formatted_corr_type()
            self.return_number = int(start_vars['corr_return_results'])

            #The two if statements below append samples to the sample list based upon whether the user
            #rselected Primary Samples Only, Other Samples Only, or All Samples

            primary_samples = self.dataset.group.samplelist
            if self.dataset.group.parlist != None:
                primary_samples += self.dataset.group.parlist
            if self.dataset.group.f1list != None:
                primary_samples += self.dataset.group.f1list

            #If either BXD/whatever Only or All Samples, append all of that group's samplelist
            if corr_samples_group != 'samples_other':
                self.process_samples(start_vars, primary_samples)

            #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
            #exclude the primary samples (because they would have been added in the previous
            #if statement if the user selected All Samples)
            if corr_samples_group != 'samples_primary':
                if corr_samples_group == 'samples_other':
                    primary_samples = [x for x in primary_samples if x not in (
                                    self.dataset.group.parlist + self.dataset.group.f1list)]
                self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples)

            self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
            self.target_dataset.get_trait_data(list(self.sample_data.keys()))

            self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method)

            if self.target_dataset.type == "ProbeSet":
                self.filter_cols = [7, 6]
            elif self.target_dataset.type == "Publish":
                self.filter_cols = [6, 0]
            else:
                self.filter_cols = [4, 0]

            self.correlation_results = []

            self.correlation_data = {}

            if self.corr_type == "tissue":
                self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")

                tissue_corr_data = self.do_tissue_correlation_for_all_traits()
                if tissue_corr_data != None:
                    for trait in list(tissue_corr_data.keys())[:self.return_number]:
                        self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
                else:
                    for trait, values in list(self.target_dataset.trait_data.items()):
                        self.get_sample_r_and_p_values(trait, values)

            elif self.corr_type == "lit":
                self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId")
                lit_corr_data = self.do_lit_correlation_for_all_traits()

                for trait in list(lit_corr_data.keys())[:self.return_number]:
                    self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])

            elif self.corr_type == "sample":
                for trait, values in list(self.target_dataset.trait_data.items()):
                    self.get_sample_r_and_p_values(trait, values)

            self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
                                                                   key=lambda t: -abs(t[1][0])))


            #ZS: Convert min/max chromosome to an int for the location range option
            range_chr_as_int = None
            for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
                if 'loc_chr' in start_vars:
                    if chr_info.name == self.location_chr:
                        range_chr_as_int = order_id

            for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
                trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
                if not trait_object:
                    continue

                chr_as_int = 0
                for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
                    if self.location_type == "highest_lod":
                        if chr_info.name == trait_object.locus_chr:
                            chr_as_int = order_id
                    else:
                        if chr_info.name == trait_object.chr:
                            chr_as_int = order_id

                if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
                    float(self.correlation_data[trait][0]) <= self.p_range_upper):

                    if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
                        if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
                            continue

                    if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
                        continue
                    if self.location_type == "highest_lod":
                        if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
                            continue
                        if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
                            continue
                    else:
                        if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
                            continue
                        if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
                            continue

                    (trait_object.sample_r,
                    trait_object.sample_p,
                    trait_object.num_overlap) = self.correlation_data[trait]

                    # Set some sane defaults
                    trait_object.tissue_corr = 0
                    trait_object.tissue_pvalue = 0
                    trait_object.lit_corr = 0
                    if self.corr_type == "tissue" and tissue_corr_data != None:
                        trait_object.tissue_corr = tissue_corr_data[trait][1]
                        trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                    elif self.corr_type == "lit":
                        trait_object.lit_corr = lit_corr_data[trait][1]

                    self.correlation_results.append(trait_object)

            if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_lit_correlation_for_trait_list()

            if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_tissue_correlation_for_trait_list()

        self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
示例#13
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_" + kw['species'] + "_" + kw[
                'group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(
                    self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples,
                                             ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
            hddn['temp_trait'] = True
            hddn['group'] = self.temp_group
            hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name + " Only"
            self.sample_group_types[
                'samples_other'] = "Non-" + self.dataset.group.name
            self.sample_group_types['samples_all'] = "All Cases"
        else:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.trait_table_width = get_trait_table_width(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id=self.trait_id,
                       trait_symbol=trait_symbol,
                       dataset_type=self.dataset.type,
                       data_scale=self.dataset.data_scale,
                       sample_group_types=self.sample_group_types,
                       sample_lists=sample_lists,
                       attribute_names=self.sample_groups[0].attributes,
                       temp_uuid=self.temp_uuid)
        self.js_data = js_data
示例#14
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' +
                                       self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        #ZS: Check if genotypes exist in the DB in order to create links for markers
        if "geno_db_exists" in start_vars:
            self.geno_db_exists = start_vars['geno_db_exists']
        else:
            try:
                self.geno_db_exists = "True"
            except:
                self.geno_db_exists = "False"

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else None

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset,
                                                     self.samples, self.vals,
                                                     self.covariates,
                                                     self.use_loco)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(
                self.this_trait, self.dataset, self.samples, self.vals,
                self.json_data, self.num_perm, self.bootCheck,
                self.num_bootstrap, self.do_control, self.control_marker,
                self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(json_data=self.json_data,
                                this_trait=self.this_trait.name,
                                data_set=self.dataset.name,
                                maf=self.maf,
                                manhattan_plot=self.manhattan_plot,
                                mapping_scale=self.mapping_scale,
                                qtl_results=self.qtl_results)

        else:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            with Bench("Exporting Results"):
                export_mapping_results(self.dataset, self.this_trait,
                                       self.qtl_results,
                                       self.mapping_results_path,
                                       self.mapping_scale, self.score_type)

            with Bench("Trimming Markers for Figure"):
                if len(self.qtl_results) > 30000:
                    self.qtl_results = trim_markers_for_figure(
                        self.qtl_results)

            with Bench("Trimming Markers for Table"):
                self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([
                        self.species.chromosomes.chromosomes[key].name,
                        self.species.chromosomes.chromosomes[key].mb_length
                    ])
                    chromosome_mb_lengths[
                        key] = self.species.chromosomes.chromosomes[
                            key].mb_length

                self.js_data = dict(
                    result_score_type=self.score_type,
                    json_data=self.json_data,
                    this_trait=self.this_trait.name,
                    data_set=self.dataset.name,
                    maf=self.maf,
                    manhattan_plot=self.manhattan_plot,
                    mapping_scale=self.mapping_scale,
                    chromosomes=chromosome_mb_lengths,
                    qtl_results=self.qtl_results,
                    num_perm=self.num_perm,
                    perm_results=self.perm_output,
                )
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)
        
        self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf'] # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS" #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1        
        if "selected_chr" in start_vars:
            self.selected_chr = int(start_vars['selected_chr'])
 
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            self.dataset.group.get_specified_markers(markers = included_markers)
            self.dataset.group.markers.add_pvalues(p_values)
            results = self.dataset.group.markers.markers
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            if start_vars['num_perm'] == "":
                self.num_perm = 0
            else:
                self.num_perm = start_vars['num_perm']
            self.control = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']

            if start_vars['pair_scan'] == "true":
                self.pair_scan = True

            results = self.run_rqtl_geno()
        elif self.mapping_method == "reaper":
            if start_vars['num_perm'] == "":
                self.num_perm = 0
            else:
                self.num_perm = int(start_vars['num_perm'])
            self.additive = False
            self.control = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            results = self.gen_reaper_results()
        elif self.mapping_method == "plink":
            results = self.run_plink()
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            self.num_perm = start_vars['num_perm']
            if self.num_perm != "":
                if int(self.num_perm) > 0:
	             self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")
            
        if self.pair_scan == True:  
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                qtl_results = self.qtl_results,
            )

        else:
            self.cutoff = 2    
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
                        self.qtl_results.append(marker)

            self.json_data['chr'] = []
            self.json_data['pos'] = []
            self.json_data['lod.hk'] = []
            self.json_data['markernames'] = []

            self.json_data['suggestive'] = self.suggestive
            self.json_data['significant'] = self.significant

            #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                #if index<40:
                #    print("lod score is:", qtl['lod_score'])
                if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    print("changing to X")
                    self.json_data['chr'].append("X")
                else:
                    self.json_data['chr'].append(str(qtl['chr']))
                self.json_data['pos'].append(qtl['Mb'])
                if 'lrs_value' in qtl.keys():
                    self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                else:
                    self.json_data['lod.hk'].append(str(qtl['lod_score']))
                self.json_data['markernames'].append(qtl['name'])

            #Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data['chrnames'] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
                chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
            # print("json_data:", self.json_data)
        

            self.js_data = dict(
                result_score_type = self.score_type,
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                chromosomes = chromosome_mb_lengths,
                qtl_results = self.qtl_results,
            )
示例#16
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = [] # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' + self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf'] # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS" #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait,
                                                                                                                                                        self.dataset,
                                                                                                                                                        self.samples,
                                                                                                                                                        self.json_data,
                                                                                                                                                        self.num_perm,
                                                                                                                                                        self.bootCheck,
                                                                                                                                                        self.num_bootstrap,
                                                                                                                                                        self.do_control,
                                                                                                                                                        self.control_marker,
                                                                                                                                                        self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            self.dataset.group.genofile = start_vars['genofile']
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                qtl_results = self.qtl_results
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
                    chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

                # logger.debug("json_data:", self.json_data)

                self.js_data = dict(
                    result_score_type = self.score_type,
                    json_data = self.json_data,
                    this_trait = self.this_trait.name,
                    data_set = self.dataset.name,
                    maf = self.maf,
                    manhattan_plot = self.manhattan_plot,
                    mapping_scale = self.mapping_scale,
                    chromosomes = chromosome_mb_lengths,
                    qtl_results = self.qtl_results,
                    num_perm = self.num_perm,
                    perm_results = self.perm_output,
                )
示例#17
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        #ZS: Get verify/rna-seq link URLs
        try:
            blatsequence = self.this_trait.blatseq
            if not blatsequence:
                #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                query1 = """SELECT Probe.Sequence, Probe.Name
                           FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                           WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                 ProbeSetFreeze.Name = '%s' AND
                                 ProbeSet.Name = '%s' AND
                                 Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name)
                seqs = g.db.execute(query1).fetchall()
                if not seqs:
                    raise ValueError
                else:
                    blatsequence = ''
                    for seqt in seqs:
                        if int(seqt[1][-1]) % 2 == 1:
                            blatsequence += string.strip(seqt[0])

            #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
            blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'
            #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
            query2 = """SELECT Probe.Sequence, Probe.Name
                        FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                              ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                              ProbeSetFreeze.Name = '%s' AND
                              ProbeSet.Name = '%s' AND
                              Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name)

            seqs = g.db.execute(query2).fetchall()
            for seqt in seqs:
                if int(seqt[1][-1]) %2 == 1:
                    blatsequence += '%3EProbe_' + string.strip(seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A'

            if self.dataset.group.species == "rat":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn3', blatsequence)
                self.UTHSC_BLAT_URL = ""
            elif self.dataset.group.species == "mouse":
                self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('mouse', 'mm10', blatsequence)
                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence)
            elif self.dataset.group.species == "human":
                self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('human', 'hg19', blatsequence)
                self.UTHSC_BLAT_URL = ""
            else:
                self.UCSC_BLAT_URL = ""
                self.UTHSC_BLAT_URL = ""
        except:
            self.UCSC_BLAT_URL = ""
            self.UTHSC_BLAT_URL = ""

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
           hddn['temp_trait'] = True
           hddn['group'] = self.temp_group
           hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
            self.sample_group_types['samples_other'] = "Other"
            self.sample_group_types['samples_all'] = "All"
        else:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id = self.trait_id,
                       trait_symbol = trait_symbol,
                       dataset_type = self.dataset.type,
                       data_scale = self.dataset.data_scale,
                       sample_group_types = self.sample_group_types,
                       sample_lists = sample_lists,
                       attribute_names = self.sample_groups[0].attributes,
                       temp_uuid = self.temp_uuid)
        self.js_data = js_data
    def __init__(self, start_vars):
        # get trait list from db (database name)
        # calculate correlation with Base vector and targets

        # Check parameters
        assert('corr_type' in start_vars)
        assert(is_str(start_vars['corr_type']))
        assert('dataset' in start_vars)
        # assert('group' in start_vars) permitted to be empty?
        assert('corr_sample_method' in start_vars)
        assert('corr_samples_group' in start_vars)
        assert('corr_dataset' in start_vars)
        assert('min_expr' in start_vars)
        assert('corr_return_results' in start_vars)
        if 'loc_chr' in start_vars:
            assert('min_loc_mb' in start_vars)
            assert('max_loc_mb' in start_vars)

        with Bench("Doing correlations"):
            if start_vars['dataset'] == "Temp":
                self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
                self.trait_id = "Temp"
                self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            else:
                helper_functions.get_species_dataset_trait(self, start_vars)

            self.dataset.group.read_genotype_file()

            corr_samples_group = start_vars['corr_samples_group']

            self.sample_data = {}
            self.corr_type = start_vars['corr_type']
            self.corr_method = start_vars['corr_sample_method']
            self.min_expr = get_float(start_vars,'min_expr')
            self.p_range_lower = get_float(start_vars,'p_range_lower',-1.0)
            self.p_range_upper = get_float(start_vars,'p_range_upper',1.0)

            if ('loc_chr' in start_vars and
                'min_loc_mb' in start_vars and
                'max_loc_mb' in start_vars):

                self.location_chr = get_string(start_vars,'loc_chr')
                self.min_location_mb = get_int(start_vars,'min_loc_mb')
                self.max_location_mb = get_int(start_vars,'max_loc_mb')

            self.get_formatted_corr_type()
            self.return_number = int(start_vars['corr_return_results'])

            #The two if statements below append samples to the sample list based upon whether the user
            #rselected Primary Samples Only, Other Samples Only, or All Samples

            primary_samples = self.dataset.group.samplelist
            if self.dataset.group.parlist != None:
                primary_samples += self.dataset.group.parlist
            if self.dataset.group.f1list != None:
                primary_samples += self.dataset.group.f1list

            #If either BXD/whatever Only or All Samples, append all of that group's samplelist
            if corr_samples_group != 'samples_other':
                self.process_samples(start_vars, primary_samples)

            #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
            #exclude the primary samples (because they would have been added in the previous
            #if statement if the user selected All Samples)
            if corr_samples_group != 'samples_primary':
                if corr_samples_group == 'samples_other':
                    primary_samples = [x for x in primary_samples if x not in (
                                    self.dataset.group.parlist + self.dataset.group.f1list)]
                self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)

            self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
            self.target_dataset.get_trait_data(self.sample_data.keys())

            self.correlation_results = []

            self.correlation_data = {}

            if self.corr_type == "tissue":
                self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")

                tissue_corr_data = self.do_tissue_correlation_for_all_traits()
                if tissue_corr_data != None:
                    for trait in tissue_corr_data.keys()[:self.return_number]:
                        self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
                else:
                    for trait, values in self.target_dataset.trait_data.iteritems():
                        self.get_sample_r_and_p_values(trait, values)

            elif self.corr_type == "lit":
                self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId")
                lit_corr_data = self.do_lit_correlation_for_all_traits()

                for trait in lit_corr_data.keys()[:self.return_number]:
                    self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])

            elif self.corr_type == "sample":
                for trait, values in self.target_dataset.trait_data.iteritems():
                    self.get_sample_r_and_p_values(trait, values)

            self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
                                                                   key=lambda t: -abs(t[1][0])))


            if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                #ZS: Convert min/max chromosome to an int for the location range option
                range_chr_as_int = None
                for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems():
                    if chr_info.name == self.location_chr:
                        range_chr_as_int = order_id

            for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
                trait_object = GeneralTrait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)

                if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                    #ZS: Convert trait chromosome to an int for the location range option
                    chr_as_int = 0
                    for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems():
                        if chr_info.name == trait_object.chr:
                            chr_as_int = order_id

                if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
                    float(self.correlation_data[trait][0]) <= self.p_range_upper):

                    if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":

                        if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
                            continue
                        elif range_chr_as_int != None and (chr_as_int != range_chr_as_int):
                            continue
                        elif (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
                            continue
                        elif (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
                            continue

                        (trait_object.sample_r,
                        trait_object.sample_p,
                        trait_object.num_overlap) = self.correlation_data[trait]

                        # Set some sane defaults
                        trait_object.tissue_corr = 0
                        trait_object.tissue_pvalue = 0
                        trait_object.lit_corr = 0
                        if self.corr_type == "tissue" and tissue_corr_data != None:
                            trait_object.tissue_corr = tissue_corr_data[trait][1]
                            trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                        elif self.corr_type == "lit":
                            trait_object.lit_corr = lit_corr_data[trait][1]
                        self.correlation_results.append(trait_object)
                    else:
                        (trait_object.sample_r,
                        trait_object.sample_p,
                        trait_object.num_overlap) = self.correlation_data[trait]

                        # Set some sane defaults
                        trait_object.tissue_corr = 0
                        trait_object.tissue_pvalue = 0
                        trait_object.lit_corr = 0
                        if self.corr_type == "tissue":
                            trait_object.tissue_corr = tissue_corr_data[trait][1]
                            trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                        elif self.corr_type == "lit":
                            trait_object.lit_corr = lit_corr_data[trait][1]
                        self.correlation_results.append(trait_object)

            self.target_dataset.get_trait_info(self.correlation_results, self.target_dataset.group.species)

            if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_lit_correlation_for_trait_list()

            if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_tissue_correlation_for_trait_list()

        self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)