Exemplo n.º 1
0
    def read_genotype_file(self):
        '''Read genotype from .geno file instead of database'''
        #genotype_1 is Dataset Object without parents and f1
        #genotype_2 is Dataset Object with parents and f1 (not for intercross)

        genotype_1 = reaper.Dataset()

        # reaper barfs on unicode filenames, so here we ensure it's a string
        if self.genofile:
            full_filename = str(locate(self.genofile, 'genotype'))
        else:
            full_filename = str(locate(self.name + '.geno', 'genotype'))
        genotype_1.read(full_filename)

        if genotype_1.type == "group" and self.parlist:
            genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1])       #, F1=_f1)
        else:
            genotype_2 = genotype_1

        #determine default genotype object
        if self.incparentsf1 and genotype_1.type != "intercross":
            genotype = genotype_2
        else:
            self.incparentsf1 = 0
            genotype = genotype_1

        self.samplelist = list(genotype.prgy)

        return genotype
Exemplo n.º 2
0
    def read_genotype_file(self, use_reaper=False):
        '''Read genotype from .geno file instead of database'''
        # genotype_1 is Dataset Object without parents and f1
        # genotype_2 is Dataset Object with parents and f1 (not for intercross)

        #genotype_1 = reaper.Dataset()

        # reaper barfs on unicode filenames, so here we ensure it's a string
        if self.genofile:
            if "RData" in self.genofile:  # ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData
                full_filename = str(
                    locate(self.genofile.split(".")[0] + ".geno", 'genotype'))
            else:
                full_filename = str(locate(self.genofile, 'genotype'))
        else:
            full_filename = str(locate(self.name + '.geno', 'genotype'))
        genotype_1 = gen_geno_ob.genotype(full_filename)

        if genotype_1.type == "group" and self.parlist:
            genotype_2 = genotype_1.add(Mat=self.parlist[0],
                                        Pat=self.parlist[1])  # , F1=_f1)
        else:
            genotype_2 = genotype_1

        # determine default genotype object
        if self.incparentsf1 and genotype_1.type != "intercross":
            genotype = genotype_2
        else:
            self.incparentsf1 = 0
            genotype = genotype_1

        self.samplelist = list(genotype.prgy)

        return genotype
Exemplo n.º 3
0
    def __init__(self, name):
        json_data_fh = open(locate(name + ".json", 'genotype/json'))

        try:
            markers = []
            with open(locate(name + "_snps.txt", 'r')) as bimbam_fh:
                marker = {}
                if len(bimbam_fh[0].split(", ")) > 2:
                    delimiter = ", "
                elif len(bimbam_fh[0].split(",")) > 2:
                    delimiter = ","
                elif len(bimbam_fh[0].split("\t")) > 2:
                    delimiter = "\t"
                else:
                    delimiter = " "
                for line in bimbam_fh:
                    marker['name'] = line.split(delimiter)[0]
                    marker['Mb']
                    marker['chr'] = line.split(delimiter)[2]
                    marker['cM']
                    markers.append(marker)
        #try:
        #    markers = json.load(json_data_fh)
        except:
            markers = []

        for marker in markers:
            if (marker['chr'] != "X") and (marker['chr'] != "Y"):
                marker['chr'] = int(marker['chr'])
            marker['Mb'] = float(marker['Mb'])

        self.markers = markers
Exemplo n.º 4
0
    def run_analysis(self, requestform):
        print("Starting ePheWAS analysis on dataset")
        genofilelocation = locate("BXD.geno", "genotype")                                  # Get the location of the BXD genotypes
        tissuealignerloc = locate("Tissue_color_aligner.csv", "auwerx")                       # Get the location of the Tissue_color_aligner

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)

        # Print some debug
        print "self.trait_id:" + self.trait_id + "\n"
        print "self.datasetname:" + self.datasetname + "\n"
        print "self.dataset.type:" + self.dataset.type + "\n"

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
          snpinfo.append(marker["name"]);
          snpinfo.append(marker["chr"]);
          snpinfo.append(marker["Mb"]);

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames = r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol = 3, byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        print("Initialization of ePheWAS done !")
Exemplo n.º 5
0
    def read_genotype_file(self):
        '''Read genotype from .geno file instead of database'''
        #genotype_1 is Dataset Object without parents and f1
        #genotype_2 is Dataset Object with parents and f1 (not for intercross)

        genotype_1 = reaper.Dataset()

        # reaper barfs on unicode filenames, so here we ensure it's a string
        if self.genofile:
            full_filename = str(locate(self.genofile, 'genotype'))
        else:
            full_filename = str(locate(self.name + '.geno', 'genotype'))
        genotype_1.read(full_filename)

        if genotype_1.type == "group" and self.parlist:
            genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1])       #, F1=_f1)
        else:
            genotype_2 = genotype_1

        #determine default genotype object
        if self.incparentsf1 and genotype_1.type != "intercross":
            genotype = genotype_2
        else:
            self.incparentsf1 = 0
            genotype = genotype_1

        self.samplelist = list(genotype.prgy)

        return genotype
Exemplo n.º 6
0
    def __init__(self, name):
        json_data_fh = open(locate(name + ".json",'genotype/json'))

        try:
            markers = []
            with open(locate(name + "_snps.txt", 'r')) as bimbam_fh:
                marker = {}
                if len(bimbam_fh[0].split(", ")) > 2:
                    delimiter = ", "
                elif len(bimbam_fh[0].split(",")) > 2:
                    delimiter = ","
                elif len(bimbam_fh[0].split("\t")) > 2:
                    delimiter = "\t"
                else:
                    delimiter = " "
                for line in bimbam_fh:
                    marker['name'] = line.split(delimiter)[0]
                    marker['Mb']
                    marker['chr'] = line.split(delimiter)[2]
                    marker['cM']
                    markers.append(marker)
        #try:
        #    markers = json.load(json_data_fh)
        except:
            markers = []

        for marker in markers:
            if (marker['chr'] != "X") and (marker['chr'] != "Y"):
                marker['chr'] = int(marker['chr'])
            marker['Mb'] = float(marker['Mb'])

        self.markers = markers
Exemplo n.º 7
0
    def __init__(self, name):
        json_data_fh = open(locate(name + ".json", 'genotype/json'))

        markers = []
        with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name),
                  'r') as bimbam_fh:
            if len(bimbam_fh.readline().split(", ")) > 2:
                delimiter = ", "
            elif len(bimbam_fh.readline().split(",")) > 2:
                delimiter = ","
            elif len(bimbam_fh.readline().split("\t")) > 2:
                delimiter = "\t"
            else:
                delimiter = " "
            for line in bimbam_fh:
                marker = {}
                marker['name'] = line.split(delimiter)[0].rstrip()
                marker['Mb'] = float(
                    line.split(delimiter)[1].rstrip()) / 1000000
                marker['chr'] = line.split(delimiter)[2].rstrip()
                markers.append(marker)

        for marker in markers:
            if (marker['chr'] != "X") and (marker['chr'] !=
                                           "Y") and (marker['chr'] != "M"):
                marker['chr'] = int(marker['chr'])
            marker['Mb'] = float(marker['Mb'])

        self.markers = markers
Exemplo n.º 8
0
    def gen_human_results(self, pheno_vector, key, temp_uuid):
        file_base = locate(self.dataset.group.name, "mapping")

        plink_input = input.plink(file_base, type='b')
        input_file_name = os.path.join(webqtlConfig.SNP_PATH,
                                       self.dataset.group.name + ".snps.gz")

        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
        covariate_matrix = np.ones((pheno_vector.shape[0], 1))
        kinship_matrix = np.fromfile(open(file_base + '.kin', 'r'), sep=" ")
        kinship_matrix.resize(
            (len(plink_input.indivs), len(plink_input.indivs)))

        logger.debug("Before creating params")

        params = dict(
            pheno_vector=pheno_vector.tolist(),
            covariate_matrix=covariate_matrix.tolist(),
            input_file_name=input_file_name,
            kinship_matrix=kinship_matrix.tolist(),
            refit=False,
            temp_uuid=temp_uuid,

            # meta data
            timestamp=datetime.datetime.now().isoformat(),
        )

        logger.debug("After creating params")

        json_params = json.dumps(params)
        Redis.set(key, json_params)
        Redis.expire(key, 60 * 60)

        logger.debug("Before creating the command")

        command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "human")

        logger.debug("command is:", command)

        os.system(command)

        json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60)
        results = json.loads(json_results[1])
        t_stats = results['t_stats']
        p_values = results['p_values']

        #p_values, t_stats = lmm.run_human(key)

        #p_values, t_stats = lmm.run_human(
        #        pheno_vector,
        #        covariate_matrix,
        #        input_file_name,
        #        kinship_matrix,
        #        loading_progress=tempdata
        #    )

        return p_values, t_stats
Exemplo n.º 9
0
def generate_cross_from_rdata(dataset):
    rdata_location  = locate(dataset.group.name + ".RData", "genotype/rdata")
    ro.r("""
       generate_cross_from_rdata <- function(filename = '%s') {
           load(file=filename)
           cross = cunique
           return(cross)
       }
    """ % (rdata_location))
Exemplo n.º 10
0
    def gen_human_results(self, pheno_vector, key, temp_uuid):
        file_base = locate(self.dataset.group.name,"mapping")

        plink_input = input.plink(file_base, type='b')
        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")

        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
        covariate_matrix = np.ones((pheno_vector.shape[0],1))
        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))

        logger.debug("Before creating params")

        params = dict(pheno_vector = pheno_vector.tolist(),
                    covariate_matrix = covariate_matrix.tolist(),
                    input_file_name = input_file_name,
                    kinship_matrix = kinship_matrix.tolist(),
                    refit = False,
                    temp_uuid = temp_uuid,

                    # meta data
                    timestamp = datetime.datetime.now().isoformat(),
                    )

        logger.debug("After creating params")

        json_params = json.dumps(params)
        Redis.set(key, json_params)
        Redis.expire(key, 60*60)

        logger.debug("Before creating the command")

        command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
                                                                                                                "human")

        logger.debug("command is:", command)

        os.system(command)

        json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
        results = json.loads(json_results[1])
        t_stats = results['t_stats']
        p_values = results['p_values']


        #p_values, t_stats = lmm.run_human(key)

        #p_values, t_stats = lmm.run_human(
        #        pheno_vector,
        #        covariate_matrix,
        #        input_file_name,
        #        kinship_matrix,
        #        loading_progress=tempdata
        #    )

        return p_values, t_stats
Exemplo n.º 11
0
    def run_analysis(self, requestform):
        print("Starting ePheWAS analysis on dataset")
        genofilelocation = locate(
            "BXD.geno", "genotype")  # Get the location of the BXD genotypes
        tissuealignerloc = locate(
            "Tissue_color_aligner.csv",
            "auwerx")  # Get the location of the Tissue_color_aligner

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)

        # Print some debug
        print "self.trait_id:" + self.trait_id + "\n"
        print "self.datasetname:" + self.datasetname + "\n"
        print "self.dataset.type:" + self.dataset.type + "\n"

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
            snpinfo.append(marker["name"])
            snpinfo.append(marker["chr"])
            snpinfo.append(marker["Mb"])

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo,
                                 nrow=len(parser.markers),
                                 dimnames=r_list(rnames,
                                                 r_c("SNP", "Chr", "Pos")),
                                 ncol=3,
                                 byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        print("Initialization of ePheWAS done !")
Exemplo n.º 12
0
def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method,
             num_perm, perm_strata_list, do_control, control_marker,
             manhattan_plot, cofactors):
    """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)"""

    pheno_file = write_phenotype_file(trait_name, samples, vals, dataset,
                                      cofactors, perm_strata_list)
    if dataset.group.genofile:
        geno_file = locate(dataset.group.genofile, "genotype")
    else:
        geno_file = locate(dataset.group.name + ".geno", "genotype")

    post_data = {
        "pheno_file": pheno_file,
        "geno_file": geno_file,
        "model": model,
        "method": method,
        "nperm": num_perm,
        "scale": mapping_scale
    }

    if do_control == "true" and control_marker:
        post_data["control"] = control_marker

    if not manhattan_plot:
        post_data["interval"] = True
    if cofactors:
        post_data["addcovar"] = True

    if perm_strata_list:
        post_data["pstrata"] = True

    rqtl_output = requests.post(GN3_LOCAL_URL + "api/rqtl/compute",
                                data=post_data).json()
    if num_perm > 0:
        return rqtl_output['perm_results'], rqtl_output[
            'suggestive'], rqtl_output['significant'], rqtl_output['results']
    else:
        return rqtl_output['results']
Exemplo n.º 13
0
    def __init__(self, name):
        json_data_fh = open(locate(name + '.json','genotype/json'))
        try:
            markers = json.load(json_data_fh)
        except:
            markers = []

        for marker in markers:
            if (marker['chr'] != "X") and (marker['chr'] != "Y"):
                marker['chr'] = int(marker['chr'])
            marker['Mb'] = float(marker['Mb'])

        self.markers = markers
Exemplo n.º 14
0
    def __init__(self, name):
        json_data_fh = open(locate(name + ".json",'genotype/json'))
        try:
            markers = json.load(json_data_fh)
        except:
            markers = []

        for marker in markers:
            if (marker['chr'] != "X") and (marker['chr'] != "Y"):
                marker['chr'] = int(marker['chr'])
            marker['Mb'] = float(marker['Mb'])

        self.markers = markers
Exemplo n.º 15
0
 def __init__(self, name, specified_markers = []):
     marker_data_fh = open(locate('genotype') + '/' + name + '.bim')
     self.markers = []
     for line in marker_data_fh:
         splat = line.strip().split()
         #logger.debug("splat:", splat)
         if len(specified_markers) > 0:
             if splat[1] in specified_markers:
                 marker = {}
                 marker['chr'] = int(splat[0])
                 marker['name'] = splat[1]
                 marker['Mb'] = float(splat[3]) / 1000000
             else:
                 continue
         else:
             marker = {}
             marker['chr'] = int(splat[0])
             marker['name'] = splat[1]
             marker['Mb'] = float(splat[3]) / 1000000
         self.markers.append(marker)
Exemplo n.º 16
0
 def __init__(self, name, specified_markers = []):
     marker_data_fh = open(locate('genotype') + '/' + name + '.bim')
     self.markers = []
     for line in marker_data_fh:
         splat = line.strip().split()
         #logger.debug("splat:", splat)
         if len(specified_markers) > 0:
             if splat[1] in specified_markers:
                 marker = {}
                 marker['chr'] = int(splat[0])
                 marker['name'] = splat[1]
                 marker['Mb'] = float(splat[3]) / 1000000
             else:
                 continue
         else:
             marker = {}
             marker['chr'] = int(splat[0])
             marker['name'] = splat[1]
             marker['Mb'] = float(splat[3]) / 1000000
         self.markers.append(marker)
Exemplo n.º 17
0
    def run_analysis(self, requestform):
        print("Starting CTL analysis on dataset")
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        print("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        print("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        print("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        print("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()

        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
          markernames.append(marker["name"])
          markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        print(len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
          print("retrieving data for", trait)
          if trait != "":
            ts = trait.split(':')
            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
            gt.retrieve_sample_data(individuals)
            for ind in individuals:
              if ind in gt.data.keys():
                traits.append(gt.data[ind].value)
              else:
                traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno)
        rGeno = r_data_frame(rGeno)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance = significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000, height=600)
        self.r_lineplot(res, significance = significance)
        r_dev_off()

        n = 2
        for trait in self.trait_db_list:
          # Create the QTL like CTL plots
          self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
          self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
          r_png(self.results['imgloc' + str(n)], width=1000, height=600)
          self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
          r_dev_off()
          n = n + 1

        # Flush any output from R
        sys.stdout.flush()
    def run_rqtl_geno(self):
        print("Calling R/qtl")

        self.geno_to_rqtl_function()

        ## Get pointers to some common R functions
        r_library     = ro.r["library"]                 # Map the library function
        r_c           = ro.r["c"]                       # Map the c function
        r_sum         = ro.r["sum"]                     # Map the sum function
        plot          = ro.r["plot"]                    # Map the plot function
        postscript    = ro.r["postscript"]              # Map the postscript function
        png           = ro.r["png"]              # Map the png function
        dev_off       = ro.r["dev.off"]                 # Map the device off function

        print(r_library("qtl"))                         # Load R/qtl

        ## Get pointers to some R/qtl functions
        scanone         = ro.r["scanone"]               # Map the scanone function
        scantwo         = ro.r["scantwo"]               # Map the scantwo function
        calc_genoprob   = ro.r["calc.genoprob"]         # Map the calc.genoprob function
        read_cross      = ro.r["read.cross"]            # Map the read.cross function
        write_cross     = ro.r["write.cross"]           # Map the write.cross function
        GENOtoCSVR      = ro.r["GENOtoCSVR"]            # Map the local GENOtoCSVR function

        crossname = self.dataset.group.name
        genofilelocation  = locate(crossname + ".geno", "genotype")
        crossfilelocation = TMPDIR + crossname + ".cross"

        print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
        cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)                                  # TODO: Add the SEX if that is available

        if self.manhattan_plot:
            cross_object = calc_genoprob(cross_object)
        else:
            cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")

        cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())                 # Add the phenotype

        # for debug: write_cross(cross_object, "csvr", "test.csvr")

        # Scan for QTLs
        covar = self.create_covariates(cross_object)                                                    # Create the additive covariate matrix

        if self.pair_scan:
            if self.do_control == "true":                                                # If sum(covar) > 0 we have a covariate matrix
                print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=self.model, method=self.method, n_cluster = 16)
            else:
                print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=self.model, method=self.method, n_cluster = 16)
 
            print("Pair scan results:", result_data_frame)

            self.pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
            png(file=TMPDIR+self.pair_scan_filename)
            plot(result_data_frame)
            dev_off()
            
            return self.process_pair_scan_results(result_data_frame)

        else:
            if self.do_control == "true":
                print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=self.model, method=self.method)
            else:
                print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=self.model, method=self.method)

            if int(self.num_perm) > 0:                                                                   # Do permutation (if requested by user)
                if self.do_control == "true":
                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = int(self.num_perm), model=self.model, method=self.method)
                else:
                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = int(self.num_perm), model=self.model, method=self.method)

                self.process_rqtl_perm_results(perm_data_frame)                                          # Functions that sets the thresholds for the webinterface

            return self.process_rqtl_results(result_data_frame)
Exemplo n.º 19
0
    def run_analysis(self, requestform):
        logger.info("Starting PheWAS analysis on dataset")
        genofilelocation = locate(
            "BXD.geno", "genotype")  # Get the location of the BXD genotypes
        precompfile = locate_phewas(
            "PheWAS_pval_EMMA_norm.RData",
            "auwerx")  # Get the location of the pre-computed EMMA results

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)
        self.region = int(requestform["num_region"])
        self.mtadjust = str(requestform["sel_mtadjust"])

        # Logger.Info some debug
        logger.info("self.trait_id:" + self.trait_id + "\n")
        logger.info("self.datasetname:" + self.datasetname + "\n")
        logger.info("self.dataset.type:" + self.dataset.type + "\n")

        # GN Magic ?
        self.this_trait = GeneralTrait(dataset=self.dataset,
                                       name=self.trait_id,
                                       get_qtl_info=False,
                                       get_sample_info=False)
        logger.info(vars(self.this_trait))

        # Set the values we need
        self.chr = str(self.this_trait.chr)
        self.mb = int(self.this_trait.mb)

        # logger.info some debug
        logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" +
                    str(self.region) + "\n")

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
            snpinfo.append(marker["name"])
            snpinfo.append(marker["chr"])
            snpinfo.append(marker["Mb"])

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo,
                                 nrow=len(parser.markers),
                                 dimnames=r_list(rnames,
                                                 r_c("SNP", "Chr", "Pos")),
                                 ncol=3,
                                 byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
        self.results['mtadjust'] = self.mtadjust
        logger.info("IMAGE AT:", self.results['imgurl1'])
        logger.info("IMAGE AT:", self.results['imgloc1'])
        # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input)
        # TODO: generate the PDF in the temp folder, with a unique name
        assert (precompfile)
        assert (phenoaligner)
        assert (snpaligner)
        phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner,
                                           snpaligner, "None", self.chr,
                                           self.mb, self.region,
                                           self.results['imgloc1'],
                                           self.mtadjust)
        self.results['phewas1'] = phewasres[0]
        self.results['phewas2'] = phewasres[1]
        self.results['tabulardata'] = phewasres[2]
        self.results['R_debuglog'] = phewasres[3]

        #self.r_PheWASManhattan(allpvalues)
        #self.r_Stop()

        logger.info("Initialization of PheWAS done !")
Exemplo n.º 20
0
def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm,
                  do_control, control_marker, manhattan_plot, pair_scan):
    geno_to_rqtl_function(dataset)

    ## Get pointers to some common R functions
    r_library = ro.r["library"]  # Map the library function
    r_c = ro.r["c"]  # Map the c function
    r_sum = ro.r["sum"]  # Map the sum function
    plot = ro.r["plot"]  # Map the plot function
    postscript = ro.r["postscript"]  # Map the postscript function
    png = ro.r["png"]  # Map the png function
    dev_off = ro.r["dev.off"]  # Map the device off function

    print(r_library("qtl"))  # Load R/qtl

    ## Get pointers to some R/qtl functions
    scanone = ro.r["scanone"]  # Map the scanone function
    scantwo = ro.r["scantwo"]  # Map the scantwo function
    calc_genoprob = ro.r["calc.genoprob"]  # Map the calc.genoprob function
    read_cross = ro.r["read.cross"]  # Map the read.cross function
    write_cross = ro.r["write.cross"]  # Map the write.cross function
    GENOtoCSVR = ro.r["GENOtoCSVR"]  # Map the local GENOtoCSVR function

    crossname = dataset.group.name
    genofilelocation = locate(crossname + ".geno", "genotype")
    crossfilelocation = TMPDIR + crossname + ".cross"

    #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)

    cross_object = GENOtoCSVR(
        genofilelocation,
        crossfilelocation)  # TODO: Add the SEX if that is available

    if manhattan_plot:
        cross_object = calc_genoprob(cross_object)
    else:
        cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")

    cross_object = add_phenotype(
        cross_object, sanitize_rqtl_phenotype(vals))  # Add the phenotype

    # for debug: write_cross(cross_object, "csvr", "test.csvr")

    # Scan for QTLs
    covar = create_covariates(
        control_marker, cross_object)  # Create the additive covariate matrix

    if pair_scan:
        if do_control == "true":  # If sum(covar) > 0 we have a covariate matrix
            print("Using covariate")
            result_data_frame = scantwo(cross_object,
                                        pheno="the_pheno",
                                        addcovar=covar,
                                        model=model,
                                        method=method,
                                        n_cluster=16)
        else:
            print("No covariates")
            result_data_frame = scantwo(cross_object,
                                        pheno="the_pheno",
                                        model=model,
                                        method=method,
                                        n_cluster=16)

        #print("Pair scan results:", result_data_frame)

        pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
        png(file=TEMPDIR + pair_scan_filename)
        plot(result_data_frame)
        dev_off()

        return process_pair_scan_results(result_data_frame)
    else:
        if do_control == "true":
            print("Using covariate")
            result_data_frame = scanone(cross_object,
                                        pheno="the_pheno",
                                        addcovar=covar,
                                        model=model,
                                        method=method)
        else:
            print("No covariates")
            result_data_frame = scanone(cross_object,
                                        pheno="the_pheno",
                                        model=model,
                                        method=method)

        if num_perm > 0 and permCheck == "ON":  # Do permutation (if requested by user)
            if do_control == "true":
                perm_data_frame = scanone(cross_object,
                                          pheno_col="the_pheno",
                                          addcovar=covar,
                                          n_perm=num_perm,
                                          model=model,
                                          method=method)
            else:
                perm_data_frame = scanone(cross_object,
                                          pheno_col="the_pheno",
                                          n_perm=num_perm,
                                          model=model,
                                          method=method)

            perm_output, suggestive, significant = process_rqtl_perm_results(
                num_perm, perm_data_frame
            )  # Functions that sets the thresholds for the webinterface
            return perm_output, suggestive, significant, process_rqtl_results(
                result_data_frame)
        else:
            return process_rqtl_results(result_data_frame)
Exemplo n.º 21
0
    def run_analysis(self, requestform):
        logger.info("Starting PheWAS analysis on dataset")
        genofilelocation = locate("BXD.geno", "genotype")                                  # Get the location of the BXD genotypes
        precompfile = locate_phewas("PheWAS_pval_EMMA_norm.RData", "auwerx")              # Get the location of the pre-computed EMMA results

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)
        self.region = int(requestform["num_region"])
        self.mtadjust = str(requestform["sel_mtadjust"])

        # Logger.Info some debug
        logger.info("self.trait_id:" + self.trait_id + "\n")
        logger.info("self.datasetname:" + self.datasetname + "\n")
        logger.info("self.dataset.type:" + self.dataset.type + "\n")

        # GN Magic ?
        self.this_trait = GeneralTrait(dataset=self.dataset, name = self.trait_id, get_qtl_info = False, get_sample_info=False)
        logger.info(vars(self.this_trait))

        # Set the values we need
        self.chr = str(self.this_trait.chr);
        self.mb = int(self.this_trait.mb);

        # logger.info some debug
        logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" + str(self.region) + "\n")

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
          snpinfo.append(marker["name"]);
          snpinfo.append(marker["chr"]);
          snpinfo.append(marker["Mb"]);

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames = r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol = 3, byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
        self.results['mtadjust'] = self.mtadjust
        logger.info("IMAGE AT:", self.results['imgurl1'] )
        logger.info("IMAGE AT:", self.results['imgloc1'] )
        # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input)
        # TODO: generate the PDF in the temp folder, with a unique name
        assert(precompfile)
        assert(phenoaligner)
        assert(snpaligner)
        phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner, snpaligner, "None", self.chr, self.mb, self.region, self.results['imgloc1'] , self.mtadjust)
        self.results['phewas1'] = phewasres[0]
        self.results['phewas2'] = phewasres[1]
        self.results['tabulardata'] = phewasres[2]
        self.results['R_debuglog'] = phewasres[3]

        #self.r_PheWASManhattan(allpvalues)
        #self.r_Stop()

        logger.info("Initialization of PheWAS done !")
Exemplo n.º 22
0
def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
    ## Get pointers to some common R functions
    r_library     = ro.r["library"]                 # Map the library function
    r_c           = ro.r["c"]                       # Map the c function
    plot          = ro.r["plot"]                    # Map the plot function
    png           = ro.r["png"]                     # Map the png function
    dev_off       = ro.r["dev.off"]                 # Map the device off function

    print(r_library("qtl"))                         # Load R/qtl

    ## Get pointers to some R/qtl functions
    scanone                    = ro.r["scanone"]               # Map the scanone function
    scantwo                    = ro.r["scantwo"]               # Map the scantwo function
    calc_genoprob              = ro.r["calc.genoprob"]         # Map the calc.genoprob function

    crossname = dataset.group.name
    #try:
    #    generate_cross_from_rdata(dataset)
    #    read_cross_from_rdata      = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function
    #    genofilelocation  = locate(crossname + ".RData", "genotype/rdata")
    #    cross_object = read_cross_from_rdata(genofilelocation)  # Map the local GENOtoCSVR function
    #except:
    generate_cross_from_geno(dataset)
    GENOtoCSVR                 = ro.r["GENOtoCSVR"]            # Map the local GENOtoCSVR function
    crossfilelocation = TMPDIR + crossname + ".cross"
    if dataset.group.genofile:
        genofilelocation  = locate(dataset.group.genofile, "genotype")
    else:
        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
    cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)      # TODO: Add the SEX if that is available

    if manhattan_plot:
        cross_object = calc_genoprob(cross_object)
    else:
        cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")

    pheno_string = sanitize_rqtl_phenotype(vals)

    cross_object = add_phenotype(cross_object, pheno_string, "the_pheno")                 # Add the phenotype

    # Scan for QTLs
    marker_covars = create_marker_covariates(control_marker, cross_object)  # Create the additive covariate markers

    if cofactors != "":
        cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples)                            # Create the covariates from selected traits
        ro.r('all_covars <- cbind(marker_covars, trait_covars)')
    else:
        ro.r('all_covars <- marker_covars')

    covars = ro.r['all_covars']

    if pair_scan:
        if do_control == "true":
            logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16)
        else:
            logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)

        pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
        png(file=TEMPDIR+pair_scan_filename)
        plot(result_data_frame)
        dev_off()

        return process_pair_scan_results(result_data_frame)
    else:
        if do_control == "true" or cofactors != "":
            logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method)
        else:
            logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)

        if num_perm > 0 and permCheck == "ON":                                                                   # Do permutation (if requested by user)
            if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping
                cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list)
                if do_control == "true" or cofactors != "":
                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method)
                else:
                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method)
            else:
                if do_control == "true" or cofactors != "":
                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method)
                else:
                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)

            perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame)          # Functions that sets the thresholds for the webinterface
            the_scale = check_mapping_scale(genofilelocation)
            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale
        else:
            the_scale = check_mapping_scale(genofilelocation)
            return process_rqtl_results(result_data_frame, dataset.group.species), the_scale
    def run_analysis(self, requestform):
        print("Starting CTL analysis on dataset")
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        print("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        print("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        print("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        print("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        print(dataset.group)
        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
          markernames.append(marker["name"])
          markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        print(len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
          print("retrieving data for", trait)
          if trait != "":
            ts = trait.split(':')
            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
            gt = TRAIT.retrieve_sample_data(gt, dataset, individuals)
            for ind in individuals:
              if ind in gt.data.keys():
                traits.append(gt.data[ind].value)
              else:
                traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))

        print(rPheno)

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno, check_names = False)
        rGeno = r_data_frame(rGeno, check_names = False)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance = significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png')
        self.r_lineplot(res, significance = significance)
        r_dev_off()

        n = 2                                                 # We start from 2, since R starts from 1 :)
        for trait in self.trait_db_list:
          # Create the QTL like CTL plots
          self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
          self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
          r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png')
          self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
          r_dev_off()
          n = n + 1

        # Flush any output from R
        sys.stdout.flush()

        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
        print(type(significant))
        if not type(significant) == ri.RNULLType:
          for x in range(len(significant[0])):
            print(significant[0][x], significant[1][x], significant[2][x])            # Debug to console
            tsS = significant[0][x].split(':')                                        # Source
            tsT = significant[2][x].split(':')                                        # Target
            gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1])            # Retrieve Source info from the DB
            gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1])            # Retrieve Target info from the DB
            self.addNode(gtS)
            self.addNode(gtT)
            self.addEdge(gtS, gtT, significant, x)

            significant[0][x] = gtS.symbol + " (" + gtS.name + ")"                    # Update the trait name for the displayed table
            significant[2][x] = gtT.symbol + " (" + gtT.name + ")"                    # Update the trait name for the displayed table

        self.elements = json.dumps(self.nodes_list + self.edges_list)
Exemplo n.º 24
0
def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
    geno_to_rqtl_function(dataset)

    ## Get pointers to some common R functions
    r_library     = ro.r["library"]                 # Map the library function
    r_c           = ro.r["c"]                       # Map the c function
    plot          = ro.r["plot"]                    # Map the plot function
    png           = ro.r["png"]                     # Map the png function
    dev_off       = ro.r["dev.off"]                 # Map the device off function

    print(r_library("qtl"))                         # Load R/qtl

    ## Get pointers to some R/qtl functions
    scanone         = ro.r["scanone"]               # Map the scanone function
    scantwo         = ro.r["scantwo"]               # Map the scantwo function
    calc_genoprob   = ro.r["calc.genoprob"]         # Map the calc.genoprob function
    GENOtoCSVR      = ro.r["GENOtoCSVR"]            # Map the local GENOtoCSVR function

    crossname = dataset.group.name
    genofilelocation  = locate(crossname + ".geno", "genotype")
    crossfilelocation = TMPDIR + crossname + ".cross"

    cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)                            # TODO: Add the SEX if that is available

    if manhattan_plot:
        cross_object = calc_genoprob(cross_object)
    else:
        cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")

    cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals))                 # Add the phenotype

    # Scan for QTLs
    covar = create_covariates(control_marker, cross_object)                                   # Create the additive covariate matrix

    if pair_scan:
        if do_control == "true":
            logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
        else:
            logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)

        pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
        png(file=TEMPDIR+pair_scan_filename)
        plot(result_data_frame)
        dev_off()

        return process_pair_scan_results(result_data_frame)
    else:
        if do_control == "true":
            logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
        else:
            logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)

        if num_perm > 0 and permCheck == "ON":                                                                   # Do permutation (if requested by user)
            if do_control == "true":
                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
            else:
                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)

            perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame)          # Functions that sets the thresholds for the webinterface
            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame)
        else:
            return process_rqtl_results(result_data_frame)