예제 #1
0
def gen_pheno_txt_file(this_dataset, genofile_name, vals, method):
    """Generates phenotype file for GEMMA"""

    if method == "gemma":
        current_file_data = []
        with open(
                "{}/{}.fam".format(flat_files('mapping'),
                                   this_dataset.group.name), "r") as outfile:
            for i, line in enumerate(outfile):
                split_line = line.split()
                current_file_data.append(split_line)

        with open(
                "{}/{}.fam".format(flat_files('mapping'),
                                   this_dataset.group.name), "w") as outfile:
            for i, line in enumerate(current_file_data):
                if vals[i] == "x":
                    this_val = -9
                else:
                    this_val = vals[i]
                outfile.write("0" + " " + line[1] + " " + line[2] + " " +
                              line[3] + " " + line[4] + " " + str(this_val) +
                              "\n")
    else:
        current_file_data = []
        with open(
                "{}/{}_pheno.txt".format(flat_files('genotype/bimbam'),
                                         genofile_name), "w") as outfile:
            for value in vals:
                if value == "x":
                    outfile.write("NA\n")
                else:
                    outfile.write(value + "\n")
예제 #2
0
def gen_pheno_txt_file(this_dataset, genofile_name, vals, method):
    """Generates phenotype file for GEMMA"""

    if method == "gemma":
        current_file_data = []
        with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile:
            for i, line in enumerate(outfile):
                split_line = line.split()
                current_file_data.append(split_line)

        with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
            for i, line in enumerate(current_file_data):
                if vals[i] == "x":
                    this_val = -9
                else:
                    this_val = vals[i]
                outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n")
    else:
        current_file_data = []
        with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile:
            for value in vals:
                if value == "x":
                    outfile.write("NA\n")
                else:
                    outfile.write(value + "\n")
예제 #3
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
예제 #4
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open(
            "{}/{}_covariates.txt".format(flat_files('mapping'),
                                          this_dataset.group.name),
            "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
def gen_pheno_txt_file(this_dataset, vals):
    """Generates phenotype file for GEMMA/PLINK"""

    current_file_data = []
    with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile:
        for i, line in enumerate(outfile):
            split_line = line.split()
            current_file_data.append(split_line)

    with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
        for i, line in enumerate(current_file_data):
            if vals[i] == "x":
                this_val = -9
            else:
                this_val = vals[i]
            outfile.write(line[1] + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n")
예제 #6
0
    def __init__(self, name):
        json_data_fh = open(locate(name + ".json", 'genotype/json'))

        markers = []
        with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name),
                  'r') as bimbam_fh:
            if len(bimbam_fh.readline().split(", ")) > 2:
                delimiter = ", "
            elif len(bimbam_fh.readline().split(",")) > 2:
                delimiter = ","
            elif len(bimbam_fh.readline().split("\t")) > 2:
                delimiter = "\t"
            else:
                delimiter = " "
            for line in bimbam_fh:
                marker = {}
                marker['name'] = line.split(delimiter)[0].rstrip()
                marker['Mb'] = float(
                    line.split(delimiter)[1].rstrip()) / 1000000
                marker['chr'] = line.split(delimiter)[2].rstrip()
                markers.append(marker)

        for marker in markers:
            if (marker['chr'] != "X") and (marker['chr'] !=
                                           "Y") and (marker['chr'] != "M"):
                marker['chr'] = int(marker['chr'])
            marker['Mb'] = float(marker['Mb'])

        self.markers = markers
예제 #7
0
def gen_pheno_txt_file(this_dataset, vals):
    """Generates phenotype file for GEMMA/PLINK"""

    current_file_data = []
    with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile:
        for i, line in enumerate(outfile):
            split_line = line.split()
            current_file_data.append(split_line)

    with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
        for i, line in enumerate(current_file_data):
            if vals[i] == "x":
                this_val = -9
            else:
                this_val = vals[i]
            outfile.write("0 " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n")
예제 #8
0
 def check_plink_gemma():
     if flat_file_exists("mapping"):
         MAPPING_PATH = flat_files("mapping")+"/"
         if (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bed") and
             (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".map") or
              os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bim"))):
             return True
     return False
예제 #9
0
 def check_plink_gemma():
     if flat_file_exists("mapping"):
         MAPPING_PATH = flat_files("mapping")+"/"
         if (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bed") and
             (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".map") or
              os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bim"))):
             return True
     return False
예제 #10
0
def gen_pheno_txt_file(this_dataset, genofile_name, vals):
    """Generates phenotype file for GEMMA"""

    current_file_data = []
    with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile:
        for value in vals:
            if value == "x":
                outfile.write("NA\n")
            else:
                outfile.write(value + "\n")
예제 #11
0
def run_gemma(this_dataset, samples, vals):
    """Generates p-values for each marker using GEMMA"""

    print("INSIDE GEMMA_MAPPING")

    gen_pheno_txt_file(this_dataset, vals)

    # use GEMMA_RUN in the next one, create a unique temp file

    gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -outdir %s -o %s_output' % (
        flat_files('mapping'), this_dataset.group.name, flat_files('mapping'),
        this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR,
        this_dataset.group.name)
    print("gemma_command:" + gemma_command)

    os.system(gemma_command)

    marker_obs = parse_gemma_output(this_dataset)

    return marker_obs
예제 #12
0
def gen_pheno_txt_file(this_dataset, genofile_name, vals):
    """Generates phenotype file for GEMMA"""

    current_file_data = []
    with open(
            "{}/{}_pheno.txt".format(flat_files('genotype/bimbam'),
                                     genofile_name), "w") as outfile:
        for value in vals:
            if value == "x":
                outfile.write("NA\n")
            else:
                outfile.write(value + "\n")
def get_samples_from_ped_file(dataset):
    ped_file= open("{}{}.ped".format(flat_files('mapping'), dataset.group.name),"r")
    line = ped_file.readline()
    sample_list=[]

    while line:
        lineList = string.split(string.strip(line), '\t')
        lineList = map(string.strip, lineList)

        sample_name = lineList[0]
        sample_list.append(sample_name)

        line = ped_file.readline()

    return sample_list
예제 #14
0
def get_samples_from_ped_file(dataset):
    ped_file= open("{}{}.ped".format(flat_files('mapping'), dataset.group.name),"r")
    line = ped_file.readline()
    sample_list=[]

    while line:
        lineList = string.split(string.strip(line), '\t')
        lineList = map(string.strip, lineList)

        sample_name = lineList[0]
        sample_list.append(sample_name)

        line = ped_file.readline()

    return sample_list
예제 #15
0
    def gen_reaper_results(self):
        self.trait_results = {}
        for trait_db in self.trait_list:
            self.dataset.group.get_markers()
            this_trait = trait_db[0]

            genotype = self.dataset.group.read_genotype_file(use_reaper=False)
            samples, values, variances, sample_aliases = this_trait.export_informative(
            )

            if self.dataset.group.genofile != None:
                genofile_name = self.dataset.group.genofile[:-5]
            else:
                genofile_name = self.dataset.group.name

            trimmed_samples = []
            trimmed_values = []
            for i in range(0, len(samples)):
                if samples[i] in self.dataset.group.samplelist:
                    trimmed_samples.append(str(samples[i]))
                    trimmed_values.append(values[i])

            trait_filename = str(this_trait.name) + "_" + str(
                self.dataset.name) + "_pheno"
            gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename)

            output_filename = self.dataset.group.name + "_GWA_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))

            reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format(
                flat_files('genotype'), genofile_name, TEMPDIR, trait_filename,
                webqtlConfig.GENERATED_IMAGE_DIR, output_filename)

            os.system(reaper_command)

            reaper_results = parse_reaper_output(output_filename)

            lrs_values = [float(qtl['lrs_value']) for qtl in reaper_results]

            self.trait_results[this_trait.name] = []
            for qtl in reaper_results:
                if qtl['additive'] > 0:
                    self.trait_results[this_trait.name].append(
                        -float(qtl['lrs_value']))
                else:
                    self.trait_results[this_trait.name].append(
                        float(qtl['lrs_value']))
예제 #16
0
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))
    gen_pheno_txt_file(dataset, vals)

    plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % (
        flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
예제 #17
0
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr(
        "%s_%s_" % (dataset.group.name, this_trait.name))
    gen_pheno_txt_file(dataset, vals)

    plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % (
        flat_files('mapping'), dataset.group.name, maf, TMPDIR,
        plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
예제 #18
0
 def __init__(self, name, specified_markers = []):
     marker_data_fh = open(flat_files('mapping') + '/' + name + '.bim')
     self.markers = []
     for line in marker_data_fh:
         splat = line.strip().split()
         #logger.debug("splat:", splat)
         if len(specified_markers) > 0:
             if splat[1] in specified_markers:
                 marker = {}
                 marker['chr'] = int(splat[0])
                 marker['name'] = splat[1]
                 marker['Mb'] = float(splat[3]) / 1000000
             else:
                 continue
         else:
             marker = {}
             marker['chr'] = int(splat[0])
             marker['name'] = splat[1]
             marker['Mb'] = float(splat[3]) / 1000000
         self.markers.append(marker)
예제 #19
0
 def __init__(self, name, specified_markers = []):
     marker_data_fh = open(flat_files('mapping') + '/' + name + '.bim')
     self.markers = []
     for line in marker_data_fh:
         splat = line.strip().split()
         #logger.debug("splat:", splat)
         if len(specified_markers) > 0:
             if splat[1] in specified_markers:
                 marker = {}
                 marker['chr'] = int(splat[0])
                 marker['name'] = splat[1]
                 marker['Mb'] = float(splat[3]) / 1000000
             else:
                 continue
         else:
             marker = {}
             marker['chr'] = int(splat[0])
             marker['name'] = splat[1]
             marker['Mb'] = float(splat[3]) / 1000000
         self.markers.append(marker)
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))

    gen_pheno_txt_file(dataset, vals)
    #gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename)

    plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-fid --no-parents --no-sex --maf %s --missing-phenotype -9 --out %s/%s --assoc ' % (
        flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    #for marker in self.dataset.group.markers.markers:
    #    if marker['name'] not in included_markers:
    #        logger.debug("marker:", marker)
    #        self.dataset.group.markers.markers.remove(marker)
    #        #del self.dataset.group.markers.markers[marker]

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
예제 #21
0
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
    """Generates p-values for each marker using GEMMA"""

    if this_dataset.group.genofile != None:
        genofile_name = this_dataset.group.genofile[:-5]
    else:
        genofile_name = this_dataset.group.name

    gen_pheno_txt_file(this_dataset, genofile_name, vals, method)

    if not os.path.isfile("{}{}_output.assoc.txt".format(
            webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
        open(
            "{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR,
                                           genofile_name), "w+")

    this_chromosomes = this_dataset.species.chromosomes.chromosomes
    chr_list_string = ""
    for i in range(len(this_chromosomes)):
        if i < (len(this_chromosomes) - 1):
            chr_list_string += this_chromosomes[i + 1].name + ","
        else:
            chr_list_string += this_chromosomes[i + 1].name

    if covariates != "":
        gen_covariates_file(this_dataset, covariates)

    if method == "gemma":
        gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (
            flat_files('mapping'), this_dataset.group.name,
            flat_files('mapping'), this_dataset.group.name)
        if covariates != "":
            gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (
                flat_files('mapping'), this_dataset.group.name,
                webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)
        else:
            #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
            gemma_command += ' -outdir %s -o %s_output' % (
                webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)
    else:
        if use_loco == "True":
            k_output_filename = this_dataset.group.name + "_K_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name, TEMPDIR,
                k_output_filename)
            logger.debug("k_command:" + generate_k_command)
            os.system(generate_k_command)

            gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (
                TEMPDIR, k_output_filename, flat_files('genotype/bimbam'),
                genofile_name, flat_files('genotype/bimbam'), genofile_name)

            gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (
                    flat_files('mapping'), this_dataset.group.name,
                    flat_files('genotype/bimbam'), genofile_name, TEMPDIR,
                    gwa_output_filename)
            else:
                gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (
                    flat_files('genotype/bimbam'), genofile_name, TEMPDIR,
                    gwa_output_filename)

        else:
            gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name)

            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (
                    flat_files('mapping'), this_dataset.group.name,
                    webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)
            else:
                gemma_command += ' -outdir %s -debug -o %s_output' % (
                    webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)

    logger.debug("gemma_command:" + gemma_command)
    os.system(gemma_command)

    if use_loco == "True":
        marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
    else:
        marker_obs = parse_gemma_output(genofile_name)

    return marker_obs
예제 #22
0
 def check_plink_gemma():
     if flat_file_exists("mapping"):
         MAPPING_PATH = flat_files("mapping")+"/"
         if os.path.isfile(MAPPING_PATH+self.name+".bed"):
             return True
     return False
예제 #23
0
assert_writable_dir(TEMPDIR)

TMPDIR               = mk_dir(TEMPDIR+'/gn2/')
assert_writable_dir(TMPDIR)

CACHEDIR             = mk_dir(TMPDIR+'/cache/')
# We can no longer write into the git tree:
GENERATED_IMAGE_DIR  = mk_dir(TMPDIR+'generated/')
GENERATED_TEXT_DIR   = mk_dir(TMPDIR+'generated_text/')

# Make sure we have permissions to access these
assert_writable_dir(CACHEDIR)
assert_writable_dir(GENERATED_IMAGE_DIR)
assert_writable_dir(GENERATED_TEXT_DIR)

# Flat file directories
GENODIR              = flat_files('genotype')+'/'
assert_dir(GENODIR)

# JSON genotypes are OBSOLETE
JSON_GENODIR         = flat_files('genotype/json')+'/'
if not valid_path(JSON_GENODIR):
    # fall back on old location (move the dir, FIXME)
    JSON_GENODIR = flat_files('json')

# Are we using the following...?
PORTADDR = "http://50.16.251.170"
INFOPAGEHREF = '/dbdoc/%s.html'
CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
SCRIPTFILE = 'main.py'
예제 #24
0
 def check_plink_gemma():
     if flat_file_exists("mapping"):
         MAPPING_PATH = flat_files("mapping")+"/"
         if os.path.isfile(MAPPING_PATH+self.name+".bed"):
             return True
     return False
def run_reaper(this_trait,
               this_dataset,
               samples,
               vals,
               json_data,
               num_perm,
               boot_check,
               num_bootstrap,
               do_control,
               control_marker,
               manhattan_plot,
               first_run=True,
               output_files=None):
    """Generates p-values for each marker using qtlreaper"""

    if first_run:
        if this_dataset.group.genofile != None:
            genofile_name = this_dataset.group.genofile[:-5]
        else:
            genofile_name = this_dataset.group.name

        trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno"
        gen_pheno_txt_file(samples, vals, trait_filename)

        output_filename = (f"{this_dataset.group.name}_GWA_" + ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(6)))
        bootstrap_filename = None
        permu_filename = None

        opt_list = []
        if boot_check and num_bootstrap > 0:
            bootstrap_filename = (
                f"{this_dataset.group.name}_BOOTSTRAP_" + ''.join(
                    random.choice(string.ascii_uppercase + string.digits)
                    for _ in range(6)))

            opt_list.append("-b")
            opt_list.append(f"--n_bootstrap {str(num_bootstrap)}")
            opt_list.append(
                f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt"
            )
        if num_perm > 0:
            permu_filename = ("{this_dataset.group.name}_PERM_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6)))
            opt_list.append("-n " + str(num_perm))
            opt_list.append("--permu_output " +
                            webqtlConfig.GENERATED_IMAGE_DIR + permu_filename +
                            ".txt")
        if control_marker != "" and do_control == "true":
            opt_list.append("-c " + control_marker)
        if manhattan_plot != True:
            opt_list.append("--interval 1")

        reaper_command = (
            REAPER_COMMAND +
            ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.
            format(flat_files('genotype'), genofile_name, TEMPDIR,
                   trait_filename, " ".join(opt_list),
                   webqtlConfig.GENERATED_IMAGE_DIR, output_filename))

        logger.debug("reaper_command:" + reaper_command)
        os.system(reaper_command)
    else:
        output_filename, permu_filename, bootstrap_filename = output_files

    marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(
        output_filename, permu_filename, bootstrap_filename)

    suggestive = 0
    significant = 0
    if len(permu_vals) > 0:
        suggestive = permu_vals[int(num_perm * 0.37 - 1)]
        significant = permu_vals[int(num_perm * 0.95 - 1)]

    return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals,
            [output_filename, permu_filename, bootstrap_filename])
예제 #26
0
TMPDIR = mk_dir(TEMPDIR + '/gn2/')
assert_writable_dir(TMPDIR)

CACHEDIR = mk_dir(TMPDIR + '/cache/')
# We can no longer write into the git tree:
GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/')
GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/')

# Make sure we have permissions to access these
assert_writable_dir(CACHEDIR)
assert_writable_dir(GENERATED_IMAGE_DIR)
assert_writable_dir(GENERATED_TEXT_DIR)

# Flat file directories
GENODIR = flat_files('genotype') + '/'
assert_dir(GENODIR)
assert_dir(GENODIR + 'bimbam')  # for gemma

# JSON genotypes are OBSOLETE
JSON_GENODIR = flat_files('genotype/json') + '/'
if not valid_path(JSON_GENODIR):
    # fall back on old location (move the dir, FIXME)
    JSON_GENODIR = flat_files('json')

# Are we using the following...?
PORTADDR = "http://50.16.251.170"
INFOPAGEHREF = '/dbdoc/%s.html'
CGIDIR = '/webqtl/'  #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
SCRIPTFILE = 'main.py'
예제 #27
0
def get_genotypes(group_name, file_format="csv", dataset_name=None):
    limit_num = None
    if 'limit_to' in request.args:
        if request.args['limit_to'].isdigit():
            limit_num = int(request.args['limit_to'])

    si = StringIO.StringIO()
    if file_format == "csv" or file_format == "geno":
        filename = group_name + ".geno"

        if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"),
                                                group_name)):
            output_lines = []
            with open("{0}/{1}.geno".format(flat_files("genotype"),
                                            group_name)) as genofile:
                i = 0
                for line in genofile:
                    if line[0] == "#" or line[0] == "@":
                        output_lines.append([line.strip()])
                    else:
                        if limit_num and i >= limit_num:
                            break
                        output_lines.append(line.split())
                        i += 1

            csv_writer = csv.writer(si,
                                    delimiter="\t",
                                    escapechar="\\",
                                    quoting=csv.QUOTE_NONE)
        else:
            return return_error(code=204,
                                source=request.url_rule.rule,
                                title="No Results",
                                details="")
    elif file_format == "rqtl2":
        memory_file = io.BytesIO()
        if dataset_name:
            filename = dataset_name
        else:
            filename = group_name

        if os.path.isfile("{0}/{1}_geno.csv".format(
                flat_files("genotype/rqtl2"), group_name)):
            yaml_file = json.load(
                open("{0}/{1}.json".format(flat_files("genotype/rqtl2"),
                                           group_name)))
            yaml_file["geno"] = filename + "_geno.csv"
            yaml_file["gmap"] = filename + "_gmap.csv"
            yaml_file["pheno"] = filename + "_pheno.csv"
            config_file = [filename + ".json", json.dumps(yaml_file)]
            #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))]
            geno_file = [
                filename + "_geno.csv",
                open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"),
                                               group_name))
            ]
            gmap_file = [
                filename + "_gmap.csv",
                open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"),
                                               group_name))
            ]
            if dataset_name:
                phenotypes = requests.get(
                    "http://gn2.genenetwork.org/api/v_pre1/sample_data/" +
                    dataset_name)
            else:
                phenotypes = requests.get(
                    "http://gn2.genenetwork.org/api/v_pre1/sample_data/" +
                    group_name + "Publish")

            with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf:
                zf.writestr(config_file[0], config_file[1])
                for this_file in [geno_file, gmap_file]:
                    zf.writestr(this_file[0], this_file[1].read())
                zf.writestr(filename + "_pheno.csv", phenotypes.content)

            memory_file.seek(0)

            return send_file(memory_file,
                             attachment_filename=filename + ".zip",
                             as_attachment=True)
        else:
            return return_error(code=204,
                                source=request.url_rule.rule,
                                title="No Results",
                                details="")
    else:
        filename = group_name + ".bimbam"

        if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"),
                                                group_name)):
            output_lines = []
            with open("{0}/{1}_geno.txt".format(flat_files("genotype/bimbam"),
                                                group_name)) as genofile:
                i = 0
                for line in genofile:
                    if limit_num and i >= limit_num:
                        break
                    output_lines.append(
                        [line.strip() for line in line.split(",")])
                    i += 1

            csv_writer = csv.writer(si, delimiter=",")
        else:
            return return_error(code=204,
                                source=request.url_rule.rule,
                                title="No Results",
                                details="")

    csv_writer.writerows(output_lines)
    output = make_response(si.getvalue())
    output.headers["Content-Disposition"] = "attachment; filename=" + filename
    output.headers["Content-type"] = "text/csv"

    return output
예제 #28
0
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
    """Generates p-values for each marker using GEMMA"""

    if this_dataset.group.genofile != None:
        genofile_name = this_dataset.group.genofile[:-5]
    else:
        genofile_name = this_dataset.group.name

    gen_pheno_txt_file(this_dataset, genofile_name, vals, method)

    if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
        open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")

    this_chromosomes = this_dataset.species.chromosomes.chromosomes
    chr_list_string = ""
    for i in range(len(this_chromosomes)):
        if i < (len(this_chromosomes) - 1):
            chr_list_string += this_chromosomes[i+1].name + ","
        else:
            chr_list_string += this_chromosomes[i+1].name  

    if covariates != "":
        gen_covariates_file(this_dataset, covariates)

    if method == "gemma":
        gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'),
                                                                                        this_dataset.group.name,
                                                                                        flat_files('mapping'),
                                                                                        this_dataset.group.name)
        if covariates != "":
            gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'),
                                                                                   this_dataset.group.name,
                                                                                   webqtlConfig.GENERATED_IMAGE_DIR,
                                                                                   this_dataset.group.name)
        else:
            #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
            gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
                                                           this_dataset.group.name)
    else:
        if use_loco == "True":
            k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
            generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            TEMPDIR,
                                                                                            k_output_filename)
            logger.debug("k_command:" + generate_k_command)
            os.system(generate_k_command)

            gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR,
                                                                                            k_output_filename,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name)

            gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'),
                                                                                                                                         this_dataset.group.name,
                                                                                                                                         flat_files('genotype/bimbam'),
                                                                                                                                         genofile_name,
                                                                                                                                         TEMPDIR,
                                                                                                                                         gwa_output_filename)
            else:
                gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
                                                                                                                 genofile_name,
                                                                                                                 TEMPDIR,
                                                                                                                 gwa_output_filename)

        else:
            gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name)

            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'),
                                                                                                             this_dataset.group.name,
                                                                                                             webqtlConfig.GENERATED_IMAGE_DIR,
                                                                                                             genofile_name)
            else:
                gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
                                                                      genofile_name)

    logger.debug("gemma_command:" + gemma_command)
    os.system(gemma_command)

    if use_loco == "True":
        marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
    else:
        marker_obs = parse_gemma_output(genofile_name)

    return marker_obs
예제 #29
0
UCSC_RUDI_TRACK_URL = " http://genome.cse.ucsc.edu/cgi-bin/hgTracks?org=%s&db=%s&hgt.customText=http://gbic.biol.rug.nl/~ralberts/tracks/%s/%s"
GENOMEBROWSER_URL="http://ucscbrowser.genenetwork.org/cgi-bin/hgTracks?clade=mammal&org=Mouse&db=mm9&position=%s&hgt.suggest=&pix=800&Submit=submit"
ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?species=Mus_musculus;idx=Transcript;end=1;q=%s"

# HTMLPATH = GNROOT + 'genotype_files/'
# PYLMM_PATH
# IMGDIR = GNROOT + '/wqflask/wqflask/static/output/'

# Temporary storage:
TMPDIR               = mk_dir(TEMPDIR+'/gn2/')
CACHEDIR             = mk_dir(TEMPDIR+'/cache/')
# We can no longer write into the git tree:
GENERATED_IMAGE_DIR  = mk_dir(TMPDIR+'/generate/')
GENERATED_TEXT_DIR   = mk_dir(TMPDIR+'/generate_text/')

# Flat file directories
GENODIR              = flat_files('genotype')+'/'
JSON_GENODIR         = assert_dir(GENODIR+'json/')

# SITENAME = 'GN'
# PORTADDR = "http://50.16.251.170"
# BASEHREF = '<base href="http://50.16.251.170/">'

INFOPAGEHREF = '/dbdoc/%s.html'
CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
SCRIPTFILE = 'main.py'

# GLOSSARYFILE = "/glossary.html"
# REFRESHSTR = '<meta http-equiv="refresh" content="5;url=%s' + SCRIPTFILE +'?sid=%s">'
# REFRESHDIR = '%s' + SCRIPTFILE +'?sid=%s'