示例#1
0
    def buildWork(self):
        class BetaWorkArgs(object):
            def __init__(self, source):
                self.verbosity = "10"
                self.weight_db_path = source.weight_db_path
                self.gwas_folder = source.gwas_folder
                self.output_folder = source.beta_folder

                self.snp_column = source.snp_value.get()
                self.a1_column = source.a1_value.get()
                self.a2_column = source.a2_value.get()

                self.or_column = source.or_value.get() if source.or_on.get() else None
                self.beta_column = source.beta_value.get() if source.beta_on.get() else None
                self.beta_sign_column = source.beta_sign_value.get() if source.beta_sign_on.get() else None
                self.beta_zscore_column = source.beta_z_value.get() if source.beta_z_on.get() else None
                self.frequency_column = source.frequency_value.get() if source.frequency_on.get() else None
                self.se_column = source.se_value.get() if source.se_on.get() else None
                self.pvalue_column = source.p_value.get() if source.p_on.get() else None
                self.compressed = source.compressed_on.get()
                self.gwas_file_pattern = source.gwas_file_pattern_value.get() if len(source.gwas_file_pattern_value.get()) else None
                self.separator = source.separator_value.get() if len(source.separator_value.get()) else None
                self.scheme = GWASUtilities.BETA_P
                # TODO: implement this
                self.skip_until_header = None

        beta_args = BetaWorkArgs(source=self)
        beta_work = M03_betas.GetBetas(beta_args)

        class ZScoresWorkArgs(object):
            def __init__(self, source):
                self.verbosity = "10"
                self.keep_ens_version = False
                self.beta_folder = source.beta_folder
                self.weight_db_path = source.weight_db_path
                self.output_file = source.output_path
                self.covariance = source.covariance_file
                self.zscore_scheme = ZScoreCalculation.BETA_Z_SIGMA_REF
                self.normalization_scheme = Normalization.NONE
                self.input_format = Formats.FlatFile
                self.selected_dosage_folder = "intermediate/filtered_1000GP_Phase3"

        zscore_args = ZScoresWorkArgs(source=self)
        zscore_work = M04_zscores.CalculateZScores(zscore_args)
        #TODO: maybe connect stuff together so that M03 passes stuff to M04

        class WorkWrapper(object):
            def __init__(self, works):
                self.works = works

            def run(self):
                try:
                    #delete as we go so that stuff gets deleted
                    self.works = list(reversed(self.works))
                    for i in xrange(len(self.works) - 1, -1, -1):
                        work = self.works[i]
                        work.run()
                        del self.works[i]

                except Exception as e:
                    logging.info("Exception when running task: %s", str(e))
                finally:
                    pass

        work = WorkWrapper([beta_work, zscore_work])
        return work
示例#2
0
    def buildBetas(self, db_filename):
        filebase = os.path.basename(db_filename).replace(".db", "")
        output_folder = os.path.abspath(self.args.output_directory)

        logging.info("Processing betas for %s" % (db_filename))
        self.args.weight_db_path = os.path.abspath(db_filename)
        cov_directory = self.args.covariance_directory
        if cov_directory.upper() == "SAME":
            cov_directory = "/".join(self.args.weight_db_path.split("/")[0:-1])

        extComponents = self.args.covariance_suffix.split("..")

        if len(extComponents) > 1:
            covext = "..".join(extComponents[0:-1])
            dbext = extComponents[-1]
            filebase = db_filename.replace(dbext, "")
            self.args.covariance = "%s/%s%s" % (cov_directory, filebase.split("/")[-1], covext)
        else:
            self.args.covariance = "%s/%s%s" % (
            cov_directory, filebase.strip("/")[-1], self.args.covariance_suffix)
        file_prefix = filebase.split("/")[-1].split(".")[0]
        beta_output = os.path.join(output_folder, file_prefix)
        logging.info("Writing betas to %s" % (beta_output))

        self.args.output_folder = beta_output

        logging.info("Loading weight model")
        weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db_path)

        betaScript = M03_betas.GetBetas(self.args)
        names = Utilities.contentsWithRegexpFromFolder(self.args.gwas_folder, betaScript.gwas_regexp)

        if not os.path.exists(beta_output):
            os.makedirs(beta_output)
        betaScript.output_folder = beta_output              #os.path.join(output_folder, filebase)
        if not os.path.exists(betaScript.output_folder):
            os.makedirs(betaScript.output_folder)

        report_prefix = None
        for name in names:
            name = name + ".gz"
            if report_prefix is None:
                report_prefix = name.split("/")[-1].split(".")[0]
            try:
                betaScript.buildBetas(weight_db_logic,name)

            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass

        suffix = ".csv"
        self.args.output_file = os.path.join(output_folder,
                                             report_prefix + "-" + file_prefix + suffix)  # output_folder       #os.path.join(output_folder, file_prefix) + ".csv"

        # ZScores
        logging.info("Calculating ZScores for %s" % (filebase))
        zscoreScript = M04_zscores.CalculateZScores(self.args)
        zscoreScript.folder_beta = betaScript.output_folder
        zscoreScript.run()