Exemplo n.º 1
0
    def buildBetas(self, weight_db_logic, name):
        output_path = os.path.join(self.output_folder, name)
        if not ".gz" in output_path:
            output_path += ".gz"
        if os.path.exists(output_path):
            logging.info(
                "%s already exists, delete it if you want it to be done again",
                output_path)
            return

        logging.info("Building beta for %s and %s", name, self.weight_db_path)
        input_path = os.path.join(self.gwas_folder, name)
        file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs(
            input_path, self.args)

        scheme = MethodGuessing.chooseGWASProcessingScheme(
            self.args, input_path)
        callback = MethodGuessing.chooseGWASCallback(file_format, scheme,
                                                     weight_db_logic)
        if not weight_db_logic:
            GWASUtilities.loadGWASAndStream(input_path, output_path,
                                            self.compressed_gwas,
                                            self.args.separator,
                                            self.args.skip_until_header,
                                            callback)
        else:
            dosage_loader = GWASUtilities.GWASDosageFileLoader(
                input_path, self.compressed_gwas, self.args.separator,
                self.args.skip_until_header, callback)
            results, column_order = dosage_loader.load()

            # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile".
            # It exists merely to provide different logging
            if len(results):

                def do_output(file, results, column_order):
                    file.write("\t".join(column_order) + "\n")
                    first = results[column_order[0]]
                    n = len(first)
                    for i in xrange(0, n):
                        line_comps = [str(results[c][i]) for c in column_order]
                        line = "%s\n" % "\t".join(line_comps)
                        file.write(line)

                with gzip.open(output_path, "wb") as file:
                    do_output(file, results, column_order)
            else:
                logging.info(
                    "No snps from the tissue model found in the GWAS file")
        logging.info("Successfully ran GWAS input processing")
def run(args):
    logging.info("Loading weight db")
    weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(args.weight_db_path)

    logging.info("Loading covariance file")
    covariance_contents = MatrixUtilities.loadMatrixFromFile(args.covariance)

    logging.info("Choosing method")
    beta_contents = Utilities.contentsWithPatternsFromFolder(args.beta_folder, [])
    zscore_calculation, normalization = MethodGuessing.chooseZscoreSchemeFromFiles(args.beta_folder, beta_contents, covariance_contents, weight_db_logic)

    logging.info("Processing")
    betas = {}
    for content in beta_contents:
        logging.info("Loading betas")
        beta_path = os.path.join(args.beta_folder, content)
        beta_sets = KeyedDataSet.KeyedDataSetFileUtilities.loadDataSetsFromCompressedFile(beta_path, header="")
        beta_sets = {set.name: set for set in beta_sets}
        betas[content] = beta_sets

    if args.gene_name:
        try:
            gene_data, weights, covariance_matrix, valid_rsids, beta_sets = get_gene_data(args.gene_name, weight_db_logic, covariance_contents, betas)
            weight_values, variances = ZScoreCalculation.preProcess(covariance_matrix, valid_rsids, weights, beta_sets)
            if args.interactive:
                embed()
            logging.info("Processed gene data")
        except Exception as e:
            logging.info("Couldn't get gene data")
            embed()
Exemplo n.º 3
0
    def buildBetas(self, weight_db_logic, name):
        output_path = os.path.join(self.output_folder, name)
        if os.path.exists(output_path):
            logging.info(
                "%s already exists, delete it if you want it to be done again",
                output_path)
            return

        logging.info("Building beta for %s and %s", name, self.weight_db_path)
        input_path = os.path.join(self.gwas_folder, name)
        file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs(
            input_path, self.args)

        scheme = MethodGuessing.chooseGWASProcessingScheme(
            file_format, weight_db_logic, self.args, input_path)

        callback = GWASUtilities.GWASWeightDBFilteredBetaLineCollector(
            file_format, scheme, weight_db_logic)
        dosage_loader = GWASUtilities.GWASDosageFileLoader(
            input_path, self.compressed, self.args.separator,
            self.args.skip_until_header, callback)
        result_sets = dosage_loader.load()

        # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile".
        # It exists merely to provide different login
        if len(result_sets):
            KeyedDataSet.KeyedDataSetFileUtilities.saveSetsToCompressedFile(
                output_path, result_sets, "rsid")
        else:
            logging.info(
                "No snps from the tissue model found in the GWAS file")
Exemplo n.º 4
0
    def selectMethod(self, folder, beta_contents, covariance_entries, weight_db_logic):
        normalization = None
        zscore_calculation = None
        if self.zscore_scheme:
            zscore_calculation = ZScoreCalculation.ZScoreScheme(self.zscore_scheme)
            if not self.normalization_scheme:
                raise Exception("Normalization scheme is required")
        else:
            zscore_calculation, normalization = MethodGuessing.chooseZscoreSchemeFromFiles(folder, beta_contents, covariance_entries, weight_db_logic)

        if self.normalization_scheme:
            normalization = Normalization.normalizationScheme(self.normalization_scheme, covariance_entries, weight_db_logic)

        return zscore_calculation, normalization
Exemplo n.º 5
0
    def buildBetas(self, weight_db_logic, name):
        output_path = os.path.join(self.output_folder, name)
        if not ".gz" in output_path:
            output_path += ".gz"
        if os.path.exists(output_path):
            logging.info("%s already exists, delete it if you want it to be done again", output_path)
            return

        logging.info("Building beta for %s and %s", name, self.weight_db_path if self.weight_db_path else "no database")
        input_path = os.path.join(self.gwas_folder, name)
        file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs(input_path, self.args)

        scheme = MethodGuessing.chooseGWASProcessingScheme(self.args, input_path)
        callback = MethodGuessing.chooseGWASCallback(file_format, scheme, weight_db_logic)
        if not weight_db_logic:
            GWASUtilities.loadGWASAndStream(input_path, output_path, self.compressed_gwas, self.args.separator, self.args.skip_until_header, callback)
        else:
            dosage_loader = GWASUtilities.GWASDosageFileLoader(input_path, self.compressed_gwas, self.args.separator, self.args.skip_until_header, callback)
            results, column_order = dosage_loader.load()

            # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile".
            # It exists merely to provide different logging
            if len(results):
                def do_output(file, results, column_order):
                    file.write("\t".join(column_order)+"\n")
                    first = results[column_order[0]]
                    n = len(first)
                    for i in xrange(0,n):
                        line_comps = [str(results[c][i]) for c in column_order]
                        line = "%s\n" % "\t".join(line_comps)
                        file.write(line)

                with gzip.open(output_path, "wb") as file:
                    do_output(file, results, column_order)
            else:
                logging.info("No snps from the tissue model found in the GWAS file")
        logging.info("Successfully ran GWAS input processing")
Exemplo n.º 6
0
    def selectMethod(self, folder, beta_contents, covariance_entries,
                     weight_db_logic):
        normalization = None
        zscore_calculation = None
        if self.zscore_scheme:
            zscore_calculation = ZScoreCalculation.ZScoreScheme(
                self.zscore_scheme)
            if not self.normalization_scheme:
                raise Exception("Normalization scheme is required")
        else:
            zscore_calculation, normalization = MethodGuessing.chooseZscoreSchemeFromFiles(
                folder, beta_contents, covariance_entries, weight_db_logic)

        if self.normalization_scheme:
            normalization = Normalization.normalizationScheme(
                self.normalization_scheme, covariance_entries, weight_db_logic)

        return zscore_calculation, normalization
Exemplo n.º 7
0
    def buildBetas(self, weight_db_logic, name):
        output_path = os.path.join(self.output_folder, name)
        if os.path.exists(output_path):
            logging.info("%s already exists, delete it if you want it to be done again", output_path)
            return

        logging.info("Building beta for %s and %s", name, self.weight_db_path)
        input_path = os.path.join(self.gwas_folder, name)
        file_format = GWASUtilities.GWASFileFormat.fileFormatFromArgs(input_path, self.args)

        scheme = MethodGuessing.chooseGWASProcessingScheme(file_format, weight_db_logic, self.args, input_path)

        callback = GWASUtilities.GWASWeightDBFilteredBetaLineCollector(file_format, scheme, weight_db_logic)
        dosage_loader = GWASUtilities.GWASDosageFileLoader(input_path, self.compressed, self.args.separator, callback)
        result_sets = dosage_loader.load()

        # The following check is sort of redundant, as it exists in "saveSetsToCompressedFile".
        # It exists merely to provide different login
        if len(result_sets):
            KeyedDataSet.KeyedDataSetFileUtilities.saveSetsToCompressedFile(output_path, result_sets, "rsid")
        else:
            logging.info("No snps from the tissue model found in the GWAS file")