def run(args, _gwas=None): start = timer() if not args.overwrite and os.path.exists(args.output_file): logging.info("%s already exists, move it or delete it if you want it done again", args.output_file) return logging.info("Started metaxcan association") context = MetaxcanUtilities.build_context(args, _gwas) model_snps = context.get_model_snps() total_snps = len(model_snps) snps_found=set() reporter = Utilities.PercentReporter(logging.INFO, total_snps) i_genes, i_snps = context.get_data_intersection() results = [] for gene in i_genes: r, snps = AssociationCalculation.association(gene, context, return_snps=True) results.append(r) snps_found.update(snps) reporter.update(len(snps_found), "%d %% of model's snps found so far in the gwas study") Utilities.ensure_requisite_folders(args.output_file) reporter.update(len(snps_found), "%d %% of model's snps used", force=True) results = AssociationCalculation.dataframe_from_results(zip(*results)) results = MetaxcanUtilities.format_output(results, context, args.keep_ens_version) results.to_csv(args.output_file, index=False) end = timer() logging.info("Sucessfully processed metaxcan association in %s seconds"%(str(end - start)))
def run_metaxcan(args, context): logging.info("Started metaxcan association") model_snps = context.get_model_snps() total_snps = len(model_snps) snps_found=set() reporter = Utilities.PercentReporter(logging.INFO, total_snps) i_genes, i_snps = context.get_data_intersection() results = [] for gene in i_genes: logging.log(7, "Processing gene %s", gene) r, snps = AssociationCalculation.association(gene, context, return_snps=True) results.append(r) snps_found.update(snps) reporter.update(len(snps_found), "%d %% of model's snps found so far in the gwas study") reporter.update(len(snps_found), "%d %% of model's snps used", force=True) results = AssociationCalculation.dataframe_from_results(results) results = MetaxcanUtilities.format_output(results, context, args.remove_ens_version) if args.output_file: Utilities.ensure_requisite_folders(args.output_file) results.to_csv(args.output_file, index=False) return results
def run_additional(args, context): logging.info("Started metaxcan additional stats") i_genes, i_snps = context.get_data_intersection() results = [] for gene in i_genes: stats_ = AssociationCalculation.additional_stats(gene, context) results.append(stats_) results = AssociationCalculation.dataframe_from_aditional_stats(results) results = MetaxcanUtilities.format_additional_output(results, context, args.remove_ens_version) if args.additional_output: Utilities.ensure_requisite_folders(args.additional_output) results.to_csv(args.additional_output, index=False) return results
def test_build_context(self): c = _context() r, snps = AssociationCalculation.association("A", c, return_snps=True) assert_equal_tuple(self, r, ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3)) r, snps = AssociationCalculation.association("B", c, return_snps=True) assert_equal_tuple(self, r, ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6)) r, snps = AssociationCalculation.association("C", c, return_snps=True) assert_equal_tuple(self, r, ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1)) r, snps = AssociationCalculation.association("D", c, return_snps=True) assert_equal_tuple( self, r, ('D', numpy.nan, numpy.nan, numpy.nan, 2, numpy.nan, 0)) r, snps = AssociationCalculation.association("E", c, return_snps=True) assert_equal_tuple( self, r, ('E', numpy.nan, numpy.nan, numpy.nan, 1, numpy.nan, 0)) #The following is a case of "wrong" data r, snps = AssociationCalculation.association("F", c, return_snps=True) assert_equal_tuple(self, r, ('F', numpy.nan, numpy.nan, 0, 2, 2, 2)) r, snps = AssociationCalculation.association("G", c, return_snps=True) assert_equal_tuple(self, r, ('G', numpy.nan, numpy.nan, 0, 1, 1, 1))
def run_metaxcan(args, context): logging.info("Started metaxcan association") model_snps = context.get_model_snps() total_snps = len(model_snps) snps_found = set() reporter = Utilities.PercentReporter(logging.INFO, total_snps) i_genes, i_snps = context.get_data_intersection() results = [] additional = [] for i, gene in enumerate(i_genes): if args.MAX_R and i + 1 > args.MAX_R: logging.log("Early exit condition met") break logging.log(9, "Processing gene %i:%s", i, gene) r, snps = AssociationCalculation.association(gene, context, return_snps=True) results.append(r) snps_found.update(snps) reporter.update( len(snps_found), "%d %% of model's snps found so far in the gwas study") if args.additional_output: stats_ = AssociationCalculation.additional_stats(gene, context) additional.append(stats_) reporter.update(len(snps_found), "%d %% of model's snps used", force=True) results = AssociationCalculation.dataframe_from_results(results) results = MetaxcanUtilities.format_output(results, context, args.remove_ens_version) if args.additional_output: additional = AssociationCalculation.dataframe_from_aditional_stats( additional) results = MetaxcanUtilities.merge_additional_output( results, additional, context, args.remove_ens_version) if args.output_file: Utilities.ensure_requisite_folders(args.output_file) results.to_csv(args.output_file, index=False) return results
def test_dataframe_from_results(self): results = [ ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3), ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6), ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1)] d = AssociationCalculation.dataframe_from_results(results) A = AssociationCalculation.ARF r_ = list(zip(*results)) numpy.testing.assert_array_equal(d[A.K_GENE], r_[A.GENE]) numpy.testing.assert_array_equal(d[A.K_ZSCORE], r_[A.ZSCORE]) numpy.testing.assert_array_equal(d[A.K_EFFECT_SIZE], r_[A.EFFECT_SIZE]) numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_MODEL], r_[A.N_SNPS_IN_MODEL]) numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_COV], r_[A.N_SNPS_IN_COV]) numpy.testing.assert_array_equal(d[A.K_N_SNPS_USED], r_[A.N_SNPS_USED])
def test_dataframe_from_results(self): results = [ ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3), ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6), ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1)] d = AssociationCalculation.dataframe_from_results(results) A = AssociationCalculation.ARF r_ = zip(*results) numpy.testing.assert_array_equal(d[A.K_GENE], r_[A.GENE]) numpy.testing.assert_array_equal(d[A.K_ZSCORE], r_[A.ZSCORE]) numpy.testing.assert_array_equal(d[A.K_EFFECT_SIZE], r_[A.EFFECT_SIZE]) numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_MODEL], r_[A.N_SNPS_IN_MODEL]) numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_COV], r_[A.N_SNPS_IN_COV]) numpy.testing.assert_array_equal(d[A.K_N_SNPS_USED], r_[A.N_SNPS_USED])
def test_build_context(self): c = _context() r, snps = AssociationCalculation.association("A", c, return_snps=True) self.assertEqual(r, ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3)) r, snps = AssociationCalculation.association("B", c, return_snps=True) self.assertEqual(r, ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6)) r, snps = AssociationCalculation.association("C", c, return_snps=True) self.assertEqual(r, ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1)) r, snps = AssociationCalculation.association("D", c, return_snps=True) self.assertEqual(r, ('D', numpy.nan, numpy.nan, numpy.nan, 2, numpy.nan, 0)) r, snps = AssociationCalculation.association("E", c, return_snps=True) self.assertEqual(r, ('E', numpy.nan, numpy.nan, numpy.nan, 1, numpy.nan, 0)) #The following is a case of "wrong" data r, snps = AssociationCalculation.association("F", c, return_snps=True) self.assertEqual(r, ('F', numpy.nan, numpy.nan, 0, 2, 2, 2)) r, snps = AssociationCalculation.association("G", c, return_snps=True) self.assertEqual(r, ('G', numpy.nan, numpy.nan, 0, 1, 1, 1))