def test_single_snp(args): import fastlmm from pysnptools.snpreader import SnpData, Pheno, SnpReader from fastlmm.association import single_snp from utils import read_hdf5_dataset import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import fastlmm.util.util as flutil logger.info('read phenotypes from file: ' + args.phenotype_file) phenotypes = pd.read_table(args.phenotype_file) iid = np.repeat(phenotypes['id'].values.astype('S')[:, np.newaxis], 2, axis=1) if args.sample_indices_file is not None: logger.info('read indices from file: ' + args.sample_indices_file) sample_indices = read_hdf5_dataset(args.sample_indices_file) else: sample_indices = np.nonzero( (phenotypes['type'] == 'training').values)[0] logger.info('read SNP file (for test): ' + args.snp_file) test_snps = get_snpdata(iid, args.snp_file, sample_indices=sample_indices) logger.info('read SNP file (for K0): ' + args.k0_file) K0 = get_snpdata(iid, args.k0_file) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) df_pheno = phenotypes[phenotypes['type'] == 'training'].copy() df_pheno['fid'] = df_pheno['id'] df_pheno['iid'] = df_pheno['id'] traits = ('trait1', 'trait2', 'trait3') for trait in traits: pheno_file = os.path.join(args.output_dir, 'pheno.%s.txt' % trait) logger.info('create Pheno file: ' + pheno_file) df_pheno[['fid', 'iid', trait]].to_csv(pheno_file, index=False, sep='\t', header=False) pheno = Pheno(pheno_file) logger.info('run FastLMM for single SNP test for %s' % trait) results_df = single_snp(test_snps, pheno, K0=K0, count_A1=True, GB_goal=args.GB_goal) result_file = os.path.join(args.output_dir, 'single_snp.' + trait) logger.info('save results to file: ' + result_file) results_df.to_hdf(result_file, trait) if args.manhattan: plot_file = os.path.join(args.output_dir, 'manhattan.%s.pdf' % trait) logger.info('create Manhattan plot: ' + plot_file) plt.clf() flutil.manhattan_plot(results_df.as_matrix( ["Chr", "ChrPos", "PValue"]), pvalue_line=1e-5, xaxis_unit_bp=False) plt.savefig(plot_file)
# Providing the path to the bed file required for analysis # bed_file = "/birl2/users/cbe453/arabidopsis-association/PLINK_manipulation/Seed_Oil_Composition_maf_ge_05_Fully_Merged_391_Subset_Final" #pheno_file = "/birl2/data/P2IRC/GE2P/GWAS/arabidopsis/arabidopsis-pheno-files/BC16_0/bioBC_FA-BC16_0_plink.pheno" # Perform the single_snp GWAS analysis. # By default, FaST-LMM does not generate a proper output file so the output_file_name option # is required. An arbitrary RAM cap of 10G was set based on previous tests. results_df = single_snp(args.bed_file, args.pheno_file, GB_goal=10, count_A1=True, output_file_name=args.out_file) # Tools for visualization if you're equipped with Xquartz (my Desktop machine is not...) import matplotlib.pyplot as plt import fastlmm.util.util as flutil #draw manhattan plot flutil.manhattan_plot(results_df.as_matrix(["Chr", "ChrPos", "PValue"]), pvalue_line=1e-5, xaxis_unit_bp=False) plt.title(args.plot_title) plt.savefig(args.out_file + '.png') #draw qqplot from fastlmm.util.stats.plotp import qqplot qqplot(results_df["PValue"].values, fileout='qq_' + args.out_file + '.png', title=args.plot_title.replace('Manhattan', 'Quantile-quantile')) results_df.head()
# Load FaST-LMM basic association test: from fastlmm.association import single_snp from pysnptools.snpreader import Ped from pysnptools.snpreader import Pheno from pysnptools.snpreader import wrap_plink_parser import numpy as np from sys import argv import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import fastlmm.util.util as flutil script, inped_file, inpheno_file, results_dataframe, output_manhattan = argv # Load snp data: print "Loading variant data..." ped_file = Ped(inped_file) print "Loading phenotype data..." pheno_fn = Pheno(inpheno_file) # Run basic association test: print "Running FaST-LMM single_snp test..." results_df = single_snp(test_snps=ped_file, pheno=pheno_fn, leave_out_one_chrom=0, output_file_name=results_dataframe) chromosome_starts = flutil.manhattan_plot(results_df.as_matrix(["Chr", "ChrPos", "PValue"]), pvalue_line=4.4e-7, xaxis_unit_bp=True) plt.show() # fig = plt.figure() # fig.savefig(output_manhattan)