import gpugwas.io as gwasio import cudf # Test loading VCF print("Test loading VCF to DF") vcf_df = gwasio.load_vcf("data/test.vcf", info_keys=["*"], format_keys=["*"]) #vcf_df = gwasio.load_vcf("/home/jdaw/1kg-data/1kg.vcf", info_keys=["*"], format_keys=["*"]) #vcf_df.to_parquet(path="1kg_full_jdaw_v2.pqt", compression="auto", index=False) print(vcf_df) # Test loading annotation file print("Test loading annotation file") ann_df = gwasio.load_annotations("data/1kg_annotations.txt") print(ann_df) # Test loading VCF (Method 2) print("Test loading VCF to DF (Method 2)") vcf_df_2 = gwasio.load_vcf_variantworks("data/test.vcf", info_keys=["AC", "AF"], format_keys=["GT"]) print(vcf_df_2) print("===== TEST PASSED ====")
warnings.simplefilter('ignore') parser = argparse.ArgumentParser(description='Run GPU GWAS Pipeline') parser.add_argument('--vcf_path', default='./data/test.vcf') parser.add_argument('--annotation_path', default='./data/1kg_annotations.txt') parser.add_argument('--workdir', default='./temp/') args = parser.parse_args() # Initialize Memory Pool to 10GB cudf.set_allocator(pool=True, initial_pool_size=1e10) cp.cuda.set_allocator(rmm.rmm_cupy_allocator) # Load data print("Loading data") vcf_df, feature_mapping = gwasio.load_vcf(args.vcf_path, info_keys=["AF"], format_keys=["GT", "DP"]) print(vcf_df.head()) print("Loading annotations") ann_df = gwasio.load_annotations(args.annotation_path) #print(ann_df) # Start benchmarking after I/O t0 = time.time() # Filter data print("Filtering samples") vcf_df = gwasfilter.filter_samples(vcf_df) print(vcf_df.head()) print("Filtering variants") vcf_df = gwasfilter.filter_variants(vcf_df)
import gpugwas.filter as gwasfilter import gpugwas.algorithms as algos import gpugwas.viz as viz #import gpugwas.processing as gwasproc parser = argparse.ArgumentParser(description='Run GPU GWAS Pipeline') parser.add_argument('--vcf_path', default='./data/test.vcf') parser.add_argument('--annotation_path', default='./data/1kg_annotations.txt') parser.add_argument('--workdir', default='./temp/') args = parser.parse_args() # Load data print("Loading data") vcf_df = gwasio.load_vcf(args.vcf_path, info_keys=["AF"], format_keys=["GT", "DP"]) print(vcf_df.head()) print("Loading annotations") ann_df = gwasio.load_annotations(args.annotation_path) #print(ann_df) # Start benchmarking after I/O t0 = time.time() # Filter data print("Filtering samples") vcf_df = gwasfilter.filter_samples(vcf_df) print(vcf_df.head()) print("Filtering variants") vcf_df = gwasfilter.filter_variants(vcf_df)