def check_and_set_gpu(use_gpu, gpu_lib, quit_on_fail=False): """Check GPU libraries can be loaded and set managed memory. Args: use_gpu (bool) Whether GPU packages have been requested gpu_lib (bool) Whether GPU packages are available Returns: use_gpu (bool) Whether GPU packages can be used """ # load CUDA libraries if use_gpu and not gpu_lib: if quit_on_fail: sys.stderr.write('Unable to load GPU libraries; exiting\n') sys.exit(1) else: sys.stderr.write( 'Unable to load GPU libraries; using CPU libraries ' 'instead\n') use_gpu = False # Set memory management for large networks if use_gpu: cudf.set_allocator("managed") return use_gpu
def initializeBlazing(ralId=0, networkInterface='lo', singleNode=False): #print(networkInterface) workerIp = ni.ifaddresses(networkInterface)[ni.AF_INET][0]['addr'] ralCommunicationPort = random.randint(10000, 32000) + ralId while checkSocket(ralCommunicationPort) == False: ralCommunicationPort = random.randint(10000, 32000) + ralId cudf.set_allocator( allocator="managed", pool=True, initial_pool_size=None, # Default is 1/2 total GPU memory enable_logging=False) cio.initializeCaller(ralId, 0, networkInterface.encode(), workerIp.encode(), ralCommunicationPort, singleNode) cwd = os.getcwd() return ralCommunicationPort, workerIp, cwd
from gpugwas.vizb import show_qq_plot, show_manhattan_plot #import gpugwas.processing as gwasproc import warnings warnings.filterwarnings('ignore', 'Expected ') warnings.simplefilter('ignore') parser = argparse.ArgumentParser(description='Run GPU GWAS Pipeline') parser.add_argument('--vcf_path', default='./data/test.vcf') parser.add_argument('--annotation_path', default='./data/1kg_annotations.txt') parser.add_argument('--workdir', default='./temp/') args = parser.parse_args() # Initialize Memory Pool to 10GB cudf.set_allocator(pool=True, initial_pool_size=1e10) cp.cuda.set_allocator(rmm.rmm_cupy_allocator) # Load data print("Loading data") vcf_df, feature_mapping = gwasio.load_vcf(args.vcf_path, info_keys=["AF"], format_keys=["GT", "DP"]) print(vcf_df.head()) print("Loading annotations") ann_df = gwasio.load_annotations(args.annotation_path) #print(ann_df) # Start benchmarking after I/O t0 = time.time()
src_grp = os.path.join("data", data_name + ".csv") print("loading dataset %s" % data_name, flush=True) na_flag = int(data_name.split("_")[3]) if na_flag > 0: print("skip due to na_flag>0: #221", flush=True, file=sys.stderr) exit(0) # not yet implemented #221 on_vmem = data_name.split("_")[1] == "1e7" # spilling vmem to mem on_disk = not ( on_vmem) # no really disk, just variable name used to log in script below print("using video and main memory data storage" if on_disk else "using only video memory data storage", flush=True) if on_disk: cu.set_allocator("managed") x = dc.read_csv(src_grp, header=0, dtype=[ 'str', 'str', 'str', 'int32', 'int32', 'int32', 'int32', 'int32', 'float64' ]) x['id1'] = x['id1'].astype('category') x['id2'] = x['id2'].astype('category') x['id3'] = x['id3'].astype('category') x = x.persist() in_rows = len(x.index) print(in_rows, flush=True) task_init = timeit.default_timer()
import cudf as dd from feature_engineering_2 import ( pos_cash, process_unified, process_bureau_and_balance, process_previous_applications, installments_payments, credit_card_balance ) # initiating mem management # this allows for spilling out of the gpu ram dd.set_allocator("managed") ### Load datasets print("loading data") bureau_balance = dd.read_parquet('raw_data/bureau_balance.parquet') bureau = dd.read_parquet('raw_data/bureau.parquet') cc_balance = dd.read_parquet('raw_data/cc_balance.parquet') payments = dd.read_parquet('raw_data/payments.parquet') pc_balance = dd.read_parquet('raw_data/pc_balance.parquet') prev = dd.read_parquet('raw_data/prev.parquet') train = dd.read_parquet('raw_data/train.parquet') test = dd.read_parquet('raw_data/test.parquet') train_index = train.index test_index = test.index train_target = train['TARGET'] unified = dd.concat([train.drop('TARGET', axis=1), test]) print("starting processing") unified_feat = process_unified(unified, dd)