Пример #1
0
def check_and_set_gpu(use_gpu, gpu_lib, quit_on_fail=False):
    """Check GPU libraries can be loaded and set managed memory.

    Args:
        use_gpu (bool)
            Whether GPU packages have been requested
        gpu_lib (bool)
            Whether GPU packages are available
    Returns:
        use_gpu (bool)
            Whether GPU packages can be used
    """
    # load CUDA libraries
    if use_gpu and not gpu_lib:
        if quit_on_fail:
            sys.stderr.write('Unable to load GPU libraries; exiting\n')
            sys.exit(1)
        else:
            sys.stderr.write(
                'Unable to load GPU libraries; using CPU libraries '
                'instead\n')
            use_gpu = False

    # Set memory management for large networks
    if use_gpu:
        cudf.set_allocator("managed")

    return use_gpu
Пример #2
0
def initializeBlazing(ralId=0, networkInterface='lo', singleNode=False):
    #print(networkInterface)
    workerIp = ni.ifaddresses(networkInterface)[ni.AF_INET][0]['addr']
    ralCommunicationPort = random.randint(10000, 32000) + ralId
    while checkSocket(ralCommunicationPort) == False:
        ralCommunicationPort = random.randint(10000, 32000) + ralId

    cudf.set_allocator(
        allocator="managed",
        pool=True,
        initial_pool_size=None,  # Default is 1/2 total GPU memory
        enable_logging=False)

    cio.initializeCaller(ralId, 0, networkInterface.encode(),
                         workerIp.encode(), ralCommunicationPort, singleNode)
    cwd = os.getcwd()
    return ralCommunicationPort, workerIp, cwd
Пример #3
0
from gpugwas.vizb import show_qq_plot, show_manhattan_plot
#import gpugwas.processing as gwasproc

import warnings
warnings.filterwarnings('ignore', 'Expected ')
warnings.simplefilter('ignore')

parser = argparse.ArgumentParser(description='Run GPU GWAS Pipeline')
parser.add_argument('--vcf_path', default='./data/test.vcf')
parser.add_argument('--annotation_path', default='./data/1kg_annotations.txt')
parser.add_argument('--workdir', default='./temp/')
args = parser.parse_args()

# Initialize Memory Pool to 10GB
cudf.set_allocator(pool=True, initial_pool_size=1e10)
cp.cuda.set_allocator(rmm.rmm_cupy_allocator)

# Load data
print("Loading data")
vcf_df, feature_mapping = gwasio.load_vcf(args.vcf_path,
                                          info_keys=["AF"],
                                          format_keys=["GT", "DP"])
print(vcf_df.head())
print("Loading annotations")
ann_df = gwasio.load_annotations(args.annotation_path)
#print(ann_df)

# Start benchmarking after I/O
t0 = time.time()
Пример #4
0
src_grp = os.path.join("data", data_name + ".csv")
print("loading dataset %s" % data_name, flush=True)

na_flag = int(data_name.split("_")[3])
if na_flag > 0:
    print("skip due to na_flag>0: #221", flush=True, file=sys.stderr)
    exit(0)  # not yet implemented #221

on_vmem = data_name.split("_")[1] == "1e7"  # spilling vmem to mem
on_disk = not (
    on_vmem)  # no really disk, just variable name used to log in script below
print("using video and main memory data storage"
      if on_disk else "using only video memory data storage",
      flush=True)
if on_disk:
    cu.set_allocator("managed")

x = dc.read_csv(src_grp,
                header=0,
                dtype=[
                    'str', 'str', 'str', 'int32', 'int32', 'int32', 'int32',
                    'int32', 'float64'
                ])
x['id1'] = x['id1'].astype('category')
x['id2'] = x['id2'].astype('category')
x['id3'] = x['id3'].astype('category')
x = x.persist()
in_rows = len(x.index)
print(in_rows, flush=True)

task_init = timeit.default_timer()
Пример #5
0
import cudf as dd
from feature_engineering_2 import (
    pos_cash, process_unified, process_bureau_and_balance, 
    process_previous_applications, installments_payments,
    credit_card_balance
    )

# initiating mem management
# this allows for spilling out of the gpu ram 
dd.set_allocator("managed")

### Load datasets
print("loading data")
bureau_balance = dd.read_parquet('raw_data/bureau_balance.parquet')
bureau = dd.read_parquet('raw_data/bureau.parquet')
cc_balance = dd.read_parquet('raw_data/cc_balance.parquet')
payments = dd.read_parquet('raw_data/payments.parquet')
pc_balance = dd.read_parquet('raw_data/pc_balance.parquet')
prev = dd.read_parquet('raw_data/prev.parquet')
train = dd.read_parquet('raw_data/train.parquet')
test = dd.read_parquet('raw_data/test.parquet')

train_index = train.index
test_index = test.index

train_target = train['TARGET']
unified = dd.concat([train.drop('TARGET', axis=1), test])
print("starting processing")

unified_feat = process_unified(unified, dd)