def get_local_files( data_dir="/Users/karande1/Benchmarks/Pilot2/common/generate_datasets"): ''' Load data files from local directory ''' data_files = glob.glob('%s/*.npz' % data_dir) filelist = [d for d in data_files if 'AE' not in d] filelist = sorted(filelist) import pilot2_datasets as p2 fields = p2.gen_data_set_dict() return (filelist, fields)
def get_list_of_data_files(GP): import pilot2_datasets as p2 reload(p2) print ('Reading Data...') ## Identify the data set selected data_set=p2.data_sets[GP['set_sel']][0] ## Get the MD5 hash for the proper data set data_hash=p2.data_sets[GP['set_sel']][1] print ('Reading Data Files... %s->%s' % (GP['set_sel'], data_set)) ## Check if the data files are in the data director, otherwise fetch from FTP data_file = get_file(data_set, origin='http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/Pilot2/'+data_set+'.tar.gz', untar=True, md5_hash=data_hash) data_dir = os.path.join(os.path.dirname(data_file), data_set) ## Make a list of all of the data files in the data set data_files=glob.glob('%s/*.npz'%data_dir) fields = p2.gen_data_set_dict() return (data_files, fields)
def get_local_files( data_tag="3k_run16", data_dir_prefix="/p/gscratchr/brainusr/datasets/cancer/pilot2"): ''' Load data files from local directory ''' if data_tag == '3k_run16': data_dir = data_dir_prefix + '/3k_run16_10us.35fs-DPPC.20-DIPC.60-CHOL.20.dir/' elif data_tag == '3k_run10': data_dir = data_dir_prefix + '/3k_run10_10us.35fs-DPPC.10-DOPC.70-CHOL.20.dir/' elif data_tag == '3k_run32': data_dir = data_dir_prefix + '/3k_run32_10us.35fs-DPPC.50-DOPC.10-CHOL.40.dir/' data_files = glob.glob('%s/*.npz' % data_dir) filelist = [d for d in data_files if 'AE' not in d] filelist = sorted(filelist) import pilot2_datasets as p2 fields = p2.gen_data_set_dict() return (filelist, fields)