def test_astropy_time_array(): times = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00'] t1 = Time(times, format='isot', scale='utc') hkl.dump(t1, "test_ap2.h5") t2 = hkl.load("test_ap2.h5") print(t1) print(t2) assert t1.value.shape == t2.value.shape for ii in range(len(t1)): assert t1.value[ii] == t2.value[ii] assert t1.format == t2.format assert t1.scale == t2.scale times = [58264, 58265, 58266] t1 = Time(times, format='mjd', scale='utc') hkl.dump(t1, "test_ap2.h5") t2 = hkl.load("test_ap2.h5") print(t1) print(t2) assert t1.value.shape == t2.value.shape assert np.allclose(t1.value, t2.value) assert t1.format == t2.format assert t1.scale == t2.scale
def safe_load_h(path, num_objs = None): if not os.path.exists(path): return None with open(path, "r") as f: if num_objs is None: return hkl.load(f) return [hkl.load(f) for _ in xrange(num_objs)] f.close()
def getWeakClassifiers(Npos): feature_index_list = hkl.load('feature_index_list' + str(Npos) + ".hkl") alpha_list = hkl.load('alpha_list' + str(Npos) + ".hkl") theta_list = hkl.load('theta_list' + str(Npos) + ".hkl") polarity_list = hkl.load('polarity_list' + str(Npos) + ".hkl") best_result_list = hkl.load('best_result_list' + str(Npos) + ".hkl") return feature_index_list, alpha_list, theta_list, polarity_list, best_result_list
def __init__(self, images_hkl, labels_hkl, one_hot=False): images = hkl.load(images_hkl) labels = hkl.load(labels_hkl) if one_hot: labels = dense_to_one_hot(labels, 102) images = images.astype(numpy.float32) self._num_examples = images.shape[0] self._images = images self._labels = labels self._epochs_completed = 0 self._index_in_epoch = 0 self.train_name_file = "obj101/train.txt" self.vid_name_file = "obj101/test.txt"
def run(model_name): # 訓練データ読み込み print "==> loading train data from %s" % (model_name + "_train_(features|labels).hkl") train_features = hkl.load(model_name + "_train_features.hkl") train_labels = hkl.load(model_name + "_train_labels.hkl") print "train_features.shape =", train_features.shape print "train_labels.shape =", train_labels.shape svm = LinearSVC(C=1.0) # print "==> training and test" # X_train = train_features[-1000:] # T_train = train_labels[-1000:] # X_test = train_features[:-1000] # T_test = train_labels[:-1000] # svm.fit(X_train, T_train) # Y_test = svm.predict(X_test) # print confusion_matrix(T_test, Y_test) # print accuracy_score(T_test, Y_test) # print classification_report(T_test, Y_test) # 10分割交差検定 print "==> cross validation" scores = cross_val_score(svm, train_features, train_labels, cv=10) print "Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()) # 全訓練データで学習 svm.fit(train_features, train_labels) # テストデータ読み込み print "==> loading test data from %s" % (model_name + "_test_(features|labels).hkl") test_features = hkl.load(model_name + "_test_features.hkl") # 予測結果をCSVに書き込む print "==> predicting and writing" predicted_labels = svm.predict(test_features) with open("test.txt") as fr: lines = fr.readlines() image_ids = [] for line in lines: image_path = line.split()[0] image_name = line.split("/")[-1] image_id = image_name.split(".")[0] image_id = int(image_id) image_ids.append(image_id) assert len(image_ids) == len(predicted_labels) with open(model_name + "_predict.txt", "w") as fw: fw.write("id,label\n") for i in xrange(len(image_ids)): fw.write("%d,%d\n" % (image_ids[i], predicted_labels[i]))
def _load_batch(path, pixels, dtype='float64', model='VGG_16'): """ load a batch of images """ batch = hkl.load(path + '_data.hpy') if model in MODELS[0:1]: data = _preprocess_vgg(batch.astype(dtype), pixels) elif model in MODELS[2]: data = _preprocess_googlenet(batch.astype(dtype), pixels) elif model in MODELS[3]: data = _preprocess_inception_v3(batch.astype(dtype)) else: raise ValueError labels = one_hot(hkl.load(path + '_labels.hpy'), n=5) # convert labels to one-hot representation with 5 classes return data, labels.astype(dtype)
def load_coco_data(data_path='./data', split='train'): data_path = os.path.join(data_path, split) start_t = time.time() data = {} data['features'] = hickle.load(os.path.join(data_path, '%s.features.hkl' %split)) with open(os.path.join(data_path, '%s.file.names.pkl' %split), 'rb') as f: data['file_names'] = pickle.load(f) with open(os.path.join(data_path, '%s.captions.pkl' %split), 'rb') as f: data['captions'] = pickle.load(f) with open(os.path.join(data_path, '%s.image.idxs.pkl' %split), 'rb') as f: data['image_idxs'] = pickle.load(f) if split == 'train': with open(os.path.join(data_path, 'word_to_idx.pkl'), 'rb') as f: data['word_to_idx'] = pickle.load(f) for k, v in data.iteritems(): if type(v) == np.ndarray: print k, type(v), v.shape, v.dtype else: print k, type(v), len(v) end_t = time.time() print "Elapse time: %.2f" %(end_t - start_t) return data
def load(filepath, fmt='pkl'): """ Load data from a pickle-format file. Input filepath - filepath fmt - format, {'pkl'} | 'hkl' | 'h5' Output data - data """ if fmt == 'pkl': # use pickle import cPickle with open(filepath, 'r') as fo: data = cPickle.load(fo) elif fmt == 'hkl': # use hickle, which is faster for large-scale import hickle import pdb; pdb.set_trace() with open(filepath, "r") as fo: data = hickle.load(fo) else: raise Exception('unknown fmt: {}'.format(fmt)) return data
def fun_load(config, sock_data=5000): send_queue = config['queue_l2t'] recv_queue = config['queue_t2l'] # recv_queue and send_queue are multiprocessing.Queue # recv_queue is only for receiving # send_queue is only for sending # if need to do random crop and mirror flag_randproc = not config['use_data_layer'] flag_batch = config['batch_crop_mirror'] drv.init() dev = drv.Device(int(config['gpu'][-1])) ctx = dev.make_context() sock = zmq.Context().socket(zmq.PAIR) sock.bind('tcp://*:{0}'.format(sock_data)) shape, dtype, h = sock.recv_pyobj() print 'shared_x information received', shape, dtype shape = (3, 255, 255, 256) # TODO remove fix gpu_data_remote = gpuarray.GPUArray(shape, dtype, gpudata=drv.IPCMemoryHandle(h)) gpu_data = gpuarray.GPUArray(shape, dtype) img_mean = recv_queue.get() print 'img_mean received' # The first time, do the set ups and other stuff # receive information for loading while True: # getting the hkl file name to load hkl_name = recv_queue.get() # print hkl_name #data = pickle.load(open(hkl_name)) - img_mean data = hkl.load(hkl_name) - img_mean # print 'load ', time.time() - bgn_time if flag_randproc: param_rand = recv_queue.get() data = crop_and_mirror(data, param_rand, flag_batch=flag_batch) gpu_data.set(data) # wait for computation on last minibatch to finish msg = recv_queue.get() assert msg == 'calc_finished' drv.memcpy_peer(gpu_data_remote.ptr, gpu_data.ptr, gpu_data.dtype.itemsize * gpu_data.size, ctx, ctx) ctx.synchronize() send_queue.put('copy_finished')
def test_astropy_skycoord(): ra = Angle(['1d20m', '1d21m'], unit='degree') dec = Angle(['33d0m0s', '33d01m'], unit='degree') radec = SkyCoord(ra, dec) hkl.dump(radec, "test_ap.h5") radec2 = hkl.load("test_ap.h5") assert np.allclose(radec.ra.value, radec2.ra.value) assert np.allclose(radec.dec.value, radec2.dec.value) ra = Angle(['1d20m', '1d21m'], unit='hourangle') dec = Angle(['33d0m0s', '33d01m'], unit='degree') radec = SkyCoord(ra, dec) hkl.dump(radec, "test_ap.h5") radec2 = hkl.load("test_ap.h5") assert np.allclose(radec.ra.value, radec2.ra.value) assert np.allclose(radec.dec.value, radec2.dec.value)
def test_astropy_quantity(): for uu in ['m^3', 'm^3 / s', 'kg/pc']: a = Quantity(7, unit=uu) hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert a == b assert a.unit == b.unit a *= a hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert a == b assert a.unit == b.unit
def test(): #im_file = caffe_root+'examples/images/cat.jpg' im_file = '/home/bill/Dropbox/Cox_Lab/Illusions/images/T_illusion.jpg' layer1 = 'pool2' layer2 = 'conv2' save_file = '/home/bill/Dropbox/Cox_Lab/Illusions/misc/T_feats2_'+layer1+'_notoversample.hkl' save_file2 = '/home/bill/Dropbox/Cox_Lab/Illusions/misc/T_recon_'+layer1+'_0-2tran.jpg' if not os.path.isfile(save_file): feats = get_features(im_file, layer1) hkl.dump(feats, open(save_file, 'w')) else: feats = hkl.load(open(save_file)) #feats = feats.reshape((10,256,6,6)) recon_im = get_recon(feats, layer2) # img = Image.fromarray(recon_im, 'RGB') pdb.set_trace() #recon_im[recon_im<0] = 0 #recon_im = recon_im/255 plt.imshow(recon_im) #, cmap='Greys_r') plt.show(block=False) plt.savefig(save_file2) pdb.set_trace()
def build_dataset(self): #data_set = scipy.io.loadmat(self.dataset_path) data_set = hickle.load(file(self.dataset_path, 'r')) self.trainset = [data_set['train_video'].astype(config.floatX), data_set['train_audio'].astype(config.floatX)] self.testset = [data_set['test_video'].astype(config.floatX), data_set['test_audio'].astype(config.floatX)] self.tuning = self.testset
def train_model_wrap(train_model, shared_x, shared_y, rand_arr, img_mean, count, minibatch_index, minibatch_range, batch_size, train_filenames, train_labels, flag_para_load, flag_datalayer, send_queue=None, recv_queue=None): if flag_para_load: # load by self or the other process # wait for the copying to finish msg = recv_queue.get() assert msg == 'copy_finished' if count < len(minibatch_range): ind_to_read = minibatch_range[count] name_to_read = str(train_filenames[ind_to_read]) send_queue.put(name_to_read) if not flag_datalayer: send_queue.put(get_rand3d()) else: batch_img = hkl.load(str(train_filenames[minibatch_index])) - img_mean shared_x.set_value(batch_img) batch_label = train_labels[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] shared_y.set_value(batch_label) if flag_datalayer: rand_arr.set_value(get_rand3d()) cost_ij = train_model() return cost_ij
def do_all(runname='brownseds_highz', outfolder=None, regenerate=False, regenerate_stack=False, **opts): if outfolder is None: outfolder = os.getenv('APPS') + '/prospector_alpha/plots/'+runname+'/pcomp/' if not os.path.isdir(outfolder): os.makedirs(outfolder) os.makedirs(outfolder+'data/') stack_opts = { 'sigma_sf':0.3, # scatter in the star-forming sequence, in dex 'nbins_horizontal':3, # number of bins in horizontal stack 'nbins_vertical':4, # number of bins in vertical stack 'horizontal_bin_colors': ['#45ADA8','#FC913A','#FF4E50'], 'vertical_bin_colors': ['red','#FC913A','#45ADA8','#323299'], 'low_mass_cutoff':9.5, # log(M) where we stop stacking and plotting 'high_mass_cutoff': 11.5, 'ylim_horizontal_sfr': (-0.8,3), 'ylim_horizontal_ssfr': (1e-13,1e-9), 'ylim_vertical_sfr': (-3,3), 'ylim_vertical_ssfr': (1e-13,1e-9), 'xlim_t': (1e7,1.4e10), 'show_disp':[0.16,0.84] # percentile of population distribution to show on plot } filename = outfolder+'data/single_sfh_stack.h5' if os.path.isfile(filename) and regenerate_stack == False: with open(filename, "r") as f: stack = hickle.load(f) else: data = collate_data(runname,filename=outfolder+'data/stacksfh.h5',regenerate=regenerate,**opts) stack = stack_sfh(data,regenerate_stack=regenerate_stack, **stack_opts) hickle.dump(stack,open(filename, "w")) plot_stacked_sfh(stack,outfolder, **stack_opts)
def set_data(filenames, file_count,subb, config, count, cur, img_mean, gpu_data, gpu_data_remote, ctx, icomm,img_batch_empty): load_time = time.time() data=None # aa = config['rank']+count/subb*size # img_list = range(aa*config['file_batch_size'],(aa+1)*config['file_batch_size'],1) #print rank, img_list if config['data_source'] in ['hkl','both']: data_hkl = hkl.load(str(filenames[file_count]))# c01b data = data_hkl if config['data_source'] in ['lmdb', 'both']: data_lmdb = lmdb_load_cur(cur,config,img_batch_empty) data = data_lmdb if config['data_source']=='both': if config['rank']==0: print (rank,(data_hkl-data_lmdb)[1,0:3,1,1].tolist()) load_time = time.time()-load_time #)* sub_time = time.time() #( data = data -img_mean sub_time = time.time()-sub_time crop_time = time.time() #( for minibatch_index in range(subb): count+=1 batch_data = data[:,:,:,minibatch_index*config['batch_size']:(minibatch_index+1)*batch_size] if mode == 'train': rand_arr = get_rand3d(config['random'], count+(rank+1)*n_files*(subb)) else: rand_arr = np.float32([0.5, 0.5, 0]) batch_data = crop_and_mirror(batch_data, rand_arr, flag_batch=config['batch_crop_mirror'],cropsize=config['input_width']) gpu_data[minibatch_index].set(batch_data) crop_time = time.time() - crop_time #) #print 'load_time: %f (load %f, sub %f, crop %f)' % (load_time+crop_time+sub_time, load_time,sub_time, crop_time) # wait for computation on last file to finish msg = icomm.recv(source=MPI.ANY_SOURCE,tag=35) assert msg == "calc_finished" for minibatch_index in range(subb): # copy from preload area drv.memcpy_dtod(gpu_data_remote[minibatch_index].ptr, gpu_data[minibatch_index].ptr, gpu_data[minibatch_index].dtype.itemsize * gpu_data[minibatch_index].size ) ctx.synchronize() icomm.isend("copy_finished",dest=0,tag=55) return count
def test_astropy_angle(): for uu in ['radian', 'degree']: a = Angle(1.02, unit=uu) hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert a == b assert a.unit == b.unit
def test_astropy_angle_array(): a = Angle([1,2,3], unit='degree') hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert np.allclose(a.value, b.value) assert a.unit == b.unit
def load_hkl_file(filename): hkl_filename = filename + '.hkl' if os.path.isfile(hkl_filename): start = time.get_seconds() data = hkl.load(hkl_filename) print 'Loaded %s in %ds' % (hkl_filename, time.get_seconds() - start) return data return None
def test_astropy_quantity_array(): a = Quantity([1,2,3], unit='m') hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert np.allclose(a.value, b.value) assert a.unit == b.unit
def show_batch(filenames,labels,minibatch_index,label_dict, batch_size): import hickle as hkl batch_img = hkl.load(str(filenames[minibatch_index]))# c01b batch_label = labels[(minibatch_index) * batch_size: (minibatch_index + 1) * batch_size] print minibatch_index for show_range in range(6): show_batch_part(batch_img, batch_label,show_range,label_dict)
def LoadBigDict(filename): if filename[-4:]!=".hkl": filename+=".hkl" data=hkl.load(filename) if "WWGammaW" in data.keys(): gammaw=data["WWGammaW"] if "SmoothT" in gammaw.keys(): gammaw["SmoothT"]=array(gammaw["SmoothT"], dtype=complex) return data
def load_data(): # load your data using this function d = hkl.load(config.data_path) data = d['trainFeatures'] labels = d['trainLabels'] lz = d['labels'] data = data.reshape(data.shape[0], 3, 227, 227) #data = data.transpose(0, 2, 3, 1) return data,labels,lz
def load_data(): # load your data using this function d = hkl.load('../dataset/myfood100-227.hkl') data = d['trainFeatures'] labels = d['trainLabels'] lz = d['labels'] data = data.reshape(data.shape[0], 3, 227, 227) #data = data.transpose(0, 2, 3, 1) return data,labels,lz
def load_dataset(self, embedding_method, test_flag=False): if embedding_method == 'skip': skip_dim = 4800 filename = '/data/movieQA/skip_split.hkl' skip_embed = hickle.load(filename) skip_embed['zsl_train'], skip_embed['zsl_val'] = sc.sent_clip(self.nstory, skip_embed, skip_dim) self.zq = skip_embed['zq_train'] self.zsl = skip_embed['zsl_train'] self.zaj = skip_embed['zaj_train'] self.ground_truth = skip_embed['ground_truth_train'] self.zq_val = skip_embed['zq_val'] self.zsl_val = skip_embed['zsl_val'] self.zaj_val = skip_embed['zaj_val'] self.ground_truth_val = skip_embed['ground_truth_val'] self.num_train_examples = self.zq.shape[0] self.num_val_examples = self.zq_val.shape[0] if embedding_method == 'word2vec': w2v_embed = hickle.load('/data/movieQA/hickle_dump/w2v_plot_official.hkl') w2v_dim = 300 w2v_embed['zsl_train'], w2v_embed['zsl_val'] = sc.sent_clip(self.nstory, w2v_embed, w2v_dim) self.zq = w2v_embed['zq_train'] self.zsl = w2v_embed['zsl_train'] self.zaj = w2v_embed['zaj_train'] self.ground_truth = w2v_embed['ground_truth_train'] self.zq_val = w2v_embed['zq_val'] self.zsl_val = w2v_embed['zsl_val'] self.zaj_val = w2v_embed['zaj_val'] self.ground_truth_val = w2v_embed['ground_truth_val'] print self.zq.shape print self.zsl.shape print self.zaj.shape print self.ground_truth.shape assert self.zq.shape == (9566, 1, w2v_dim) assert self.zsl.shape == (9566, self.nstory, w2v_dim) assert self.zaj.shape == (9566, 5, w2v_dim) assert self.ground_truth.shape == (9566, ) self.num_train_examples = self.zq.shape[0] self.num_val_examples = self.zq_val.shape[0]
def get_val_error_loss(rand_arr, shared_x, shared_y, val_filenames, val_labels, flag_datalayer, flag_para_load, batch_size, validate_model, send_queue=None, recv_queue=None): if flag_datalayer: rand_arr.set_value(np.float32([0.5, 0.5, 0])) validation_losses = [] validation_errors = [] n_val_batches = len(val_filenames) if flag_para_load: # send the initial message to load data, before each epoch send_queue.put(str(val_filenames[0])) if not flag_datalayer: send_queue.put(np.float32([0.5, 0.5, 0])) send_queue.put('calc_finished') for val_index in range(n_val_batches): if flag_para_load: # load by self or the other process # wait for the copying to finish msg = recv_queue.get() assert msg == 'copy_finished' if val_index + 1 < n_val_batches: name_to_read = str(val_filenames[val_index + 1]) send_queue.put(name_to_read) if not flag_datalayer: send_queue.put(np.float32([0.5, 0.5, 0])) else: val_img = hkl.load(str(val_filenames[val_index])) shared_x.set_value(val_img) shared_y.set_value(val_labels[val_index * batch_size: (val_index + 1) * batch_size]) loss, error = validate_model() if flag_para_load and (val_index + 1 < n_val_batches): send_queue.put('calc_finished') # print loss, error validation_losses.append(loss) validation_errors.append(error) this_validation_loss = np.mean(validation_losses) this_validation_error = np.mean(validation_errors) return this_validation_error, this_validation_loss
def collate_data(runname, filename=None, regenerate=False, **opts): """ pull out all of the necessary information from the individual data files this takes awhile, so this data is saved to disk. """ # if it's already made, load it and give it back # else, start with the making! if os.path.isfile(filename) and regenerate == False: print 'loading all data' with open(filename, "r") as f: outdict=hickle.load(f) return outdict # define output containers outvar = ['stellar_mass','sfr_30', 'sfr_100','half_time'] outdict = {q: {f: [] for f in ['q50','q84','q16']} for q in outvar} for f in ['objname','agebins', 'weights', 'z_fraction']: outdict[f] = [] # we want MASS, SFR_100, Z_FRACTION CHAIN, and AGEBINS for each galaxy pfile.run_params['zred'] = None # make sure this is reset basenames = find_all_prospector_results(runname) for i, name in enumerate(basenames): # load output from fit try: res, _, model, prosp = load_prospector_data(name) except: print name.split('/')[-1]+' failed to load. skipping.' continue if (res is None) or (prosp is None): continue outdict['objname'] += [name.split('/')[-1]] print 'loaded ' + outdict['objname'][-1] # agebins (and generate model) pfile.run_params['objname'] = outdict['objname'][-1] model = pfile.load_model(**pfile.run_params) outdict['agebins'] += [model.params['agebins']] # zfraction zidx = model.theta_index['z_fraction'] outdict['z_fraction'] += [res['chain'][prosp['sample_idx'], zidx]] outdict['weights'] += [prosp['weights']] # extra variables for v in outvar: for f in ['q50','q84','q16']: outdict[v][f] += [prosp['extras'][v][f]] # dump files and return hickle.dump(outdict,open(filename, "w")) return outdict
def fun_load(config, sock_data=5000): send_queue = config["queue_l2t"] recv_queue = config["queue_t2l"] # recv_queue and send_queue are multiprocessing.Queue # recv_queue is only for receiving # send_queue is only for sending # if need to do random crop and mirror flag_batch = config["batch_crop_mirror"] drv.init() dev = drv.Device(int(config["gpu"][-1])) ctx = dev.make_context() sock = zmq.Context().socket(zmq.PAIR) sock.bind("tcp://*:{0}".format(sock_data)) shape, dtype, h = sock.recv_pyobj() print "shared_x information received" gpu_data_remote = gpuarray.GPUArray(shape, dtype, gpudata=drv.IPCMemoryHandle(h)) gpu_data = gpuarray.GPUArray(shape, dtype) img_mean = recv_queue.get() print "img_mean received" # The first time, do the set ups and other stuff # receive information for loading while True: # getting the hkl file name to load hkl_name = recv_queue.get() # print hkl_name data = hkl.load(hkl_name) - img_mean # print 'load ', time.time() - bgn_time param_rand = recv_queue.get() data = crop_and_mirror(data, param_rand, flag_batch=flag_batch) gpu_data.set(data) # wait for computation on last minibatch to finish msg = recv_queue.get() assert msg == "calc_finished" drv.memcpy_peer(gpu_data_remote.ptr, gpu_data.ptr, gpu_data.dtype.itemsize * gpu_data.size, ctx, ctx) ctx.synchronize() send_queue.put("copy_finished")
def create_moving_line(nt, line_len, nx, x0, y0, speed): X = np.zeros((nt, nx, nx)).astype(np.float32) for i in range(nt): xt = x0+i*speed X[i,y0:y0+line_lin,xt] = 1 file_name = 'line.hkl' hkl.dump(X, open(file_name, 'w')) X = hkl.load(open(file_name))
def test_embedded_array(): """ See https://github.com/telegraphic/hickle/issues/24 """ d_orig = [[np.array([10., 20.]), np.array([10, 20, 30])], [np.array([10, 2]), np.array([1.])]] hickle.dump(d_orig, 'test.h5') d_hkl = hickle.load('test.h5') for ii, xx in enumerate(d_orig): for jj, yy in enumerate(xx): assert np.allclose(d_orig[ii][jj], d_hkl[ii][jj]) print d_hkl print d_orig
def test_embedded_array(): """ See https://github.com/telegraphic/hickle/issues/24 """ d_orig = [[np.array([10., 20.]), np.array([10, 20, 30])], [np.array([10, 2]), np.array([1.])]] hickle.dump(d_orig, 'test.h5') d_hkl = hickle.load('test.h5') for ii, xx in enumerate(d_orig): for jj, yy in enumerate(xx): assert np.allclose(d_orig[ii][jj], d_hkl[ii][jj]) print(d_hkl) print(d_orig)
def imageLoader_pred(files, batch_size): L = len(files) #this line is just to make the generator infinite, keras needs that while True: batch_start = 0 batch_end = batch_size while batch_start < L: limit = min(batch_end, L) filename = files[batch_start] X = hkl.load(filename) Y = X #self.output_mode == 'prediction': # output actual pixels yield (X, Y ) #a tuple with two numpy arrays with batch_size samples batch_start += batch_size batch_end += batch_size
def load(self, fname=None): if fname is None: fname = self.get_filename() data = hickle.load(fname) for (k, v) in data.items(): if k == "args": self.args = v elif k == "dirs": self.dirs = [] for d in v: self.dirs.append(str(pathlib.Path(d))) elif k == "indexer": pass else: setattr(self, k, v)
def preprocess(self, X): file_to_load, frame_number = X[0].split("-") frame_number = int(frame_number) try: first_video = hkl.load(self.vlog_dir + file_to_load + "/clip.hkl") num_of_frames_to_use_from_first_vid = first_video.shape[ 0] - frame_number if (num_of_frames_to_use_from_first_vid >= self.nt): return first_video[frame_number:frame_number + self.nt].astype( np.float32) / 255 else: clip = np.zeros((self.nt, ) + self.im_shape, np.uint8) clip[0:num_of_frames_to_use_from_first_vid] = first_video[ frame_number:frame_number + num_of_frames_to_use_from_first_vid] second_file_to_load, second_frame_number = X[9].split("-") second_frame_number = int(second_frame_number) second_video = hkl.load(self.vlog_dir + second_file_to_load + "/clip.hkl") clip[num_of_frames_to_use_from_first_vid:self. nt] = second_video[:second_frame_number] return clip.astype(np.float32) / 255 except: print "data is corrupt:", X
def get_gt_masks(gt_mask_file, size): """ This function load cached gt_masks from .hkl :param roidb: :return: """ assert os.path.exists(gt_mask_file), '%s does not exist'.format( gt_mask_file) gt_masks = hkl.load(gt_mask_file) num_mask = gt_masks.shape[0] processed_masks = np.zeros((num_mask, size[0], size[1])) for i in range(num_mask): processed_masks[i, :, :] = cv2.resize(gt_masks[i].astype('float'), (size[1], size[0])) return processed_masks
def test_load(): a = set([1, 2, 3, 4]) b = set([5, 6, 7, 8]) c = set([9, 10, 11, 12]) z = (a, b, c) z = [z, z] z = (z, z, z, z, z) print("Original:") pprint(z) dump(z, 'test.hkl', mode='w') print("\nReconstructed:") z = load('test.hkl') pprint(z)
def __init__(self, story_filename, qa_filelist, capacity, batch_size, num_threads): self.qa_data = {} for qa_file in qa_filelist: imdb_key = qa_file.split('/')[-1].split('.h5')[0] self.qa_data[imdb_key] = h5py.File(qa_file) if FLAGS.video_features == True or FLAGS.sub_with_video_features == True: self.story = load(story_filename) else: self.story = h5py.File(story_filename) self.queue = Queue.Queue(capacity) self.batch_size = batch_size self.num_threads = num_threads self.num_movie = len(self.qa_data)
def load_bball_data(self, data_dir,dt=0.1): '''This function is taken from https://github.com/cagatayyildiz/ODE2VAE with X and Y flipped''' Ytr = hkl.load(os.path.join(data_dir, "training.hkl")) Xtr = dt*np.arange(0,Ytr.shape[1],dtype=np.float32) Xtr = np.tile(Xtr,[Ytr.shape[0],1]) Xval = Yval = Xtest = Ytest = None # Yval = hkl.load(os.path.join(data_dir, "val.hkl")) # Xval = dt*np.arange(0, Yval.shape[1], dtype=np.float32) # Xval = np.tile(Xval, [Yval.shape[0], 1]) # Ytest = hkl.load(os.path.join(data_dir, "test.hkl")) # Xtest = dt*np.arange(0, Ytest.shape[1],dtype=np.float32) # Xtest = np.tile(Xtest, [Ytest.shape[0],1]) return Xtr,Ytr,Xval,Yval,Xtest,Ytest
def test_masked(): """ Test masked numpy array """ filename, mode = 'test.h5', 'w' a = np.ma.array([1, 2, 3, 4], dtype='float32', mask=[0, 1, 0, 0]) dump(a, filename, mode) a_hkl = load(filename) try: assert a_hkl.dtype == a.dtype assert np.all((a_hkl, a)) except AssertionError: print(a_hkl) print(a) raise
def __getitem__(self, idx): if self.split == 'train': pos_data = hickle.load(self.pos_data_list[idx]) neg_data = hickle.load(random.choice(self.neg_data_list)) pos_gt = [pos_data['gt_im'][i] for i in range(min(len(pos_data['gt_im']), 11))] + \ [pos_data['gt_im'][-1] for _ in range(11 - len(pos_data['gt_im']))] pos_pred = [pos_data['gt_im'][0] ] + [pos_data['pred_im'][i] for i in range(10)] neg_gt = [neg_data['gt_im'][i] for i in range(min(len(neg_data['gt_im']), 11))] + \ [neg_data['gt_im'][-1] for _ in range(11 - len(neg_data['gt_im']))] neg_pred = [neg_data['gt_im'][0] ] + [neg_data['pred_im'][i] for i in range(10)] pos_gt = torch.from_numpy(np.array(pos_gt)) pos_pred = torch.from_numpy(np.array(pos_pred)) neg_gt = torch.from_numpy(np.array(neg_gt)) neg_pred = torch.from_numpy(np.array(neg_pred)) return pos_gt, pos_pred, neg_gt, neg_pred if self.split == 'test': if idx < len(self.pos_data_list): data = hickle.load(self.pos_data_list[idx]) else: data = hickle.load(self.neg_data_list[idx - len(self.pos_data_list)]) gt = [data['gt_im'][i] for i in range(min(len(data['gt_im']), 11))] + \ [data['gt_im'][-1] for _ in range(11 - len(data['gt_im']))] pred = [data['gt_im'][0]] + [data['pred_im'][i] for i in range(10)] gt = torch.from_numpy(np.array(gt)) pred = torch.from_numpy(np.array(pred)) return gt, pred, torch.ones(1) if idx < len( self.pos_data_list) else torch.zeros(1)
def create_KLAB16_predicted_classes(): feature_tag = 'fc7_noRelu' for t in ['orig', 'occl']: f_name = '/home/bill/Projects/Occlusion_RNN/runs/' + feature_tag + '_KLAB16/pred_classes_test_train' + t + '.hkl' f = open(f_name, 'r') pred_classes = hkl.load(f) f.close() for j in range(-1, 7): if np.sum(pred_classes[str(j)] == -1) > 0: print 'THERE ARE SOME NONpredictions' pdb.set_trace() else: save_file = '/home/bill/Projects/Occlusion_RNN/files/pred_classes_' + feature_tag + '_train' + t + '_t' + str( j) + '_KLAB16.mat' spio.savemat(save_file, {'pred_classes': pred_classes[str(j)]})
def data_split(inputfile, reads_count): data = hkl.load(inputfile) reads_count = hkl.load(reads_count) X = data['mat'] X_kspec = data['kmer'] reads_count = np.array(reads_count) y = np.mean(reads_count, axis=1) y = np.log(y + 1e-3) rs = ShuffleSplit(len(y), n_iter=1, random_state=1) X_kspec = X_kspec.reshape((X_kspec.shape[0], 1024, 4)) X = np.concatenate((X, X_kspec), axis=1) X = X[:, np.newaxis] X = X.transpose((0, 1, 3, 2)) for train_idx, test_idx in rs: X_train = X[train_idx, :] y_train = y[train_idx] X_test = X[test_idx, :] y_test = y[test_idx] X_train = X_train.astype('float32') y_train = y_train.astype('float32') X_test = X_test.astype('float32') y_test = y_test.astype('float32') print 'Data prepration done!' return [X_train, y_train, X_test, y_test]
def wideresnet50(pooling): dir_models = os.path.join(expanduser("~"), '.torch/wideresnet') path_hkl = os.path.join(dir_models, 'wideresnet50.hkl') if os.path.isfile(path_hkl): params = hkl.load(path_hkl) # convert numpy arrays to torch Variables for k,v in sorted(params.items()): print (k, v.shape) params[k] = Variable(torch.from_numpy(v), requires_grad=True) else: os.system('mkdir -p ' + dir_models) os.system('wget {} -O {}'.format(model_urls['wideresnet50'], path_hkl)) f = define_model(params) model = WideResNet(pooling) return model
def load_model(load_path): # loads the model and its history and settings, and then returns it new_model = Model() new_model.model = keras.models.load_model( os.path.join(load_path, "model.hdf5")) new_model.epochs, new_model.batch_size, new_model.history = \ hickle.load(os.path.join(load_path, "model_data.hkl")) new_model.status = 'ready' new_model.use_generator = False return new_model
def create(self, *args, **kwargs): # this function will be called once to create this waveform object # one neat property of Object-Oriented Programming (OOP) structure is that # you can create some field-value pairs that can be called and updated # in all functions of the object, if you specify the function properly. # The only thing that you need to do is to instantiate those fields in # this function with the prefix 'self.', then you can call them and # edit them in all the other functions that have the first input argument # being 'self' # # For exmample, if you instantiate a field-value pair: # self.name = IronMan # # You can then call them or edit them in other functions: # def get_name(self): # print(self.name) # # def set_name(self, new_name): # self.name = new_name # # In this way, you don't need to return and pass in so many arguments # across different functions anymore :) pwd = os.path.normpath(os.getcwd()) # 'channelxxx, xxx is the number of the channel' self.channel_filename = [os.path.basename(pwd)] aname = DPT.levels.normpath(os.path.dirname(pwd)) self.array_dict = dict() self.array_dict[aname] = 0 self.numSets = 1 self.current_plot_type = None template_fileanme = os.path.join( DPT.levels.resolve_level('day', self.channel_filename[0]), 'mountains', self.channel_filename[0], 'output', 'templates.hkl') templates = hkl.load(template_fileanme) self.data = [np.squeeze(templates)] # check on the mountainsort template data and create a DPT object accordingly # Example: if len(self.data): # create object if data is not empty DPT.DPObject.create(self, *args, **kwargs) else: # create empty object if data is empty DPT.DPObject.create(self, dirs=[], *args, **kwargs)
def build_dual_model(model_type='vision', pretrained=True, low_dim=128, n_cluster=100, freeze=True): if model_type == 'vision': encoder = torchvision.models.googlenet(pretrained) encoder = nn.Sequential(*list(encoder.children())[:-2]) else: encoder = inception_v1_encoder() if pretrained: base_model_weights_path = 'models/googlenet.h5' if os.path.exists(base_model_weights_path): encoder.load_state_dict( {k: torch.from_numpy(v).cuda() for k, v in hickle.load(base_model_weights_path).items()}) encoder = nn.Sequential(*list(encoder.children())[:-1]) # decoder = inception_v1_decoder() decoder = Decoder() model = DualModel(encoder, decoder, low_dim, n_cluster) return model
def load_weights(_cls, f_weights): if not os.path.isfile(f_weights): return None, None, None from_hickle = hickle.load(f_weights) W = theano.shared(value=from_hickle['W'], name='W', borrow=True) b = theano.shared(value=from_hickle['b'], name='b', borrow=True) b_prime = theano.shared(value=from_hickle['b_prime'], name='b_prime', borrow=True) print "Loaded Weights from disk" print "Loaded : dtype of W : ", W.dtype print "Loaded : dtype of b : ", b.dtype print "Loaded : dtype of b_prime : ", b_prime .dtype return W, b, b_prime
def data_load(i,test_filenames): """loads data and reshapes it to: #example x #pixels in img. arg: i: batch index return: test_data, a flattened #eg x #pxls in img. """ import hickle as hkl test_data = hkl.load(test_filenames[i]) test_data = test_data.astype('float32').transpose([3,0,1,2]); a,b,c,d = test_data.shape test_data = test_data.reshape(a,b*c*d) return (test_data)
def test_tuple_numpy(): """ Test converting a list of numpy arrays """ filename, mode = 'test.h5', 'w' a = np.ones(1024) b = np.zeros(1000) c = (a, b, a) dump(c, filename, mode) dd_hkl = load(filename) print(dd_hkl) assert isinstance(dd_hkl, tuple) assert isinstance(dd_hkl[0], np.ndarray)
def markers_tst(self): if self._markers_tst is None: if not os.path.exists(self._hdf5_file): if self._snp_list is None: if self._pvals is None: self._pvals = pd.read_csv(self.pval_path)[self.trait_name].values self._pvals[self._pvals != self._pvals] = 100 snp_list = self._pvals.ravel().argsort() snp_list = snp_list[0:self._k] snp_list = np.sort(snp_list) self._snp_list = snp_list self._markers_tst = readgbinfile(self.test_markers_path, snp_list=self._snp_list, std=False) else: aux = hkl.load(self._hdf5_file) self._markers_tr = aux["x_tst"] return self._markers_tst
def __init__(self, filename): # Models are HDF files via hickle with 4 item tuple # (intercept, data, indices, indptr) # where the latter three items form a csr_matrix sparse # representation of the model coefficients # This is immediately converted to the dense representation # to speed up prediction (the .A1 bit returns the data # contents of the numpy matrix as a numpy array, making # calculations much quicker) raw_data = hickle.load(filename) self.coef = csr_matrix((raw_data[1], raw_data[2], raw_data[3]), shape=(1, 67108864)).todense().A1 self.intercept = raw_data[0]
def test_numpy(): """ Dumping and loading numpy array """ filename, mode = 'test.h5', 'w' dtypes = ['float32', 'float64', 'complex64', 'complex128'] for dt in dtypes: array_obj = np.ones(8, dtype=dt) dump(array_obj, filename, mode) array_hkl = load(filename) try: assert array_hkl.dtype == array_obj.dtype assert np.all((array_hkl, array_obj)) except AssertionError: print(array_hkl) print(array_obj) raise
def get_data_sampler(train): d = hickle.load('{}/dataset.hkl'.format(DATA_DIR)) #GROUPS = d['GROUPS'] GROUPS = {'Normal': 0, 'Abnormal': 1} labels = [] images = [] for y, x in zip(d['y'], d['X']): if y not in GROUPS: continue labels.append(GROUPS[y]) images.append(x) labels = np.array(labels) images = np.array(images) sss = sklearn.cross_validation.StratifiedShuffleSplit( labels, n_iter=1, test_size=0.2, random_state=RANDOM_SEED, ) if train: ix, _ = tuple(sss)[0] else: _, ix = tuple(sss)[0] labels = labels[ix] images = images[ix] #IMAGE_MEAN = images.mean(0).mean(0).mean(0) #images = (images - IMAGE_MEAN) images = images / 255. - 0.5 def sample(seed, N, p=0.5): rng = np.random.RandomState(seed) X = np.zeros((N, NUM_CHANNELS, IMAGE_W, IMAGE_W)) Y = np.zeros(N) idx = np.random.randint(0, len(images), N) for n, i in enumerate(idx): X[n] = patch(images[i], rng=rng) Y[n] = labels[i] return X, Y return sample
def test_multi_hickle(): """ Dumping to and loading from the same file several times https://github.com/telegraphic/hickle/issues/20""" a = {'a': 123, 'b': [1, 2, 4]} if os.path.exists("test.hkl"): os.remove("test.hkl") dump(a, "test.hkl", path="/test", mode="w") dump(a, "test.hkl", path="/test2", mode="r+") dump(a, "test.hkl", path="/test3", mode="r+") dump(a, "test.hkl", path="/test4", mode="r+") load("test.hkl", path="/test") load("test.hkl", path="/test2") load("test.hkl", path="/test3") load("test.hkl", path="/test4")
def calc_peaks(): counter = 0 surrogate_repeat = 1000 # overwrite hkl_files and running directions with specific test files_______________________ hkl_files = ['10528_2015-04-01_VR_GCend_linTrack1_TT4_SS_06_PF_info.hkl', '10528_2015-04-16_VR_GCend_Dark_linTrack1_TT2_SS_25_PF_info.hkl', '10537_2015-10-22_VR_GCend_linTrack1_TT1_SS_09_PF_info_normalised.hkl', '10823_2015-07-24_VR_GCend_linTrack1_TT3_SS_04_PF_info_normalised.hkl'] run_direc = ['right', 'left', 'right', 'left'] for i, file in enumerate(hkl_files): a = hickle.load(path+file) #'10353_2014-06-17_VR_GCend_linTrack1_GC_TT3_SS_07_PF_info_normalised.hkl') #file for gain in ['1.5']: #['0.5', '1.5']: print 'counter: ', counter, ' out of ', len(hkl_files)-1 #*2 counter += 1 # run_direc[i] = 'right' fr = a[run_direc[i]+'FR_x_y_gain_'+gain] x = fr[0] y = fr[1] # correct x-axis for leftwards runs -- they were always saved as a FR array from traj.xlim[0] to # traj.xlim[1], which goes for x from [0 .. e.g. 2] no matter what the running direction of the animal was! # For leftward runs spikes at e.g. x=2 would be at the beginning of the run for the animal, therefore need # to be corrected to be x=0. if run_direc[i] == 'left': # for leftward runs plot abolute x-value from start position # sys.exit() vis_track_length = 2. if file.endswith('normalised.hkl'): start = vis_track_length/float(gain) else: start = vis_track_length x = abs(x-start) fig22, ax22 = fit_gaussians_etc(x=x, y=y, surrogate_repeat=surrogate_repeat, gain=gain, run_direc=run_direc[i], file=file, savefig=True) if gain == '0.5': g = '05' else: g = '15' fig22.savefig(pathGauss+file.split('.hkl')[0]+'_'+run_direc[i]+'_gain_'+g+'.pdf', format='pdf')
def markers_tr(self): if self._markers_tr is None: if not os.path.exists(self._hdf5_file): if self._snp_list is None: if self._pvals is None: params = {'index_col': None, 'sep': ','} self._pvals = pd.read_csv(self.pval_path, **params)[self.trait_name].values self._pvals[self._pvals != self._pvals] = 100 snp_list = self._pvals.ravel().argsort() snp_list = snp_list[0:self._k] snp_list = np.sort(snp_list) self._snp_list = snp_list self._markers_tr = readgbinfile(self.train_markers_path, snp_list=self._snp_list, std=False) else: aux = hkl.load(self._hdf5_file) self._markers_tr = aux["x_tr"] return self._markers_tr
def load_activation_data(path): try: a = hickle.load( path ) Files = a["Files"] Y = a["Y"] Y_catmat = a["Y_catmat"] activations = a["activations"] except ValueError: print "Error reading pickle file (corrupted?): {0}".format(path) Files = [] Y = [] Y_catmat = [] activations = [] return Files, activations, Y, Y_catmat
def build_auto_enc_model(model_type, pretrained=True): if model_type == 'vision': encoder = models.googlenet(pretrained) encoder = nn.Sequential(*list(encoder.children())[:-2]) else: encoder = inception_v1_encoder() if pretrained: base_model_weights_path = 'models/googlenet.h5' if os.path.exists(base_model_weights_path): encoder.load_state_dict({ k: torch.from_numpy(v).cuda() for k, v in hickle.load(base_model_weights_path).items() }) encoder = nn.Sequential(*list(encoder.children())[:-1]) decoder = inception_v1_decoder() model = AutoEncoderModel(encoder, decoder) return model
def main(resume=None): l = 300 dataset = './data/ubiquitous_train.hkl' print('Loading dataset {}...'.format(dataset)) X_train, y_train = hkl.load(dataset) X_train = X_train.reshape(-1, 4, 1, l).astype(floatX) y_train = np.array(y_train, dtype='int32') indice = np.arange(X_train.shape[0]) np.random.shuffle(indice) X_train = X_train[indice] y_train = y_train[indice] print('X_train shape: {}, y_train shape: {}'.format(X_train.shape, y_train.shape)) layers = [ (InputLayer, {'shape': (None, 4, 1, l)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}), (MaxPool2DLayer, {'pool_size': (1, 2)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}), (MaxPool2DLayer, {'pool_size': (1, 2)}), (DenseLayer, {'num_units': 64}), (DropoutLayer, {}), (DenseLayer, {'num_units': 64}), (DenseLayer, {'num_units': 2, 'nonlinearity': softmax})] lr = theano.shared(np.float32(1e-4)) net = NeuralNet( layers=layers, max_epochs=100, update=adam, update_learning_rate=lr, train_split=TrainSplit(eval_size=0.1), on_epoch_finished=[ AdjustVariable(lr, target=1e-8, half_life=20)], verbose=4) if resume != None: net.load_params_from(resume) net.fit(X_train, y_train) net.save_params_to('./models/net_params.pkl')
def test_astropy_table(): t = Table([[1, 2], [3, 4]], names=('a', 'b'), meta={'name': 'test_thing'}) hkl.dump({'a': t}, "test_ap.h5") t2 = hkl.load("test_ap.h5")['a'] print(t) print(t.meta) print(t2) print(t2.meta) print(t.dtype, t2.dtype) assert t.meta == t2.meta assert t.dtype == t2.dtype assert np.allclose(t['a'].astype('float32'), t2['a'].astype('float32')) assert np.allclose(t['b'].astype('float32'), t2['b'].astype('float32'))