def dump(self, input_, target): if not os.path.exists(os.path.dirname(self.filename)): os.makedirs(os.path.dirname(self.filename)) d = { 'input' : input_ , 'target': target , 'hidn_st0': self.hidn_st0 , 'cell_st0': self.cell_st0 } for name, p in chain(self.lstm.named_parameters(), self.linear.named_parameters()): d[name] = p d_futhark = {} for name, p in d.items(): xs = p.cpu().detach().numpy() if name == 'hidn_st0': d_futhark[name] = xs[0,:,:].T elif name == 'cell_st0': d_futhark[name] = xs[0,:,:].T elif name == 'weight': d_futhark[name] = xs.T else: d_futhark[name] = xs d_futhark['loss_adj'] = np.float32(1.0) with open(self.filename + ".json",'w') as f: json.dump({name: p.tolist() for name, p in d.items()}, f) with open(self.filename + ".in",'wb') as f: for xs in d_futhark.values(): futhark_data.dump(xs, f, True)
def main(): opts, args = get_params() config = parse_opts(opts) inputs = parse_args(args) with open(config["output"], config["mode"]) as file: for sizes, dtype, dform in inputs: if dform in [None]: values = generate_dense(sizes, dtype) fd.dump(values, file, config["binary"]) elif dform in ["csr"]: values, col_idx, row_ptr, num_col = generate_sparse( sizes, dtype) fd.dump(values, file, config["binary"]) fd.dump(col_idx, file, config["binary"]) fd.dump(row_ptr, file, config["binary"]) fd.dump(num_col, file, config["binary"]) else: print("illegal data format", "\n") print_usage() sys.exit(2)
def dump_output(self): if not os.path.exists(os.path.dirname(self.filename)): os.makedirs(os.path.dirname(self.filename)) with open(self.filename + ".F",'wb') as f: futhark_data.dump(self.loss.cpu().detach().numpy(),f, True) with open(self.filename + ".J",'wb') as f: for n, g in self.grads.items(): if n == 'weight': futhark_data.dump(g.cpu().detach().numpy().T,f,True) else: futhark_data.dump(g.cpu().detach().numpy(),f,True)
import numpy as np import futhark_data import pandas as pd #from sklearn.datasets import dump_svmlight_file test_size=5*10**6 data = pd.read_csv("data/HIGGS.csv", delimiter=",", header=None) data = data[:test_size].astype("float32") data.to_csv("data/HIGGS_training.csv", header=False, index=False) data = data.to_numpy() data = np.where(data==-999.0, np.nan, data) # #print(data = -999.0) # #(l, c) = data.shape # #train = data[:, :l-test] #print(data.shape) target = data[:,0] print(target.shape) data = data[:,1:] print(data.shape) fileHandler = open("data/HIGGS_training", "wb") futhark_data.dump(data, fileHandler, True) futhark_data.dump(target, fileHandler, True)
def dump_fit(self, X, y, fn): if self.gamma == 'auto': self.__gamma = 1.0 / X.shape[1] else: self.__gamma = self.gamma f = open(fn, 'wb') dump(X.astype(np.float32), f, binary=True) dump(y.astype(np.int32), f, binary=True) dump(np.dtype('float32').type(self.C), f, binary=True) dump(np.dtype('int32').type(self.n_ws), f, binary=True) dump(np.dtype('int32').type(self.max_t), f, binary=True) dump(np.dtype('int32').type(self.max_t_in), f, binary=True) dump(np.dtype('int32').type(self.max_t_out), f, binary=True) dump(np.dtype('float32').type(self.eps), f, binary=True) dump(np.dtype('float32').type(self.__gamma), f, binary=True) dump(np.dtype('float32').type(self.coef0), f, binary=True) dump(np.dtype('float32').type(self.degree), f, binary=True) f.close()
def dump_predict(self, X, fn, n_ws=64): if not self.trained: raise Exception('Not trained') f = open(fn, 'wb') dump(X.astype(np.float32), f, binary=True) dump(fsvm.from_futhark(self.__A), f, binary=True) dump(fsvm.from_futhark(self.__I), f, binary=True) dump(fsvm.from_futhark(self.__S), f, binary=True) dump(fsvm.from_futhark(self.__Z), f, binary=True) dump(fsvm.from_futhark(self.__R), f, binary=True) dump(np.dtype('int32').type(self.__n_c), f, binary=True) dump(np.dtype('int32').type(n_ws), f, binary=True) dump(np.dtype('float32').type(self.gamma), f, binary=True) dump(np.dtype('float32').type(self.coef0), f, binary=True) dump(np.dtype('float32').type(self.degree), f, binary=True) f.close()
#print(data.shape, data.dtype) rnd_shape = np.random.multinomial(num, np.ones(size) / size, size=1)[0].astype("int64") #print(rnd_shape.shape, rnd_shape.dtype) #print("making "+str_size+" matrix with name "+ name) print("Making: " + name) gen_data_command = "futhark dataset -b --u16-bounds=" + str( 0) + ":" + str(size) + " -g " + str_size + "u16 > " + name gen_gis_command = "futhark dataset -b -g" + arr_size + "f32 >>" + name gen_his_command = "futhark dataset -b -g" + arr_size + "f32 >>" + name os.system(gen_data_command) os.system(gen_gis_command) os.system(gen_his_command) fileHandler = open(name, "ab") futhark_data.dump(rnd_shape, fileHandler, True) futhark_data.dump(size, fileHandler, True) #print ("done") SEGS = np.array([2**4, 2**5, 2**6, 2**7, 2**8, 2**9, 2**10, 2**11, 2**12]).astype("int64") num = 10**7 for size in SEGS: str_size = matsize_to_str(num, 20) arr_size = size_to_str(num) name = path_name(path, prefix, size, num) # #print(file_name(prefix, num, size) not in datasets) if file_name(prefix, size, num) not in datasets: #data = np.random.rand(num,size).astype("float32") #print(data.shape, data.dtype) rnd_shape = np.random.multinomial(num, np.ones(size) / size,
# #print(file_name(prefix, num, size) not in datasets) if file_name(prefix, num, size) not in datasets: #data = np.random.rand(num,size).astype("float32") #print(data.shape, data.dtype) rnd_shape = np.random.multinomial(num, np.ones(n)/n, size=1)[0].astype("int64") #print(rnd_shape.shape, rnd_shape.dtype) conds = np.random.rand(n).astype("float32") #print(conds.shape, conds.dtype) split_idxs = np.random.randint(size-1, size=n).astype("int64") #print(split_idxs.shape, split_idxs.dtype) #print("making "+str_size+" matrix with name "+ name) print("Making: "+name) os.system("futhark dataset -b -g "+str_size+"f32 > "+name) fileHandler = open(name, "ab") #futhark_data.dump(data, fileHandler, True) futhark_data.dump(rnd_shape, fileHandler, True) futhark_data.dump(conds, fileHandler, True) futhark_data.dump(split_idxs, fileHandler, True) # os.system("futhark dataset -b -g "+str_size+"f32 > "+name) # print ("done") SEGS = [2**4, 2**5, 2**6, 2**7, 2**8, 2**9, 2**10, 2**11] size = 20 num = 10**6 #number of elements prefix = "seg" for seg in SEGS: str_size = matsize_to_str(num, size) name = path_name(path, prefix, seg, num) # #print(file_name(prefix, num, size) not in datasets) if file_name(prefix, seg, num) not in datasets: #print("writing: "+name)