def SaveModel(self, file): print 'Saving model parameters' model = [] for param in self.params: model.append(param.get_value()) with open(file, 'wb') as f: pkl_utils.dump(model, f)
def SaveModel(self,file): print 'Saving model parameters' model = [] for param in self.params: model.append(param.get_value()) with open(file,'wb') as f: pkl_utils.dump(model,f)
def test_dump_load_mrg(self): rng = MRG_RandomStreams() with open('test', 'wb') as f: dump(rng, f) with open('test', 'rb') as f: rng = load(f) assert type(rng) == MRG_RandomStreams
def test_dump_load_mrg(self): rng = MRG_RandomStreams(use_cuda=cuda_ndarray.cuda_enabled) with open('test', 'wb') as f: dump(rng, f) with open('test', 'rb') as f: rng = load(f) assert type(rng) == MRG_RandomStreams
def test_dump_load_mrg(self): rng = MRG_RandomStream() with open("test", "wb") as f: dump(rng, f) with open("test", "rb") as f: rng = load(f) assert type(rng) == MRG_RandomStream
def test_dump_zip_names(): foo_1 = theano.shared(0, name='foo') foo_2 = theano.shared(1, name='foo') with open('model.zip', 'wb') as f: dump((foo_1, foo_2, numpy.array(2)), f) keys = numpy.load('model.zip').keys() assert keys == ['foo', 'foo_2', 'array_0', 'pkl'] foo = numpy.load('model.zip')['foo'] assert foo == numpy.array(0) with open('model.zip', 'rb') as f: foo_1, foo_2, array = load(f) assert array == numpy.array(2)
def test_dump_zip_names(self): foo_1 = theano.shared(0, name="foo") foo_2 = theano.shared(1, name="foo") foo_3 = theano.shared(2, name="foo") with open("model.zip", "wb") as f: dump((foo_1, foo_2, foo_3, np.array(3)), f) keys = list(np.load("model.zip").keys()) assert keys == ["foo", "foo_2", "foo_3", "array_0", "pkl"] foo_3 = np.load("model.zip")["foo_3"] assert foo_3 == np.array(2) with open("model.zip", "rb") as f: foo_1, foo_2, foo_3, array = load(f) assert array == np.array(3)
def test_dump_zip_names(self): foo_1 = theano.shared(0, name='foo') foo_2 = theano.shared(1, name='foo') foo_3 = theano.shared(2, name='foo') with open('model.zip', 'wb') as f: dump((foo_1, foo_2, foo_3, np.array(3)), f) keys = list(np.load('model.zip').keys()) assert keys == ['foo', 'foo_2', 'foo_3', 'array_0', 'pkl'] foo_3 = np.load('model.zip')['foo_3'] assert foo_3 == np.array(2) with open('model.zip', 'rb') as f: foo_1, foo_2, foo_3, array = load(f) assert array == np.array(3)
def test_dump_load(): x = GpuArraySharedVariable( 'x', GpuArrayType('float32', (1, 1), name='x', context_name=test_ctx_name), [[1]], False) with open('test', 'wb') as f: dump(x, f) with open('test', 'rb') as f: x = load(f) assert x.name == 'x' np.testing.assert_allclose(x.get_value(), [[1]])
def test_dump_load(): x = GpuArraySharedVariable('x', GpuArrayType('float32', (1, 1), name='x', context_name=test_ctx_name), [[1]], False) with open('test', 'wb') as f: dump(x, f) with open('test', 'rb') as f: x = load(f) assert x.name == 'x' np.testing.assert_allclose(x.get_value(), [[1]])
def test_dump_load(self): if not cuda_ndarray.cuda_enabled: raise SkipTest('Optional package cuda disabled') x = CudaNdarraySharedVariable('x', CudaNdarrayType((1, 1), name='x'), [[1]], False) with open('test', 'wb') as f: dump(x, f) with open('test', 'rb') as f: x = load(f) assert x.name == 'x' assert_allclose(x.get_value(), [[1]])
def test_dump_load(): x = GpuArraySharedVariable( "x", GpuArrayType("float32", (1, 1), name="x", context_name=test_ctx_name), [[1]], False, ) with open("test", "wb") as f: dump(x, f) with open("test", "rb") as f: x = load(f) assert x.name == "x" np.testing.assert_allclose(x.get_value(), [[1]])
def post(self, *args, **kwargs): """ Trains and saves a Ordinal Perceptron model :params: dataset: Dataset to be trained results: Results for supervised training :return: Dict with model_id to be later used for prediction """ # Receives request files try: dataset = request.files['dataset'] results = request.files['results'] except: raise exceptions.NotAcceptable( "You need to send a dataset and a result set.") # Connects to db session = kwargs["db_connection"] ord = creation_and_training(dataset, results) # Create variables for database entry model_id = str(uuid.uuid4()) timestamp = datetime.datetime.now() # Save model for future use with open("models/" + model_id + ".zip", "wb") as f: pkl.dump(ord, f) # Create new row and add it to db try: new_model = Model(MODEL=model_id, AI="ordinal_perceptron", TIMESTAMP=timestamp) session.add(new_model) finally: session.commit() # Create response response = dict() response['model_id'] = model_id return response
def dump_weights_pickle(classifier, file_name='../weights/weight_3DCNN.zip'): W0 = classifier.params[0] W1 = classifier.params[2] W2 = classifier.params[4] W3 = classifier.params[6] W4 = classifier.params[8] W5 = classifier.params[10] b0 = classifier.params[1] b1 = classifier.params[3] b2 = classifier.params[5] b3 = classifier.params[7] b4 = classifier.params[9] b5 = classifier.params[11] with open(file_name, 'wb') as f: dump((W0, W1, W2, W3, W4, W5, b0, b1, b2, b3, b4, b5), f)
mrrs.append(metrics_results['mrr_at_n']) eval_sessions_metrics_log.append({ 'hitrate_at_n_gru4rec': metrics_results['hitrate_at_n'], 'mrr_at_n_gru4rec': metrics_results['mrr_at_n'], 'clicks_count': len(test_df), 'sessions_count': test_df['SessionId'].nunique() }) save_eval_benchmark_metrics_csv( eval_sessions_metrics_log, temp_folder, training_hours_for_each_eval=ARGS.training_hours_for_each_eval, output_csv=EVAL_METRICS_FILE) finally: print("AVG HitRate: {}".format(sum(hit_rates) / len(hit_rates))) print("AVG MRR: {}".format(sum(mrrs) / len(mrrs))) #Export trained model gru_file = open(os.path.join(temp_folder, MODEL_FILE), "wb+") try: pkl.dump(gru, gru_file) finally: gru_file.close() print('Trained model and eval results exported to temporary folder: {}'. format(temp_folder))
batch_size = 10 session_ids = valid.SessionId.values[0:batch_size] input_item_ids = valid.ItemId.values[0:batch_size] out_idx = valid.ItemId.values[0:batch_size] uniq_out = np.unique(np.array(out_idx, dtype=np.int32)) #predict_for_item_ids = np.hstack([data, uniq_out[~np.in1d(uniq_out,data)]]) #LP: comment this if above works! predict_for_item_ids = None print('session_ids: {}'.format(session_ids)) print('input_item_ids: {}'.format(input_item_ids)) print('uniq_out: {}'.format(uniq_out)) print('predict_for_item_ids: {}'.format(predict_for_item_ids)) preds = gru.predict_next_batch(session_ids, input_item_ids, predict_for_item_ids, batch_size) preds.fillna(0, inplace=True) if break_ties: preds += np.random.rand(*preds.values.shape) * 1e-8 print('Preds: {}'.format(preds)) # save model fd = open(os.environ['HOME'] + 'model.theano', 'wb') dump(gru, fd) fd.close() print('Model: {}'.format(md))
def save_model(params, path, name, suffix=''): from theano.misc import pkl_utils with open(path + name + suffix, 'wb') as fout: for param in params: pkl_utils.dump(param.get_value(), fout)
def train(self, dataset, lr=1., gamma=.9, beta1=0.9, beta2=0.999, min_batch_size=100, max_batch_size=None, num_epochs=200, save_as=None, early_stopping=-1E+6): """Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI files converted to piano-rolls. dataset : list of numpy arrays batch_size : integer Training sequences will be split into subsequences of at most this size before applying the SGD updates. num_epochs : integer Number of epochs (pass over the training set) performed. The user can safely interrupt training with Ctrl+C at any time.""" if self.optimizer is 'adadelta': hyperparams = (lr, gamma) elif self.optimizer is 'adam': hyperparams = (lr, beta1, beta2) else: hyperparams = (lr, ) if type(dataset) is not list: dataset = [dataset] self.dataset = dataset nsamples = len(dataset) # flatten all parameters into an array for FLANN NN computation: # TODO: not sure if this is very useful, since the parameter space # is very high dimensional... basically parameters can bounce around # minimum and NN will not converge to zero in a long time... take e.g. # large dim. arrays with random 0, 1's and compute nn after generating # a new one each time! But hmm I think it should be steadily decreasing... # anyway think about it # import pyflann # flann = pyflann.FLANN() # pyflann.set_distance_type('euclidean') param_vec = self.param_vec if param_vec is None: param_vec = np.array([]) for param in self.parameters: param_vec = np.concatenate( (param_vec, param.get_value().flatten())) param_vec = param_vec[None, :] if max_batch_size is None: max_batch_size = min_batch_size mean_batch_size = min_batch_size else: mean_batch_size = int((min_batch_size + max_batch_size) / 2) best_monitor = 1E+6 done = False try: for epoch in xrange(num_epochs): if done: break start_time = time.time() costs = [] monitors = [] # shuffle dataset: shuffle(dataset) for sample_number, sample in enumerate(dataset): # split to batches: sample_size = len(sample) idx = np.random.randint(min_batch_size, max_batch_size + 1, int(sample_size / min_batch_size)).cumsum() idx = idx[idx < sample_size - min_batch_size] batches = np.split(sample, idx) shuffle(batches) nbatches = len(batches) for n, batch in enumerate(batches): # don't train with almost empty batch: if np.sum(batch) < min_steps_in_batch: continue if batch.shape[0] < 3: # just in case... continue monitor, cost = self.train_function( batch, *hyperparams) #TODO: revert to saved parameters in case of nans (?) if np.isnan(cost): raise ValueError( '\nNaN encountered, breaking out!') if np.abs(cost) > 1E+9: raise ValueError('\nCost blew up, breaking out!') costs.append(cost) monitors.append(monitor) pct_progress = int(100 * n / nbatches) print('\rSample: {:6}/{} -- Progress: {:3}% -- ' 'Cost={:6.3f} -- Monitor={:6.3f}'.format( sample_number + 1, nsamples, pct_progress, float(cost), float(monitor)), end='') sys.stdout.flush() if monitor < early_stopping: print('\nEarly stop.') done = True break costs = np.asarray(costs) monitors = np.asarray(monitors) costs[costs > 1e+6] = 1e+6 # getting rid of infs monitors[monitors > 1e+6] = 1e+6 # getting rid of infs self.costs = costs self.monitors = monitors avg_cost = np.round(np.mean(costs), 4) std_cost = np.round(np.std(costs), 4) avg_monitor = np.round(np.mean(monitors), 4) std_monitor = np.round(np.std(monitors), 4) time_elapsed = time.time() - start_time # Nearest neighbors in parameter space: # param_vec_next = np.array([]) # for param in self.parameters: # param_vec_next = np.concatenate((param_vec_next, param.get_value().flatten())) #flann_params = flann.build_index(param_vec, target_precision=.9) #nn_dist = np.sqrt(flann.nn(param_vec, param_vec_next, 1)[1][0]) # add to previous parameter vectors: # param_vec = np.vstack((param_vec, param_vec_next)) print('\rEpoch {:4}/{} | Cost mean={:6.3f}, std={:6.3f} | ' 'Monitor mean={:6.4f}, std={:6.3f} | ' 'Time={} s\n'.format(epoch + 1, num_epochs, avg_cost, std_cost, avg_monitor, std_monitor, np.round(time_elapsed, 0)), end='') sys.stdout.flush() if save_as is not None and avg_monitor < best_monitor: #print('Saving results...') best_monitor = avg_monitor # save full state, not just parameters: # param_list = [] # gtm1_list = [] # stm1_list = [] # for n in range(len(self.parameters)): # try: # param_list.append(self.parameters[n]) # gtm1_list.append(self.gtm1[n]) # stm1_list.append(self.stm1[n]) # except: # break saved_state = self.parameters + self.gtm1 + self.stm1 with open(save_as + '.zip', 'w') as f: dump(saved_state, f) # savefile = file(save_as + '.save', mode='wb') # cPickle.dump(saved_state, savefile) # savefile.close() except KeyboardInterrupt: self.costs = costs self.monitors = monitors print('\nInterrupted by user.')
x, l = list(res_buys.items())[list(res_buys.keys()).index(k)] print("Buys Validation:") for e in l: print(x, ':', e[0], ' ', e[1]) if export_csv is not None: file = open(export_csv, 'w+') file.write('Metrics;') for k, l in res.items(): for e in l: file.write(e[0]) file.write(';') break file.write('\n') for k, l in res.items(): file.write(k) file.write(';') for e in l: file.write(str(e[1])) file.write(';') file.write('\n') #comment out when not using gru file = open("mdl/test-gru.mdl", "wb+") pkl.dump(gru, file) file.close()
def save_model_params(model, model_path): with open(os.path.join(model_path, "params.zip"), 'w+') as f: pkl_utils.dump((model.parameters, model.update_rule.parameters), f)
def save(self, filename): with open(filename, 'wb') as f: dump(self.get_save(), f) print "\nSuccessfully saved architecture to file: %s" % filename
def dumps(self, file_path): with open(file_path, 'wb') as f: dump(self.params, f)
def save_feature_maps(self,filename): cwd = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(cwd,filename+'.zip'),'wb') as f: for k in self.conv_layers: params += [param.get_value() for param in self.params[k]] dump(params, f)
def save_params(self, filename): cwd = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(cwd,filename+'.zip'),'wb') as f: params = [param.get_value() for param in self.full_params] dump(params, f)
def train(self, dataset, lr=1., gamma=.9, beta1=0.9, beta2=0.999, min_batch_size=100, max_batch_size=None, num_epochs=200, save_as=None, early_stopping=-1E+6): """Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI files converted to piano-rolls. dataset : list of numpy arrays batch_size : integer Training sequences will be split into subsequences of at most this size before applying the SGD updates. num_epochs : integer Number of epochs (pass over the training set) performed. The user can safely interrupt training with Ctrl+C at any time.""" if self.optimizer is 'adadelta': hyperparams = (lr, gamma) elif self.optimizer is 'adam': hyperparams = (lr, beta1, beta2) else: hyperparams = (lr, ) if type(dataset) is not list: dataset = [dataset] self.dataset = dataset nsamples = len(dataset) # flatten all parameters into an array for FLANN NN computation: # TODO: not sure if this is very useful, since the parameter space # is very high dimensional... basically parameters can bounce around # minimum and NN will not converge to zero in a long time... take e.g. # large dim. arrays with random 0, 1's and compute nn after generating # a new one each time! But hmm I think it should be steadily decreasing... # anyway think about it # import pyflann # flann = pyflann.FLANN() # pyflann.set_distance_type('euclidean') param_vec = self.param_vec if param_vec is None: param_vec = np.array([]) for param in self.parameters: param_vec = np.concatenate((param_vec, param.get_value().flatten())) param_vec = param_vec[None, :] if max_batch_size is None: max_batch_size = min_batch_size mean_batch_size = min_batch_size else: mean_batch_size = int((min_batch_size + max_batch_size) / 2) best_monitor = 1E+6 done = False try: for epoch in xrange(num_epochs): if done: break start_time = time.time() costs = [] monitors = [] # shuffle dataset: shuffle(dataset) for sample_number, sample in enumerate(dataset): # split to batches: sample_size = len(sample) idx = np.random.randint(min_batch_size, max_batch_size + 1, int(sample_size / min_batch_size)).cumsum() idx = idx[idx < sample_size - min_batch_size] batches = np.split(sample, idx) shuffle(batches) nbatches = len(batches) for n, batch in enumerate(batches): # don't train with almost empty batch: if np.sum(batch) < min_steps_in_batch: continue if batch.shape[0] < 3: # just in case... continue monitor, cost = self.train_function(batch, *hyperparams) #TODO: revert to saved parameters in case of nans (?) if np.isnan(cost): raise ValueError('\nNaN encountered, breaking out!') if np.abs(cost) > 1E+9: raise ValueError('\nCost blew up, breaking out!') costs.append(cost) monitors.append(monitor) pct_progress = int(100 * n / nbatches) print('\rSample: {:6}/{} -- Progress: {:3}% -- ' 'Cost={:6.3f} -- Monitor={:6.3f}'.format(sample_number + 1, nsamples, pct_progress, float(cost), float(monitor)), end='') sys.stdout.flush() if monitor < early_stopping: print('\nEarly stop.') done = True break costs = np.asarray(costs) monitors = np.asarray(monitors) costs[costs > 1e+6] = 1e+6 # getting rid of infs monitors[monitors > 1e+6] = 1e+6 # getting rid of infs self.costs = costs self.monitors = monitors avg_cost = np.round(np.mean(costs), 4) std_cost = np.round(np.std(costs), 4) avg_monitor = np.round(np.mean(monitors), 4) std_monitor = np.round(np.std(monitors), 4) time_elapsed = time.time() - start_time # Nearest neighbors in parameter space: # param_vec_next = np.array([]) # for param in self.parameters: # param_vec_next = np.concatenate((param_vec_next, param.get_value().flatten())) #flann_params = flann.build_index(param_vec, target_precision=.9) #nn_dist = np.sqrt(flann.nn(param_vec, param_vec_next, 1)[1][0]) # add to previous parameter vectors: # param_vec = np.vstack((param_vec, param_vec_next)) print('\rEpoch {:4}/{} | Cost mean={:6.3f}, std={:6.3f} | ' 'Monitor mean={:6.4f}, std={:6.3f} | ' 'Time={} s\n'.format(epoch + 1, num_epochs, avg_cost, std_cost, avg_monitor, std_monitor, np.round(time_elapsed, 0)), end='') sys.stdout.flush() if save_as is not None and avg_monitor < best_monitor: #print('Saving results...') best_monitor = avg_monitor # save full state, not just parameters: # param_list = [] # gtm1_list = [] # stm1_list = [] # for n in range(len(self.parameters)): # try: # param_list.append(self.parameters[n]) # gtm1_list.append(self.gtm1[n]) # stm1_list.append(self.stm1[n]) # except: # break saved_state = self.parameters + self.gtm1 + self.stm1 with open(save_as + '.zip', 'w') as f: dump(saved_state, f) # savefile = file(save_as + '.save', mode='wb') # cPickle.dump(saved_state, savefile) # savefile.close() except KeyboardInterrupt: self.costs = costs self.monitors = monitors print('\nInterrupted by user.')
def dump_params_pickle(file,params_to_pickle): with open(file, 'wb') as f: dump(params_to_pickle, f)
def dump_params_pickle(file, params_to_pickle): with open(file, 'wb') as f: dump(params_to_pickle, f)