def next_fold(self): ''' sets up and returns a batcher on the next validation fold returns the (train, test) batchers on the next fold ''' train_inds, test_inds = self.get_train_val_inds() # return two Batcher train_mean = 0 if self.new_batch: print('calculating train mean') train_fp = Batcher.Batcher(self.batch_sz, self.metadata, train_inds, self.mass_headers, self.calc_headers, root_dir, self.attr2onehot, new_batch=self.new_batch) train_mean = train_fp.get_train_mean() print(train_mean) train, test = Batcher.Batcher(self.batch_sz, self.metadata, train_inds, self.mass_headers, self.calc_headers, root_dir, self.attr2onehot, mean=train_mean, new_batch=self.new_batch), \ Batcher.Batcher(self.batch_sz, self.metadata, test_inds, self.mass_headers, self.calc_headers, root_dir, self.attr2onehot, mean=train_mean) self.curr_fold += 1 return train, test
cost = map["cost"] model = map["model"] inputs = map["inputs"] params = map["params"] print "Computing cost..." cost_func = theano.function(inputs=inputs, outputs=cost) print "Computing update..." updates = model.cmp_grad(alpha, cost) print "Compution gradient step..." sgd_step = theano.function(inputs=inputs, updates=updates) print "Descent..." batcher = Batcher(params) batch = batcher.get_batch(batch_size) last_cost = cost_func(batch[0], batch[1]) best_cost = last_cost while batcher.epoch < max_epoch: batch = batcher.get_batch(batch_size) if batcher.epoch_percentage == 0: last_cost = cost_func(batch[0], batch[1]) map["curve"] += [last_cost] if best_cost > last_cost: best_cost = last_cost file = open(name + ".pkl", 'wb') pickle.dump(map, file, -1) sgd_step(batch[0], batch[1]) sys.stdout.write( '\r%d%% of epoch %d completed. Best cost: %f, last cost %f' %
for i in range(10): batch = batcher.get_batch(batch_size) c, memory = cost_func(batch[0], memory, batch[1]) compilation += [c] map["curve"] += [np.asarray(compilation)] time = batcher.get_time() sys.stdout.write('\r%d mins %d secs: valid cost = %s' % (time[0], time[1], np.asarray(compilation)[:4].tolist())) if np.mean(compilation) < best: best = np.mean(compilation) file = open(name + ".pkl", 'wb') pickle.dump(map, file, -1) file.close() return best print "Descent..." batcher = Batcher() valid = batcher.valid best = 10000 for n in range(max_epoch): best = valid_cost(best) batcher.reset() memory = np.zeros((model.depth, batch_size, params[0]), dtype=config.floatX) for i in range(10): batch = batcher.get_batch(batch_size) memory = sgd_step(batch[0], memory, batch[1]) print ""