def train(hyper): apollo.set_random_seed(hyper['random_seed']) if hyper['gpu'] is None: apollo.set_mode_cpu() logging.info('Using cpu device (pass --gpu X to train on the gpu)') else: apollo.set_mode_gpu() apollo.set_device(hyper['gpu']) logging.info('Using gpu device %d' % hyper['gpu']) apollo.set_logging_verbosity(hyper['loglevel']) net = apollo.Net() forward(net, hyper) network_path = '%s/network.jpg' % hyper['schematic_prefix'] net.draw_to_file(network_path) logging.info('Drawing network to %s' % network_path) net.reset_forward() if 'weights' in hyper: logging.info('Loading weights from %s' % hyper['weights']) net.load(hyper['weights']) train_loss_hist = [] for i in xrange(hyper['start_iter'], hyper['max_iter']): train_loss_hist.append(forward(net, hyper)) net.backward() lr = (hyper['base_lr'] * hyper['gamma']**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper['clip_gradients']) if i % hyper['display_interval'] == 0: logging.info( 'Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']: filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']: sub = hyper.get('sub', 100) plt.plot( np.convolve(train_loss_hist, np.ones(sub) / sub)[sub:-sub]) filename = '%s/train_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename)
def train(hyper): apollo.set_random_seed(hyper['random_seed']) if hyper['gpu'] is None: apollo.set_mode_cpu() logging.info('Using cpu device (pass --gpu X to train on the gpu)') else: apollo.set_mode_gpu() apollo.set_device(hyper['gpu']) logging.info('Using gpu device %d' % hyper['gpu']) apollo.set_logging_verbosity(hyper['loglevel']) net = apollo.Net() forward(net, hyper) network_path = '%s/network.jpg' % hyper['schematic_prefix'] net.draw_to_file(network_path) logging.info('Drawing network to %s' % network_path) net.reset_forward() if 'weights' in hyper: logging.info('Loading weights from %s' % hyper['weights']) net.load(hyper['weights']) train_loss_hist = [] for i in xrange(hyper['start_iter'], hyper['max_iter']): train_loss_hist.append(forward(net, hyper)) net.backward() lr = (hyper['base_lr'] * hyper['gamma']**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper['clip_gradients']) if i % hyper['display_interval'] == 0: logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']: filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']: sub = hyper.get('sub', 100) plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub]) filename = '%s/train_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename)
def solve(self, train_data, val_data=None): if self.random_seed is not None: apollo.set_random_seed(self.random_seed) train_data = make_batch_generator(train_data, self.batch_size) if val_data is not None: val_data = make_batch_generator(val_data, self.batch_size) train_loss = [] val_loss = [] for idx in xrange(self.start_iter, self.max_iter): if idx % self.val_interval == 0 and val_data is not None: for val_idx in xrange(self.val_iter): self.net.forward("val", val_data.next()) val_loss.append(self.net.apollo_net.loss) self.net.apollo_net.clear_forward() self.net.forward("train", train_data.next()) train_loss.append(self.net.apollo_net.loss) self.net.backward() lr = self.base_lr * self.gamma_lr**(idx // self.gamma_stepsize) self.net.apollo_net.update(lr, momentum=self.momentum, clip_gradients=self.clip_gradients, weight_decay=self.weight_decay) for logger in self.loggers: logger.log(idx, {"train_loss": train_loss, "val_loss": val_loss, "net": self.net, "start_iter": self.start_iter})
def default_train(hyper, forward, test_forward=None): if test_forward is None: test_forward = forward import matplotlib matplotlib.use("Agg", warn=False) import matplotlib.pyplot as plt d = default_hyper() d.update(hyper) hyper = d validate_hyper(hyper) apollo.set_random_seed(hyper["random_seed"]) if hyper["gpu"] is None: apollo.set_mode_cpu() logging.info("Using cpu device (pass --gpu X to train on the gpu)") else: apollo.set_mode_gpu() apollo.set_device(hyper["gpu"]) logging.info("Using gpu device %d" % hyper["gpu"]) apollo.set_logging_verbosity(hyper["loglevel"]) net = apollo.Net() forward(net, hyper) network_path = "%s/network.jpg" % hyper["schematic_prefix"] net.draw_to_file(network_path) logging.info("Drawing network to %s" % network_path) net.reset_forward() if hyper.get("separate_test_net", True) == True: test_net = apollo.Net() test_forward(test_net, hyper) test_net.reset_forward() else: test_net = net if "weights" in hyper: logging.info("Loading weights from %s" % weights) net.load(hyper["weights"]) train_loss_hist = [] for i in xrange(hyper["start_iter"], hyper["max_iter"]): train_loss_hist.append(forward(net, hyper)) net.backward() lr = hyper["base_lr"] * hyper.get("gamma", 1.0) ** (i // hyper.get("stepsize", sys.maxint)) net.update( lr=lr, momentum=hyper.get("momentum", 0.0), clip_gradients=hyper.get("clip_gradients", -1), weight_decay=hyper.get("weight_decay", 0.0), ) if i % hyper["display_interval"] == 0: logging.info("Iteration %d: %s" % (i, np.mean(train_loss_hist[-hyper["display_interval"] :]))) if i % hyper["snapshot_interval"] == 0 and i > hyper["start_iter"]: filename = "%s/%d.h5" % (hyper["snapshot_prefix"], i) logging.info("Saving net to: %s" % filename) net.save(filename) if i % hyper["graph_interval"] == 0 and i > hyper["start_iter"]: sub = hyper.get("sub", 100) plt.plot(np.convolve(train_loss_hist, np.ones(sub) / sub)[sub:-sub]) filename = "%s/train_loss.jpg" % hyper["graph_prefix"] logging.info("Saving figure to: %s" % filename) plt.savefig(filename) if hyper["test_interval"] is not None and i % hyper["test_interval"] == 0: test_loss = [] accuracy = [] test_net.phase = "test" test_net.copy_params_from(net) for j in xrange(hyper["test_iter"]): test_loss.append(test_forward(test_net, hyper)) test_net.reset_forward() if "accuracy" in test_net.tops: accuracy.append(test_net.tops["accuracy"].data.flatten()[0]) if len(accuracy) > 0: logging.info("Accuracy: %.5f" % np.mean(accuracy)) logging.info("Test loss: %f" % np.mean(test_loss)) test_net.phase = "train"
def default_train(hyper, forward, test_forward=None): if test_forward is None: test_forward=forward import matplotlib; matplotlib.use('Agg', warn=False); import matplotlib.pyplot as plt d = default_hyper() d.update(hyper) hyper = d validate_hyper(hyper) apollo.set_random_seed(hyper['random_seed']) if hyper['gpu'] is None: apollo.set_mode_cpu() logging.info('Using cpu device (pass --gpu X to train on the gpu)') else: apollo.set_mode_gpu() apollo.set_device(hyper['gpu']) logging.info('Using gpu device %d' % hyper['gpu']) apollo.set_logging_verbosity(hyper['loglevel']) net = apollo.Net() forward(net, hyper) network_path = '%s/network.jpg' % hyper['schematic_prefix'] net.draw_to_file(network_path) logging.info('Drawing network to %s' % network_path) net.reset_forward() if hyper.get('separate_test_net', True) == True: test_net = apollo.Net() test_forward(test_net, hyper) test_net.reset_forward() else: test_net = net if 'weights' in hyper: logging.info('Loading weights from %s' % hyper['weights']) net.load(hyper['weights']) train_loss_hist = [] for i in xrange(hyper['start_iter'], hyper['max_iter']): train_loss_hist.append(forward(net, hyper)) net.backward() lr = (hyper['base_lr'] * hyper.get('gamma', 1.)**(i // hyper.get('stepsize', sys.maxint))) net.update(lr=lr, momentum=hyper.get('momentum', 0.0), clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper.get('weight_decay', 0.0)) if i % hyper['display_interval'] == 0: logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']: filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']: sub = hyper.get('sub', 100) plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub]) filename = '%s/train_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename) if hyper['test_interval'] is not None and i % hyper['test_interval'] == 0: test_loss = [] accuracy = [] test_net.phase = 'test' test_net.copy_params_from(net) for j in xrange(hyper['test_iter']): test_loss.append(test_forward(test_net, hyper)) test_net.reset_forward() if 'accuracy' in test_net.tops: accuracy.append(test_net.tops['accuracy'].data.flatten()[0]) if len(accuracy) > 0: logging.info('Accuracy: %.5f' % np.mean(accuracy)) logging.info('Test loss: %f' % np.mean(test_loss)) test_net.phase = 'train'