def __call__(self, epoch: int, sym: mxnet.symbol.Symbol, arg: Dict[str, np.ndarray], aux: Dict[str, np.ndarray]): if epoch % self._frequency != 0: return with tune.checkpoint_dir(step=epoch) as checkpoint_dir: save_checkpoint(os.path.join(checkpoint_dir, self._filename), epoch, sym, arg, aux)
def save_best_model(self): if self.best_param is None or self.best_acc == 0: print 'No Best Model' return from mxnet.model import save_checkpoint save_checkpoint("%s[ACC-%0.5f E%d]" % (self.path, self.best_acc, self.best_param[0]), *self.best_param)
def _callback(epoch, sym, arg, aux): if epoch % period == 4: save_checkpoint(os.path.join(os.environ['ROOT_DIR'], prefix), epoch, sym, arg, aux) symbol_fp = '%s-symbol.json' % prefix param_fp = '%s-%04d.params' % (prefix, epoch) setup_upload_from_s3(symbol_fp, recursive=False) setup_upload_from_s3(param_fp, recursive=False)
def _callback(iter_no, sym, arg, aux): #if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: # print "save model with mean/std" # num_classes = len(arg['bbox_pred_bias'].asnumpy()) / 4 # means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (1, num_classes)) # stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (1, num_classes)) # arg['bbox_pred_weight'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T # arg['bbox_pred_bias'] = arg['bbox_pred_bias'] * mx.nd.array(np.squeeze(stds)) + \ # mx.nd.array(np.squeeze(means)) """The checkpoint function.""" save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
def _callback(iter_no, sym, arg, aux): if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: print "save model with mean/std" num_classes = len(arg['bbox_pred_bias'].asnumpy()) / 4 means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (1, num_classes)) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (1, num_classes)) arg['bbox_pred_weight'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T arg['bbox_pred_bias'] = arg['bbox_pred_bias'] * mx.nd.array(np.squeeze(stds)) + \ mx.nd.array(np.squeeze(means)) """The checkpoint function.""" save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
def do(self, loop): logging.info("Saving model to %s @ %d"%(self.prefix, loop.status['epochs'])) save_checkpoint(self.prefix, loop.status['epochs'], loop.sym, loop.model.arg_params, loop.model.aux_params)
def main(args): learning_rate = args.lr epoches = args.epoches batch_size = args.batch_size num_hidden = args.num_hidden num_embed = args.num_embed num_lstm_layer = args.num_lstm_layer freq_val = args.freq_val val_flag = True if args.freq_val > 0 else False ctx = mx.cpu(0) if args.gpu is None else mx.gpu(int(args.gpu)) prefix = args.prefix period = args.period with open(config.text_root, 'r') as f: captions = json.load(f) buckets = [10, 20, 30] # buckets = None train_data = caption_dataIter(captions=captions, batch_size=batch_size, mode='train') val_data = caption_dataIter(captions=captions, batch_size=batch_size, mode='val') ########################################################################## ########################### custom train process ######################### ########################################################################## cnn_shapes = {'image_data': (batch_size, 3, 224, 224)} cnn_sym = vgg16_fc7('image_data') cnn_exec = cnn_sym.simple_bind(ctx=ctx, is_train=False, **cnn_shapes) lstm = caption_module(num_lstm_layer=num_lstm_layer, seq_len=train_data.sent_length + 2, vocab_size=train_data.vocab_size, num_hidden=num_hidden, num_embed=num_embed, batch_size=batch_size) lstm_shapes = { 'image_feature': (batch_size, 4096), 'word_data': (batch_size, train_data.sent_length + 2), 'softmax_label': (batch_size, train_data.sent_length + 2) } lstm_exec = lstm.simple_bind(ctx=ctx, is_train=True, **lstm_shapes) # init params pretrain = mx.nd.load(config.vgg_pretrain) init_cnn(cnn_exec, pretrain) # init optimazer optimazer = mx.optimizer.create('adam') optimazer.lr = learning_rate updater = mx.optimizer.get_updater(optimazer) # init metric perplexity = mx.metric.Perplexity(ignore_label=-1) perplexity.reset() # callback params = callbacks(nbatch=0, eval_metric=perplexity, epoch=0) speedometer = mx.callback.Speedometer(batch_size=batch_size, frequent=20) for epoch in range(epoches): for i, batch in enumerate(train_data): # cnn forward, get image_feature cnn_exec.arg_dict['image_data'] = batch.data[0] cnn_exec.forward() image_feature = cnn_exec.outputs[0] # lstm forward lstm_exec.arg_dict['image_feature'] = image_feature lstm_exec.arg_dict['word_data'] = batch.data[1] lstm_exec.arg_dict['softmax_label'] = batch.label lstm_exec.forward(is_train=True) print batch.label params.eval_metric.update(labels=batch.label, preds=lstm_exec.outputs) lstm_exec.backward() params.epoch = epoch params.nbatch += 1 speedometer(params) for j, name in enumerate(lstm.list_arguments()): if name not in lstm_shapes.keys(): updater(j, lstm_exec.grad_dict[name], lstm_exec.arg_dict[name]) train_data.reset() params.nbatch = 0 if val_flag and epoch % freq_val == 0: for i, batch in enumerate(val_data): # cnn forward, get image_feature cnn_exec.arg_dict['image_data'] = batch.data[0] cnn_exec.forward() image_feature = cnn_exec.outputs[0] # lstm forward lstm_exec.arg_dict['image_feature'] = image_feature lstm_exec.arg_dict['word_data'] = batch.data[1] lstm_exec.arg_dict['softmax_label'] = batch.label lstm_exec.forward(is_train=False) params.eval_metric.update(labels=batch.label, preds=lstm_exec.outputs) params.epoch = epoch params.nbatch += 1 speedometer(params) params.nbatch = 0 val_data.reset() if period: save_checkpoint(prefix=prefix, epoch=epoch, symbol=lstm, arg_params=lstm_exec.arg_dict, aux_params=lstm_exec.aux_dict)
def checkpoint_if_only_best(self, eval_metric, sym, arg, aux): if self.is_best(eval_metric, update_value=True): save_checkpoint(self._prefix, 0, sym, arg, aux)