def experiment(args): track_local_dir = os.path.join(args.logroot, args.experimentname) if args.remote: track_remote_dir = os.path.join(args.remote, args.projectname, args.experimentname) else: track_remote_dir = None with track.trial(track_local_dir, track_remote_dir, param_map=vars(args)): track.debug("Starting trial") do_training(args)
raise Exception('num_gpu should be positive divisor of batch_size') if mode == "predict": data_train, args = load_data(args) elif mode == "train" or mode == "load": data_train, data_val, args = load_data(args) # log current config config_logger = ConfigLogger(log) config_logger(args.config) model_loaded, model_num_epoch = load_model(args, contexts, data_train) # if mode is 'train', it trains the model if mode == 'train': data_names = [x[0] for x in data_train.provide_data] label_names = [x[0] for x in data_train.provide_label] module = mx.mod.Module(model_loaded, context=contexts, data_names=data_names, label_names=label_names) do_training(args=args, module=module, data_train=data_train, data_val=data_val) # if mode is 'load', it loads model from the checkpoint and continues the training. elif mode == 'load': do_training(args=args, module=model_loaded, data_train=data_train, data_val=data_val, begin_epoch=model_num_epoch+1) # if mode is 'predict', it predict label from the input by the input model elif mode == 'predict': # predict through data model_loaded.bind(for_training=False, data_shapes=data_train.provide_data, label_shapes=data_train.provide_label) max_t_count = args.config.getint('arch', 'max_t_count') eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, seq_length=max_t_count) model_loaded.score(eval_data=data_train, num_batch=None, eval_metric=eval_metric, reset=True)
def train_it(self, fuck_off=None): train.do_training() return "trained!"