default=None, required=False) args = parser.parse_args() if args.plot is not None or args.sample is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt if args.sample is not None: checkpoint_file = args.sample else: checkpoint_file = args.plot if not os.path.exists(checkpoint_file): raise ValueError("Checkpoint file path %s" % checkpoint_file, " does not exist!") print(checkpoint_file) checkpoint_dict = load_checkpoint(checkpoint_file) train_costs = checkpoint_dict["train_costs"] valid_costs = checkpoint_dict["valid_costs"] plt.plot(train_costs) plt.plot(valid_costs) plt.savefig("costs.png") X_mb, X_mb_mask, c_mb, c_mb_mask = next(valid_itr) valid_itr.reset() prev_h1, prev_h2, prev_h3 = [ np_zeros((minibatch_size, n_hid)) for i in range(3) ] prev_kappa = np_zeros((minibatch_size, att_size)) prev_w = np_zeros((minibatch_size, n_chars)) if args.sample is not None: predict_function = checkpoint_dict["predict_function"]
grads = tensor.grad(reg_cost, params) grads = gradient_clipping(grads, 10.) learning_rate = 1E-4 opt = adam(params, learning_rate) updates = opt.updates(params, grads) if args.cont is not None: print("Continuing training from saved model") continue_path = args.cont if not os.path.exists(continue_path): raise ValueError("Continue model %s, path not " "found" % continue_path) saved_checkpoint = load_checkpoint(continue_path) checkpoint_dict = saved_checkpoint train_function = checkpoint_dict["train_function"] cost_function = checkpoint_dict["cost_function"] predict_function = checkpoint_dict["predict_function"] else: train_function = theano.function([X_sym, X_mask_sym], [cost], updates=updates, on_unused_input='warn') cost_function = theano.function([X_sym, X_mask_sym], [cost], on_unused_input='warn') predict_function = theano.function([X_sym, X_mask_sym], [pred], on_unused_input='warn')
default=None, required=False) args = parser.parse_args() if args.plot is not None or args.sample is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt if args.sample is not None: checkpoint_file = args.sample else: checkpoint_file = args.plot if not os.path.exists(checkpoint_file): raise ValueError("Checkpoint file path %s" % checkpoint_file, " does not exist!") print(checkpoint_file) checkpoint_dict = load_checkpoint(checkpoint_file) train_costs = checkpoint_dict["overall_train_costs"] valid_costs = checkpoint_dict["overall_valid_costs"] plt.plot(train_costs) plt.plot(valid_costs) plt.savefig("costs.png") X_mb, X_mb_mask, c_mb, c_mb_mask = next(valid_itr) valid_itr.reset() prev_h1, prev_h2, prev_h3 = [np_zeros((minibatch_size, n_hid)) for i in range(3)] prev_kappa = np_zeros((minibatch_size, att_size)) prev_w = np_zeros((minibatch_size, n_chars)) bias = args.bias if args.sample is not None:
grads = tensor.grad(reg_cost, params) grads = gradient_clipping(grads, 10.) learning_rate = 1E-4 opt = adam(params, learning_rate) updates = opt.updates(params, grads) if args.cont is not None: print("Continuing training from saved model") continue_path = args.cont if not os.path.exists(continue_path): raise ValueError("Continue model %s, path not " "found" % continue_path) saved_checkpoint = load_checkpoint(continue_path) checkpoint_dict = saved_checkpoint train_function = checkpoint_dict["train_function"] cost_function = checkpoint_dict["cost_function"] predict_function = checkpoint_dict["predict_function"] else: train_function = theano.function([X_sym, X_mask_sym], [cost], updates=updates, on_unused_input='warn') cost_function = theano.function([X_sym, X_mask_sym], [cost], on_unused_input='warn') predict_function = theano.function([X_sym, X_mask_sym], [pred], on_unused_input='warn') print("Beginning training loop") checkpoint_dict = {} checkpoint_dict["train_function"] = train_function