def init_model(vocab_size, char_type_size): model = FunctionSet( embed=F.EmbedID(vocab_size, embed_units), char_type_embed=F.EmbedID(char_type_size, char_type_embed_units), #dict_embed = F.Linear(12, dict_embed_units), hidden1=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), i_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), f_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), o_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), output=F.Linear(hidden_units + 12, label_num), ) if opt_selection == 'Adagrad': opt = optimizers.AdaGrad(lr=learning_rate) elif opt_selection == 'SGD': opt = optimizers.SGD() elif opt_selection == 'Adam': opt = optimizers.Adam() else: opt = optimizers.AdaGrad(lr=learning_rate) print('Adagrad is chosen as defaut') opt.setup(model) return model, opt
def setup_optimizer(self, optimizer_name, gradient_clipping=3, weight_decay=0.00001, **kwargs): # set optimizer if optimizer_name == "Adam": self.opt = optimizers.Adam(**kwargs) elif optimizer_name == "AdaDelta": self.opt = optimizers.AdaDelta(**kwargs) elif optimizer_name == "AdaGrad": self.opt = optimizers.AdaGrad(**kwargs) elif optimizer_name == "RMSprop": self.opt = optimizers.RMSprop(**kwargs) elif optimizer_name == "RMSpropGraves": self.opt = optimizers.RMSpropGraves(**kwargs) elif optimizer_name == "SGD": self.opt = optimizers.SGD(**kwargs) elif optimizer_name == "MomentumSGD": self.opt = optimizers.MomentumSGD(**kwargs) # self.opt.use_cleargrads() self.opt.setup(self) self.opt.add_hook(optimizer.GradientClipping(gradient_clipping)) self.opt.add_hook(optimizer.WeightDecay(weight_decay)) self.opt_params = { "optimizer_name": optimizer_name, "gradient_clipping": gradient_clipping, "weight_decay": weight_decay }
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(word_lists, args.vocab) phrase_set = set() semi_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semi_set |= set(extract_semi_labels(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists] trace('start training ...') parser = Parser( args.vocab, args.embed, args.queue, args.stack, len(phrase_set), len(semi_set), ) if USE_GPU: parser.to_gpu() opt = optimizers.AdaGrad(lr = 0.005) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(5)) for epoch in range(args.epoch): n = 0 for samples in batch(zip(word_lists, op_lists), args.minibatch): parser.zerograds() loss = my_zeros((), np.float32) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward(word_list, op_list, 0) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semi_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) trace('finished.')
def get_model_optimizer(result_folder, cfg_mod): model_fn = path.basename(cfg_mod.SRC_MODEL) src_model = imp.load_source( model_fn.split('.')[0], path.join(result_folder, cfg_mod.SRC_MODEL)).src_model if cfg_mod.OPT_PARAM == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=cfg_mod.TRAIN_RATE, eps=cfg_mod.EPS) elif cfg_mod.OPT_PARAM == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=cfg_mod.TRAIN_RATE, momentum=cfg_mod.MOMENTUM) elif cfg_mod.OPT_PARAM == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=cfg_mod.TRAIN_RATE, eps=cfg_mod.EPS) elif cfg_mod.OPT_PARAM == 'ADAM': optimizer = optimizers.Adam(alpha=cfg_mod.TRAIN_RATE, beta1=cfg_mod.BETA1, beta2=cfg_mod.BETA2, eps=cfg_mod.EPS) else: raise Exception('No optimizer is selected') optimizer.setup(src_model) if cfg_mod.WEIGHT_DECAY: optimizer.add_hook(chainer.optimizer.WeightDecay(cfg_mod.WEIGHT_DECAY)) return src_model, optimizer
def get_opt(args): if args.opt_model == "SGD": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.SGD(lr=alpha0) if args.opt_model == "AdaGrad": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.AdaGrad(lr=alpha0) if args.opt_model == "AdaDelta": alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0 alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1 return optimizers.AdaDelta(rho=alpha0, eps=alpha1) if args.opt_model == "Momentum": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1) if args.opt_model == "NAG": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.NesterovAG(lr=alpha0, momentum=alpha1) if args.opt_model == "RMS": return optimizers.RMSpropGraves() if args.opt_model == "SM": return optimizers.SMORMS3() if args.opt_model == "Adam": # default case alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2 alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3 return optimizers.Adam(alpha=alpha0, beta1=alpha1, beta2=alpha2, eps=alpha3) print('no such optimization method', args.opt_model) sys.exit(1)
def train(self, epoch): trace('making vocabularies ...') self.trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.encdec) opt.add_hook(optimizer.GradientClipping(5)) gen1 = gens.word_list(self.target) gen = gens.batch(gen1, self.minibatch) for trg_batch in gen: self.batch_size = len(trg_batch) self.trg_batch = fill_batch(trg_batch) if len(trg_batch) != self.minibatch: break self.encdec.clear(self.batch_size) self.__forward_img() self.encdec.reset(self.batch_size) loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec, True, 0) loss.backward() opt.update() K = len(self.trg_batch) - 2 self.print_out(K, hyp_batch, epoch)
def main(): w2v_dict = TransVecotr(MODEL_PATH) dataset, height, width = w2v_dict(WAKATI_PATH) feat_data = dataset["vec"] label_data = xp.array([LAB_DIC[i] for i in dataset["lab"]], dtype=xp.int32) x_train, x_test, y_train, y_test = train_test_split(feat_data, label_data, test_size=0.15) input_channel = 1 x_train = xp.array(x_train, dtype=xp.float32).reshape(len(x_train), input_channel, height, width) x_test = xp.array(x_test, dtype=xp.float32).reshape(len(x_test), input_channel, height, width) train = tuple_dataset.TupleDataset(x_train, y_train) test = tuple_dataset.TupleDataset(x_test, y_test) train_iter = iterators.SerialIterator(train, N_BATCH) test_iter = iterators.SerialIterator(test, N_BATCH, repeat=False, shuffle=False) model = L.Classifier(SimpleCNN(input_channel, N_OUTPUT, FILTER_H, width, MID_UNITS, N_UNITS, N_LABEL)) if GPU >= 0: model.to_gpu() optimizer = optimizers.AdaGrad() optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=GPU) trainer = training.Trainer(updater, (N_EPOCH, 'epoch'), out="result") trainer.extend(extensions.Evaluator(test_iter, model, device=GPU)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot()) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): w2v_dict = TransVecotr(MODEL_PATH) dataset, height, width = w2v_dict(WAKATI_PATH) feat_data = dataset["vec"] label_data = xp.array([LAB_DIC[i] for i in dataset["lab"]], dtype=xp.int32) input_channel = 1 x_train = xp.array(feat_data, dtype=xp.float32).reshape(len(feat_data), input_channel, height, width) train = tuple_dataset.TupleDataset(x_train, label_data) train_iter = iterators.SerialIterator(train, N_BATCH) model = L.Classifier(SimpleCNN(input_channel, N_OUTPUT, FILTER_H, width, MID_UNITS, N_UNITS, N_LABEL)) if GPU >= 0: model.to_gpu() optimizer = optimizers.AdaGrad() optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=GPU) trainer = training.Trainer(updater, (1, 'epoch'), out="result") serializers.load_npz(TRAINER_PATH, trainer) while True: input_text = raw_input('input text :') if input_text == "exit": break pred_vec = w2v_dict.gen_pred_vec(input_text, height) pred_vec = xp.array([pred_vec], dtype=xp.float32) pred_data = xp.array([pred_vec], dtype=xp.float32) hyp_data = model.predictor(pred_data) res_dict = {v:k for k, v in LAB_DIC.items()} if res_dict[hyp_data.data.argmax()] == "yakiu": print "彡(゚)(゚) やきう民" else: print "(´・ω・`) 原住民" print
def max_ent_deep_irl(feature_matrix, trans_probs, trajs, gamma=0.9, n_epoch=30): n_states, d_states = feature_matrix.shape _, n_actions, _ = trans_probs.shape reward_func = Reward(d_states, 64) optimizer = optimizers.AdaGrad(lr=0.01) optimizer.setup(reward_func) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) optimizer.add_hook(chainer.optimizer.GradientClipping(100.0)) feature_exp = np.zeros((d_states)) for episode in trajs: for step in episode: feature_exp += feature_matrix[step[0], :] feature_exp = feature_exp / len(trajs) fmat = chainer.Variable(feature_matrix.astype(np.float32)) for _ in range(n_epoch): reward_func.zerograds() r = reward_func(fmat) v = value_iteration(trans_probs, r.data.reshape((n_states,)), gamma) pi = best_policy(trans_probs, v) exp_svf = expected_svf(trans_probs, trajs, pi) grad_r = feature_exp - exp_svf r.grad = -grad_r.reshape((n_states, 1)).astype(np.float32) r.backward() optimizer.update() return reward_func(fmat).data.reshape((n_states,))
def get_model_optimizer(args): model = get_model(args) if 'opt' in args: # prepare optimizer if args.opt == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=args.lr) elif args.opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.opt == 'Adam': optimizer = optimizers.Adam() else: raise Exception('No optimizer is selected') optimizer.setup(model) if args.resume_opt is not None: serializers.load_hdf5(args.resume_opt, optimizer) args.epoch_offset = int( re.search('epoch-([0-9]+)', args.resume_opt).groups()[0]) return model, optimizer else: print('No optimizer generated.') return model
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if (opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif (opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif (opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif (opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif (opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif (opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif (opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif (opt_str.lower() == 'sgd'): opt = O.SGD() elif (opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0])) logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0])) return opt
def get_model_optimizer(result_dir, args): model_fn = os.path.basename(args.model) model_name = model_fn.split('.')[0] module = imp.load_source(model_fn.split('.')[0], args.model) Net = getattr(module, model_name) dst = '%s/%s' % (result_dir, model_fn) if not os.path.exists(dst): shutil.copy(args.model, dst) dst = '%s/%s' % (result_dir, os.path.basename(__file__)) if not os.path.exists(dst): shutil.copy(__file__, dst) # prepare model model = Net() if args.gpu >= 0: cuda.get_device(args.gpu).use() if args.restart_from is not None: model = pickle.load(open(args.restart_from, 'rb')) if args.gpu >= 0: model.to_gpu() # prepare optimizer if args.opt == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=0.0005) elif args.opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=0.0005, momentum=0.9) elif args.opt == 'Adam': optimizer = optimizers.Adam() else: raise Exception('No optimizer is selected') optimizer.setup(model) return model, optimizer
def get_optimizer(model, opt, lr=None, adam_alpha=None, adam_beta1=None, adam_beta2=None, adam_eps=None, weight_decay=None): if opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9) elif opt == 'Adam': optimizer = optimizers.Adam(alpha=adam_alpha, beta1=adam_beta1, beta2=adam_beta2, eps=adam_eps) elif opt == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=lr) elif opt == 'RMSprop': optimizer = optimizers.RMSprop(lr=lr) else: raise Exception('No optimizer is selected') # The first model as the master model optimizer.setup(model) if opt == 'MomentumSGD': optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) return optimizer
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if(opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif(opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif(opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif(opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif(opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif(opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif(opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif(opt_str.lower() == 'sgd'): opt = O.SGD() elif(opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) print('\n[Warning] {0}\n\t{1}->{2}\n'.format( fileFuncLine(), opt_str, opt.__doc__.split('.')[0]) ) print('Optimizer:', opt.__doc__.split('.')[0]) return opt
def which_is_best_optimizer(k=10, model=CNN()): k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.Adam(), tag='Adam') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.SGD(), tag='SGD') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSpropGraves(), tag='RMSpropGraves') # k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.AdaDelta(), tag='AdaDelta') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.AdaGrad(), tag='AdaGrad') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.MomentumSGD(), tag='MomentumSGD') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.NesterovAG(), tag='NesterovAG')
def init_model(): #Make models if use_pre2 == 'pre': pre_unit = 4 else: pre_unit = 0 if use_null == 'null': null_unit = 6 else: null_unit = 0 if args.phrase == 'phrase': phrase_unit = 4 model = chainer.FunctionSet( trainable=chainer.FunctionSet( w0=F.Linear(n_units * 2 + null_unit * 2, n_label), ww0=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ww1=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) else: model = chainer.FunctionSet( trainable=chainer.FunctionSet(w0=F.Linear( n_units * 4 + null_unit * 4, n_label), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) if opt_name == 'SGD': optimizer = optimizers.SGD(lr=0.02) # (lr=opt_score) # lr=0.01 elif opt_name == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=0.001) # (lr=opt_score) # lr=0.001 elif opt_name == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=0.9) # (rho=opt_score) # rho=0.9 elif opt_name == 'Adam': optimizer = optimizers.Adam( alpha=0.0001) # (alpha=opt_score) # alpha=0.0001 optimizer.setup(model) # .collect_parameters() return model, optimizer
def setup_optimizer(self): if self.opt_type == 'sgd': self.optimizer = optimizers.SGD(lr=self.opt_lr) elif self.opt_type == 'adagrad': self.optimizer = optimizers.AdaGrad(lr=self.opt_lr) elif self.opt_type == 'adam': self.optimizer = optimizers.Adam(alpha=self.opt_lr) self.optimizer.setup(self.network.collect_parameters())
def main(): opts = {} optimizer = optimizers.AdaGrad() opts['optimizer'] = optimizer opts['model'] = CNN train(opts)
def init_model(vocab_size): model = chainer.FunctionSet( embed=F.EmbedID(vocab_size, embed_units), hidden1=F.Linear(window * embed_units, hidden_units), output=F.Linear(hidden_units, label_num), ) opt = optimizers.AdaGrad(lr=learning_rate) opt.setup(model) return model, opt
def get_model_optimizer(result_dir, args): model = pickle.load(open('models/%s' % NETS[args.net][1])) model.to_gpu() # prepare optimizer optimizer = optimizers.AdaGrad(lr=0.0005) optimizer.setup(model) return model, optimizer
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') attmt = AttentionMT(args.vocab, args.embed, args.hidden) if args.use_gpu: attmt.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(attmt) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace( ' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace( ' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace( ' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') attmt.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', attmt) trace('finished.')
def train(self): """ Train method If you use the word2vec model, you possible to use the copy weight Optimizer method use the Adagrad """ trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP) if self.word2vecFlag: self.copy_model(self.word2vec, self.attention_dialogue.emb) self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True) for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.attention_dialogue) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model model_path = APP_ROOT + "/model/" + prefix src_vocab.save(model_path + '.srcvocab') trg_vocab.save(model_path + '.trgvocab') self.attention_dialogue.save_spec(model_path + '.spec') serializers.save_hdf5(model_path + '.weights', self.attention_dialogue) trace('finished.')
def initializeOptimizer(self, optimizerAlgorithm): if optimizerAlgorithm == "Adam": self.optimizer = optimizers.Adam() elif optimizerAlgorithm == "AdaGrad": self.optimizer = optimizers.AdaGrad() elif optimizerAlgorithm == "SGD": self.optimizer = optimizers.MomentumSGD() else: raise ValueError('could not find %s in optimizers {"Adam", "AdaGrad", "SGD"}' % (optimizerAlgorithm)) self.optimizer.setup(self.model)
def __init__(self, outputdim, minval, optimizer=None): if optimizer is None: self.optimizer = chainer.optimizers.Adam() else: self.optimizer = optimizer self.model = GoogLeNetBN(outputdim) self.optimizer.setup(self.model) self.myOptimizers = [optimizers.Adam(), optimizers.AdaGrad(), optimizers.AdaDelta()] self.mindata = -minval[0] print(self.mindata)
def initialize_optimizer(self, lr=0.5): if self.algorithm == 'SGD': self.optimizer = optimizers.SGD(lr=lr) elif self.algorithm == 'Adam': self.optimizer = optimizers.Adam() elif self.algorithm == 'Adagrad': self.optimizer = optimizers.AdaGrad() elif self.algorithm == 'Adadelta': self.optimizer = optimizers.AdaDelta() else: raise AssertionError('this algorithm is not available') self.optimizer.setup(self.model)
def init_optimizer(self): if self.optimizer == 'SGD': self.optimizer = optimizers.MomentumSGD(lr=self.learning_rate, momentum=self.momentum) elif self.optimizer == 'AdaDelta': self.optimizer = optimizers.AdaDelta() elif self.optimizer == 'AdaGrad': self.optimizer = optimizers.AdaGrad() elif self.optimizer == 'Adam': self.optimizer = optimizers.Adam() elif self.optimizer == 'RMSprop': self.optimizer = optimizers.RMSprop()
def get_optimizer(opt): # prepare optimizer if opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.7) elif opt == 'Adam': optimizer = optimizers.Adam(alpha=args.alpha) elif opt == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=args.lr) else: raise Exception('No optimizer is selected') return optimizer
def init(args): def parse(line): attr, pos_id = line.split() attr = tuple(attr.split(',')) return (attr, int(pos_id)) model = md.Analyzer( md.BidirectionalRecognizer(md.Recognizer(256, 100, 100, 100), md.Recognizer(256, 100, 100, 100)), md.Tagger(md.BiClassifier(100), chainer.ChainList())) optimizer = optimizers.AdaGrad(lr=0.01) optimizer.setup(model) return Storage(model, optimizer)
def cross_optimizers(opt): if opt == 'SGD': optimizer = optimizers.SGD() elif opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD() elif opt == 'AdaGrad': optimizer = optimizers.AdaGrad() elif opt == 'RMSprop': optimizer = optimizers.RMSprop() elif opt == 'AdaDelta': optimizer = optimizers.AdaDelta() elif opt == 'Adam': optimizer = optimizers.Adam() return copy.deepcopy(optimizer)
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) # If you use the ipython note book you hace to use the forward function # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')