def setup_optimizer(self, optimizer_name, gradient_clipping=3, weight_decay=0.00001, **kwargs): # set optimizer if optimizer_name == "Adam": self.opt = optimizers.Adam(**kwargs) elif optimizer_name == "AdaDelta": self.opt = optimizers.AdaDelta(**kwargs) elif optimizer_name == "AdaGrad": self.opt = optimizers.AdaGrad(**kwargs) elif optimizer_name == "RMSprop": self.opt = optimizers.RMSprop(**kwargs) elif optimizer_name == "RMSpropGraves": self.opt = optimizers.RMSpropGraves(**kwargs) elif optimizer_name == "SGD": self.opt = optimizers.SGD(**kwargs) elif optimizer_name == "MomentumSGD": self.opt = optimizers.MomentumSGD(**kwargs) # self.opt.use_cleargrads() self.opt.setup(self) self.opt.add_hook(optimizer.GradientClipping(gradient_clipping)) self.opt.add_hook(optimizer.WeightDecay(weight_decay)) self.opt_params = { "optimizer_name": optimizer_name, "gradient_clipping": gradient_clipping, "weight_decay": weight_decay }
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if(opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif(opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif(opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif(opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif(opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif(opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif(opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif(opt_str.lower() == 'sgd'): opt = O.SGD() elif(opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) print('\n[Warning] {0}\n\t{1}->{2}\n'.format( fileFuncLine(), opt_str, opt.__doc__.split('.')[0]) ) print('Optimizer:', opt.__doc__.split('.')[0]) return opt
def get_model_optimizer(result_folder, cfg_mod): model_fn = path.basename(cfg_mod.SRC_MODEL) src_model = imp.load_source( model_fn.split('.')[0], path.join(result_folder, cfg_mod.SRC_MODEL)).src_model if cfg_mod.OPT_PARAM == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=cfg_mod.TRAIN_RATE, eps=cfg_mod.EPS) elif cfg_mod.OPT_PARAM == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=cfg_mod.TRAIN_RATE, momentum=cfg_mod.MOMENTUM) elif cfg_mod.OPT_PARAM == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=cfg_mod.TRAIN_RATE, eps=cfg_mod.EPS) elif cfg_mod.OPT_PARAM == 'ADAM': optimizer = optimizers.Adam(alpha=cfg_mod.TRAIN_RATE, beta1=cfg_mod.BETA1, beta2=cfg_mod.BETA2, eps=cfg_mod.EPS) else: raise Exception('No optimizer is selected') optimizer.setup(src_model) if cfg_mod.WEIGHT_DECAY: optimizer.add_hook(chainer.optimizer.WeightDecay(cfg_mod.WEIGHT_DECAY)) return src_model, optimizer
def setUp(self): if self.file_type == 'filename': fd, path = tempfile.mkstemp() os.close(fd) self.file = path elif self.file_type == 'bytesio': self.file = six.BytesIO() else: assert False child = link.Chain() with child.init_scope(): child.linear = links.Linear(2, 3) child.Wc = chainer.Parameter(shape=(2, 3)) self.parent = link.Chain() with self.parent.init_scope(): self.parent.child = child self.parent.Wp = chainer.Parameter(shape=(2, 3)) self.optimizer = optimizers.AdaDelta() self.optimizer.setup(self.parent) self.parent.cleargrads() self.optimizer.update() # init all states self.savez = numpy.savez_compressed if self.compress else numpy.savez
def set_optimiser(self, opt_type, lr_rate=None, enable_decay=False, decay=1.0, start_decay_at=-1): if opt_type == "SGD": if (lr_rate is None): lr_rate = 1.0 self.enable_decay = enable_decay self.decay = decay self.start_decay_at = start_decay_at self.optimizer = optimizers.SGD(lr_rate) elif opt_type == "Adam": if (lr_rate is None): lr_rate = 0.001 print("disable lr decay") self.enable_decay = False self.optimizer = optimizers.Adam(alpha=lr_rate) elif opt_type == "AdaDelta": if (lr_rate is None): lr_rate = 0.95 print("disable lr decay") self.enable_decay = False self.optimizer = optimizers.AdaDelta(rho=lr_rate) else: raise Exception("Invalid optimizer type" + opt_type)
def which_is_best_optimizer(k=10, model=CNN()): k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.Adam(), tag='Adam') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.SGD(), tag='SGD') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSpropGraves(), tag='RMSpropGraves') # k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.AdaDelta(), tag='AdaDelta') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.AdaGrad(), tag='AdaGrad') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.MomentumSGD(), tag='MomentumSGD') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.NesterovAG(), tag='NesterovAG')
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if (opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif (opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif (opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif (opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif (opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif (opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif (opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif (opt_str.lower() == 'sgd'): opt = O.SGD() elif (opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0])) logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0])) return opt
def __init__(self, specs, njobs): self.specs = specs assert specs.has_key( "layers_specs"), "Please specify specs of each layer" layers_specs = self.specs["layers_specs"] self.layers = CNN(layers_specs) #rev_layers_specs = copy.deepcopy([spec for spec in reversed(specs["layers_specs"])]) #for spec in rev_layers_specs: # if spec["type"] == "full": # spec["dimensions"] = (spec["dimensions"][1], spec["dimensions"][0]) # elif spec["type"] == "conv": # spec["type"] = "deconv" # spec["filter_shape"] = (spec["filter_shape"][1], spec["filter_shape"][0], spec["filter_shape"][2], spec["filter_shape"][3]) #self.dec_layers= CNN(rev_layers_specs) if njobs == -1: self.device_id = [-1] else: self.device_id = [4] self.layers.to_gpu(self.device_id[0]) if self.specs["learning_rule"]["type"] == "adam": self.optimizer = optimizers.Adam() elif self.specs["learning_rule"]["type"] == "adadelta": self.optimizer = optimizers.AdaDelta() elif self.specs["learning_rule"]["type"] == "momentum": self.optimizer = optimizers.MomentumSGD() else: raise ValueError("Unsupported rule" + str(self.specs["learning_rule"]["type"])) self.optimizer.setup(self.layers)
def Init(self): TFunctionApprox.Init(self) L = self.Locate if self.Params['nn_data_x'] != None: self.DataX = np.array( pickle.load(open(L(self.Params['nn_data_x']), 'rb')), np.float32) else: self.DataX = np.array([], np.float32) if self.Params['nn_data_y'] != None: self.DataY = np.array( pickle.load(open(L(self.Params['nn_data_y']), 'rb')), np.int32) else: self.DataY = np.array([], np.int32) self.CreateNNs() if self.Params['nn_params'] != None: #self.model.copy_parameters_from(map(lambda e:np.array(e,np.float32),self.Params['nn_params'])) self.model.copy_parameters_from( map(lambda e: np.array(e, np.float32), pickle.load(open(L(self.Params['nn_params']), 'rb')))) self.is_predictable = True else: if self.Options['init_bias_randomly']: self.InitBias(m='mean') if self.Options['gpu'] >= 0: cuda.init(self.Options['gpu']) self.model.to_gpu() self.model_err.to_gpu() self.optimizer = optimizers.AdaDelta(rho=self.Options['AdaDelta_rho']) self.optimizer.setup(self.model.collect_parameters())
def setOptimizer(args, EncDecAtt): # optimizerを構築 if args.optimizer == 'SGD': optimizer = optimizers.SGD(lr=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.lr)) elif args.optimizer == 'Adam': # assert 0, "Currently Adam is not supported for asynchronous update" optimizer = optimizers.Adam(alpha=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.alpha)) elif args.optimizer == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.lr)) elif args.optimizer == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.rho)) else: assert 0, "ERROR" optimizer.setup(EncDecAtt.model) # ここでoptimizerにモデルを貼り付け if args.optimizer == 'Adam': optimizer.t = 1 # warning回避のちょっとしたhack 本来はするべきではない return optimizer
def get_opt(args): if args.opt_model == "SGD": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.SGD(lr=alpha0) if args.opt_model == "AdaGrad": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.AdaGrad(lr=alpha0) if args.opt_model == "AdaDelta": alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0 alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1 return optimizers.AdaDelta(rho=alpha0, eps=alpha1) if args.opt_model == "Momentum": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1) if args.opt_model == "NAG": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.NesterovAG(lr=alpha0, momentum=alpha1) if args.opt_model == "RMS": return optimizers.RMSpropGraves() if args.opt_model == "SM": return optimizers.SMORMS3() if args.opt_model == "Adam": # default case alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2 alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3 return optimizers.Adam(alpha=alpha0, beta1=alpha1, beta2=alpha2, eps=alpha3) print('no such optimization method', args.opt_model) sys.exit(1)
def init_model(): #Make models if use_pre2 == 'pre': pre_unit = 4 else: pre_unit = 0 if use_null == 'null': null_unit = 6 else: null_unit = 0 if args.phrase == 'phrase': phrase_unit = 4 model = chainer.FunctionSet( trainable=chainer.FunctionSet( w0=F.Linear(n_units * 2 + null_unit * 2, n_label), ww0=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ww1=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) else: model = chainer.FunctionSet( trainable=chainer.FunctionSet(w0=F.Linear( n_units * 4 + null_unit * 4, n_label), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) if opt_name == 'SGD': optimizer = optimizers.SGD(lr=0.02) # (lr=opt_score) # lr=0.01 elif opt_name == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=0.001) # (lr=opt_score) # lr=0.001 elif opt_name == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=0.9) # (rho=opt_score) # rho=0.9 elif opt_name == 'Adam': optimizer = optimizers.Adam( alpha=0.0001) # (alpha=opt_score) # alpha=0.0001 optimizer.setup(model) # .collect_parameters() return model, optimizer
def __init__(self, link, epoch=10, batch_size=100, visualize=False): ChainList.__init__(self, link) self.optimizer = optimizers.AdaDelta() self.optimizer.setup(self) self.loss_function = F.mean_squared_error self.epoch = epoch self.batch_size = batch_size self.visualize = visualize
def train(args): source_vocab = Vocab(args.source, args.vocab) target_vocab = Vocab(args.target, args.vocab) att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size, args.embed_size) if args.use_gpu: att_encdec.to_gpu() if args.source_validation: if os.path.exists(PLOT_DIR) == False: os.mkdir(PLOT_DIR) fp_loss = open(PLOT_DIR + "loss", "w") fp_loss_val = open(PLOT_DIR + "loss_val", "w") opt = optimizers.AdaDelta(args.rho, args.eps) opt.setup(att_encdec) opt.add_hook(optimizer.WeightDecay(DECAY_COEFF)) opt.add_hook(optimizer.GradientClipping(CLIP_THR)) for epoch in xrange(args.epochs): print "--- epoch: %s/%s ---" % (epoch + 1, args.epochs) source_gen = word_list(args.source) target_gen = word_list(args.target) batch_gen = batch(sort(source_gen, target_gen, 100 * args.minibatch), args.minibatch) n = 0 total_loss = 0.0 for source_batch, target_batch in batch_gen: n += len(source_batch) source_batch = fill_batch_end(source_batch) target_batch = fill_batch_end(target_batch) hyp_batch, loss = forward(source_batch, target_batch, source_vocab, target_vocab, att_encdec, True, 0) total_loss += loss.data * len(source_batch) closed_test(source_batch, target_batch, hyp_batch) loss.backward() opt.update() print "[n=%s]" % (n) print "[total=%s]" % (n) prefix = args.model_path + '%s' % (epoch + 1) serializers.save_hdf5(prefix + '.attencdec', att_encdec) if args.source_validation: total_loss_val, n_val = validation_test(args, att_encdec, source_vocab, target_vocab) fp_loss.write("\t".join([str(epoch), str(total_loss / n) + "\n"])) fp_loss_val.write("\t".join( [str(epoch), str(total_loss_val / n_val) + "\n"])) fp_loss.flush() fp_loss_val.flush() hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path + HPARAM_NAME) source_vocab.save(args.model_path + SRC_VOCAB_NAME) target_vocab.save(args.model_path + TAR_VOCAB_NAME) hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path + HPARAM_NAME) source_vocab.save(args.model_path + SRC_VOCAB_NAME) target_vocab.save(args.model_path + TAR_VOCAB_NAME) if args.source_validation: fp_loss.close() fp_loss_val.close()
def TrainConvnetExtractor(trainidx, epoch=20, saveas="convnet.model"): cqtfilelist = np.array(find_files(const.PATH_MIDIHCQT, ext="npz"))[trainidx] #midifilelist = find_files(const.PATH_MIDI,ext="mid")[:filecnt] config.train = True config.enable_backprop = True convnet = networks.FullCNNFeatExtractor() model = networks.ConvnetPredictor(convnet) model.to_gpu(0) opt = optimizers.AdaDelta() opt.setup(model) print("train set length: %d" % trainidx.size) print("start epochs...") S = [] T = [] for cqtfile in cqtfilelist: dat = np.load(cqtfile) spec = utils.PreprocessSpec(dat["spec"])[:const.CQT_H, :, :] targ = GetConvnetTargetFromPianoroll(dat["target"]).astype(np.int32) assert (spec.shape[1] == targ.shape[0]) S.append(spec) T.append(targ) S = np.concatenate(S, axis=1) T = np.concatenate(T, axis=0) for ep in range(epoch): sum_loss = 0 assert (S.shape[1] == T.shape[0]) randidx = np.random.randint(0, S.shape[1] - const.CONV_TRAIN_SEQLEN - 1, S.shape[1] // const.CONV_TRAIN_SEQLEN * 4) for i in range(0, randidx.size - const.CONV_TRAIN_BATCH, const.CONV_TRAIN_BATCH): x_batch = np.stack([ S[:, randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :] for j in range(i, i + const.CONV_TRAIN_BATCH) ]) t_batch = np.stack([ T[randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :] for j in range(i, i + const.CONV_TRAIN_BATCH) ]) x_in = cp.asarray(x_batch) t_in = cp.asarray(t_batch) model.cleargrads() loss = model(x_in, t_in) loss.backward() opt.update() sum_loss += loss.data convnet.save(saveas) print("epoch: %d/%d loss:%.04f" % (ep + 1, epoch, sum_loss / const.CONV_TRAIN_BATCH)) convnet.save(saveas)
def __init__(self, n_units, pool_size, epoch=10, batch_size=100, dropout_rate=(), optimizer=optimizers.AdaDelta()): super(MaxoutClassifier, self).__init__(n_units, epoch, batch_size, dropout_rate, optimizer) self.pool_size = pool_size
def __init__(self, outputdim, minval, optimizer=None): if optimizer is None: self.optimizer = chainer.optimizers.Adam() else: self.optimizer = optimizer self.model = GoogLeNetBN(outputdim) self.optimizer.setup(self.model) self.myOptimizers = [optimizers.Adam(), optimizers.AdaGrad(), optimizers.AdaDelta()] self.mindata = -minval[0] print(self.mindata)
def initialize_optimizer(self, lr=0.5): if self.algorithm == 'SGD': self.optimizer = optimizers.SGD(lr=lr) elif self.algorithm == 'Adam': self.optimizer = optimizers.Adam() elif self.algorithm == 'Adagrad': self.optimizer = optimizers.AdaGrad() elif self.algorithm == 'Adadelta': self.optimizer = optimizers.AdaDelta() else: raise AssertionError('this algorithm is not available') self.optimizer.setup(self.model)
def init_optimizer(self): if self.optimizer == 'SGD': self.optimizer = optimizers.MomentumSGD(lr=self.learning_rate, momentum=self.momentum) elif self.optimizer == 'AdaDelta': self.optimizer = optimizers.AdaDelta() elif self.optimizer == 'AdaGrad': self.optimizer = optimizers.AdaGrad() elif self.optimizer == 'Adam': self.optimizer = optimizers.Adam() elif self.optimizer == 'RMSprop': self.optimizer = optimizers.RMSprop()
def setUp(self): fd, path = tempfile.mkstemp() os.close(fd) self.temp_file_path = path child = link.Chain(linear=links.Linear(2, 3)) child.add_param('Wc', (2, 3)) self.parent = link.Chain(child=child) self.parent.add_param('Wp', (2, 3)) self.optimizer = optimizers.AdaDelta() self.optimizer.setup(self.parent)
def main(): state = make_initial_state() accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32)) for epoch in range(args.n_epoch): print('epoch %d/%d: ' % (epoch + 1, args.n_epoch)) log_ppl = 0.0 trained = 0 opt = optimizers.AdaDelta() opt.setup(model) for batch in generate_batch(args.corpus, args.batchsize): batch = [[vocab[x] for x in words] for words in batch] K = len(batch) if K != args.batchsize: break L = len(batch[0]) - 1 opt.zero_grads() for l in range(L): x_batch = xp.array([batch[k][l] for k in range(K)], dtype=np.int32) y_batch = xp.array([batch[k][l + 1] for k in range(K)], dtype=np.int32) state, loss_i = forward_one_step(x_batch, y_batch, state) accum_loss += loss_i accum_loss.backward() log_ppl += accum_loss.data.reshape(()) * K accum_loss.unchain_backward() # truncate accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() trained += K sys.stderr.write('\r %d/%d' % (trained, num_lines)) sys.stderr.flush() log_ppl /= float(num_words) print(' log(PPL) = %.10f' % log_ppl) # print(' PPL = %.10f' % math.exp(log_ppl)) if (epoch + 1) % 5 == 0: print("save model") model_name = "%s/kokkai_lstm_lm.epoch%d" % (args.model, epoch + 1) cPickle.dump(copy.deepcopy(model).to_cpu(), open(model_name, 'wb')) print('training finished.')
def cross_optimizers(opt): if opt == 'SGD': optimizer = optimizers.SGD() elif opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD() elif opt == 'AdaGrad': optimizer = optimizers.AdaGrad() elif opt == 'RMSprop': optimizer = optimizers.RMSprop() elif opt == 'AdaDelta': optimizer = optimizers.AdaDelta() elif opt == 'Adam': optimizer = optimizers.Adam() return copy.deepcopy(optimizer)
def __init__(self, n_units, epoch=10, batch_size=100, dropout_rate=(), optimizer=optimizers.AdaDelta()): self.n_units = n_units self.epoch = epoch self.batch_size = batch_size self.__constructed = False self.pre_trained = False self.optimizer = optimizer if dropout_rate: self.dropout_rate = dropout_rate else: self.dropout_rate = [.5 for i in n_units]
def select_optimizer(name): if name == "AdaGrad": optimizer = optimizers.AdaGrad(lr=0.001) elif name == "Adam": optimizer = chainer.optimizers.Adam(alpha=0.0001) elif name == "MomentumSGD": optimizer = optimizers.MomentumSGD(lr=0.01) elif name == "RMSprop": optimizer = optimizers.RMSprop(lr=0.01) elif name == "SGD": optimizer = optimizers.SGD(lr=0.01) elif name == "AdaDelta": optimizer = optimizers.AdaDelta(rho=0.9) else: raise Exception("Unknown network optimizer: " + args.optimizer) return optimizer
def _build_optimizer(self, trial, model): # option of optimizer funciton optimizer_name = trial.suggest_categorical( 'optimizer', ['Adam', "AdaDelta", 'RMSProp']) if optimizer_name == 'Adam': adam_alpha = trial.suggest_loguniform('adam_alpha', 1e-5, 1e-1) optimizer = optimizers.Adam(alpha=adam_alpha) elif optimizer_name == "AdaDelta": optimizer = optimizers.AdaDelta() elif optimizer_name == "RMSprop": optimizer = optimizers.RMSprop() weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) return optimizer
def setUp(self): fd, path = tempfile.mkstemp() os.close(fd) self.temp_file_path = path child = link.Chain(linear=links.Linear(2, 3)) child.add_param('Wc', (2, 3)) self.parent = link.Chain(child=child) self.parent.add_param('Wp', (2, 3)) self.optimizer = optimizers.AdaDelta() self.optimizer.setup(self.parent) self.parent.zerograds() self.optimizer.update() # init all states self.savez = numpy.savez_compressed if self.compress else numpy.savez
def get_optimizer(self, name, lr, momentum=0.9): if name.lower() == "adam": return optimizers.Adam(alpha=lr, beta1=momentum) if name.lower() == "smorms3": return optimizers.SMORMS3(lr=lr) if name.lower() == "adagrad": return optimizers.AdaGrad(lr=lr) if name.lower() == "adadelta": return optimizers.AdaDelta(rho=momentum) if name.lower() == "nesterov" or name.lower() == "nesterovag": return optimizers.NesterovAG(lr=lr, momentum=momentum) if name.lower() == "rmsprop": return optimizers.RMSprop(lr=lr, alpha=momentum) if name.lower() == "momentumsgd": return optimizers.MomentumSGD(lr=lr, mommentum=mommentum) if name.lower() == "sgd": return optimizers.SGD(lr=lr)
def get_optimizer(name): """ :type name: str :rtype: chainer.Optimizer """ if name == "adadelta": opt = optimizers.AdaDelta() elif name == "adagrad": opt = optimizers.AdaGrad() elif name == "adam": opt = optimizers.Adam() elif name == "rmsprop": opt = optimizers.RMSprop() elif name == "smorms3": opt = optimizers.SMORMS3() else: raise ValueError("Unknown optimizer_name=%s" % name) return opt
def create_classifier(n_vocab, doc_length, wv_size, filter_sizes, hidden_units, output_channel, initialW, non_static, batch_size, epoch, gpu): model = NNModel(n_vocab=n_vocab, doc_length=doc_length, wv_size=wv_size, filter_sizes=filter_sizes, hidden_units=hidden_units, output_channel=output_channel, initialW=initialW, non_static=non_static) # optimizer = optimizers.Adam() optimizer = optimizers.AdaDelta() return (model, ChainerEstimator(model=SoftmaxCrossEntropyClassifier(model), optimizer=optimizer, batch_size=batch_size, device=gpu, stop_trigger=(epoch, 'epoch')))
def setUp(self): fd, path = tempfile.mkstemp() os.close(fd) self.temp_file_path = path child = link.Chain() with child.init_scope(): child.linear = links.Linear(2, 3) child.Wc = chainer.Parameter(shape=(2, 3)) self.parent = link.Chain() with self.parent.init_scope(): self.parent.child = child self.parent.Wp = chainer.Parameter(shape=(2, 3)) self.optimizer = optimizers.AdaDelta() self.optimizer.setup(self.parent) self.parent.cleargrads() self.optimizer.update() # init states