示例#1
0
    def setup_optimizer(self,
                        optimizer_name,
                        gradient_clipping=3,
                        weight_decay=0.00001,
                        **kwargs):
        # set optimizer
        if optimizer_name == "Adam":
            self.opt = optimizers.Adam(**kwargs)
        elif optimizer_name == "AdaDelta":
            self.opt = optimizers.AdaDelta(**kwargs)
        elif optimizer_name == "AdaGrad":
            self.opt = optimizers.AdaGrad(**kwargs)
        elif optimizer_name == "RMSprop":
            self.opt = optimizers.RMSprop(**kwargs)
        elif optimizer_name == "RMSpropGraves":
            self.opt = optimizers.RMSpropGraves(**kwargs)
        elif optimizer_name == "SGD":
            self.opt = optimizers.SGD(**kwargs)
        elif optimizer_name == "MomentumSGD":
            self.opt = optimizers.MomentumSGD(**kwargs)

        # self.opt.use_cleargrads()
        self.opt.setup(self)
        self.opt.add_hook(optimizer.GradientClipping(gradient_clipping))
        self.opt.add_hook(optimizer.WeightDecay(weight_decay))

        self.opt_params = {
            "optimizer_name": optimizer_name,
            "gradient_clipping": gradient_clipping,
            "weight_decay": weight_decay
        }
示例#2
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if(opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif(opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif(opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif(opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif(opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif(opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif(opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif(opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif(opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        print('\n[Warning] {0}\n\t{1}->{2}\n'.format(
            fileFuncLine(), opt_str, opt.__doc__.split('.')[0])
        )

    print('Optimizer:', opt.__doc__.split('.')[0])
    return opt
示例#3
0
def get_model_optimizer(result_folder, cfg_mod):
    model_fn = path.basename(cfg_mod.SRC_MODEL)
    src_model = imp.load_source(
        model_fn.split('.')[0], path.join(result_folder,
                                          cfg_mod.SRC_MODEL)).src_model

    if cfg_mod.OPT_PARAM == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=cfg_mod.TRAIN_RATE, eps=cfg_mod.EPS)
    elif cfg_mod.OPT_PARAM == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=cfg_mod.TRAIN_RATE,
                                           momentum=cfg_mod.MOMENTUM)
    elif cfg_mod.OPT_PARAM == 'AdaDelta':
        optimizer = optimizers.AdaDelta(rho=cfg_mod.TRAIN_RATE,
                                        eps=cfg_mod.EPS)
    elif cfg_mod.OPT_PARAM == 'ADAM':
        optimizer = optimizers.Adam(alpha=cfg_mod.TRAIN_RATE,
                                    beta1=cfg_mod.BETA1,
                                    beta2=cfg_mod.BETA2,
                                    eps=cfg_mod.EPS)
    else:
        raise Exception('No optimizer is selected')
    optimizer.setup(src_model)

    if cfg_mod.WEIGHT_DECAY:
        optimizer.add_hook(chainer.optimizer.WeightDecay(cfg_mod.WEIGHT_DECAY))

    return src_model, optimizer
示例#4
0
    def setUp(self):
        if self.file_type == 'filename':
            fd, path = tempfile.mkstemp()
            os.close(fd)
            self.file = path
        elif self.file_type == 'bytesio':
            self.file = six.BytesIO()
        else:
            assert False

        child = link.Chain()
        with child.init_scope():
            child.linear = links.Linear(2, 3)
            child.Wc = chainer.Parameter(shape=(2, 3))

        self.parent = link.Chain()
        with self.parent.init_scope():
            self.parent.child = child
            self.parent.Wp = chainer.Parameter(shape=(2, 3))

        self.optimizer = optimizers.AdaDelta()
        self.optimizer.setup(self.parent)

        self.parent.cleargrads()
        self.optimizer.update()  # init all states

        self.savez = numpy.savez_compressed if self.compress else numpy.savez
示例#5
0
    def set_optimiser(self,
                      opt_type,
                      lr_rate=None,
                      enable_decay=False,
                      decay=1.0,
                      start_decay_at=-1):
        if opt_type == "SGD":
            if (lr_rate is None):
                lr_rate = 1.0
            self.enable_decay = enable_decay
            self.decay = decay
            self.start_decay_at = start_decay_at
            self.optimizer = optimizers.SGD(lr_rate)

        elif opt_type == "Adam":
            if (lr_rate is None):
                lr_rate = 0.001
            print("disable lr decay")
            self.enable_decay = False
            self.optimizer = optimizers.Adam(alpha=lr_rate)

        elif opt_type == "AdaDelta":
            if (lr_rate is None):
                lr_rate = 0.95
            print("disable lr decay")
            self.enable_decay = False
            self.optimizer = optimizers.AdaDelta(rho=lr_rate)
        else:
            raise Exception("Invalid optimizer type" + opt_type)
示例#6
0
def which_is_best_optimizer(k=10, model=CNN()):
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.Adam(),
                      tag='Adam')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.SGD(),
                      tag='SGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.RMSpropGraves(),
                      tag='RMSpropGraves')
    #    k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaDelta(),
                      tag='AdaDelta')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaGrad(),
                      tag='AdaGrad')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.MomentumSGD(),
                      tag='MomentumSGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.NesterovAG(),
                      tag='NesterovAG')
示例#7
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if (opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif (opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif (opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif (opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif (opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif (opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif (opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif (opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif (opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0]))

    logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0]))
    return opt
示例#8
0
    def __init__(self, specs, njobs):
        self.specs = specs
        assert specs.has_key(
            "layers_specs"), "Please specify specs of each layer"
        layers_specs = self.specs["layers_specs"]
        self.layers = CNN(layers_specs)
        #rev_layers_specs = copy.deepcopy([spec for spec in reversed(specs["layers_specs"])])
        #for spec in rev_layers_specs:
        #    if spec["type"] == "full":
        #        spec["dimensions"] = (spec["dimensions"][1], spec["dimensions"][0])
        #    elif spec["type"] == "conv":
        #        spec["type"] = "deconv"
        #        spec["filter_shape"] = (spec["filter_shape"][1], spec["filter_shape"][0], spec["filter_shape"][2], spec["filter_shape"][3])
        #self.dec_layers= CNN(rev_layers_specs)
        if njobs == -1:
            self.device_id = [-1]
        else:
            self.device_id = [4]
            self.layers.to_gpu(self.device_id[0])

        if self.specs["learning_rule"]["type"] == "adam":
            self.optimizer = optimizers.Adam()
        elif self.specs["learning_rule"]["type"] == "adadelta":
            self.optimizer = optimizers.AdaDelta()
        elif self.specs["learning_rule"]["type"] == "momentum":
            self.optimizer = optimizers.MomentumSGD()
        else:
            raise ValueError("Unsupported rule" +
                             str(self.specs["learning_rule"]["type"]))
        self.optimizer.setup(self.layers)
示例#9
0
    def Init(self):
        TFunctionApprox.Init(self)
        L = self.Locate
        if self.Params['nn_data_x'] != None:
            self.DataX = np.array(
                pickle.load(open(L(self.Params['nn_data_x']), 'rb')),
                np.float32)
        else:
            self.DataX = np.array([], np.float32)
        if self.Params['nn_data_y'] != None:
            self.DataY = np.array(
                pickle.load(open(L(self.Params['nn_data_y']), 'rb')), np.int32)
        else:
            self.DataY = np.array([], np.int32)

        self.CreateNNs()

        if self.Params['nn_params'] != None:
            #self.model.copy_parameters_from(map(lambda e:np.array(e,np.float32),self.Params['nn_params']))
            self.model.copy_parameters_from(
                map(lambda e: np.array(e, np.float32),
                    pickle.load(open(L(self.Params['nn_params']), 'rb'))))
            self.is_predictable = True
        else:
            if self.Options['init_bias_randomly']:
                self.InitBias(m='mean')

        if self.Options['gpu'] >= 0:
            cuda.init(self.Options['gpu'])
            self.model.to_gpu()
            self.model_err.to_gpu()

        self.optimizer = optimizers.AdaDelta(rho=self.Options['AdaDelta_rho'])
        self.optimizer.setup(self.model.collect_parameters())
示例#10
0
def setOptimizer(args, EncDecAtt):
    # optimizerを構築
    if args.optimizer == 'SGD':
        optimizer = optimizers.SGD(lr=args.learning_rate)
        sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' %
                         (args.optimizer, optimizer.lr))
    elif args.optimizer == 'Adam':
        # assert 0, "Currently Adam is not supported for asynchronous update"
        optimizer = optimizers.Adam(alpha=args.learning_rate)
        sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' %
                         (args.optimizer, optimizer.alpha))
    elif args.optimizer == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=args.learning_rate)
        sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' %
                         (args.optimizer, optimizer.lr))
    elif args.optimizer == 'AdaDelta':
        optimizer = optimizers.AdaDelta(rho=args.learning_rate)
        sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' %
                         (args.optimizer, optimizer.rho))
    else:
        assert 0, "ERROR"

    optimizer.setup(EncDecAtt.model)  # ここでoptimizerにモデルを貼り付け
    if args.optimizer == 'Adam':
        optimizer.t = 1  # warning回避のちょっとしたhack 本来はするべきではない

    return optimizer
示例#11
0
def get_opt(args):
    if args.opt_model == "SGD":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.SGD(lr=alpha0)
    if args.opt_model == "AdaGrad":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.AdaGrad(lr=alpha0)
    if args.opt_model == "AdaDelta":
        alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0
        alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1
        return optimizers.AdaDelta(rho=alpha0, eps=alpha1)
    if args.opt_model == "Momentum":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1)
    if args.opt_model == "NAG":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.NesterovAG(lr=alpha0, momentum=alpha1)
    if args.opt_model == "RMS":
        return optimizers.RMSpropGraves()
    if args.opt_model == "SM":
        return optimizers.SMORMS3()
    if args.opt_model == "Adam":  # default case
        alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2
        alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3
        return optimizers.Adam(alpha=alpha0,
                               beta1=alpha1,
                               beta2=alpha2,
                               eps=alpha3)
    print('no such optimization method', args.opt_model)
    sys.exit(1)
示例#12
0
def init_model():
    #Make models
    if use_pre2 == 'pre': pre_unit = 4
    else: pre_unit = 0
    if use_null == 'null': null_unit = 6
    else: null_unit = 0
    if args.phrase == 'phrase':
        phrase_unit = 4
        model = chainer.FunctionSet(
            trainable=chainer.FunctionSet(
                w0=F.Linear(n_units * 2 + null_unit * 2, n_label),
                ww0=F.Linear(
                    n_units * 2 + pre_unit + null_unit * 2 + phrase_unit,
                    n_units + null_unit),
                ww1=F.Linear(
                    n_units * 2 + pre_unit + null_unit * 2 + phrase_unit,
                    n_units + null_unit),
            ),
            w1_f=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #source input
            w2_f=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #source output
            w1_e=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #target input
            w2_e=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #target output
            embed_f=F.EmbedID(vocab_f['len_vocab'],
                              n_units),  #source word embedding
            embed_e=F.EmbedID(vocab_e['len_vocab'],
                              n_units),  #target word embedding
        )
    else:
        model = chainer.FunctionSet(
            trainable=chainer.FunctionSet(w0=F.Linear(
                n_units * 4 + null_unit * 4, n_label), ),
            w1_f=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #source input
            w2_f=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #source output
            w1_e=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #target input
            w2_e=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #target output
            embed_f=F.EmbedID(vocab_f['len_vocab'],
                              n_units),  #source word embedding
            embed_e=F.EmbedID(vocab_e['len_vocab'],
                              n_units),  #target word embedding 
        )
    if opt_name == 'SGD':
        optimizer = optimizers.SGD(lr=0.02)  # (lr=opt_score)  # lr=0.01
    elif opt_name == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=0.001)  # (lr=opt_score)  # lr=0.001
    elif opt_name == 'AdaDelta':
        optimizer = optimizers.AdaDelta(rho=0.9)  # (rho=opt_score)  # rho=0.9
    elif opt_name == 'Adam':
        optimizer = optimizers.Adam(
            alpha=0.0001)  # (alpha=opt_score)  # alpha=0.0001
    optimizer.setup(model)  # .collect_parameters()
    return model, optimizer
示例#13
0
 def __init__(self, link, epoch=10, batch_size=100, visualize=False):
     ChainList.__init__(self, link)
     self.optimizer = optimizers.AdaDelta()
     self.optimizer.setup(self)
     self.loss_function = F.mean_squared_error
     self.epoch = epoch
     self.batch_size = batch_size
     self.visualize = visualize
示例#14
0
def train(args):
    source_vocab = Vocab(args.source, args.vocab)
    target_vocab = Vocab(args.target, args.vocab)
    att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size,
                      args.embed_size)
    if args.use_gpu:
        att_encdec.to_gpu()
    if args.source_validation:
        if os.path.exists(PLOT_DIR) == False: os.mkdir(PLOT_DIR)
        fp_loss = open(PLOT_DIR + "loss", "w")
        fp_loss_val = open(PLOT_DIR + "loss_val", "w")

    opt = optimizers.AdaDelta(args.rho, args.eps)
    opt.setup(att_encdec)
    opt.add_hook(optimizer.WeightDecay(DECAY_COEFF))
    opt.add_hook(optimizer.GradientClipping(CLIP_THR))
    for epoch in xrange(args.epochs):
        print "--- epoch: %s/%s ---" % (epoch + 1, args.epochs)
        source_gen = word_list(args.source)
        target_gen = word_list(args.target)
        batch_gen = batch(sort(source_gen, target_gen, 100 * args.minibatch),
                          args.minibatch)
        n = 0
        total_loss = 0.0
        for source_batch, target_batch in batch_gen:
            n += len(source_batch)
            source_batch = fill_batch_end(source_batch)
            target_batch = fill_batch_end(target_batch)
            hyp_batch, loss = forward(source_batch, target_batch, source_vocab,
                                      target_vocab, att_encdec, True, 0)
            total_loss += loss.data * len(source_batch)
            closed_test(source_batch, target_batch, hyp_batch)

            loss.backward()
            opt.update()
            print "[n=%s]" % (n)
        print "[total=%s]" % (n)
        prefix = args.model_path + '%s' % (epoch + 1)
        serializers.save_hdf5(prefix + '.attencdec', att_encdec)
        if args.source_validation:
            total_loss_val, n_val = validation_test(args, att_encdec,
                                                    source_vocab, target_vocab)
            fp_loss.write("\t".join([str(epoch), str(total_loss / n) + "\n"]))
            fp_loss_val.write("\t".join(
                [str(epoch), str(total_loss_val / n_val) + "\n"]))
            fp_loss.flush()
            fp_loss_val.flush()
        hyp_params = att_encdec.get_hyper_params()
        Backup.dump(hyp_params, args.model_path + HPARAM_NAME)
        source_vocab.save(args.model_path + SRC_VOCAB_NAME)
        target_vocab.save(args.model_path + TAR_VOCAB_NAME)
    hyp_params = att_encdec.get_hyper_params()
    Backup.dump(hyp_params, args.model_path + HPARAM_NAME)
    source_vocab.save(args.model_path + SRC_VOCAB_NAME)
    target_vocab.save(args.model_path + TAR_VOCAB_NAME)
    if args.source_validation:
        fp_loss.close()
        fp_loss_val.close()
def TrainConvnetExtractor(trainidx, epoch=20, saveas="convnet.model"):
    cqtfilelist = np.array(find_files(const.PATH_MIDIHCQT,
                                      ext="npz"))[trainidx]
    #midifilelist = find_files(const.PATH_MIDI,ext="mid")[:filecnt]
    config.train = True
    config.enable_backprop = True
    convnet = networks.FullCNNFeatExtractor()
    model = networks.ConvnetPredictor(convnet)
    model.to_gpu(0)
    opt = optimizers.AdaDelta()
    opt.setup(model)
    print("train set length: %d" % trainidx.size)
    print("start epochs...")
    S = []
    T = []

    for cqtfile in cqtfilelist:
        dat = np.load(cqtfile)
        spec = utils.PreprocessSpec(dat["spec"])[:const.CQT_H, :, :]
        targ = GetConvnetTargetFromPianoroll(dat["target"]).astype(np.int32)
        assert (spec.shape[1] == targ.shape[0])
        S.append(spec)
        T.append(targ)
    S = np.concatenate(S, axis=1)
    T = np.concatenate(T, axis=0)

    for ep in range(epoch):
        sum_loss = 0

        assert (S.shape[1] == T.shape[0])
        randidx = np.random.randint(0,
                                    S.shape[1] - const.CONV_TRAIN_SEQLEN - 1,
                                    S.shape[1] // const.CONV_TRAIN_SEQLEN * 4)
        for i in range(0, randidx.size - const.CONV_TRAIN_BATCH,
                       const.CONV_TRAIN_BATCH):
            x_batch = np.stack([
                S[:, randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :]
                for j in range(i, i + const.CONV_TRAIN_BATCH)
            ])
            t_batch = np.stack([
                T[randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :]
                for j in range(i, i + const.CONV_TRAIN_BATCH)
            ])
            x_in = cp.asarray(x_batch)
            t_in = cp.asarray(t_batch)
            model.cleargrads()
            loss = model(x_in, t_in)
            loss.backward()
            opt.update()
            sum_loss += loss.data

        convnet.save(saveas)
        print("epoch: %d/%d  loss:%.04f" %
              (ep + 1, epoch, sum_loss / const.CONV_TRAIN_BATCH))

    convnet.save(saveas)
示例#16
0
 def __init__(self,
              n_units,
              pool_size,
              epoch=10,
              batch_size=100,
              dropout_rate=(),
              optimizer=optimizers.AdaDelta()):
     super(MaxoutClassifier, self).__init__(n_units, epoch, batch_size,
                                            dropout_rate, optimizer)
     self.pool_size = pool_size
示例#17
0
 def __init__(self, outputdim, minval, optimizer=None):
     if optimizer is None:
         self.optimizer = chainer.optimizers.Adam()
     else:
         self.optimizer = optimizer
     self.model = GoogLeNetBN(outputdim)
     self.optimizer.setup(self.model)
     self.myOptimizers = [optimizers.Adam(), optimizers.AdaGrad(), optimizers.AdaDelta()]
     self.mindata = -minval[0]
     print(self.mindata)
示例#18
0
 def initialize_optimizer(self, lr=0.5):
     if self.algorithm == 'SGD':
         self.optimizer = optimizers.SGD(lr=lr)
     elif self.algorithm == 'Adam':
         self.optimizer = optimizers.Adam()
     elif self.algorithm == 'Adagrad':
         self.optimizer = optimizers.AdaGrad()
     elif self.algorithm == 'Adadelta':
         self.optimizer = optimizers.AdaDelta()
     else:
         raise AssertionError('this algorithm is not available')
     self.optimizer.setup(self.model)
示例#19
0
 def init_optimizer(self):
     if self.optimizer == 'SGD':
         self.optimizer = optimizers.MomentumSGD(lr=self.learning_rate,
                                                 momentum=self.momentum)
     elif self.optimizer == 'AdaDelta':
         self.optimizer = optimizers.AdaDelta()
     elif self.optimizer == 'AdaGrad':
         self.optimizer = optimizers.AdaGrad()
     elif self.optimizer == 'Adam':
         self.optimizer = optimizers.Adam()
     elif self.optimizer == 'RMSprop':
         self.optimizer = optimizers.RMSprop()
示例#20
0
    def setUp(self):
        fd, path = tempfile.mkstemp()
        os.close(fd)
        self.temp_file_path = path

        child = link.Chain(linear=links.Linear(2, 3))
        child.add_param('Wc', (2, 3))
        self.parent = link.Chain(child=child)
        self.parent.add_param('Wp', (2, 3))

        self.optimizer = optimizers.AdaDelta()
        self.optimizer.setup(self.parent)
示例#21
0
def main():
    state = make_initial_state()
    accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32))
    for epoch in range(args.n_epoch):
        print('epoch %d/%d: ' % (epoch + 1, args.n_epoch))
        log_ppl = 0.0
        trained = 0

        opt = optimizers.AdaDelta()
        opt.setup(model)

        for batch in generate_batch(args.corpus, args.batchsize):
            batch = [[vocab[x] for x in words] for words in batch]
            K = len(batch)
            if K != args.batchsize:
                break
            L = len(batch[0]) - 1

            opt.zero_grads()

            for l in range(L):
                x_batch = xp.array([batch[k][l] for k in range(K)],
                                   dtype=np.int32)
                y_batch = xp.array([batch[k][l + 1] for k in range(K)],
                                   dtype=np.int32)
                state, loss_i = forward_one_step(x_batch, y_batch, state)
                accum_loss += loss_i
                accum_loss.backward()

                log_ppl += accum_loss.data.reshape(()) * K

            accum_loss.unchain_backward()  # truncate
            accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32))

            optimizer.clip_grads(grad_clip)
            optimizer.update()

            trained += K
            sys.stderr.write('\r  %d/%d' % (trained, num_lines))
            sys.stderr.flush()

        log_ppl /= float(num_words)
        print('  log(PPL) = %.10f' % log_ppl)
        # print('  PPL      = %.10f' % math.exp(log_ppl))

        if (epoch + 1) % 5 == 0:
            print("save model")
            model_name = "%s/kokkai_lstm_lm.epoch%d" % (args.model, epoch + 1)
            cPickle.dump(copy.deepcopy(model).to_cpu(), open(model_name, 'wb'))

    print('training finished.')
示例#22
0
def cross_optimizers(opt):
    if opt == 'SGD':
        optimizer = optimizers.SGD()
    elif opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD()
    elif opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad()
    elif opt == 'RMSprop':
        optimizer = optimizers.RMSprop()
    elif opt == 'AdaDelta':
        optimizer = optimizers.AdaDelta()
    elif opt == 'Adam':
        optimizer = optimizers.Adam()
    return copy.deepcopy(optimizer)
示例#23
0
 def __init__(self,
              n_units,
              epoch=10,
              batch_size=100,
              dropout_rate=(),
              optimizer=optimizers.AdaDelta()):
     self.n_units = n_units
     self.epoch = epoch
     self.batch_size = batch_size
     self.__constructed = False
     self.pre_trained = False
     self.optimizer = optimizer
     if dropout_rate:
         self.dropout_rate = dropout_rate
     else:
         self.dropout_rate = [.5 for i in n_units]
示例#24
0
def select_optimizer(name):
    if name == "AdaGrad":
        optimizer = optimizers.AdaGrad(lr=0.001)
    elif name == "Adam":
        optimizer = chainer.optimizers.Adam(alpha=0.0001)
    elif name == "MomentumSGD":
        optimizer = optimizers.MomentumSGD(lr=0.01)
    elif name == "RMSprop":
        optimizer = optimizers.RMSprop(lr=0.01)
    elif name == "SGD":
        optimizer = optimizers.SGD(lr=0.01)
    elif name == "AdaDelta":
        optimizer = optimizers.AdaDelta(rho=0.9)
    else:
        raise Exception("Unknown network optimizer: " + args.optimizer)
    return optimizer
    def _build_optimizer(self, trial, model):
        # option of optimizer funciton
        optimizer_name = trial.suggest_categorical(
            'optimizer', ['Adam', "AdaDelta", 'RMSProp'])
        if optimizer_name == 'Adam':
            adam_alpha = trial.suggest_loguniform('adam_alpha', 1e-5, 1e-1)
            optimizer = optimizers.Adam(alpha=adam_alpha)
        elif optimizer_name == "AdaDelta":
            optimizer = optimizers.AdaDelta()
        elif optimizer_name == "RMSprop":
            optimizer = optimizers.RMSprop()

        weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        return optimizer
示例#26
0
    def setUp(self):
        fd, path = tempfile.mkstemp()
        os.close(fd)
        self.temp_file_path = path

        child = link.Chain(linear=links.Linear(2, 3))
        child.add_param('Wc', (2, 3))
        self.parent = link.Chain(child=child)
        self.parent.add_param('Wp', (2, 3))

        self.optimizer = optimizers.AdaDelta()
        self.optimizer.setup(self.parent)

        self.parent.zerograds()
        self.optimizer.update()  # init all states

        self.savez = numpy.savez_compressed if self.compress else numpy.savez
示例#27
0
 def get_optimizer(self, name, lr, momentum=0.9):
     if name.lower() == "adam":
         return optimizers.Adam(alpha=lr, beta1=momentum)
     if name.lower() == "smorms3":
         return optimizers.SMORMS3(lr=lr)
     if name.lower() == "adagrad":
         return optimizers.AdaGrad(lr=lr)
     if name.lower() == "adadelta":
         return optimizers.AdaDelta(rho=momentum)
     if name.lower() == "nesterov" or name.lower() == "nesterovag":
         return optimizers.NesterovAG(lr=lr, momentum=momentum)
     if name.lower() == "rmsprop":
         return optimizers.RMSprop(lr=lr, alpha=momentum)
     if name.lower() == "momentumsgd":
         return optimizers.MomentumSGD(lr=lr, mommentum=mommentum)
     if name.lower() == "sgd":
         return optimizers.SGD(lr=lr)
示例#28
0
文件: utils.py 项目: arne-cl/utils
def get_optimizer(name):
    """
    :type name: str
    :rtype: chainer.Optimizer
    """
    if name == "adadelta":
        opt = optimizers.AdaDelta()
    elif name == "adagrad":
        opt = optimizers.AdaGrad()
    elif name == "adam":
        opt = optimizers.Adam()
    elif name == "rmsprop":
        opt = optimizers.RMSprop()
    elif name == "smorms3":
        opt = optimizers.SMORMS3()
    else:
        raise ValueError("Unknown optimizer_name=%s" % name)
    return opt
示例#29
0
def create_classifier(n_vocab, doc_length, wv_size, filter_sizes, hidden_units,
                      output_channel, initialW, non_static, batch_size, epoch,
                      gpu):
    model = NNModel(n_vocab=n_vocab,
                    doc_length=doc_length,
                    wv_size=wv_size,
                    filter_sizes=filter_sizes,
                    hidden_units=hidden_units,
                    output_channel=output_channel,
                    initialW=initialW,
                    non_static=non_static)
    #    optimizer = optimizers.Adam()
    optimizer = optimizers.AdaDelta()
    return (model,
            ChainerEstimator(model=SoftmaxCrossEntropyClassifier(model),
                             optimizer=optimizer,
                             batch_size=batch_size,
                             device=gpu,
                             stop_trigger=(epoch, 'epoch')))
示例#30
0
    def setUp(self):
        fd, path = tempfile.mkstemp()
        os.close(fd)
        self.temp_file_path = path

        child = link.Chain()
        with child.init_scope():
            child.linear = links.Linear(2, 3)
            child.Wc = chainer.Parameter(shape=(2, 3))

        self.parent = link.Chain()
        with self.parent.init_scope():
            self.parent.child = child
            self.parent.Wp = chainer.Parameter(shape=(2, 3))

        self.optimizer = optimizers.AdaDelta()
        self.optimizer.setup(self.parent)

        self.parent.cleargrads()
        self.optimizer.update()  # init states