def setup_optimizers(self): params = self.params self.causal_conv_optimizers = [] for layer in self.causal_conv_layers: opt = optimizers.NesterovAG(lr=params.learning_rate, momentum=params.gradient_momentum) opt.setup(layer) opt.add_hook(optimizer.WeightDecay(params.weight_decay)) opt.add_hook(GradientClipping(params.gradient_clipping)) self.causal_conv_optimizers.append(opt) self.residual_conv_optimizers = [] for layer in self.residual_conv_layers: opt = optimizers.NesterovAG(lr=params.learning_rate, momentum=params.gradient_momentum) opt.setup(layer) opt.add_hook(optimizer.WeightDecay(params.weight_decay)) opt.add_hook(GradientClipping(params.gradient_clipping)) self.residual_conv_optimizers.append(opt) self.softmax_conv_optimizers = [] for layer in self.softmax_conv_layers: opt = optimizers.NesterovAG(lr=params.learning_rate, momentum=params.gradient_momentum) opt.setup(layer) opt.add_hook(optimizer.WeightDecay(params.weight_decay)) opt.add_hook(GradientClipping(params.gradient_clipping)) self.softmax_conv_optimizers.append(opt)
def setup_optimizers(self): params = self.params self.causal_conv_optimizers = [] for layer in self.causal_conv_layers: opt = optimizers.Adam(alpha=params.learning_rate, beta1=params.gradient_momentum) opt.setup(layer) opt.add_hook(optimizer.WeightDecay(params.weight_decay)) opt.add_hook(GradientClipping(params.gradient_clipping)) self.causal_conv_optimizers.append(opt) self.residual_conv_optimizers = [] for block in self.residual_blocks: for layer in block: opt = optimizers.Adam(alpha=params.learning_rate, beta1=params.gradient_momentum) opt.setup(layer) opt.add_hook(optimizer.WeightDecay(params.weight_decay)) opt.add_hook(GradientClipping(params.gradient_clipping)) self.residual_conv_optimizers.append(opt) self.softmax_conv_optimizers = [] for layer in self.softmax_conv_layers: opt = optimizers.Adam(alpha=params.learning_rate, beta1=params.gradient_momentum) opt.setup(layer) opt.add_hook(optimizer.WeightDecay(params.weight_decay)) opt.add_hook(GradientClipping(params.gradient_clipping)) self.softmax_conv_optimizers.append(opt)
def load(cls, name): params = Trainer.load_params(name) print('nz: {}'.format(params['nz'])) print('epoch: {} / {}'.format(params['current_epoch'], params['epoch'])) print('train: {}'.format(params['train'])) print('batchsize: {}'.format(params['batchsize'])) dcgan = DCGAN(params['nz']) opt_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) opt_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) opt_gen.setup(dcgan.gen) opt_dis.setup(dcgan.dis) opt_gen.add_hook(optimizer.WeightDecay(0.00001)) opt_dis.add_hook(optimizer.WeightDecay(0.00001)) filenames = Trainer.get_model_filenames(name, params['current_epoch']) model_dir = os.path.join(Trainer.MODEL_DIR, name) serializers.load_hdf5(os.path.join(model_dir, filenames['model_gen']), dcgan.gen) serializers.load_hdf5(os.path.join(model_dir, filenames['model_dis']), dcgan.dis) serializers.load_hdf5(os.path.join(model_dir, filenames['opt_gen']), opt_gen) serializers.load_hdf5(os.path.join(model_dir, filenames['opt_dis']), opt_dis) return cls(name, params, dcgan, opt_gen, opt_dis)
def setLR(self, lr=0.002): self.gen_opt = optimizers.Adam(alpha=lr) self.gen_opt.setup(self.generator) self.gen_opt.add_hook(optimizer.WeightDecay(0.0001)) self.dis_opt = optimizers.Adam(alpha=lr) self.dis_opt.setup(self.discriminator) self.dis_opt.add_hook(optimizer.WeightDecay(0.0001))
def test_call_hooks_uninitialized_param(self): target = UninitializedChain() opt = optimizers.MomentumSGD() opt.setup(target) opt.add_hook(optimizer.WeightDecay(rate=0.0005)) target(np.ones((4, 10), dtype=np.float32)) opt.call_hooks()
def setup_optimizer(self, optimizer_name, gradient_clipping=3, weight_decay=0.00001, **kwargs): # set optimizer if optimizer_name == "Adam": self.opt = optimizers.Adam(**kwargs) elif optimizer_name == "AdaDelta": self.opt = optimizers.AdaDelta(**kwargs) elif optimizer_name == "AdaGrad": self.opt = optimizers.AdaGrad(**kwargs) elif optimizer_name == "RMSprop": self.opt = optimizers.RMSprop(**kwargs) elif optimizer_name == "RMSpropGraves": self.opt = optimizers.RMSpropGraves(**kwargs) elif optimizer_name == "SGD": self.opt = optimizers.SGD(**kwargs) elif optimizer_name == "MomentumSGD": self.opt = optimizers.MomentumSGD(**kwargs) # self.opt.use_cleargrads() self.opt.setup(self) self.opt.add_hook(optimizer.GradientClipping(gradient_clipping)) self.opt.add_hook(optimizer.WeightDecay(weight_decay)) self.opt_params = { "optimizer_name": optimizer_name, "gradient_clipping": gradient_clipping, "weight_decay": weight_decay }
def train(args): source_vocab = Vocab(args.source, args.vocab) target_vocab = Vocab(args.target, args.vocab) att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size, args.embed_size) if args.use_gpu: att_encdec.to_gpu() if args.source_validation: if os.path.exists(PLOT_DIR) == False: os.mkdir(PLOT_DIR) fp_loss = open(PLOT_DIR + "loss", "w") fp_loss_val = open(PLOT_DIR + "loss_val", "w") opt = optimizers.AdaDelta(args.rho, args.eps) opt.setup(att_encdec) opt.add_hook(optimizer.WeightDecay(DECAY_COEFF)) opt.add_hook(optimizer.GradientClipping(CLIP_THR)) for epoch in xrange(args.epochs): print "--- epoch: %s/%s ---" % (epoch + 1, args.epochs) source_gen = word_list(args.source) target_gen = word_list(args.target) batch_gen = batch(sort(source_gen, target_gen, 100 * args.minibatch), args.minibatch) n = 0 total_loss = 0.0 for source_batch, target_batch in batch_gen: n += len(source_batch) source_batch = fill_batch_end(source_batch) target_batch = fill_batch_end(target_batch) hyp_batch, loss = forward(source_batch, target_batch, source_vocab, target_vocab, att_encdec, True, 0) total_loss += loss.data * len(source_batch) closed_test(source_batch, target_batch, hyp_batch) loss.backward() opt.update() print "[n=%s]" % (n) print "[total=%s]" % (n) prefix = args.model_path + '%s' % (epoch + 1) serializers.save_hdf5(prefix + '.attencdec', att_encdec) if args.source_validation: total_loss_val, n_val = validation_test(args, att_encdec, source_vocab, target_vocab) fp_loss.write("\t".join([str(epoch), str(total_loss / n) + "\n"])) fp_loss_val.write("\t".join( [str(epoch), str(total_loss_val / n_val) + "\n"])) fp_loss.flush() fp_loss_val.flush() hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path + HPARAM_NAME) source_vocab.save(args.model_path + SRC_VOCAB_NAME) target_vocab.save(args.model_path + TAR_VOCAB_NAME) hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path + HPARAM_NAME) source_vocab.save(args.model_path + SRC_VOCAB_NAME) target_vocab.save(args.model_path + TAR_VOCAB_NAME) if args.source_validation: fp_loss.close() fp_loss_val.close()
def create(cls, name, params): merged_params = {} merged_params.update(Trainer.DEFAULT_PARAMS) merged_params.update(params) assert merged_params['nz'] >= 0 assert merged_params['epoch'] >= 0 assert merged_params['train'] >= 0 assert merged_params['batchsize'] >= 0 dcgan = DCGAN(merged_params['nz']) opt_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) opt_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) opt_gen.setup(dcgan.gen) opt_dis.setup(dcgan.dis) opt_gen.add_hook(optimizer.WeightDecay(0.00001)) opt_dis.add_hook(optimizer.WeightDecay(0.00001)) return cls(name, merged_params, dcgan, opt_gen, opt_dis)
def check_weight_decay(self): w = self.target.param.data g = self.target.param.grad decay = 0.2 expect = w - g - decay * w opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer.WeightDecay(decay)) opt.update() testing.assert_allclose(expect, w)
def train(X, t, hidden_n, weight_decay): print(hidden_n, weight_decay) model = AnimeChain(X.shape[1], hidden_n) optimizer = OS.AdaGrad() optimizer.setup(model) if weight_decay: optimizer.add_hook(O.WeightDecay(weight_decay)) for e in range(1500): V_X = Variable(X) V_t = Variable(np.array(t, dtype='int32')) V_y = model(V_X) model.zerograds() loss = F.softmax_cross_entropy(V_y, V_t) loss.backward() optimizer.update() #print(e, loss.data) if loss.data < 0.001: break return model
def train(args): if args.gpu > -1: cuda.get_device(args.gpu).use() xp = cuda.cupy else: xp = np if args.log: log_dir = args.log else: log_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '{}_{}'.format(DIR_NAME, datetime.now().strftime('%Y%m%d_%H:%M'))) if not os.path.exists(log_dir): os.mkdir(log_dir) # setting for logging logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(log_dir, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) logger.info('Arguments...') for arg, val in vars(args).items(): logger.info('{} : {}'.format(arg, val)) logger.info('Loading Vocab...') vocab = Vocab() vocab.load(args.vocab, args.lowercase) vocab.add_special_token() sufvocab = Vocab() sufvocab.load(args.sufvocab, args.lowercase) sufvocab.add_special_token(['s>', '<UNK>']) pos2id = Vocab() pos2id.load(args.poslist) logger.info('preparation for training data...') out_path = making_data(args.train_data, args.window) model = WordCSnnTagger(args.wembed, args.fembed, args.hidden, len(vocab), len(sufvocab), len(pos2id), args.window, args.objct, args.alpha) model.save_model_config(log_dir) if args.gpu > -1: model.to_gpu() opt = getattr(optimizers, args.opt)() opt.setup(model) opt.add_hook(optimizer.GradientClipping(args.gclip)) opt.add_hook(optimizer.WeightDecay(args.wdecay)) for epoch in range(args.epoch): logger.info('START epoch {}/{}'.format(epoch + 1, args.epoch)) start = time.time() sum_loss = xp.zeros((), dtype=xp.float32) n_data = 0 n_correct = 0 for i, [tags, contexts] in enumerate(line_iter(out_path, args.minibatch)): batch_ts = xp.array([pos2id[tag] for tag in tags], dtype=xp.int32) batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32) if args.lowercase: contexts = [[word.lower() for word in context] for context in contexts] batch_xs = xp.array([[vocab[word] for word in context] for context in contexts], dtype=xp.int32) batch_sufs = xp.array([[sufvocab[word[-2:]] for word in context] for context in contexts], dtype=xp.int32) batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32) batch_features = [batch_xs, batch_sufs, batch_caps] cur_batch_size = batch_ts.shape[0] ys, loss = model(batch_features, batch_ts) sum_loss += loss.data * cur_batch_size model.zerograds() loss.backward() opt.update() pred_labels = ys.data.argmax(1) n_correct += sum(1 for j in range(cur_batch_size) if pred_labels[j] == batch_ts[j]) n_data += cur_batch_size logger.info('done {} batches'.format(i + 1)) logger.info('{} epoch train loss = {}'.format(epoch + 1, sum_loss)) logger.info('{} epoch train accuracy = {}'.format(epoch + 1, float(n_correct / n_data))) logger.info('{} sec for training per epoch'.format(time.time() - start)) if args.valid_data: start = time.time() valid_loss, valid_accuracy = evaluation(model, args.valid_data, pos2id, vocab, sufvocab, args) logger.info('{} epoch valid loss = {}'.format(epoch + 1, valid_loss)) logger.info('{} epoch valid accuracy = {}'.format(epoch + 1, valid_accuracy)) logger.info('{} sec for validation per epoch'.format(time.time() - start)) if args.test_data: start = time.time() test_loss, test_accuracy = evaluation(model, args.test_data, pos2id, vocab, sufvocab, args) logger.info('{} epoch test loss = {}'.format(epoch + 1, test_loss)) logger.info('{} epoch test accuracy = {}'.format(epoch + 1, test_accuracy)) logger.info('{} sec for testing per epoch'.format(time.time() - start)) logger.info('serializing...') prefix = '{}_{}ep_{}wembed_{}fembed_{}hidden_{}window_{}minibatch_{}opt'.format(DIR_NAME, epoch + 1, args.wembed, args.fembed, args.hidden, args.window, args.minibatch, args.opt) model_path = os.path.join(log_dir, prefix + '.model') model.save(model_path) logger.info('done training')
if __name__ == "__main__": set_seed() log_tracer = LogTracer(nn_type, sep_mode) log_tracer("get train data") train, test, n_vocab = get_train_data(pad, sep_mode) log_tracer.trace_label("train", train) log_tracer.trace_label("test", test) if nn_type == "lstm": mlp = LSTM(n_vocab, n_units, N_OUT) elif nn_type == "cnn": mlp = CNN(n_vocab, n_units, N_OUT) opt = optimizers.Adam() opt.setup(mlp) opt.add_hook(optimizer.WeightDecay(w_decay)) opt.add_hook(optimizer.GradientClipping(g_clip)) log_tracer("start train") for epoch in range(n_epoch): for x, t in generate_bath(train, n_batch): mlp.cleargrads() loss, acc = mlp(x, t, train=True) loss.backward() opt.update() log_tracer.trace_train(epoch, loss.data, acc.data) x_v, t_v = parse_batch(test) loss_v, acc_v = mlp(x_v, t_v) log_tracer.trace_test(epoch, loss_v.data, acc_v.data, True) mlp.save(sep_mode)
validate_iter = iterators.SerialIterator(validate_dataset, args.batchsize, repeat=False, shuffle=False) # model model = model.make_model(V, args.embed_dim, args.channel_num, args.rnn_dim, args.fc_dim, C) classifier = L.Classifier(model) if args.gpu >= 0: classifier.to_gpu() # optimizer optimizer = optimizers.Adam() optimizer.setup(classifier) optimizer.add_hook(optimizer_.WeightDecay(1e-3)) # trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # extensions log_report = E.LogReport(trigger=(10, 'iteration')) print_report = E.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ]) evaluator = evaluator_.Evaluator(validate_iter, classifier, device=args.gpu) trainer.extend(log_report)
# get the correct total length n_train = train_data._length # the discriminator is tasked with classifying examples as real or fake Disc = CustomClassifier(predictor=d.Discriminator(latent_dim), lossfun=f.sigmoid_cross_entropy) Disc.compute_accuracy = False Gen = g.Generator() Enc = e.Encoder(latent_dim) # Use Adam optimizer # learning rate, beta1, beta2 disc_optimizer = optimizers.Adam(initial_alpha, beta1=beta_1, beta2=beta_2) disc_optimizer.setup(Disc) # using those parameters for all optimizers: 0.36 --> 0.61 disc_optimizer.add_hook(optimizer.WeightDecay(rate=weight_decay)) gen_optimizer = optimizers.Adam(initial_alpha, beta1=beta_1, beta2=beta_2) gen_optimizer.setup(Gen) gen_optimizer.add_hook(optimizer.WeightDecay(rate=weight_decay)) enc_optimizer = optimizers.Adam(initial_alpha, beta1=beta_1, beta2=beta_2) enc_optimizer.setup(Enc) enc_optimizer.add_hook(optimizer.WeightDecay(rate=weight_decay)) #Define iterator train_iter_X = i.SerialIterator(train_data, batch_size=batchsize, repeat=True, shuffle=True)
def main(): args = parse_args() XP.set_library(args) date=time.localtime()[:6] D=[] for i in date: D.append(str(i)) D="_".join(D) save_path=args.save_path if os.path.exists(save_path)==False: os.mkdir(save_path) if args.model_path!=None: print("continue existed model!! load recipe of {}".format(args.model_path)) with open(args.model_path+'/recipe.json','r') as f: recipe=json.load(f) vae_enc=recipe["network"]["IM"]["vae_enc"] vae_z=recipe["network"]["IM"]["vae_z"] vae_dec=recipe["network"]["IM"]["vae_dec"] times=recipe["network"]["IM"]["times"] alpha=recipe["network"]["IM"]["KLcoefficient"] batchsize=recipe["setting"]["batchsize"] maxepoch=args.maxepoch weightdecay=recipe["setting"]["weightdecay"] grad_clip=recipe["setting"]["grad_clip"] cur_epoch=recipe["setting"]["cur_epoch"]+1 ini_lr=recipe["setting"]["initial_learningrate"] cur_lr=recipe["setting"]["cur_lr"] with open(args.model_path+"/../trainloss.json",'r') as f: trainloss_dic=json.load(f) with open(args.model_path+"/../valloss.json",'r') as f: valloss_dic=json.load(f) else: vae_enc=args.vae_enc vae_z=args.vae_z vae_dec=args.vae_dec times=args.times alpha=args.alpha batchsize=args.batchsize maxepoch=args.maxepoch weightdecay=args.weightdecay grad_clip=5 cur_epoch=0 ini_lr=args.lr cur_lr=ini_lr trainloss_dic={} valloss_dic={} print('this experiment started at :{}'.format(D)) print('***Experiment settings***') print('[IM]vae encoder hidden size :{}'.format(vae_enc)) print('[IM]vae hidden layer size :{}'.format(vae_z)) print('[IM]vae decoder hidden layer size :{}'.format(vae_dec)) print('[IM]sequence length:{}'.format(times)) print('max epoch :{}'.format(maxepoch)) print('mini batch size :{}'.format(batchsize)) print('initial learning rate :{}'.format(cur_lr)) print('weight decay :{}'.format(weightdecay)) print("optimization by :{}".format("Adam")) print("VAE KL coefficient:",alpha) print('*************************') vae = VAE_bernoulli_noattention(vae_enc,vae_z,vae_dec,28,28,1) opt = optimizers.Adam(alpha = cur_lr) opt.setup(vae) if args.model_path!=None: print('loading model ...') serializers.load_npz(args.model_path + '/VAEweights', vae) serializers.load_npz(args.model_path + '/optimizer', opt) else: print('making [[new]] model ...') for param in vae.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) opt.add_hook(optimizer.GradientClipping(grad_clip)) opt.add_hook(optimizer.WeightDecay(weightdecay)) if args.gpu >= 0 : vae.to_gpu() mnist=MNIST(binarize=True) train_size = mnist.train_size test_size = mnist.test_size eps = 1e-8 for epoch in range(cur_epoch+1, maxepoch+1): print('\nepoch {}'.format(epoch)) LX = 0.0 LZ = 0.0 counter = 0 for iter,(img_array,label_array) in enumerate(mnist.gen_train(batchsize,Random=True)): B = img_array.shape[0] Lz = XP.fzeros(()) vae.reset(img_array) #first to T-1 step for j in range(times-1): y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i #last step j+=1 y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i Lx = Bernoulli_nll_wesp(vae.x,y,eps) LZ += Lz.data LX += Lx.data loss = (Lx+Lz)/batchsize loss.backward() opt.update() counter += B sys.stdout.write('\rnow training ... epoch {}, {}/{} '.format(epoch,counter,mnist.train_size)) sys.stdout.flush() if (iter+1) % 100 == 0: print("({}-th batch mean loss) Lx:%03.3f Lz:%03.3f".format(counter) % (Lx.data/B,Lz.data/B)) img_array = cuda.to_cpu(y.data) im_array = img_array.reshape(batchsize*28,28) img = im_array[:28*5] plt.clf() plt.imshow(img,cmap=cm.gray) plt.colorbar(orientation='horizontal') plt.savefig(save_path+"/"+"img{}.png".format(epoch)) trace(save_path+"/trainloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/train_size,LZ/train_size,(LX+LZ)/train_size)) trainloss_dic[str(epoch).zfill(3)]={ "Lx":float(LX/train_size), "Lz":float(LZ/train_size), "Lx+Lz":float((LX+LZ)/train_size)} with open(save_path+"/trainloss.json",'w') as f: json.dump(trainloss_dic,f,indent=4) print('save model ...') prefix = save_path+"/"+str(epoch).zfill(3) if os.path.exists(prefix)==False: os.mkdir(prefix) serializers.save_npz(prefix + '/VAEweights', vae) serializers.save_npz(prefix + '/optimizer', opt) print('save recipe...') recipe_dic = { "date":D, "setting":{ "maxepoch":maxepoch, "batchsize":batchsize, "weightdecay":weightdecay, "grad_clip":grad_clip, "opt":"Adam", "initial_learningrate":ini_lr, "cur_epoch":epoch, "cur_lr":cur_lr}, "network":{ "IM":{ "x_size":784, "vae_enc":vae_enc, "vae_z":vae_z, "vae_dec":vae_dec, "times":times, "KLcoefficient":alpha}, }, } with open(prefix+'/recipe.json','w') as f: json.dump(recipe_dic,f,indent=4) if epoch % 1 == 0: print("\nvalidation step") LX = 0.0 LZ = 0.0 counter = 0 for iter,(img_array,label_array) in enumerate(mnist.gen_test(batchsize)): B = img_array.shape[0] Lz = XP.fzeros(()) vae.reset(img_array) #first to T-1 step for j in range(times-1): y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i #last step j+=1 y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i Lx = Bernoulli_nll_wesp(vae.x,y,eps) LZ += Lz.data.reshape(()) LX += Lx.data.reshape(()) counter += B sys.stdout.write('\rnow testing ... epoch {}, {}/{} '.format(epoch,counter,test_size)) sys.stdout.flush() print("") trace(save_path+"/valloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/test_size,LZ/test_size,(LX+LZ)/test_size)) valloss_dic[str(epoch).zfill(3)]={ "Lx":float(LX/test_size), "Lz":float(LZ/test_size), "Lx+Lz":float((LX+LZ)/test_size)} with open(save_path+"/valloss.json",'w') as f: json.dump(valloss_dic,f,indent=4) img_array = cuda.to_cpu(y.data) im_array = img_array.reshape(batchsize*28,28) img = im_array[:28*5] plt.clf() plt.imshow(img,cmap=cm.gray) plt.colorbar(orientation='horizontal') plt.savefig(save_path+"/"+"img_test{}.png".format(epoch)) print('finished.')
def for_one_batch_training(self): loss_list = [] text_count = 0 model_list = glob.glob( "_".join(self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1])) model = Att_Seq2TF(emb_size=self.EMBED_SIZE, fnn_size=self.FNN_SIZE, hidden_size=self.HIDDEN_SIZE, num_of_middle_layer=self.num_of_middle_layer, use_dropout=self.USE_DROPOUT, flag_gpu=self.FLAG_GPU) if len(model_list) != 0: for model_cand in sorted(model_list, key=lambda x: int(x.split("_")[-2][9:])): loss_list.append( float(model_cand[model_cand.find("loss") + 4:model_cand.rfind(".")])) serializers.load_hdf5(model_cand, model) text_count = int(model_cand.split("_")[-2][9:]) print(model_cand) print(text_count) print(loss_list) if self.FLAG_GPU: model.to_gpu(0) model.reset() # print("d") opt = optimizers.Adam() # optimizer.use_cleargrads() opt.setup(model) opt.add_hook(optimizer.WeightDecay(0.0005)) opt.add_hook(optimizer.GradientClipping(5)) opt_list = glob.glob( "_".join(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1])) if len(opt_list) != 0: opt_list = sorted( opt_list, key=lambda x: int(x.split("_")[-2][9:])) serializers.load_hdf5(opt_list[-1], opt) print(opt_list[-1]) # rupe_of_trainging # train_losses = [] # test_losses = [] print("start...") start_time = time.time() # 学習開始 q = Queue(100) q_valid = Queue(500) q_valid1 = Queue(500) minibatch_maker = MinibatchMaker( self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 0) p = Process(target=minibatch_maker.epoch_pickle, args=(q, )) p.start() # minibatch_maker1 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 1) # p1 = Process(target=minibatch_maker1.epoch_factory, args=(q, )) # p1.start() # minibatch_maker2 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 2) # p2 = Process(target=minibatch_maker2.epoch_factory, args=(q, )) # p2.start() # minibatch_maker3 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 3) # p3 = Process(target=minibatch_maker3.epoch_factory, args=(q, )) # p3.start() # minibatch_maker4 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 4) # p4 = Process(target=minibatch_maker4.epoch_factory, args=(q, )) # p4.start() # minibatch_maker5 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 5) # p5 = Process(target=minibatch_maker5.epoch_factory, args=(q, )) # p5.start() # minibatch_maker6 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 6) # p6 = Process(target=minibatch_maker6.epoch_factory, args=(q, )) # p6.start() # minibatch_maker7 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 7) # p7 = Process(target=minibatch_maker7.epoch_factory, args=(q, )) # p7.start() # minibatch_maker8 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 8) # p8 = Process(target=minibatch_maker8.epoch_factory, args=(q, )) # p8.start() # minibatch_maker9 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 9) # p9 = Process(target=minibatch_maker9.epoch_factory, args=(q, )) # p9.start() #train_len = q.get() minibatch_maker_valid = MinibatchMaker( self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=0) p_valid = Process( target=minibatch_maker_valid.epoch_pickle, args=(q_valid, )) p_valid.start() minibatch_maker_valid1 = MinibatchMaker( self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=1) p_valid1 = Process( target=minibatch_maker_valid1.epoch_pickle, args=(q_valid1, )) p_valid1.start() valid_len = q_valid.get() valid_len1 = q_valid1.get() #valid_len1 = 0 #print("altsvm" + str(train_len)) print("altsvm" + str(valid_len)) print("altsvm" + str(valid_len1)) # p.terminate() # p_valid.terminate() # exit() waited_count = 0 verb_data_count = 0 pseudo_epoch_count = 0 train_dict_keep = None while waited_count < 100 and (len(loss_list) <= 10 or min(loss_list[-10:]) != loss_list[-10]): if not q.empty(): # print("something") text_count += self.EPOCH_TEXT try_count = 0 # while try_count < 5: # try: # try_count += 1 # print(str(q.full())) enc_words, fnn_inputs, dec_scores = q.get() # if text_sentence_vec_dict != None: #train_dict_keep = text_sentence_vec_dict # except Exception as e: # print("cant_get") # print(e) # if len(x_train) > 0: # print("can_get") # break # sys.exit() N = len(dec_scores) verb_data_count += N if N != 0: # training start_time_train = time.time() perm = np.random.permutation(N) sum_loss = 0 # print("first_verb") for i in range(0, N, self.BATCH_SIZE): # print(i) if self.FLAG_GPU: enc_words_batch = [] for x in perm[i:i + self.BATCH_SIZE]: enc_words_batch.append(enc_words[x]) # enc_words_batch.append( # train_dict_keep[enc_words[x][0]][enc_words[x][1]]) # enc_words_batch = cuda.to_gpu( # np.array(enc_words_batch), device=0) fnn_inputs_batch = cuda.to_gpu(fnn_inputs[ perm[i:i + self.BATCH_SIZE]], device=0) dec_scores_batch = cuda.to_gpu(dec_scores[ perm[i:i + self.BATCH_SIZE]], device=0) else: enc_words_batch = [] for x in perm[i:i + self.BATCH_SIZE]: enc_words_batch.append(enc_words[x]) # enc_words_batch.append( # train_dict_keep[enc_words[x][0]][enc_words[x][1]]) fnn_inputs_batch = fnn_inputs[ perm[i:i + self.BATCH_SIZE]] dec_scores_batch = dec_scores[ perm[i:i + self.BATCH_SIZE]] # modelのリセット model.reset() # 順伝播 model.encode(enc_words_batch) # デコーダーの計算 loss = model.decode(fnn_inputs_batch, dec_scores_batch) # print(loss) sum_loss += loss.data * len(dec_scores_batch) loss.backward() opt.update() # print("first_verb_finished") average_loss = sum_loss / N # train_losses.append(average_loss) interval = int(time.time() - start_time_train) #print("train実行時間: {}sec, N: {}".format(interval,N)) # test # loss = model(x_test, y_test) # test_losses.append(loss.data) # output learning process if text_count % 100 == 0: print("text_count: {} train loss: {} verb_data_count: {} time: {}".format( text_count, average_loss, verb_data_count, time.ctime())) if verb_data_count // self.EPOCH_LIMIT > pseudo_epoch_count: pseudo_epoch_count += 1 # print(verb_data_count) # print(pseudo_epoch_count) total_loss = 0 total_count = 0 valid_dict_keep = None model.mode_change("test") #chainer.config.train = False valid_count = 0 valid1_count = 0 while (valid_count + valid1_count) < (valid_len + valid_len1): if valid_count < valid_len and not q_valid.empty(): enc_words, fnn_inputs, dec_scores = q_valid.get() valid_count += 1 elif valid1_count < valid_len1 and not q_valid1.empty(): enc_words, fnn_inputs, dec_scores = q_valid1.get() valid1_count += 1 else: print("waiting valid " + str(valid_count) + " " + str(valid1_count)) time.sleep(10) continue # if text_sentence_vec_dict != None: #valid_dict_keep = text_sentence_vec_dict if len(dec_scores) == 0: continue N = len(dec_scores) for i in range(0, N, self.BATCH_SIZE): if self.FLAG_GPU: enc_words_batch = [] for x in enc_words[i:i + self.BATCH_SIZE]: enc_words_batch.append(x) # enc_words_batch.append( # valid_dict_keep[x[0]][x[1]]) # enc_words_batch = cuda.to_gpu( # enc_words_batch, device=0) fnn_inputs_batch = cuda.to_gpu( fnn_inputs[i:i + self.BATCH_SIZE], device=0) dec_scores_batch = cuda.to_gpu( dec_scores[i:i + self.BATCH_SIZE], device=0) else: enc_words_batch = [] for x in enc_words[i:i + self.BATCH_SIZE]: enc_words_batch.append(x) # enc_words_batch.append( # valid_dict_keep[x[0]][x[1]]) # enc_words_batch = cuda.to_gpu( # enc_words_batch, device=0) fnn_inputs_batch = fnn_inputs[ i:i + self.BATCH_SIZE] dec_scores_batch = dec_scores[ i:i + self.BATCH_SIZE] # modelのリセット model.reset() if len(enc_words_batch) == 0: print(len(enc_words)) print(len(dec_scores_batch)) print(i) exit() with chainer.no_backprop_mode(): # 順伝播 model.encode(enc_words_batch) # デコーダーの計算 loss_data = model.decode( fnn_inputs_batch, dec_scores_batch).data if not self.ARR.isnan(loss_data): total_loss += loss_data * \ len(dec_scores_batch) total_count += len(dec_scores_batch) else: print(loss_data) if total_count == 0: print("skipped") continue valid_loss = float(total_loss / total_count) model.mode_change("train") #chainer.config.train = True # print(valid_loss) # print(total_loss) # print(total_count) print("valid_count: {} valid loss: {} time: {}".format( verb_data_count // self.EPOCH_LIMIT, valid_loss, time.ctime())) try: # with open("test", mode="wb") as f: # pickle.dump("hui",f) # with open(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, str(self.USE_DROPOUT), str(self.num_of_middle_layer), str(verb_count // self.EPOCH_LIMIT), valid_loss), mode="wb") as f: # pickle.dump(opt,f) # print("will_save") # model_saved=model.copy() # model_saved.to_cpu() # fui=float(70) serializers.save_hdf5( # "/gs/hs0/tga-cl/yamashiro-s-aa/workspace/nn/fnn/model/model",model) self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), model) # print("model_saved") serializers.save_hdf5( self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), opt) except Exception as e: raise e # print("saved") loss_list.append(valid_loss) # q_valid.put((x_valid, y_valid)) waited_count = 0 else: print("waiting") time.sleep(10) print(str(text_count) + " " + str(q.qsize())) waited_count += 1 print("end") p.terminate() # p1.terminate() # p2.terminate() # p3.terminate() # p4.terminate() # p5.terminate() # p6.terminate() # p7.terminate() # p8.terminate() # p9.terminate() p_valid.terminate() p_valid1.terminate() interval = int(time.time() - start_time) print("実行時間: {}sec, last pseudo_epoch: {}".format( interval, str(verb_data_count // self.EPOCH_LIMIT)))
train_iter = iterators.SerialIterator(dataset_train, batch_size=args.batchsize, shuffle=trs) if args.numval > 0: val_iter = iterators.SerialIterator(dataset_val, batch_size=len(dataset_val), repeat=False, shuffle=False) # -- Set optimizers optimizer1 = use_optimizer(lr=args.learning_rate) optimizer1.setup(loss.phi) optimizer2 = use_optimizer(lr=args.learning_rate) optimizer2.setup(loss.net) optimizer1.add_hook(optimizer_module.Lasso(args.beta)) optimizer2.add_hook(optimizer_module.WeightDecay(args.gamma)) # -- Set a trigger if args.log_in_iteration: trigger = (1, 'iteration') else: trigger = (1, 'epoch') # -- Set a trainer if args.fixed_embedder: optimizer_dict = {'net': optimizer2} else: optimizer_dict = {'phi': optimizer1, 'net': optimizer2} updater = lkis.Updater(train_iter, optimizer_dict, device=args.gpu,
shuffle=True) validation_iter = iterators.SerialIterator(validation, batch_size=bold_val.shape[0], repeat=False, shuffle=False) linearmodel = RegressorZ(LinearRegression(bold_vox_dim, args.ndim_z), pretrained_gan=dcgan, featnet=alexnet) # Set up optimizer optim = optimizers.Adam() optim.setup(linearmodel) if args.do_weightdecay: optim.add_hook(optimizer.WeightDecay(args.l2_lambda)) updater = training.StandardUpdater(train_iter, optim, device=args.gpu_device) # Set up trainer and extensions trainer = training.Trainer(updater, (args.nepochs, 'epoch'), out=args.outdir) trainer.extend( extensions.Evaluator(validation_iter, linearmodel, device=args.gpu_device)) trainer.extend(extensions.LogReport(log_name='linearmodel_train.log')) trainer.extend( extensions.PrintReport(
def __init__(self, model): self.model = model self.optimizer = optimizers.Adam(lr) self.optimizer.setup(model) self.optimizer.add_hook(optimizer.WeightDecay(weight_decay))
def train(args): vocab = Vocabulary.from_conll(args.train, args.vocab) train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)] dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)] parser = Parser(args.vocab, args.embed, args.hidden) if args.gpu >= 0: parser.to_gpu() opt = optimizers.AdaGrad(lr=0.01) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) for epoch in range(args.epoch): random.shuffle(train_dataset) parser.zerograds() loss = XP.fzeros(()) for i, data in enumerate(train_dataset): trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1)) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: trace(' %3d: root' % j) root = j else: parent_est = p_scores.data.argmax() trace('%c %3d -> %3d (%3d)' % ('*' if parent == parent_est else ' ', j, parent_est, parent)) loss += functions.softmax_cross_entropy( p_scores, XP.iarray([parent])) root_est = root_scores.data.argmax() trace('ROOT: %3d (%3d)' % (root_est, root)) loss += functions.softmax_cross_entropy(root_scores, XP.iarray([root])) if (i + 1) % 200 == 0: loss.backward() opt.update() parser.zerograds() loss = XP.fzeros(()) loss.backward() opt.update() trace('epoch %3d: trained. ' % (epoch + 1)) parent_num = 0 parent_match = 0 root_num = 0 root_match = 0 for i, data in enumerate(dev_dataset): trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1), rollback=True) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: root = j else: parent_est = p_scores.data.argmax() parent_num += 1 parent_match += 1 if parent_est == parent else 0 root_est = root_scores.data.argmax() root_num += 1 root_match += 1 if root_est == root else 0 result_str = \ 'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \ ( \ epoch + 1, \ parent_match / parent_num, parent_match, parent_num, \ root_match / root_num, root_match, root_num) trace(result_str) with open(args.model + '.log', 'a') as fp: print(result_str, file=fp) trace('epoch %3d: saving models ...' % (epoch + 1)) prefix = args.model + '.%03d' % (epoch + 1) vocab.save(prefix + '.vocab') parser.save_spec(prefix + '.parent_spec') serializers.save_hdf5(prefix + '.parent_weights', parser) trace('finished.')
print('mkdir ' + out_dir) assert os.path.isdir(out_dir) # setup network model, optimizer, and constant values to control training z_vec_dim = config.Z_VECTOR_DIM batch_size = config.BATCH_SIZE update_max = config.UPDATE_MAX update_save_params = config.UPDATE_SAVE_PARAMS kernel_dim = getattr(config, 'KERNEL_DIM', 1) kernel_eps = getattr(config, 'KERNEL_EPS', 1) model_dis = config.Discriminator() optimizer_dis = config.OPTIMIZER_DIS optimizer_dis.setup(model_dis) decay_d = getattr(config, 'DECAY_RATE_DIS', 1e-7) optimizer_dis.add_hook(optimizer.WeightDecay(decay_d)) model_opt_set_dis = ModelOptimizerSet(model_dis, optimizer_dis) model_gen = config.Generator() optimizer_gen = config.OPTIMIZER_GEN optimizer_gen.setup(model_gen) decay_g = getattr(config, 'DECAY_RATE_GEN', 1e-7) optimizer_gen.add_hook(optimizer.WeightDecay(decay_g)) model_opt_set_gen = ModelOptimizerSet(model_gen, optimizer_gen) # setup batch generator with open(args.dataset, 'r') as f: input_files = [line.strip() for line in f.readlines()] batch_generator = ImageBatchGenerator(input_files, batch_size, config.HEIGHT, config.WIDTH, channel=config.CHANNEL,
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] lower_lists = [[w.lower() for w in words] for words in word_lists] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(lower_lists, args.vocab) phrase_set = set() semiterminal_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semiterminal_set |= set(extract_semiterminals(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [ convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists ] trace('start training ...') parser = Parser( args.vocab, args.embed, args.char_embed, args.queue, args.stack, args.srstate, len(phrase_set), len(semiterminal_set), ) if args.use_gpu: parser.to_gpu() opt = optimizers.SGD(lr=0.1) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) batch_set = list(zip(word_lists, op_lists)) for epoch in range(args.epoch): n = 0 random.shuffle(batch_set) for samples in batch(batch_set, args.minibatch): parser.zerograds() loss = XP.fzeros(()) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward_train(word_list, op_list) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semiterminal_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) opt.lr *= 0.92 trace('finished.')
optimizer_generator.setup(G.Vanilla()) else: # Use residual blocks print('Generator: Residual (N={})'.format(g_res)) optimizer_generator.setup(G.Residual(n=g_res, out_shape=train[0].shape)) if d_res < 0: # No residual blocks print('Discriminator: Standard (Minibatch Discrimination={})'.format( mbd)) optimizer_discriminator.setup(D.Vanilla(use_mbd=mbd)) else: # Use residual blocks print('Discriminator: Residual (N={}, Minibatch Discrimination={})'. format(d_res, mbd)) optimizer_discriminator.setup(D.Residual(n=d_res, use_mbd=mbd)) optimizer_generator.add_hook(optimizer.WeightDecay(g_weight_decay)) optimizer_discriminator.add_hook(optimizer.WeightDecay(d_weight_decay)) updater = GenerativeAdversarialUpdater( iterator=train_iter, noise_iterator=z_iter, optimizer_generator=optimizer_generator, optimizer_discriminator=optimizer_discriminator, device=gpu) trainer = training.Trainer(updater, stop_trigger=(epochs, 'epoch'), out=out) # Logging losses to result/logs/loss trainer.extend(