Пример #1
0
def train():
    model = AttentionModel(params_config, human_vocab_size,
                           machine_vocab_size).model

    op = Adam(lr=params_config['learning_rate'],
              decay=params_config['decay'],
              clipnorm=params_config['clipnorm'])

    if os.path.exists('./Model/model.h5'):
        print('loading model...')

        model.load_weights('./Model/model.h5')

        model.compile(optimizer=op,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    else:
        model.compile(optimizer=op,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        outputs_train = list(Yoh_train.swapaxes(0, 1))

        model.fit(Xoh_train,
                  outputs_train,
                  epochs=params_config['epochs'],
                  batch_size=params_config['batch_size'],
                  validation_split=0.1)

        if not os.path.exists('Model'):
            os.mkdir('Model')

        model.save_weights('./Model/model.h5')
    return model
Пример #2
0
def train(cfg, log_path = None):
	
	model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, 
						cfg.tanh_clipping, 'sampling')
	baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, 
								cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs)
	optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr)
	ave_loss = tf.keras.metrics.Mean()
	ave_L = tf.keras.metrics.Mean()
	
	for epoch in tqdm(range(cfg.epochs), desc = 'epoch'):
		t1 = time()
		dataset = generate_data(cfg.n_samples, cfg.n_customer)
		bs = baseline.eval_all(dataset)
		bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None
		
		for t, inputs in enumerate(dataset.batch(cfg.batch)):
			with tf.GradientTape() as tape:
				L, logp = model(inputs)
				b = bs[t] if bs is not None else baseline.eval(inputs, L)
				b = tf.stop_gradient(b)
				loss = tf.reduce_mean((L - b) * logp)
				L_mean = tf.reduce_mean(L)
			grads = tape.gradient(loss, model.trainable_weights)# model.trainable_weights == thita
			grads, _ = tf.clip_by_global_norm(grads, 1.0)
			optimizer.apply_gradients(zip(grads, model.trainable_weights))# optimizer.step

			ave_loss.update_state(loss)
			ave_L.update_state(L_mean)
			if t%(cfg.batch_steps*0.1) == 0:
				print('epoch%d, %d/%dsamples: loss %1.2f, average L %1.2f, average b %1.2f\n'%(
						epoch, t*cfg.batch, cfg.n_samples, ave_loss.result().numpy(), ave_L.result().numpy(), tf.reduce_mean(b)))

		baseline.epoch_callback(model, epoch)
		model.decode_type = 'sampling'
		model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')
		
		if cfg.islogger:
				if log_path is None:
					log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/
					with open(log_path, 'w') as f:
						f.write('time,epoch,loss,average length\n')
				with open(log_path, 'a') as f:
					t2 = time()
					f.write('%dmin%dsec,%d,%1.2f,%1.2f\n'%((t2-t1)//60, (t2-t1)%60, epoch, ave_loss.result().numpy(), ave_L.result().numpy()))

		ave_loss.reset_states()
		ave_L.reset_states()
Пример #3
0
def train(cfg, log_path = None):

	def allocate_memory():
	# https://qiita.com/studio_haneya/items/4dfaf2fb2ac44818e7e0
		physical_devices = tf.config.experimental.list_physical_devices('GPU')
		if len(physical_devices) > 0:
			for k in range(len(physical_devices)):
				tf.config.experimental.set_memory_growth(physical_devices[k], True)
				print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k]))
		else:
			print("Not enough GPU hardware devices available")

	def rein_loss(model, inputs, bs, t):
		L, ll = model(inputs, decode_type = 'sampling', training = True)
		b = bs[t] if bs is not None else baseline.eval(inputs, L)
		b = tf.stop_gradient(b)
		return tf.reduce_mean((L - b) * ll), tf.reduce_mean(L)

	def grad_func(model, inputs, bs, t):
		with tf.GradientTape() as tape:
			loss, L_mean = rein_loss(model, inputs, bs, t)
		return loss, L_mean, tape.gradient(loss, model.trainable_variables)# model.trainable_variables == thita

	allocate_memory()
	model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping)
	baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, 
							cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs)
	optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr)
	ave_loss = tf.keras.metrics.Mean()
	ave_L = tf.keras.metrics.Mean()
	
	t1 = time()
	for epoch in range(cfg.epochs):
		dataset = generate_data(cfg.n_samples, cfg.n_customer)
		
		bs = baseline.eval_all(dataset)
		bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None# bs: (cfg.batch_steps, cfg.batch) or None
		
		for t, inputs in enumerate(dataset.batch(cfg.batch)):
			
			loss, L_mean, grads = grad_func(model, inputs, bs, t)

			grads, _ = tf.clip_by_global_norm(grads, 1.0)
			optimizer.apply_gradients(zip(grads, model.trainable_variables))# optimizer.step
			
			ave_loss.update_state(loss)
			ave_L.update_state(L_mean)
			
			if t%(cfg.batch_verbose) == 0:
				t2 = time()
				print('Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec'%(
					epoch, t, ave_loss.result().numpy(), ave_L.result().numpy(), (t2-t1)//60, (t2-t1)%60))
				if cfg.islogger:
					if log_path is None:
						log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/
						with open(log_path, 'w') as f:
							f.write('time,epoch,batch,loss,cost\n')
					with open(log_path, 'a') as f:
						f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n'%(
							(t2-t1)//60, (t2-t1)%60, epoch, t, ave_loss.result().numpy(), ave_L.result().numpy()))
				t1 = time()

		baseline.epoch_callback(model, epoch)
		model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')#cfg.weight_dir = ./Weights/

		ave_loss.reset_states()
		ave_L.reset_states()