def train(): model = AttentionModel(params_config, human_vocab_size, machine_vocab_size).model op = Adam(lr=params_config['learning_rate'], decay=params_config['decay'], clipnorm=params_config['clipnorm']) if os.path.exists('./Model/model.h5'): print('loading model...') model.load_weights('./Model/model.h5') model.compile(optimizer=op, loss='categorical_crossentropy', metrics=['accuracy']) else: model.compile(optimizer=op, loss='categorical_crossentropy', metrics=['accuracy']) outputs_train = list(Yoh_train.swapaxes(0, 1)) model.fit(Xoh_train, outputs_train, epochs=params_config['epochs'], batch_size=params_config['batch_size'], validation_split=0.1) if not os.path.exists('Model'): os.mkdir('Model') model.save_weights('./Model/model.h5') return model
def train(cfg, log_path = None): model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping, 'sampling') baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs) optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr) ave_loss = tf.keras.metrics.Mean() ave_L = tf.keras.metrics.Mean() for epoch in tqdm(range(cfg.epochs), desc = 'epoch'): t1 = time() dataset = generate_data(cfg.n_samples, cfg.n_customer) bs = baseline.eval_all(dataset) bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None for t, inputs in enumerate(dataset.batch(cfg.batch)): with tf.GradientTape() as tape: L, logp = model(inputs) b = bs[t] if bs is not None else baseline.eval(inputs, L) b = tf.stop_gradient(b) loss = tf.reduce_mean((L - b) * logp) L_mean = tf.reduce_mean(L) grads = tape.gradient(loss, model.trainable_weights)# model.trainable_weights == thita grads, _ = tf.clip_by_global_norm(grads, 1.0) optimizer.apply_gradients(zip(grads, model.trainable_weights))# optimizer.step ave_loss.update_state(loss) ave_L.update_state(L_mean) if t%(cfg.batch_steps*0.1) == 0: print('epoch%d, %d/%dsamples: loss %1.2f, average L %1.2f, average b %1.2f\n'%( epoch, t*cfg.batch, cfg.n_samples, ave_loss.result().numpy(), ave_L.result().numpy(), tf.reduce_mean(b))) baseline.epoch_callback(model, epoch) model.decode_type = 'sampling' model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5') if cfg.islogger: if log_path is None: log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/ with open(log_path, 'w') as f: f.write('time,epoch,loss,average length\n') with open(log_path, 'a') as f: t2 = time() f.write('%dmin%dsec,%d,%1.2f,%1.2f\n'%((t2-t1)//60, (t2-t1)%60, epoch, ave_loss.result().numpy(), ave_L.result().numpy())) ave_loss.reset_states() ave_L.reset_states()
def train(cfg, log_path = None): def allocate_memory(): # https://qiita.com/studio_haneya/items/4dfaf2fb2ac44818e7e0 physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: for k in range(len(physical_devices)): tf.config.experimental.set_memory_growth(physical_devices[k], True) print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k])) else: print("Not enough GPU hardware devices available") def rein_loss(model, inputs, bs, t): L, ll = model(inputs, decode_type = 'sampling', training = True) b = bs[t] if bs is not None else baseline.eval(inputs, L) b = tf.stop_gradient(b) return tf.reduce_mean((L - b) * ll), tf.reduce_mean(L) def grad_func(model, inputs, bs, t): with tf.GradientTape() as tape: loss, L_mean = rein_loss(model, inputs, bs, t) return loss, L_mean, tape.gradient(loss, model.trainable_variables)# model.trainable_variables == thita allocate_memory() model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping) baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs) optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr) ave_loss = tf.keras.metrics.Mean() ave_L = tf.keras.metrics.Mean() t1 = time() for epoch in range(cfg.epochs): dataset = generate_data(cfg.n_samples, cfg.n_customer) bs = baseline.eval_all(dataset) bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None# bs: (cfg.batch_steps, cfg.batch) or None for t, inputs in enumerate(dataset.batch(cfg.batch)): loss, L_mean, grads = grad_func(model, inputs, bs, t) grads, _ = tf.clip_by_global_norm(grads, 1.0) optimizer.apply_gradients(zip(grads, model.trainable_variables))# optimizer.step ave_loss.update_state(loss) ave_L.update_state(L_mean) if t%(cfg.batch_verbose) == 0: t2 = time() print('Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec'%( epoch, t, ave_loss.result().numpy(), ave_L.result().numpy(), (t2-t1)//60, (t2-t1)%60)) if cfg.islogger: if log_path is None: log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/ with open(log_path, 'w') as f: f.write('time,epoch,batch,loss,cost\n') with open(log_path, 'a') as f: f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n'%( (t2-t1)//60, (t2-t1)%60, epoch, t, ave_loss.result().numpy(), ave_L.result().numpy())) t1 = time() baseline.epoch_callback(model, epoch) model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')#cfg.weight_dir = ./Weights/ ave_loss.reset_states() ave_L.reset_states()