from tffm import TFFMRegressor import tensorflow as tf os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) model = TFFMRegressor( order=dim, rank=args.embedding_dim, optimizer=tf.train.AdagradOptimizer(learning_rate=args.lr), n_epochs=args.train_epochs, # batch_size=1076946, batch_size=4096, init_std=0.001, reg=args.weight_decay, input_type='sparse', log_dir=os.path.join(args.save, save_name), ) model.fit(train_queue[0], train_queue[1], show_progress=True) inferences = model.predict(test_queue[0]) mse = mean_squared_error(test_queue[1], inferences) rmse = np.sqrt(mse) logging.info('rmse: %.4f[%.4f]' % (rmse, time() - start)) elif args.mode == 'autoneural': start = time() if dim == 2: model = AutoNeural(num_users, num_items, args.embedding_dim, args.weight_decay).cuda() elif dim == 3: model = AutoNeural_Triple(num_ps, num_qs, num_rs, args.embedding_dim, args.weight_decay).cuda() embedding_oprimizer = torch.optim.Adagrad(model.embedding_parameters(),
episode_rewards_sum = sum(epoche_rewards) max_reward = max(episode_rewards_sum, max_reward) if episode_rewards_sum > 0: suc_count += 1 print("-----------------------") print("Episode: ", epoch) print("Reward: ", episode_rewards_sum) print("Max reward during train: ", max_reward) print("-----------------------") epoche_rewards = model.calc_reward(epoche_rewards) replBuffer.append(epoche_observations, epoche_actions, epoche_rewards) model.fit(epoche_observations, epoche_actions, epoche_rewards, replBuffer) epoche_observations = [] epoche_actions = [] epoche_rewards = [] training_version = load_version + ( epochs_count - current_epoch) // save_period save_path = "res/{}/{}/LunarLander-v2.ckpt".format( train_model_name, training_version) model.save_model(save_path) break # Save new observation
episode_rewards_sum = sum(epoche_rewards) if episode_rewards_sum < min_reward: done = True if time.clock() - time_begin > time_limit: done = True if done: episode_rewards_sum = sum(epoche_rewards) max_reward = max(episode_rewards_sum, max_reward) print("-----------------------") print("Episode: ", epoch) print("Reward: ", episode_rewards_sum) print("Max reward during train: ", max_reward) print("-----------------------") model.fit(episode_actions=epoche_actions, episode_rewards=epoche_rewards, episode_observations=epoche_observations) epoche_observations = [] epoche_actions = [] epoche_rewards = [] model.save_model(save_path) break # Save new observation state = state_