Exemplo n.º 1
0
        from tffm import TFFMRegressor
        import tensorflow as tf
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
        model = TFFMRegressor(
            order=dim,
            rank=args.embedding_dim,
            optimizer=tf.train.AdagradOptimizer(learning_rate=args.lr),
            n_epochs=args.train_epochs,
            # batch_size=1076946,
            batch_size=4096,
            init_std=0.001,
            reg=args.weight_decay,
            input_type='sparse',
            log_dir=os.path.join(args.save, save_name),
        )
        model.fit(train_queue[0], train_queue[1], show_progress=True)
        inferences = model.predict(test_queue[0])
        mse = mean_squared_error(test_queue[1], inferences)
        rmse = np.sqrt(mse)
        logging.info('rmse: %.4f[%.4f]' % (rmse, time() - start))

    elif args.mode == 'autoneural':
        start = time()
        if dim == 2:
            model = AutoNeural(num_users, num_items, args.embedding_dim,
                               args.weight_decay).cuda()
        elif dim == 3:
            model = AutoNeural_Triple(num_ps, num_qs, num_rs,
                                      args.embedding_dim,
                                      args.weight_decay).cuda()
        embedding_oprimizer = torch.optim.Adagrad(model.embedding_parameters(),
Exemplo n.º 2
0
                episode_rewards_sum = sum(epoche_rewards)
                max_reward = max(episode_rewards_sum, max_reward)

                if episode_rewards_sum > 0:
                    suc_count += 1

                print("-----------------------")
                print("Episode: ", epoch)
                print("Reward: ", episode_rewards_sum)
                print("Max reward during train: ", max_reward)
                print("-----------------------")
                epoche_rewards = model.calc_reward(epoche_rewards)
                replBuffer.append(epoche_observations, epoche_actions,
                                  epoche_rewards)

                model.fit(epoche_observations, epoche_actions, epoche_rewards,
                          replBuffer)

                epoche_observations = []
                epoche_actions = []
                epoche_rewards = []

                training_version = load_version + (
                    epochs_count - current_epoch) // save_period

                save_path = "res/{}/{}/LunarLander-v2.ckpt".format(
                    train_model_name, training_version)

                model.save_model(save_path)
                break

            # Save new observation
Exemplo n.º 3
0
            episode_rewards_sum = sum(epoche_rewards)
            if episode_rewards_sum < min_reward:
                done = True

            if time.clock() - time_begin > time_limit:
                done = True

            if done:
                episode_rewards_sum = sum(epoche_rewards)
                max_reward = max(episode_rewards_sum, max_reward)

                print("-----------------------")
                print("Episode: ", epoch)
                print("Reward: ", episode_rewards_sum)
                print("Max reward during train: ", max_reward)
                print("-----------------------")

                model.fit(episode_actions=epoche_actions,
                          episode_rewards=epoche_rewards,
                          episode_observations=epoche_observations)
                epoche_observations = []
                epoche_actions = []
                epoche_rewards = []

                model.save_model(save_path)
                break

            # Save new observation
            state = state_