Exemplo n.º 1
0
    def __init__(self):
        self.env = flappy_bird_gym.make("FlappyBird-v0")
        self.episodes = 1000
        self.state_space = self.env.observation_space.shape[0]
        self.action_space = self.env.action_space.n
        self.memory = deque(maxlen=2000)

        self.gamma = 0.95
        self.epsilon = 1
        self.epsilon_decay = 0.9999
        self.epsilon_min = 0.01
        self.batch_number = 64

        self.train_start = 1000
        self.jump_prob = 0.01
        self.model = NeuralNetwork(input_shape=(self.state_space, ),
                                   output_shape=self.action_space)
Exemplo n.º 2
0
    def __init__(self):
        #env Variables
        self.env = flappy_bird_gym.make("FlappyBird-v0")
        self.episodes = 1000
        self.state_space = self.env.observation_space.shape[0]
        self.action_space = self.env.action_space.n

        #HyperParamters
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # this is the discount rate. Setting it to .95 prioritizes imediate rewards rather then future rewards.
        self.epsilon = 1
        self.epsilon_decay = 0.9999
        self.epsilon_min = 0.01
        self.batch_number = 64

        self.train_start = 1000
        self.jump_prob = 0.01
        self.model = NeuralNetwork(input_shape=(self.state_space, ),
                                   output_shape=self.action_space)
    def __init__(self, params_file):
        self.env = flappy_bird_gym.make("FlappyBird-v0")
        self.state_space = self.env.observation_space.shape[0]
        self.action_space = self.env.action_space.n
        self.params_file = params_file
        self.params = self.read_params()
        self.memory = deque(maxlen=2000)

        self.episodes = self.params['training']['num_episodes']
        self.gamma = self.params['training']['discount_rate']
        self.epsilon = self.params['training']['init_exploration_rate']
        self.epsilon_decay = self.params['training']['exploration_decay_rate']
        self.epsilon_min = self.params['training']['min_exploration_rate']
        self.batch_number = 64  # 16, 32, 64, 128

        self.train_start = 1000
        self.jump_prob = 0.01
        self.model = NeuralNet(input_shape=(self.state_space, ),
                               output_shape=self.action_space)
Exemplo n.º 4
0
    def __init__(self):
        self.env = flappy_bird_gym.make('FlappyBird-v0')
        self.episodes = 1000
        self.state_space = self.env.observation_space.shape[0]
        self.action_space = self.env.action_space.n
        self.memory = deque(maxlen=2000)

        #Hyperparameters

        #gives more wight on imediate rewards
        self.gamma = 0.95
        #probability of taking a random action
        self.epsilon = 1
        self.epsilon_decay = 0.9999
        #once we decay epsilon, probability of takind a random action
        self.epsilon_min = 0.01
        #amount of datapoints added to neural network
        self.batch_number = 64

        self.train_start = 1000
        self.jump_prob = 0.01

        self.model = NeuralNetwork(input_shape=(self.state_space, ),
                                   output_shape=self.action_space)
Exemplo n.º 5
0
                            action='store_true',
                            help='use the RGB version')
        parser.add_argument('-sd',
                            dest='subdir',
                            metavar='SUBDIR',
                            default='',
                            type=str,
                            help='subdir for weights')
        args = parser.parse_args()
        return args

    args = parse_args()
    birds = BirdsPopulation(rgb=args.rgb)
    env_str = f"FlappyBird{'-rgb-' if args.rgb else ''}-v0"
    envs = {
        bird.id: flappy_bird_gym.make(env_str)
        for bird in birds.population
    }
    main_env = flappy_bird_gym.make(env_str)
    if args.initial_gen > 0 or args.load:
        birds.load_population(generation=args.initial_gen, subdir=args.subdir)
    if args.train:
        evolution(args.initial_gen)
    elif not args.plot:
        bird = birds.select_fittest(1)
        print(
            f'Running single run using BIRD-{bird.id} from generation #{birds.generation}'
        )

        run_with_render(bird, recording=True)
    else:
Exemplo n.º 6
0
# trainer.restore("/home/edoardo/ray_results/tune_PPO_lunar_hover/PPO_LunarHover_7ba4e_00000_0_2021-04-02_19-01-43/checkpoint_990/checkpoint-990")
trainer.restore(
    "/home/edoardo/ray_results/tune_PPO_flappy_bird/PPO_my_env_95b43_00000_0_2021-04-03_14-36-24/checkpoint_2280/checkpoint-2280"
)

policy = trainer.get_policy()
# sequential_nn = convert_ray_simple_policy_to_sequential(policy).cpu()
sequential_nn = convert_ray_policy_to_sequential(policy).cpu()
# l0 = torch.nn.Linear(4, 2, bias=False)
# l0.weight = torch.nn.Parameter(torch.tensor([[0, 0, 1, 0], [0, 0, 0, 1]], dtype=torch.float32))
# layers = [l0]
# for l in sequential_nn:
#     layers.append(l)
# nn = torch.nn.Sequential(*layers)
nn = sequential_nn
env = flappy_bird_gym.make("FlappyBird-v0")
# env.render()
plot_index = 1
position_list = []
# env.render()
n_trials = 10
cumulative_reward = 0
clock = pygame.time.Clock()
for i in range(n_trials):
    state = env.reset()
    # env.state[2] = 0.01
    # env.state[2] = 0.045
    # env.state[3] = -0.51
    # state = np.array(env.state)
    state_np = np.array(state)
    print(state_np)
Exemplo n.º 7
0
def env_creator(env_config):
    import flappy_bird_gym
    env = flappy_bird_gym.make("FlappyBird-v0")
    return env