示例#1
0
def load(size, seed=0):
    """Load a deep sea experiment with the prescribed settings."""
    env = deep_sea.DeepSea(
        size=size,
        deterministic=False,
        seed=seed,
    )
    env.bsuite_num_episodes = sweep.NUM_EPISODES
    return env
示例#2
0
 def make_object_under_test(self):
     return deep_sea.DeepSea(5, deterministic=False)
示例#3
0
 def make_object_under_test(self):
     return deep_sea.DeepSea(10)
from numpy import random
import sys
import bsuite
from bsuite.logging.csv_logging import wrap_environment
from bsuite.experiments.deep_sea import deep_sea
from wrap2 import derived
from DeepseaEncoder import DeepSea_encoder

#Initializing params
total_steps = 1e5 / 2
n_size = 20
LAST = 2
SAVE_PATH_RAND = 'results/deepsea/TS20-LONG'
env = deep_sea.DeepSea(n_size, seed=int(sys.argv[1]))
logname = 'deep_sea/' + sys.argv[1]
env = wrap_environment(env, logname, SAVE_PATH_RAND, overwrite=True)

discount = 0.95
n_actions = 2
ql = derived(n_size * n_size, n_actions, discount, 2, 5,
             40)  #cross-validated params: L20:(2,5,40) ; L10:(4,10,20)
encoder = DeepSea_encoder(n_size)

#Interacting with environment
num_trials = 1
counter = 0
episodic_reward = []
n_episodes = 0
for i in range(num_trials):
    total_reward = 0
    prev_act = 0