def __init__(self, environment, configuration, name='', debug=False): num_actions = environment.action_space.n num_states = get_num_states(environment) self.Q = np.zeros((num_states, num_actions)) # init super lastly, because it starts the training super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
def __init__(self, environment, memory_size, nb_variational_circuits, configuration, name='', debug=False): self.D = [] # Memory self.memory_size = memory_size self.nb_variational_circuits = nb_variational_circuits self.debug = debug num_actions = environment.action_space.n num_states = get_num_states(environment) self.nb_qbits = int(np.max([num_actions, np.log2(num_states)])) self.theta = np.array([ 2 * np.pi * np.random.random(3) for j in range(self.nb_qbits * self.nb_variational_circuits) ]).flatten() self.init_memory(environment) # init super lastly, because it starts the training super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
def __init__(self, environment, configuration, name, debug=False): self.environment = environment self.debug = debug self.name = name self.configuration = configuration self.num_actions = environment.action_space.n self.num_states = get_num_states(environment)
def __init__(self, environment, configuration, name='', debug=False): self.D = [] # Memory self.num_states = get_num_states(environment) self.model = configuration.model self.model.compile(loss='mean_squared_error', optimizer='sgd') self.target_model = clone_model(self.model) self.memory_size = configuration.memory_size self.train_counter = 0 self.init_memory(environment) # init super lastly, because it starts the training super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
def __init__(self, environment, configuration, name='', debug=False): # init super Agent.__init__(self, environment=environment, debug=debug, name=name, configuration=configuration) self.D = [] # Memory self.num_states = get_num_states(environment) self.model = configuration.model self.target_model = configuration.clone_model(self.model) self.memory_size = configuration.memory_size self.train_counter = 0 self.episode = 0 self.init_memory(environment)
def __init__(self, environment, configuration, name='', verbosity_level = 10, debug=False): self.D = [] # Memory self.memory_size = configuration.memory_size self.nb_variational_circuits = configuration.nb_variational_circuits self.debug = debug self.train_counter = 0 self.episode = 0 num_actions = environment.action_space.n num_states = get_num_states(environment) self.nb_circuit_params = 1 self.nb_qbits = int(np.max([num_actions, np.log2(num_states)])) self.theta = np.array( [np.random.random(self.nb_circuit_params) for j in range(self.nb_qbits * self.nb_variational_circuits)]).flatten() self.target_theta = copy.deepcopy(self.theta.copy()) self.init_memory(environment) self.loss_mem = [] # init super lastly, because it starts the training super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
# target update target_replacement = 10 #alpha, gamma, epsilon training_params_Q = [.6, .8, .9] # Instantiate the trainer confQ = Configuration(nb_iterations=nb_iterations, training_params=training_params_Q, cooling_scheme=cooling_schemes, batch_size=1, average=int(batch_size/100), test_steps=1, verbosity_level=1e20) #alpha, gamma, epsilon training_params_DQN = [.6, .8, .9] # DQN opt .1, .8, .9 overfitting nach 590 # NN model for DQN num_states = get_num_states(env) model = Sequential() model.add(Dense(8, input_shape=(16, ), activation='tanh')) model.add(Dense(env.action_space.n, activation='linear')) model.compile(loss=Huber(), optimizer=SGD(training_params_DQN[0])) confDQN = Configuration(nb_iterations=nb_iterations, training_params=training_params_DQN[1:], cooling_scheme=cooling_schemes[1:], batch_size=batch_size, memory_size=memory_size, average=int(nb_iterations/10), model=model, target_replacement=target_replacement, test_steps=1, verbosity_level=10) confDQN.clone_model = clone_model confDQN.embedding = lambda state: to_categorical(state, num_classes=num_states).reshape(num_states) training_params_VQDQN = [.22, .8, .9] # DQN opt .1, .8, .9 overfitting nach 590 confVQD = Configuration(nb_iterations=nb_iterations, training_params=training_params_VQDQN, cooling_scheme=cooling_schemes,