예제 #1
0
    def __init__(
            self,
            env,  # environment emulator object
            hypes,  # dict of hyperparameters
            scope):  # string for TF scope
        """
		creates an instance of model
		"""
        self._scope = scope
        self._hidden_1 = hypes['nn']['hidden_1']
        if hypes['same_hidden']:
            self._hidden_2 = hypes['nn']['hidden_1']
        else:
            self._hidden_2 = hypes['nn']['hidden_2']
        self._action_dim, self._action_discrete = check_space(env.action_space)
        if not self._action_discrete:
            raise ValueError('Continuous action space not implemented')
        self._state_dim, self._state_discrete = check_space(
            env.observation_space)
        if not self._state_discrete:
            s_dict = {
                'shape': np.append(None, self._state_dim),
                'dtype': tf.float32
            }
        else:
            s_dict = {'shape': np.append(None, 1), 'dtype': tf.int32}
        self._input_dict = {
            'state': s_dict,
            'V': {
                'shape': [None, 1],
                'dtype': tf.float32
            },
            'pi': {
                'shape': [None, self._action_dim],
                'dtype': tf.float32
            }
        }

        self._experience = Database(max_size=hypes['db_size'],
                                    batch_size=hypes['nn']['batch_size'])
        self._experience_new = Database(max_size=hypes['max_ep_steps'],
                                        batch_size=hypes['nn']['batch_size'])
        self.avg_returnVar = 1.
        self._temp = hypes['tree_search']['temp']
        self._root_count_type = hypes['tree_search']['root_count_type']
        self._verbose = hypes['tree_search']['verbose_tree']
        self._n_epoch = self.get_n_epochs(hypes)
        self._ucb_params = np.linspace(hypes['tree_search']['ucb_max'],
                                       hypes['tree_search']['ucb_min'],
                                       hypes['tree_search']['ucb_decay_steps'])
        self._ucb_decay = hypes['tree_search']['ucb_decay_steps']
        self._beta_POP = hypes['nn']['beta_POP']
        self.current_state = None
        self.nu = 0.
        self.eta = hypes['tree_search']['eta']
        self.sigma = 1.
예제 #2
0
 def _get_one(self, query, params=None):
     cur = Database.Instance().dict_cursor()
     cur.execute(query, params)
     if cur.rowcount > 0:
         return self.to_object(cur.fetchone())
     else:
         return None
예제 #3
0
	def get_time_range(self, filter = None):
		filter = self.filter_defaults(filter, 1)
		filterSql = self.__build_filter(filter, "WHERE")
		cur = Database.Instance().dict_cursor()
		cur.execute("SELECT MIN(m.datetime) AS min, MAX(m.datetime) AS max FROM Measurements m " + filterSql)
		if cur.rowcount > 0:
			return cur.fetchone()
		else:
			return None
예제 #4
0
	def update(self):
		if self.id is None or self.sensor is None or self.location is None or self.value is None:
			return False

		cur = Database.Instance().dict_cursor()
		cur.execute("UPDATE Measurements SET value = %s, quality = %s, sensor = %s, location = %s, datetime = %s WHERE id = %s", [self.value, self.quality, self.sensor, self.location, self.datetime, self.id])
		if cur.rowcount > 0:
			return True
		else:
			return False
예제 #5
0
    def delete(self):
        if self.id is None:
            return False

        cur = Database.Instance().cursor()
        cur.execute("DELETE FROM Locations WHERE id = %s", [self.id])
        if cur.rowcount > 0:
            self.id = None
            return True
        else:
            return False
예제 #6
0
    def read(self):
        if self.id is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute("SELECT * FROM Notifiers WHERE id = %s", [self.id])
        if cur.rowcount > 0:
            self.from_dict(cur.fetchone())
            return True
        else:
            return False
예제 #7
0
    def update(self):
        if self.id is None or self.lon is None or self.lat is None or self.height is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "UPDATE Locations SET name = %s, geom = ST_GeomFromText(%s, 4326), height = %s WHERE id = %s",
            [self.name, self.get_point_wkt(), self.height, self.id])
        if cur.rowcount > 0:
            return True
        else:
            return False
예제 #8
0
    def read(self):
        if self.id is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "SELECT *, ST_X(geom) AS lon, ST_Y(geom) AS lat FROM Locations WHERE id = %s",
            [self.id])
        if cur.rowcount > 0:
            self.from_dict(cur.fetchone())
            return True
        else:
            return False
예제 #9
0
	def create(self):
		if self.sensor is None or self.location is None or self.value is None:
			return False

		cur = Database.Instance().dict_cursor()
		cur.execute("INSERT INTO Measurements (value, quality, sensor, location) VALUES (%s, %s, %s, %s) RETURNING datetime, id", [self.value, self.quality, self.sensor, self.location])
		data = cur.fetchone()
		self.id = data['id']
		self.datetime = data['datetime']
		if self.id > 0:
			return True
		else:
			return False
예제 #10
0
    def create(self):
        if self.lon is None or self.lat is None or self.height is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "INSERT INTO Locations (name, geom, height) VALUES (%s, ST_GeomFromText(%s, 4326), %s) RETURNING id",
            [self.name, self.get_point_wkt(), self.height])
        data = cur.fetchone()
        self.id = data['id']
        if self.id > 0:
            return True
        else:
            return False
예제 #11
0
    def create(self):
        if self.sensor is None or self.notifier is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "INSERT INTO Subscribers (settings, sensor, notifier) VALUES (%s, %s, %s) RETURNING id",
            [self._settings_dump(self.settings), self.sensor, self.notifier])
        data = cur.fetchone()
        self.id = data['id']
        if self.id > 0:
            return True
        else:
            return False
예제 #12
0
    def update(self):
        if self.id is None or self.sensor is None or self.notifier is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "UPDATE Subscribers SET settings = %s, sensor = %s, notifier = %s WHERE id = %s",
            [
                self._settings_dump(self.settings), self.sensor, self.notifier,
                self.id
            ])
        if cur.rowcount > 0:
            return True
        else:
            return False
예제 #13
0
    def update(self):
        impl = self.get_notifier_impl()
        if self.id is None or impl is None or self.active is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "UPDATE Notifiers SET module = %s, class = %s, name = %s, description = %s, settings = %s, public = %s, active = %s WHERE id = %s",
            [
                self.module, self.class_name, self.name, self.description,
                self._settings_dump(self.settings), self.public, self.active,
                self.id
            ])
        if cur.rowcount > 0:
            return True
        else:
            return False
예제 #14
0
    def create(self):
        impl = self.get_notifier_impl()
        if impl is None or self.active is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "INSERT INTO Notifiers (module, class, name, description, settings, public, active) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id",
            [
                self.module, self.class_name, self.name, self.description,
                self._settings_dump(self.settings), self.public, self.active
            ])
        data = cur.fetchone()
        self.id = data['id']
        if self.id > 0:
            return True
        else:
            return False
예제 #15
0
    def update(self):
        impl = self.get_sensor_impl()
        if self.id is None or impl is None or self.active is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "UPDATE Sensors SET module = %s, class = %s, type = %s, description = %s, unit = %s, active = %s, settings = %s WHERE id = %s",
            [
                self.module, self.class_name,
                impl.get_type(), self.description,
                impl.get_unit(), self.active,
                self._settings_dump(self.settings), self.id
            ])
        if cur.rowcount > 0:
            return True
        else:
            return False
예제 #16
0
    def create(self):
        impl = self.get_sensor_impl()
        if impl is None or self.active is None:
            return False

        cur = Database.Instance().dict_cursor()
        cur.execute(
            "INSERT INTO Sensors (module, class, type, description, unit, active, settings) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id",
            [
                self.module, self.class_name,
                impl.get_type(), self.description,
                impl.get_unit(), self.active,
                self._settings_dump(self.settings)
            ])
        data = cur.fetchone()
        self.id = data['id']
        if self.id > 0:
            return True
        else:
            return False
예제 #17
0
class agent():
    """ class for two-head neural network of AlphaZero """
    def __init__(
            self,
            env,  # environment emulator object
            hypes,  # dict of hyperparameters
            scope):  # string for TF scope
        """
		creates an instance of model
		"""
        self._scope = scope
        self._hidden_1 = hypes['nn']['hidden_1']
        if hypes['same_hidden']:
            self._hidden_2 = hypes['nn']['hidden_1']
        else:
            self._hidden_2 = hypes['nn']['hidden_2']
        self._action_dim, self._action_discrete = check_space(env.action_space)
        if not self._action_discrete:
            raise ValueError('Continuous action space not implemented')
        self._state_dim, self._state_discrete = check_space(
            env.observation_space)
        if not self._state_discrete:
            s_dict = {
                'shape': np.append(None, self._state_dim),
                'dtype': tf.float32
            }
        else:
            s_dict = {'shape': np.append(None, 1), 'dtype': tf.int32}
        self._input_dict = {
            'state': s_dict,
            'V': {
                'shape': [None, 1],
                'dtype': tf.float32
            },
            'pi': {
                'shape': [None, self._action_dim],
                'dtype': tf.float32
            }
        }

        self._experience = Database(max_size=hypes['db_size'],
                                    batch_size=hypes['nn']['batch_size'])
        self._experience_new = Database(max_size=hypes['max_ep_steps'],
                                        batch_size=hypes['nn']['batch_size'])
        self.avg_returnVar = 1.
        self._temp = hypes['tree_search']['temp']
        self._root_count_type = hypes['tree_search']['root_count_type']
        self._verbose = hypes['tree_search']['verbose_tree']
        self._n_epoch = self.get_n_epochs(hypes)
        self._ucb_params = np.linspace(hypes['tree_search']['ucb_max'],
                                       hypes['tree_search']['ucb_min'],
                                       hypes['tree_search']['ucb_decay_steps'])
        self._ucb_decay = hypes['tree_search']['ucb_decay_steps']
        self._beta_POP = hypes['nn']['beta_POP']
        self.current_state = None
        self.nu = 0.
        self.eta = hypes['tree_search']['eta']
        self.sigma = 1.

    def build_model(self, inputs):  # dict of TF graph inputs
        """
		create neural network layers and activations
		"""
        self._inputs = inputs
        with tf.variable_scope(self._scope):
            # feedforward
            x = inputs['state']
            if self._state_discrete:
                x = tf.squeeze(tf.one_hot(x, self._state_dim, axis=1), axis=2)
            x = slim.fully_connected(x,
                                     self._hidden_1,
                                     activation_fn=tf.nn.elu)
            x = slim.fully_connected(x,
                                     self._hidden_2,
                                     activation_fn=tf.nn.elu)
            # output
            self.f = x
            self.log_pi_hat = slim.fully_connected(x,
                                                   self._action_dim,
                                                   activation_fn=None)
            self.pi_hat = tf.nn.softmax(self.log_pi_hat)
            self.V_hat = slim.fully_connected(x,
                                              1,
                                              activation_fn=None,
                                              scope='V_hat')
            self.network_out = [self.pi_hat, self.V_hat]

    def predict_V(
            self,
            state,  # current state
            sess):  # TF session
        """
		run inference on given input state, return value network output
		"""
        return sess.run(self.V_hat, feed_dict={self._inputs['state']: state})

    def predict_pi(
            self,
            state,  # current state
            sess):  # TF session
        """
		run inference on given input state, return policy network output
		"""
        return sess.run(self.pi_hat, feed_dict={self._inputs['state']: state})

    def _loss(self):
        """
		additive loss definition for neural network
		"""
        self.V_loss = 0.5 * tf.losses.mean_squared_error(
            labels=self._inputs['V'], predictions=self.V_hat)
        self.pi_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=self._inputs['pi'], logits=self.log_pi_hat)
        self.loss = self.V_loss + tf.reduce_mean(self.pi_loss)
        return self.loss

    def update_scale_shift(self, beta):  # exponential decay parameter
        """
		update return normalization parameters for MCTS
		"""
        returns = self.current_state.returns
        returns_2 = [x**2 for x in returns]
        self.sigma_old = deepcopy(self.sigma)
        self.nu_old = deepcopy(self.nu)
        self.nu = (1 - beta) * self.nu + beta * np.mean(returns)
        self.eta = (1 - beta) * self.eta + beta * np.mean(returns_2)
        self.sigma = np.sqrt(self.eta - self.nu**2)

    def update_POP(
            self,
            sess,  # TF session
            t):  # int, time step
        """
		update decay parameter and call return normalization update
		"""
        beta = self._beta_POP * (1 - (1 - self._beta_POP)**(t + 1))**(-1)
        self.update_scale_shift(beta)

    def select_action(
            self,
            hypes,  # dict of hyperparameters
            modules,  # dict of modules
            env,  # environment emulator object
            sess,  # TF session
            root_index,  # current state
            env_reset,  # bool, reset environments
            t,  # int, episode time step
            t_all,  # int, training time steps 
            ep,  # int, episode count
            outdir=""):
        """
		run MCTS tree search and select action for root current state
		"""
        if env_reset == True: self.current_state = None
        self.get_ucb_param(t_all)
        self.current_state = modules['tree_search'].MCTS(
            self, root_index, self.current_state, env, sess,
            hypes['tree_search'], ep, t_all, t, outdir)
        self.update_POP(sess, t)
        priors = np.squeeze(
            self.predict_pi(self.current_state.index[None, ], sess))
        pi, V = self.current_state.return_results(self._root_count_type,
                                                  self._temp)
        self._experience.store((self.current_state.index, V, pi))
        self._experience_new.store((self.current_state.index, V, pi))
        a = np.random.choice(len(pi), p=pi)
        self.current_state = self.current_state.forward(a)
        self.pi_NN = priors
        self.pi_MCTS = pi
        return a, copy.copy(self.current_state)

    def clear_history(self):
        """
		 clear previous episode training data
		"""
        self._experience_new.clear()

    def get_ucb_param(self, t):  # int, time step
        """
		get decayed ucb parameter for MCTS
		"""
        self._ucb_param = self._ucb_params[min(t, self._ucb_decay - 1)]

    def get_n_epochs(self, hypes):  # dict of hyperparameters
        """
		get number of training epochs based on MCTS iterations
		"""
        mcts_min = hypes['tree_search']['n_mcts_min']
        n_mcts = float(hypes['tree_search']['n_mcts'])
        c_epoch = hypes['c_epoch']
        n_epoch = int(np.rint(((n_mcts / mcts_min - 1) * c_epoch + 1)))
        return n_epoch

    def train_network(
            self,
            graph,  # TF graph
            tf_sess,  # TF session
            database='main'):
        """
		train neural network
		"""
        if database == 'recent':
            db = self._experience_new
            self._experience_new.reshuffle()
            n_epoch = 1
        else:
            db = self._experience
            self._experience.reshuffle()
            n_epoch = self._n_epoch
        sess = tf_sess['sess']
        merge = tf.summary.merge_all()
        counter = 0
        losses, grads_norms, grads_clipped_norms = [], [], []
        for epoch in range(n_epoch):
            for sb, Vb, pib in db:
                counter += 1
                batch_size = len(sb)
                feed_dict = {
                    graph['inputs']['state']: sb,
                    graph['inputs']['V']: Vb,
                    graph['inputs']['pi']: pib
                }
                _, loss_value, grads_norm, grads_norm_clipped, grads_sum, summary = sess.run(
                    [
                        graph['train_op'], graph['loss'], graph['grads_norm'],
                        graph['grads_norm_clipped'], graph['grads_sum'], merge
                    ],
                    feed_dict=feed_dict)
                #tf_sess['writer'].add_summary(summary, counter)
                losses.append(loss_value / batch_size)
                grads_norms.append(grads_norm)
                # print(np.mean(grads_norm))
                grads_clipped_norms.append(grads_norm_clipped)
            mean_loss = np.mean(losses)
        return mean_loss, grads_norms, grads_clipped_norms, grads_sum