Exemplo n.º 1
0
    def __init__(self,
                 sensor_models,
                 calibration_model,
                 lr=1e-4,
                 batch_size=20,
                 log_dir=None,
                 **kwargs):
        self.graph = T.core.Graph()
        self.log_dir = log_dir
        with self.graph.as_default():
            self.calibration_model = calibration_model
            self.board_ids = list(sensor_models.keys())
            self.board_map = {b: i for i, b in enumerate(self.board_ids)}
            self.sensor_map = sensor_models
            self.sensor_models = [
                sensor_models[board_id] for board_id in self.board_ids
            ]
            self.architecture = pickle.dumps(
                [sensor_models, calibration_model])
            self.batch_size = batch_size
            self.lr = lr

            self.learning_rate = T.placeholder(T.floatx(), [])
            self.sensors = T.placeholder(T.floatx(), [None, 3])
            self.env = T.placeholder(T.floatx(), [None, 3])
            self.board = T.placeholder(T.core.int32, [None])
            self.boards = T.transpose(
                T.pack([self.board,
                        T.range(T.shape(self.board)[0])]))
            self.rep = T.gather_nd(
                T.pack([
                    sensor_model(self.sensors)
                    for sensor_model in self.sensor_models
                ]), self.boards)
            self.rep_ = T.placeholder(T.floatx(),
                                      [None, self.rep.get_shape()[-1]])
            rep_env = T.concat([self.rep, self.env], -1)
            rep_env_ = T.concat([self.rep_, self.env], -1)
            self.y_ = self.calibration_model(rep_env)
            self.y_rep = self.calibration_model(rep_env_)
            self.y = T.placeholder(T.floatx(), [None, 2])
            self.loss = T.mean((self.y - self.y_)**2)
            self.mae = T.mean(T.abs(self.y - self.y_))
            T.core.summary.scalar('MSE', self.loss)
            T.core.summary.scalar('MAE', self.mae)
            self.summary = T.core.summary.merge_all()
            self.train_op = T.core.train.AdamOptimizer(
                self.learning_rate).minimize(self.loss)

        self.session = T.interactive_session(graph=self.graph)
Exemplo n.º 2
0
    def __init__(self, features, model=None, batch_size=20, lr=1e-4):
        super(NeuralNetwork, self).__init__(features)
        self.graph = T.core.Graph()
        with self.graph.as_default():
            self.architecture = pickle.dumps(model)
            self.model = model #Relu(6, 200) >> Relu(200) >> Relu(200) >> Relu(200) >> Linear(2)
            self.batch_size = batch_size
            self.lr = lr

            self.X = T.placeholder(T.floatx(), [None, 6])
            self.y = T.placeholder(T.floatx(), [None, 2])
            self.y_ = self.model(self.X)
            self.loss = T.mean((self.y - self.y_) ** 2)
            self.train_op = T.core.train.AdamOptimizer(self.lr).minimize(self.loss)

        self.session = T.interactive_session(graph=self.graph)
Exemplo n.º 3
0
        y[i, labels[i]] = 1

    split = int(0.9 * N)

    train_idx, test_idx = idx[:split], idx[split:]

    Xtrain, Xtest = X[train_idx], X[test_idx]
    ytrain, ytest = y[train_idx], y[test_idx]

    X_in = T.placeholder(T.floatx(), [None, 28, 28, 1])
    Y_in = T.placeholder(T.floatx(), [None, 10])

    conv_net = Conv((2, 2, 10)) >> Conv((2, 2, 20)) >> Flatten() >> Linear(10)
    logits = conv_net(X_in)
    predictions = T.argmax(logits, -1)
    loss = T.mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_in))

    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

    sess = T.interactive_session()

    def train(n_iter, batch_size=20):
        for i in range(n_iter):
            idx = np.random.permutation(Xtrain.shape[0])[:batch_size]
            result = sess.run([loss, train_op], { X_in : Xtrain[idx], Y_in : ytrain[idx] })
            print("Loss:", result[0])

        preds = sess.run(predictions, { X_in : Xtest }).astype(np.int32)
        print("Error: ", 1 - (preds == labels[test_idx]).sum() / float(N - split))
    train(1000)
Exemplo n.º 4
0
 def setUp(self):
     self.session = T.interactive_session()
Exemplo n.º 5
0
    def initialize(self):
        self.graph = T.core.Graph()
        with self.graph.as_default():
            prior_params = self.prior_params.copy()
            prior_type = prior_params.pop('prior_type')
            self.prior = PRIOR_MAP[prior_type](self.ds, self.da, self.horizon, **prior_params)

            cost_params = self.cost_params.copy()
            cost_type = cost_params.pop('cost_type')
            self.cost = COST_MAP[cost_type](self.ds, self.da, **cost_params)

            self.O = T.placeholder(T.floatx(), [None, None, self.do])
            self.U = T.placeholder(T.floatx(), [None, None, self.du])
            self.C = T.placeholder(T.floatx(), [None, None])
            self.S = T.placeholder(T.floatx(), [None, None, self.ds])
            self.A = T.placeholder(T.floatx(), [None, None, self.da])

            self.t = T.placeholder(T.int32, [])
            self.state, self.action = T.placeholder(T.floatx(), [None, self.ds]), T.placeholder(T.floatx(), [None, self.da])
            if self.prior.has_dynamics():
                self.next_state = self.prior.next_state(self.state, self.action, self.t)
                self.prior_dynamics = self.prior.get_dynamics()

            self.num_data = T.scalar()
            self.beta = T.placeholder(T.floatx(), [])
            self.learning_rate = T.placeholder(T.floatx(), [])
            self.model_learning_rate = T.placeholder(T.floatx(), [])

            self.S_potentials = util.map_network(self.state_encoder)(self.O)
            self.A_potentials = util.map_network(self.action_encoder)(self.U)

            if self.prior.is_dynamics_prior():
                self.data_strength = T.placeholder(T.floatx(), [])
                self.max_iter = T.placeholder(T.int32, [])
                posterior_dynamics, (encodings, actions) = \
                        self.prior.posterior_dynamics(self.S_potentials, self.A_potentials,
                                                      data_strength=self.data_strength,
                                                      max_iter=self.max_iter)
                self.posterior_dynamics_ = posterior_dynamics, (encodings.expected_value(), actions.expected_value())

            if self.prior.is_filtering_prior():
                self.prior_dynamics_stats = self.prior.sufficient_statistics()
                self.dynamics_stats = (
                    T.placeholder(T.floatx(), [None, self.ds, self.ds]),
                    T.placeholder(T.floatx(), [None, self.ds, self.ds + self.da]),
                    T.placeholder(T.floatx(), [None, self.ds + self.da, self.ds + self.da]),
                    T.placeholder(T.floatx(), [None]),
                )
                S_natparam = self.S_potentials.get_parameters('natural')
                num_steps = T.shape(S_natparam)[1]

                self.padded_S = stats.Gaussian(T.core.pad(
                    self.S_potentials.get_parameters('natural'),
                    [[0, 0], [0, self.horizon - num_steps], [0, 0], [0, 0]]
                ), 'natural')
                self.padded_A = stats.GaussianScaleDiag([
                    T.core.pad(self.A_potentials.get_parameters('regular')[0],
                            [[0, 0], [0, self.horizon - num_steps], [0, 0]]),
                    T.core.pad(self.A_potentials.get_parameters('regular')[1],
                            [[0, 0], [0, self.horizon - num_steps], [0, 0]])
                ], 'regular')
                self.q_S_padded, self.q_A_padded = self.prior.encode(
                    self.padded_S, self.padded_A,
                    dynamics_stats=self.dynamics_stats
                )
                self.q_S_filter = self.q_S_padded.filter(max_steps=num_steps)
                self.q_A_filter = self.q_A_padded.__class__(
                    self.q_A_padded.get_parameters('natural')[:, :num_steps]
                , 'natural')
                self.e_q_S_filter = self.q_S_filter.expected_value()
                self.e_q_A_filter = self.q_A_filter.expected_value()

            (self.q_S, self.q_A), self.prior_kl, self.kl_grads, self.info = self.prior.posterior_kl_grads(
                self.S_potentials, self.A_potentials, self.num_data
            )

            self.q_S_sample = self.q_S.sample()[0]
            self.q_A_sample = self.q_A.sample()[0]

            self.q_O = util.map_network(self.state_decoder)(self.q_S_sample)
            self.q_U = util.map_network(self.action_decoder)(self.q_A_sample)
            self.q_O_sample = self.q_O.sample()[0]
            self.q_U_sample = self.q_U.sample()[0]

            self.q_O_ = util.map_network(self.state_decoder)(self.S)
            self.q_U_ = util.map_network(self.action_decoder)(self.A)
            self.q_O__sample = self.q_O_.sample()[0]
            self.q_U__sample = self.q_U_.sample()[0]

            self.cost_likelihood = self.cost.log_likelihood(self.q_S_sample, self.C)
            if self.cost.is_cost_function():
                self.evaluated_cost = self.cost.evaluate(self.S)
            self.log_likelihood = T.sum(self.q_O.log_likelihood(self.O), axis=1)

            self.elbo = T.mean(self.log_likelihood + self.cost_likelihood - self.prior_kl)
            train_elbo = T.mean(self.log_likelihood + self.beta * (self.cost_likelihood - self.prior_kl))
            T.core.summary.scalar("encoder-stdev", T.mean(self.S_potentials.get_parameters('regular')[0]))
            T.core.summary.scalar("log-likelihood", T.mean(self.log_likelihood))
            T.core.summary.scalar("cost-likelihood", T.mean(self.cost_likelihood))
            T.core.summary.scalar("prior-kl", T.mean(self.prior_kl))
            T.core.summary.scalar("beta", self.beta)
            T.core.summary.scalar("elbo", self.elbo)
            T.core.summary.scalar("beta-elbo", train_elbo)
            for k, v in self.info.items():
                T.core.summary.scalar(k, T.mean(v))
            self.summary = T.core.summary.merge_all()
            neural_params = (
                self.state_encoder.get_parameters()
                + self.state_decoder.get_parameters()
                + self.action_encoder.get_parameters()
                + self.action_decoder.get_parameters()
            )
            cost_params = self.cost.get_parameters()
            if len(neural_params) > 0:
                optimizer = T.core.train.AdamOptimizer(self.learning_rate)
                gradients, variables = zip(*optimizer.compute_gradients(-train_elbo, var_list=neural_params))
                gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                self.neural_op = optimizer.apply_gradients(zip(gradients, variables))
            else:
                self.neural_op = T.core.no_op()
            if len(cost_params) > 0:
                self.cost_op = T.core.train.AdamOptimizer(self.learning_rate).minimize(-self.elbo, var_list=cost_params)
            else:
                self.cost_op = T.core.no_op()
            if len(self.kl_grads) > 0:
                if self.prior.is_dynamics_prior():
                    # opt = lambda x: T.core.train.MomentumOptimizer(x, 0.5)
                    opt = lambda x: T.core.train.GradientDescentOptimizer(x)
                else:
                    opt = T.core.train.AdamOptimizer
                self.dynamics_op = opt(self.model_learning_rate).apply_gradients([
                    (b, a) for a, b in self.kl_grads
                ])
            else:
                self.dynamics_op = T.core.no_op()
            self.train_op = T.core.group(self.neural_op, self.dynamics_op, self.cost_op)
        self.session = T.interactive_session(graph=self.graph, allow_soft_placement=True, log_device_placement=False)