示例#1
0
def sparse_autoencoders(X, Y):
    train_X, test_X, train_Y, test_Y = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        stratify=data[:, 784],
                                                        random_state=42)
    train_X = train_X.astype('float32') / 255.0
    test_X = test_X.astype('float32') / 255.0
    n_h = 64
    input_img = Input(shape=(784, ))
    code = Dense(n_h,
                 activation='relu',
                 activity_regularizer=regularizers.l1(10e-6))(input_img)
    output_img = Dense(784, activation='sigmoid')(code)
    modeL = Model(input_img, output_img)
    modeL.compile(optimizer='adam', loss='binary_crossentropy')
    history = modeL.fit(train_X, train_X, epochs=10)
    encoded = Model(input_img, code)
    # reconstructed = autoencoder.predict(test_X)
    weights = modeL.get_weights()[0].T
    # hidden_layer_vis(weights)

    train_X = encoded.predict(train_X)
    test_X = encoded.predict(test_X)
    param = model(train_X, train_Y, 10, 0, [20])
    pred = predict(test_X, param)
    # # print(confusion_matrix(test_Y, pred))
    print('The accuracy of neural networks is',
          metrics.accuracy_score(pred, test_Y))
示例#2
0
def build_fixed_layers_models(model: Model) -> List[Model]:
    models_list: List[Model] = []
    weights = model.get_weights()
    for i in range(1, len(model.layers) + 1):
        if not model.layers[i - 1].trainable_weights:
            continue
        frozen_model = tf.keras.models.clone_model(model)
        for j in range(i):
            frozen_model.layers[j].trainable = False

        frozen_model.compile(loss=tf.keras.losses.categorical_crossentropy,
                             optimizer=tf.keras.optimizers.SGD(),
                             metrics=['accuracy'])
        frozen_model.set_weights(weights)
        models_list.append(frozen_model)

    return models_list
示例#3
0
    def save(
        self,
        model: Model,
        include_optimizer: bool = False,
        update: bool = False,
        meta: Optional[dict] = None,
    ):
        """
        Saves a Tensorflow model as a TileDB array.
        :param model: Tensorflow model.
        :param include_optimizer: Boolean. Whether to save the optimizer or not.
        :param update: Boolean. Whether we should update any existing TileDB array model at the target location.
        :param meta: Dict. Extra metadata to save in a TileDB array.
        """
        if not isinstance(model, (Functional, Sequential)):
            raise NotImplementedError(
                "No support for Subclassed models at the moment. Your "
                "model should be either Sequential or Functional.")

        # Serialize model weights and optimizer (if needed)
        model_weights = pickle.dumps(model.get_weights(), protocol=4)

        # Serialize model optimizer
        optimizer_weights = self._serialize_optimizer_weights(
            model=model, include_optimizer=include_optimizer)

        # Create TileDB model array
        if not update:
            self._create_array()

        self._write_array(
            model=model,
            include_optimizer=include_optimizer,
            serialized_weights=model_weights,
            serialized_optimizer_weights=optimizer_weights,
            meta=meta,
        )
示例#4
0
def layer_test(layer_cls,
               kwargs={},
               input_shape=None,
               input_dtype=None,
               input_data=None,
               expected_output=None,
               expected_output_dtype=None,
               fixed_batch_size=False,
               supports_masking=False):
    # generate input data

    if input_data is None:

        if not input_shape:
            raise AssertionError()

        if not input_dtype:

            input_dtype = K.floatx()

        input_data_shape = list(input_shape)

        for i, e in enumerate(input_data_shape):

            if e is None:

                input_data_shape[i] = np.random.randint(1, 4)
        input_mask = []
        if all(isinstance(e, tuple) for e in input_data_shape):
            input_data = []

            for e in input_data_shape:
                input_data.append(
                    (10 * np.random.random(e)).astype(input_dtype))
                if supports_masking:
                    a = np.full(e[:2], False)
                    a[:, :e[1] // 2] = True
                    input_mask.append(a)

        else:

            input_data = (10 * np.random.random(input_data_shape))

            input_data = input_data.astype(input_dtype)
            if supports_masking:
                a = np.full(input_data_shape[:2], False)
                a[:, :input_data_shape[1] // 2] = True

                print(a)
                print(a.shape)
                input_mask.append(a)

    else:

        if input_shape is None:

            input_shape = input_data.shape

        if input_dtype is None:

            input_dtype = input_data.dtype

    if expected_output_dtype is None:

        expected_output_dtype = input_dtype

    # instantiation

    layer = layer_cls(**kwargs)

    # test get_weights , set_weights at layer level

    weights = layer.get_weights()

    layer.set_weights(weights)

    try:
        expected_output_shape = layer.compute_output_shape(input_shape)
    except Exception:
        expected_output_shape = layer._compute_output_shape(input_shape)

    # test in functional API
    if isinstance(input_shape, list):
        if fixed_batch_size:

            x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape]
            if supports_masking:
                mask = [
                    Input(batch_shape=e[0:2], dtype=bool) for e in input_shape
                ]

        else:

            x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape]
            if supports_masking:
                mask = [Input(shape=(e[1], ), dtype=bool) for e in input_shape]

    else:
        if fixed_batch_size:

            x = Input(batch_shape=input_shape, dtype=input_dtype)
            if supports_masking:
                mask = Input(batch_shape=input_shape[0:2], dtype=bool)

        else:

            x = Input(shape=input_shape[1:], dtype=input_dtype)
            if supports_masking:
                mask = Input(shape=(input_shape[1], ), dtype=bool)

    if supports_masking:

        y = layer(Masking()(x), mask=mask)
    else:
        y = layer(x)

    if not (K.dtype(y) == expected_output_dtype):
        raise AssertionError()

    # check with the functional API
    if supports_masking:
        model = Model([x, mask], y)

        actual_output = model.predict([input_data, input_mask[0]])
    else:
        model = Model(x, y)

        actual_output = model.predict(input_data)

    actual_output_shape = actual_output.shape
    for expected_dim, actual_dim in zip(expected_output_shape,
                                        actual_output_shape):

        if expected_dim is not None:

            if not (expected_dim == actual_dim):
                raise AssertionError("expected_shape", expected_output_shape,
                                     "actual_shape", actual_output_shape)

    if expected_output is not None:

        assert_allclose(actual_output, expected_output, rtol=1e-3)

    # test serialization, weight setting at model level

    model_config = model.get_config()

    recovered_model = model.__class__.from_config(model_config)

    if model.weights:

        weights = model.get_weights()

        recovered_model.set_weights(weights)

        _output = recovered_model.predict(input_data)

        assert_allclose(_output, actual_output, rtol=1e-3)

    # test training mode (e.g. useful when the layer has a

    # different behavior at training and testing time).

    if has_arg(layer.call, 'training'):

        model.compile('rmsprop', 'mse')

        model.train_on_batch(input_data, actual_output)

    # test instantiation from layer config

    layer_config = layer.get_config()

    layer_config['batch_input_shape'] = input_shape

    layer = layer.__class__.from_config(layer_config)

    # for further checks in the caller function

    return actual_output
class JointEmbeddingModel:
	def __init__(self, config):
		self.data_dir = config.data_dir
		self.model_name = config.model_name
		self.meth_name_len = config.methname_len  # the max length of method name
		self.apiseq_len = config.apiseq_len
		self.tokens_len = config.tokens_len
		self.desc_len = config.desc_len

		self.vocab_size = config.n_words  # the size of vocab
		self.embed_dims = config.embed_dims
		self.lstm_dims = config.lstm_dims
		self.hidden_dims = config.hidden_dims

		self.margin = 0.05

		self.init_embed_weights_meth_name = config.init_embed_weights_methodname
		self.init_embed_weights_tokens = config.init_embed_weights_tokens
		self.init_embed_weights_desc = config.init_embed_weights_desc

		self.meth_name = Input(shape=(self.meth_name_len,), dtype='int32', name='meth_name')
		self.apiseq = Input(shape=(self.apiseq_len,), dtype='int32', name='apiseq')
		self.tokens = Input(shape=(self.tokens_len,), dtype='int32', name='tokens2')
		self.desc_good = Input(shape=(self.desc_len,), dtype='int32', name='desc_good')
		self.desc_bad = Input(shape=(self.desc_len,), dtype='int32', name='desc_bad')

		if not os.path.exists(self.data_dir + 'model/' + self.model_name):
			os.makedirs(self.data_dir + 'model/' + self.model_name)

	def build(self):

		self.transformer_meth = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims,
		                                                 embed_dim=self.embed_dims, ffn_dim=self.lstm_dims,
		                                                 droput_rate=0.2, n_heads=2, max_len=self.meth_name_len,
		                                                 name='methT')

		self.transformer_apiseq = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims,
		                                                   embed_dim=self.embed_dims, ffn_dim=self.lstm_dims,
		                                                   droput_rate=0.2, n_heads=4, max_len=self.apiseq_len,
		                                                   name='apiseqT')

		self.transformer_desc = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims,
		                                                 embed_dim=self.embed_dims, ffn_dim=self.lstm_dims,
		                                                 droput_rate=0.2, n_heads=4, max_len=self.desc_len, name='descT')

		# self.transformer_ast = EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=128)
		self.transformer_tokens = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims,
		                                                   embed_dim=self.embed_dims, ffn_dim=self.lstm_dims,
		                                                   droput_rate=0.2, n_heads=8, max_len=self.tokens_len,
		                                                   name='tokensT')
		# create path to store model Info

		# 1 -- CodeNN
		meth_name = Input(shape=(self.meth_name_len,), dtype='int32', name='meth_name')
		apiseq = Input(shape=(self.apiseq_len,), dtype='int32', name='apiseq')
		tokens3 = Input(shape=(self.tokens_len,), dtype='int32', name='tokens3')

		# method name
		# embedding layer

		meth_name_out = self.transformer_meth(meth_name)
		# max pooling
		maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]),
		                 name='maxpooling_methodname')
		method_name_pool = maxpool(meth_name_out)
		activation = Activation('tanh', name='active_method_name')
		method_name_repr = activation(method_name_pool)

		# apiseq
		# embedding layer

		apiseq_out = self.transformer_apiseq(apiseq)
		# max pooling
		maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]),
		                 name='maxpooling_apiseq')
		apiseq_pool = maxpool(apiseq_out)
		activation = Activation('tanh', name='active_apiseq')
		apiseq_repr = activation(apiseq_pool)

		# tokens
		# embedding layer

		tokens_out = self.transformer_tokens(tokens3)
		# max pooling
		maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]),
		                 name='maxpooling_tokens')
		tokens_pool = maxpool(tokens_out)
		activation = Activation('tanh', name='active_tokens')
		tokens_repr = activation(tokens_pool)

		# fusion method_name, apiseq, tokens
		merge_method_name_api = Concatenate(name='merge_methname_api')([method_name_repr, apiseq_repr])
		merge_code_repr = Concatenate(name='merge_code_repr')([merge_method_name_api, tokens_repr])

		code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr)

		self.code_repr_model = Model(inputs=[meth_name, apiseq, tokens3], outputs=[code_repr], name='code_repr_model')
		self.code_repr_model.summary()

		self.output = Model(inputs=self.code_repr_model.input, outputs=self.code_repr_model.get_layer('tokensT').output)
		self.output.summary()

		#  2 -- description
		desc = Input(shape=(self.desc_len,), dtype='int32', name='desc')

		# desc
		# embedding layer
		desc_out = self.transformer_desc(desc)

		# max pooling

		maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]),
		                 name='maxpooling_desc')
		desc_pool = maxpool(desc_out)
		activation = Activation('tanh', name='active_desc')
		desc_repr = activation(desc_pool)

		self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model')
		self.desc_repr_model.summary()

		#  3 -- cosine similarity
		code_repr = self.code_repr_model([meth_name, apiseq, tokens3])

		desc_repr = self.desc_repr_model([desc])

		cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr])

		sim_model = Model(inputs=[meth_name, apiseq, tokens3, desc], outputs=[cos_sim], name='sim_model')
		self.sim_model = sim_model

		self.sim_model.summary()

		#  4 -- build training model
		good_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_good])
		bad_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_bad])
		loss = Lambda(lambda x: k.maximum(1e-6, self.margin - (x[0] - x[1])), output_shape=lambda x: x[0], name='loss')(
			[good_sim, bad_sim])

		self.training_model = Model(inputs=[self.meth_name, self.apiseq, self.tokens, self.desc_good, self.desc_bad],
		                            outputs=[loss], name='training_model')

		self.training_model.summary()

	def compile(self, optimizer, **kwargs):
		optimizer = keras.optimizers.SGD(lr=0.0001, momentum=0.9, nesterov=True)
		# optimizer = keras.optimizers.Adam(lr=0.0001)
		# print(self.code_repr_model.layers, self.desc_repr_model.layers, self.training_model.layers, self.sim_model.layers)
		self.code_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs)
		self.desc_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs)
		self.training_model.compile(loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs)
		self.sim_model.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs)

	def fit(self, x, **kwargs):
		y = np.zeros(shape=x[0].shape[:1], dtype=np.float32)
		return self.training_model.fit(x, y, **kwargs)

	def getOutput(self, x):
		# functor = k.function([self.code_repr_model.layers[0].input, k.learning_phase()], [self.code_repr_model.layers[0].output])
		# print(functor(x)[0])
		print(self.output.predict(x))

	def repr_code(self, x, **kwargs):
		return self.code_repr_model.predict(x, **kwargs)

	def repr_desc(self, x, **kwargs):
		return self.desc_repr_model.predict(x, **kwargs)

	def predict(self, x, **kwargs):
		return self.sim_model.predict(x, **kwargs)

	def save(self, code_model_file, desc_model_file, **kwargs):
		file = h5py.File(code_model_file, 'w')
		weight_code = self.code_repr_model.get_weights()
		for i in range(len(weight_code)):
			file.create_dataset('weight_code'+str(i), data=weight_code[i])
		file.close()

		file = h5py.File(desc_model_file, 'w')
		weight_desc = self.desc_repr_model.get_weights()
		for i in range(len(weight_desc)):
			file.create_dataset('weight_desc'+str(i), data=weight_desc[i])
		file.close()
		# self.code_repr_model.save_weights(code_model_file, **kwargs)
		# self.desc_repr_model.save_weights(desc_model_file, **kwargs)

	def load(self, code_model_file, desc_model_file, **kwargs):
		# self.code_repr_model.load_weights(code_model_file, **kwargs)
		# self.desc_repr_model.load_weights(desc_model_file, **kwargs)
		file = h5py.File(code_model_file, 'r')
		weight_code = []
		for i in range(len(file.keys())):
			weight_code.append(file['weight_code'+str(i)][:])
		self.code_repr_model.set_weights(weight_code)
		file.close()

		file = h5py.File(desc_model_file, 'r')
		weight_desc = []
		for i in range(len(file.keys())):
			weight_desc.append(file['weight_desc'+str(i)][:])
		self.desc_repr_model.set_weights(weight_desc)
		file.close()
示例#6
0
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,

               input_data=None, expected_output=None,

               expected_output_dtype=None, fixed_batch_size=False):
    # generate input data

    if input_data is None:

        if not input_shape:
            raise AssertionError()

        if not input_dtype:

            input_dtype = K.floatx()

        input_data_shape = list(input_shape)

        for i, e in enumerate(input_data_shape):

            if e is None:

                input_data_shape[i] = np.random.randint(1, 4)

        if all(isinstance(e, tuple) for e in input_data_shape):
            input_data = []
            for e in input_data_shape:
                input_data.append(
                    (10 * np.random.random(e)).astype(input_dtype))

        else:

            input_data = (10 * np.random.random(input_data_shape))

            input_data = input_data.astype(input_dtype)

    else:

        if input_shape is None:

            input_shape = input_data.shape

        if input_dtype is None:

            input_dtype = input_data.dtype

    if expected_output_dtype is None:

        expected_output_dtype = input_dtype

    # instantiation

    layer = layer_cls(**kwargs)

    # test get_weights , set_weights at layer level

    weights = layer.get_weights()

    layer.set_weights(weights)

    try:
        expected_output_shape = layer.compute_output_shape(input_shape)
    except Exception:
        expected_output_shape = layer._compute_output_shape(input_shape)

    # test in functional API
    if isinstance(input_shape, list):
        if fixed_batch_size:

            x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape]

        else:

            x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape]
    else:
        if fixed_batch_size:

            x = Input(batch_shape=input_shape, dtype=input_dtype)

        else:

            x = Input(shape=input_shape[1:], dtype=input_dtype)

    y = layer(x)

    if not (K.dtype(y) == expected_output_dtype):
        raise AssertionError()

    # check with the functional API

    model = Model(x, y)

    actual_output = model.predict(input_data)

    actual_output_shape = actual_output.shape

    for expected_dim, actual_dim in zip(expected_output_shape,

                                        actual_output_shape):

        if expected_dim is not None:

            if not (expected_dim == actual_dim):
                raise AssertionError()

    if expected_output is not None:

        assert_allclose(actual_output, expected_output, rtol=1e-3)

    # test serialization, weight setting at model level

    model_config = model.get_config()

    recovered_model = model.__class__.from_config(model_config)

    if model.weights:

        weights = model.get_weights()

        recovered_model.set_weights(weights)

        _output = recovered_model.predict(input_data)

        assert_allclose(_output, actual_output, rtol=1e-3)

    # test training mode (e.g. useful when the layer has a

    # different behavior at training and testing time).

    if has_arg(layer.call, 'training'):

        model.compile('rmsprop', 'mse')

        model.train_on_batch(input_data, actual_output)

    # test instantiation from layer config

    layer_config = layer.get_config()

    layer_config['batch_input_shape'] = input_shape

    layer = layer.__class__.from_config(layer_config)

    # for further checks in the caller function

    return actual_output
示例#7
0
 def _serialize_model_weights(model: Model) -> bytes:
     """
     Serialization of model weights
     """
     return pickle.dumps(model.get_weights(), protocol=4)
示例#8
0
model.set_weights([
    layer for weight, bias in zip(deepcopy(initial_weights),
                                  deepcopy(initial_biases))
    for layer in (weight, bias.reshape(-1))
])

history = model.fit(
    np.array(deepcopy(inputs)),
    np.array(deepcopy(targets)),
    epochs=epochs,
    verbose="0",
)

keras_vals = {
    "weights": model.get_weights()[::2],
    "biases": model.get_weights()[1::2],
}


def test_learned_something() -> None:
    """Ensure that the weights and biases are changing."""
    for key in ["weights", "biases"]:
        assert len(keras_vals[key]) == len(tf_vals[key])
        for idx in range(len(initial_weights)):
            assert not np.allclose(keras_vals[key][idx], initial_weights[idx])
            assert not np.allclose(tf_vals[key][idx], initial_weights[idx])


def test_weights_and_biases() -> None:
    """Ensure tf weights and biases match keras."""
示例#9
0
input = Input(shape=(1, ), batch_size=6)

re = Reshape(target_shape=(1, 1))(input)

rnn_1 = LSTM(128, stateful=True, return_sequences=True)(re)
rnn_2 = LSTM(128, stateful=True, return_sequences=False)(rnn_1)

output_1 = Dense(1, activation='linear')(rnn_2)
output_2 = Dense(1, activation='sigmoid')(rnn_2)
output_3 = Dense(3, activation='softmax')(rnn_2)

model = Model(inputs=input, outputs=[output_1, output_2, output_3])
adam = Adam(lr=0.001)
model.compile(loss="mse", optimizer=adam)

raw_weights = model.get_weights()
new_weights = []

for raw in raw_weights:

    new_weights.append(np.random.uniform(-5, 5, raw.shape))

model.set_weights(np.array(new_weights))

one, tow, three = model.predict([1, 2, 3, 4, 5, 6])

one = one.tolist()
tow = tow.tolist()
three = three.tolist()

print('predict: [1, 2, 3, 4, 5, 6]')
示例#10
0
class CriticNetwork(object):
    """
    Input to the network is the state and action, output is Q(s,a).
    The action must be obtained from the output of the Actor network.
    """
    def __init__(self, sess, root_net, inputs, state_dim, action_dim,
                 learning_rate, tau):
        self.sess = sess
        assert isinstance(state_dim, list), 'state_dim must be a list.'
        self.s_dim = state_dim
        assert isinstance(action_dim, list), 'action_dim must be a list.'
        self.a_dim = action_dim
        self.learning_rate = learning_rate
        self.tau = tau

        # Input
        self.inputs = inputs
        self.target_inputs = inputs

        # Create the critic network
        self.action, self.out = self.create_critic_network(root_net=root_net)
        self.critic_model = Model(inputs=[inputs, self.action],
                                  outputs=self.out)

        #self.network_params = tf.trainable_variables()[num_actor_vars:]

        # Target Network
        self.target_action, self.target_out = self.create_critic_network(
            root_net=None)
        self.target_critic_model = Model(
            inputs=[self.target_inputs, self.target_action],
            outputs=self.target_out)

        #self.target_network_params = tf.trainable_variables()[(len(self.network_params) + num_actor_vars):]

        # Op for periodically updating target network with online network
        # weights with regularization
        #self.update_target_network_params = \
        #    [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) \
        #                                          + tf.multiply(self.target_network_params[i], 1. - self.tau))
        #     for i in range(len(self.target_network_params))]

        # Network target (y_i)
        self.predicted_q_value = tf.placeholder(tf.float32, [None, 1])

        # Define loss and optimization Op
        #self.loss = tflearn.mean_square(self.predicted_q_value, self.out)
        self.loss = tf.reduce_mean(tf.square(self.predicted_q_value -
                                             self.out))
        self.optimize = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.loss)

        # Get the gradient of the net w.r.t. the action.
        # For each action in the minibatch (i.e., for each x in xs),
        # this will sum up the gradients of each critic output in the minibatch
        # w.r.t. that action. Each output is independent of all
        # actions except for one.
        self.action_grads = tf.gradients(self.out, self.action)

    def create_critic_network(self, root_net):
        raise NotImplementedError(
            'Create critic should return (inputs, action, out)')

    def train(self, inputs, action, predicted_q_value):
        return self.sess.run(
            [self.out, self.optimize],
            feed_dict={
                self.inputs: inputs,
                self.action: action,
                self.predicted_q_value: predicted_q_value
            })

    def predict(self, inputs, action):
        return self.sess.run(self.out,
                             feed_dict={
                                 self.inputs: inputs,
                                 self.action: action
                             })

    def predict_target(self, inputs, action):
        return self.sess.run(self.target_out,
                             feed_dict={
                                 self.target_inputs: inputs,
                                 self.target_action: action
                             })

    def action_gradients(self, inputs, actions):
        return self.sess.run(self.action_grads,
                             feed_dict={
                                 self.inputs: inputs,
                                 self.action: actions
                             })

    def update_target_network(self):
        self.target_critic_model.set_weights(self.critic_model.get_weights())
示例#11
0
文件: ddpg.py 项目: ajmal017/rewave
class DDPG(object):
    def __init__(self,
                 env,
                 sess,
                 actor_noise,
                 obs_normalizer=None,
                 action_processor=None,
                 predictor_type="cnn",
                 use_batch_norm=False,
                 load_root_model=False,
                 config=DEFAULT_CONFIG):

        self.config = config
        assert self.config['max step'] > self.config[
            'batch size'], 'Max step must be bigger than batch size'

        self.episode = self.config["episode"]

        self.actor_learning_rate = self.config["actor learning rate"]
        self.critic_learning_rate = self.config["critic learning rate"]
        self.tau = self.config["tau"]
        self.gamma = self.config["gamma"]
        self.batch_size = self.config['batch size']

        self.action_processor = action_processor

        np.random.seed(self.config['seed'])
        if env:
            env.seed(self.config['seed'])

        self.sess = sess
        # if env is None, then DDPG just predicts
        self.env = env

        self.actor_noise = actor_noise
        # share state input
        has_complex_state = (
            isinstance(self.env.observation_space, gym.spaces.Dict)
            or isinstance(self.env.observation_space, gym.spaces.Tuple))
        if obs_normalizer and has_complex_state:
            state_input = Input(
                shape=self.env.observation_space.spaces[obs_normalizer].shape,
                name="state_input")
        else:
            state_input = Input(shape=self.env.observation_space.shape,
                                name="state_input")

        target_state_input = Input(
            shape=self.env.observation_space.spaces[obs_normalizer].shape,
            name="target_state_input")
        self.obs_normalizer = obs_normalizer

        # shape
        action_dim = env.action_space.shape[0]
        nb_assets = state_input.shape[1]
        window_length = state_input.shape[2]
        nb_features = state_input.shape[3]

        # paths
        self.model_save_path = get_model_path(window_length=window_length,
                                              predictor_type=predictor_type,
                                              use_batch_norm=use_batch_norm)
        self.summary_path = get_result_path(
            window_length=window_length,
            predictor_type=predictor_type,
            use_batch_norm=use_batch_norm) + "/" + datetime.now().strftime(
                "%Y-%m-%d-%H%M%S")
        self.root_model_save_path = get_root_model_path(
            window_length, predictor_type, use_batch_norm)

        # feature extraction
        self.predictor_type = predictor_type
        self.use_batch_norm = use_batch_norm
        root_net = RootNetwork(inputs=state_input,
                               predictor_type=self.predictor_type,
                               use_batch_norm=self.use_batch_norm).net
        self.root_model = Model(state_input, root_net)

        if load_root_model == True:
            try:
                self.root_model.load_weights(self.root_model_save_path)
                for layer in self.root_model.layers:
                    layer.trainable = False
            except:
                print("ERROR while loading root model ",
                      self.root_model_save_path)
        else:
            pass
        variable_summaries(root_net, "Root_Output")

        #array_variable_summaries(self.root_model.layers[1].weights, "Root_Input_1")
        #array_variable_summaries(self.root_model.layers[2].weights, "Root_Input_2")
        #array_variable_summaries(self.root_model.layers[-1].weights, "Root_Output_2")

        target_root_net = RootNetwork(inputs=target_state_input,
                                      predictor_type=predictor_type,
                                      use_batch_norm=use_batch_norm).net
        self.target_root_model = Model(target_state_input, target_root_net)

        if load_root_model == True:
            try:
                self.target_root_model.load_weights(self.root_model_save_path)
                for layer in self.target_root_model.layers:
                    layer.trainable = False
            except:
                print("ERROR while loading root model ",
                      self.root_model_save_path)
        else:
            pass

        self.target_root_model.set_weights(self.root_model.get_weights())

        # ===================================================================== #
        #                               Actor Model                             #
        # Chain rule: find the gradient of changing the actor network params in #
        # getting closest to the final value network predictions, i.e. de/dA    #
        # Calculate de/dA as = de/dC * dC/dA, where e is error, C critic, A act #
        # ===================================================================== #

        self.actor_state_input, self.actor_model = Actor(
            state_input=state_input, root_net=root_net,
            action_dim=action_dim).references()
        _, self.target_actor_model = Actor(state_input=target_state_input,
                                           root_net=target_root_net,
                                           action_dim=action_dim).references()

        # summary
        #array_variable_summaries(self.actor_model.layers[-1].weights, "Actor_Output")

        #actor_model_weights = self.actor_model.trainable_weights

        #self.actor_grads = K.gradients(self.actor_model.output,actor_model_weights)  # dC/dA (from actor)

        # grads = zip(self.actor_grads, actor_model_weights)

        action_grad = Input(shape=(action_dim, ))
        loss = K.mean(-action_grad * self.actor_model.outputs)

        for regularizer_loss in self.actor_model.losses:
            loss += regularizer_loss

        loss = loss

        optimizer = Adam(lr=self.actor_learning_rate)

        updates_op = optimizer.get_updates(
            params=self.actor_model.trainable_weights,
            # constraints=self.model.constraints,
            loss=loss)

        self.optimize = K.function(
            inputs=[self.actor_state_input, action_grad,
                    K.learning_phase()],
            outputs=[loss],
            updates=updates_op)  # calling function for the loop
        """
        self.actor_grads = tf.gradients(self.actor_model.output,
                                        actor_model_weights, -self.actor_critic_grad)  # dC/dA (from actor)

        tf.summary.histogram("Actor_Critic_Grad", self.actor_critic_grad)

        
        grads = zip(self.actor_grads, actor_model_weights)

        self.optimize = tf.train.AdamOptimizer(self.actor_learning_rate).apply_gradients(grads)
        """
        # ===================================================================== #
        #                              Critic Model                             #
        # ===================================================================== #

        self.critic_state_input, self.critic_action_input, self.critic_model = Critic(
            state_input=state_input,
            root_net=root_net,
            action_dim=action_dim,
            lr=self.critic_learning_rate).references()
        array_variable_summaries(self.critic_model.layers[-1].weights,
                                 "Critic_Output")

        _, _, self.target_critic_model = Critic(
            state_input=target_state_input,
            root_net=target_root_net,
            action_dim=action_dim,
            lr=self.critic_learning_rate).references()
        """
        self.critic_grads = tf.gradients(self.critic_model.output,
                                         self.critic_action_input)  # where we calcaulte de/dC for feeding above

        """

        #self.actor_critic_grad = tf.placeholder(tf.float32,[None, self.env.action_space.shape[0]])  # where we will feed de/dC (from critic)
        # summary

        self.critic_grads = K.gradients(
            self.critic_model.outputs, self.critic_action_input
        )  # where we calculate de/dC for feeding above

        self.compute_critic_gradient = K.function(
            inputs=[
                self.critic_model.output, self.critic_action_input,
                self.critic_state_input
            ],
            outputs=self.critic_grads)  # calling function for the loop

        tf.summary.histogram("Critic_Grad", self.critic_grads)

        # Update target networks
        self.update_target()

        # summary
        #self.summary_ops, self.summary_vars = build_summaries(action_dim=action_dim)
        with tf.variable_scope("Global"):
            self.episode_reward = tf.Variable(0., name="episode_reward")
            tf.summary.scalar("Reward", self.episode_reward)
            self.episode_min_reward = tf.Variable(0.,
                                                  name="episode_min_reward")
            tf.summary.scalar("Min_Reward", self.episode_min_reward)
            self.episode_ave_max_q = tf.Variable(0., name="episode_ave_max_q")
            tf.summary.scalar("Qmax_Value", self.episode_ave_max_q)
            self.loss_critic = tf.Variable(0., name="loss_critic")
            tf.summary.scalar("Loss_critic", self.loss_critic)
            self.loss_actor = tf.Variable(0., name="loss_actor")
            tf.summary.scalar("Loss_actor", self.loss_actor)
            self.ep_base_action = tf.Variable(initial_value=self.env.sim.w0,
                                              name="ep_base_action")
            tf.summary.histogram("Action_base", self.ep_base_action)
            self.ep_action = tf.Variable(initial_value=self.env.sim.w0,
                                         name="ep_action")
            tf.summary.histogram("Action", self.ep_action)

        self.merged = tf.summary.merge_all()

        # Initialize for later gradient calculations
        # self.sess.run(tf.global_variables_initializer())

        # ========================================================================= #
        #                         Target Model Updating                             #
        # ========================================================================= #

    def _update_actor_target(self):
        weights = self.actor_model.get_weights()
        target_weights = self.target_actor_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (
                1 - self.tau)
        self.target_actor_model.set_weights(target_weights)

    def _update_critic_target(self):
        weights = self.critic_model.get_weights()
        target_weights = self.target_critic_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (
                1 - self.tau)
        self.target_critic_model.set_weights(target_weights)

    def update_target(self):
        self._update_actor_target()
        self._update_critic_target()

    # ========================================================================= #
    #                              Model Predictions                            #
    # ========================================================================= #

    def act(self, cur_state):
        self.epsilon *= self.epsilon_decay
        if np.random.random() < self.epsilon:
            return self.env.action_space.sample()
        return self.actor_model.predict(cur_state).reshape(
            (self.env.action_space.shape[0], ))

    def initialize(self, load_weights=True, verbose=True):
        """ Load training history from path. To be add feature to just load weights, not training states

        """
        if load_weights:
            try:
                variables = tf.global_variables()
                param_dict = {}
                saver = tf.train.Saver()
                saver.restore(self.sess, self.model_save_path)
                for var in variables:
                    var_name = var.name[:-2]
                    if verbose:
                        print('Loading {} from checkpoint. Name: {}'.format(
                            var.name, var_name))
                    param_dict[var_name] = var
            except:
                traceback.print_exc()
                print('Build model from scratch')
                self.sess.run(tf.global_variables_initializer())
        else:
            print('Build model from scratch')
            self.sess.run(tf.global_variables_initializer())

    def _train_actor(self, samples):
        for sample in samples:
            cur_state, action, reward, new_state, _ = sample
            predicted_action = self.actor_model.predict(cur_state)
            grads = self.sess.run(self.critic_grads,
                                  feed_dict={
                                      self.critic_state_input: cur_state,
                                      self.critic_action_input:
                                      predicted_action
                                  })[0]

            self.sess.run(self.optimize,
                          feed_dict={
                              self.actor_state_input: cur_state,
                              self.actor_critic_grad: grads
                          })

    def _train_critic(self, samples):
        for sample in samples:
            cur_state, action, reward, new_state, done = sample
            if not done:
                target_action = self.target_actor_model.predict(new_state)
                future_reward = self.target_critic_model.predict(
                    [new_state, target_action])[0][0]
                reward += self.gamma * future_reward
            history_critic = self.critic_model.fit([cur_state, action],
                                                   reward,
                                                   verbose=0,
                                                   batch_size=self.batch_size)
            # print("reward = ", reward, "/", self.critic_model.predict([cur_state, action]))
            # for layer in self.critic_model.layers:
            # print(layer, " weights = ", layer.get_weights())
            return history_critic

    def train(self, save_every_episode=1, verbose=True, debug=False):
        writer = tf.summary.FileWriter(self.summary_path, self.sess.graph)
        np.random.seed(self.config['seed'])
        num_episode = self.config['episode']
        gamma = self.config['gamma']
        self.buffer = ReplayBuffer(self.config['buffer size'])

        # @TODO : could monitor the Average Qmax and stop when no more change
        # for example change less than 1e-3 for 5 episodes
        delta_QMax = 1e-4
        nb_episodes_stable = 5

        stored_ep_ave_max_q = 0.0
        stored_episodes_stable = 0
        previous_i = 0

        # main training loop
        for i in range(num_episode):
            if verbose and debug:
                print("Episode: {} Replay Buffer  {}".format(
                    i, self.buffer.count))

            # receive initial state
            previous_observation = self.env.reset()
            if self.obs_normalizer:
                previous_observation = previous_observation[
                    self.obs_normalizer]

            ep_reward = 0.0
            episode_min_reward = 0.0
            ep_ave_max_q = 0.0

            # keep track of loss for episode
            loss_critic = 0.0
            loss_actor = 0.0

            self.actor_noise.reset()

            # keeps sampling until done
            for j in range(self.config['max step']):
                # select action according to the current policy and exploration noise
                base_action = self.actor_model.predict(
                    np.expand_dims(previous_observation,
                                   axis=0)).squeeze(axis=0)

                action = base_action + self.actor_noise()

                # normalize action
                action = np.clip(action, 0.0, 1.00)
                action /= action.sum()

                action_take = action
                if self.action_processor:
                    action_take = self.action_processor(action)

                # execute action and observe reward and new state
                observation, reward, done, _ = self.env.step(action_take)

                if self.obs_normalizer:
                    observation = observation[self.obs_normalizer]

                # make standard deviation close to one
                observation = observation * 20.0
                # add to buffer
                self.buffer.add(previous_observation, action, reward, done,
                                observation)

                if self.buffer.size() >= self.batch_size:
                    # ========================================================================= #
                    #                         sample a random mini-batch                        #
                    # ========================================================================= #
                    s_batch, a_batch, r_batch, t_batch, s2_batch = self.buffer.sample_batch(
                        self.batch_size)
                    # Calculate targets from the target critic and the target actor
                    target_q = self.target_critic_model.predict(
                        [s2_batch,
                         self.target_actor_model.predict(s2_batch)])

                    y_i = []
                    for k in range(self.batch_size):
                        if t_batch[k]:
                            y_i.append(r_batch[k])
                        else:
                            y_i.append(r_batch[k] + gamma * target_q[k][0])

                    # Update the critic given the targets by minimizing the loss (mse)
                    # loss_critic += self.critic_model.train_on_batch([s_batch, a_batch], np.reshape(y_i, (self.batch_size, 1)))[0]

                    stop_on_no_improvement = EarlyStopping(monitor='loss',
                                                           min_delta=0.001,
                                                           patience=3,
                                                           verbose=0,
                                                           mode='auto')
                    history_critic = self.critic_model.fit(
                        [s_batch, a_batch],
                        np.reshape(y_i, (self.batch_size, 1)),
                        #epochs=100,
                        #callbacks=[stop_on_no_improvement],
                        verbose=0)

                    loss_critic += history_critic.history['loss'][-1]

                    # keep track of the prediction for reporting
                    predicted_q_value = self.critic_model.predict(
                        [s_batch, a_batch])

                    ep_ave_max_q += np.amax(predicted_q_value)

                    # Update the actor policy using the sampled gradient
                    a_outs = self.actor_model.predict(s_batch)

                    # gradient Q value  for actions (critic)
                    critic_grads = self.compute_critic_gradient(
                        [predicted_q_value, a_batch, s_batch])[0]

                    # use this gradient to update the policy (actor)
                    loss_actor = self.optimize([s_batch, critic_grads, 1])[0]
                    """
                    grads = self.sess.run(self.critic_grads, feed_dict={
                        self.critic_state_input: s_batch,
                        self.critic_action_input: a_outs
                    })[0]

                    self.sess.run(self.optimize, feed_dict={
                        self.actor_state_input: s_batch,
                        self.actor_critic_grad: grads
                    })
                    """

                    # Update target networks
                    self.update_target()

                ep_reward += reward
                episode_min_reward = min(reward, episode_min_reward)
                previous_observation = observation

                if done or j == self.config['max step'] - 1:
                    loss_critic = loss_critic / (float(j) if j != 0 else 1.0)
                    loss_actor = loss_actor / (float(j) if j != 0 else 1.0)
                    ep_reward = ep_reward / (float(j) if j != 0 else 1.0)
                    ep_ave_max_q = ep_ave_max_q / (float(j) if j != 0 else 1.0)
                    # do summary preparation
                    merged = self.sess.run(
                        self.merged,
                        feed_dict={
                            self.actor_state_input: s_batch,
                            self.critic_state_input: s_batch,
                            self.critic_action_input: a_batch,
                            self.episode_reward: ep_reward,
                            self.loss_critic: loss_critic,
                            self.loss_actor: loss_actor,
                            self.episode_min_reward: episode_min_reward,
                            self.ep_action: action,
                            self.ep_base_action: base_action,
                            self.episode_ave_max_q: ep_ave_max_q
                        })

                    writer.add_summary(merged, i)
                    writer.flush()

                    print(
                        'Episode: {:d}, Critic Loss:{} Actor Loss:{} Average Reward: {:.2f}, Average Qmax: {:.4f}'
                        .format(i, loss_critic, loss_actor, ep_reward,
                                ep_ave_max_q))
                    print('--- top indice {}, top 3 base actions {}'.format(
                        np.where(base_action == base_action.max())[0][0],
                        sorted(base_action)[-3:]))
                    print('+++ top indice {}, top 3  actions {}'.format(
                        np.where(action == action.max())[0][0],
                        sorted(action)[-3:]))

                    #print('Action: norm {}, values {}'.format(action.sum(), action))
                    #print('---Base Action: norm {}, values {}'.format(base_action.sum(), base_action))
                    break

            # check if we must add a termination based on no more evolution
            if abs(ep_ave_max_q - stored_ep_ave_max_q) < delta_QMax:
                # steps must be consecutive
                if i - previous_i == 1:
                    stored_episodes_stable += 1
                else:
                    stored_episodes_stable = 0
                previous_i = i

            stored_ep_ave_max_q = ep_ave_max_q
            if stored_episodes_stable > nb_episodes_stable:
                print("Early break in episode ", i)
                break

        self.save_model(verbose=True)
        print('Finish.')

    def predict(self, observation):
        """ predict the next action using actor model, only used in deploy.
            Can be used in multiple environments.

        Args:
            observation: (batch_size, num_stocks + 1, window_length)

        Returns: action array with shape (batch_size, num_stocks + 1)

        """

        if self.obs_normalizer:
            observation = self.obs_normalizer(observation)
        action = self.actor.predict(observation)
        if self.action_processor:
            action = self.action_processor(action)
        return action

    def predict_single(self, observation):
        """ Predict the action of a single observation

        Args:
            observation: (num_stocks + 1, window_length)

        Returns: a single action array with shape (num_stocks + 1,)

        """
        if self.obs_normalizer and isinstance(observation, dict):
            observation = observation[self.obs_normalizer]
        action = self.actor_model.predict(np.expand_dims(
            observation, axis=0)).squeeze(axis=0)
        if self.action_processor:
            action = self.action_processor(action)
        return action

    def save_model(self, verbose=False):
        if not os.path.exists(self.model_save_path):
            os.makedirs(self.model_save_path, exist_ok=True)

        # make sure we save all parameters
        for layer in self.root_model.layers:
            layer.trainable = True
        for layer in self.target_root_model.layers:
            layer.trainable = True

        saver = tf.train.Saver()
        model_path = saver.save(self.sess, self.model_save_path)
        print("Model saved in %s" % model_path)
示例#12
0
class Autoencoder:
    def __init__(self,
                 n_end,
                 data,
                 activation=LeakyReLU(0.1),
                 optimizer='adam',
                 lr=0.001,
                 l2=0.0,
                 l1=0.00000,
                 is_GT=True,
                 plot_every_n=10):
        self.n_end = n_end
        self.hsi = data
        self.activation = activation
        self.lr = lr
        self.l2 = l2
        self.l1 = l1
        self.optimizer = optimizer
        # self.optimizer = optimizers.Adam(lr=self.lr)
        self.model = None
        self.use_bias = False
        self.abundance_layer = None
        self.initializer = initializers.glorot_normal()
        self.sum_to_one = True
        self.is_GT = is_GT
        self.plot_every_n = plot_every_n
        self.plotS = True
        self.weights = None
        self.is_deep = False
        self.activation = activation

    def create_model(self, loss):
        use_bias = False
        # Input layer
        input_ = Input(shape=(self.hsi.n_bands, ))
        # Encoder
        if self.is_deep:
            encoded = Dense(self.n_end * 9,
                            use_bias=use_bias,
                            kernel_regularizer=None,
                            kernel_initializer=None,
                            activation=self.activation)(input_)
            # en coded = BatchNormalization()(encoded)
            encoded = Dense(self.n_end * 6,
                            use_bias=use_bias,
                            kernel_regularizer=None,
                            kernel_initializer=None,
                            activation=self.activation)(encoded)
            encoded = Dense(self.n_end * 3,
                            use_bias=use_bias,
                            kernel_regularizer=None,
                            kernel_initializer=None,
                            activation=self.activation)(encoded)
            encoded = Dense(self.n_end,
                            use_bias=use_bias,
                            kernel_regularizer=None,
                            activity_regularizer=None,
                            activation=self.activation)(encoded)
        else:
            encoded = Dense(self.n_end,
                            use_bias=use_bias,
                            activation=self.activation,
                            activity_regularizer=None,
                            kernel_regularizer=None)(input_)
        # Utility Layers

        # Batch Normalization
        encoded = BatchNormalization()(encoded)
        # Soft Thresholding
        encoded = utils.SparseReLU(alpha_initializer='zero',
                                   alpha_constraint=non_neg(),
                                   activity_regularizer=None)(encoded)
        # Sum To One (ASC)
        encoded = utils.SumToOne(axis=0,
                                 name='abundances',
                                 activity_regularizer=None)(encoded)

        decoded = GaussianDropout(0.0045)(encoded)

        # Decoder
        decoded = Dense(self.hsi.n_bands,
                        activation='linear',
                        name='endmembers',
                        use_bias=use_bias,
                        kernel_constraint=non_neg(),
                        kernel_regularizer=None,
                        kernel_initializer=self.initializer)(encoded)
        self.model = Model(inputs=input_, outputs=decoded, name='Autoencoder')
        # Compile Model

        self.model.compile(self.optimizer, loss, metrics=[utils.SAD])

    # Fit Model
    def fit(self, epochs, batch_size):

        progress = TQDMCallback(leave_outer=True, leave_inner=True)
        setattr(progress, 'on_train_batch_begin', lambda x, y: None)
        setattr(progress, 'on_train_batch_end', lambda x, y: None)
        plotWhileTraining = PlotWhileTraining(self.plot_every_n, self.hsi.size,
                                              self.n_end, self.hsi,
                                              self.hsi.GT, self.is_GT, True)
        hist = self.model.fit(self.hsi.data,
                              self.hsi.data,
                              epochs=epochs,
                              batch_size=batch_size,
                              verbose=0,
                              callbacks=[progress, plotWhileTraining],
                              shuffle=True)
        return hist

    # Shuffle or reset weights
    def shuffle_weights(self, weights):
        if weights is None:
            weights = self.model.get_weights()
        weights = [
            np.random.permutation(w.flat).reshape(w.shape) for w in weights
        ]
        # Faster, but less random: only permutes along the first dimension
        # weights = [np.random.permutation(w) for w in weights]
        self.model.set_weights(weights)

    def get_endmembers(self):
        return self.model.layers[len(self.model.layers) - 1].get_weights()[0]

    def get_abundances(self):
        intermediate_layer_model = Model(
            inputs=self.model.input,
            outputs=self.model.get_layer('abundances').output)
        abundances = intermediate_layer_model.predict(self.hsi.orig_data)
        return abundances

    def save_results(self, out_dir, fname):
        if out_dir is not None:
            out_path = out_dir / fname
        else:
            out_path = fname
        endmembers = self.get_endmembers()
        abundances = self.get_abundances()
        sio.savemat(out_path, {'M': endmembers, 'A': abundances})
示例#13
0
文件: actor.py 项目: ajmal017/rewave
class ActorNetwork(object):
    """
    Input to the network is the state, output is the action
    under a deterministic policy.
    The output layer activation is a tanh to keep the action
    between -action_bound and action_bound
    """
    def __init__(self, sess, root_net, inputs, state_dim, action_dim,
                 action_bound, learning_rate, tau, batch_size):
        """

        Args:
            sess: a tensorflow session
            state_dim: a list specifies shape
            action_dim: a list specified action shape
            action_bound: whether to normalize action in the end
            learning_rate: learning rate
            tau: target network update parameter
            batch_size: use for normalization
        """
        self.sess = sess
        assert isinstance(state_dim, list), 'state_dim must be a list.'
        self.s_dim = state_dim
        assert isinstance(action_dim, list), 'action_dim must be a list.'
        self.a_dim = action_dim
        self.action_bound = action_bound
        self.learning_rate = learning_rate
        self.tau = tau
        self.batch_size = batch_size

        # Input
        self.inputs = inputs
        self.target_inputs = inputs

        # Actor Network
        self.out, self.scaled_out = self.create_actor_network(
            root_net=root_net)
        self.actor_model = Model(inputs=inputs, outputs=self.out)

        self.network_params = tf.trainable_variables()

        # Target Network
        self.target_out, self.target_scaled_out = self.create_actor_network(
            root_net=None)
        self.target_actor_model = Model(inputs=self.target_inputs,
                                        outputs=self.target_out)

        #self.target_network_params = tf.trainable_variables()[len(self.network_params):]

        # Op for periodically updating target network with online network
        # weights
        #self.update_target_network_params = \
        #    [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) +
        #                                          tf.multiply(self.target_network_params[i], 1. - self.tau))
        #     for i in range(len(self.target_network_params))]

        # This gradient will be provided by the critic network
        self.action_gradient = tf.placeholder(tf.float32, [None] + self.a_dim)

        # Combine the gradients here
        self.unnormalized_actor_gradients = tf.gradients(
            self.scaled_out, self.network_params, -self.action_gradient)

        self.actor_gradients = list(
            map(lambda x: tf.div(x, self.batch_size),
                self.unnormalized_actor_gradients))

        # Optimization Op
        self.optimize = tf.train.AdamOptimizer(self.learning_rate). \
            apply_gradients(zip(self.actor_gradients, self.network_params))

        #self.num_trainable_vars = len(self.network_params) + len(self.target_network_params)

    def create_actor_network(self, root_net):
        raise NotImplementedError(
            'Create actor should return (inputs, out, scaled_out)')

    def train(self, inputs, a_gradient):
        self.sess.run(self.optimize,
                      feed_dict={
                          self.inputs: inputs,
                          self.action_gradient: a_gradient
                      })

    def predict(self, inputs):
        return self.sess.run(self.scaled_out, feed_dict={self.inputs: inputs})

    def predict_target(self, inputs):
        return self.sess.run(self.target_scaled_out,
                             feed_dict={self.target_inputs: inputs})

    def update_target_network(self):
        # self.sess.run(self.update_target_network_params)
        self.target_actor_model.set_weights(self.actor_model.get_weights())
示例#14
0
def layer_test(layer_cls,
               kwargs={},
               input_shape=None,
               input_dtype=None,
               input_data=None,
               expected_output=None,
               expected_output_dtype=None,
               fixed_batch_size=False):
    """Test routine for a layer with a single input tensor

    and single output tensor.

    """

    # generate input data

    if input_data is None:

        assert input_shape

        if not input_dtype:

            input_dtype = K.floatx()

        input_data_shape = list(input_shape)

        for i, e in enumerate(input_data_shape):

            if e is None:

                input_data_shape[i] = np.random.randint(1, 4)

        input_data = (10 * np.random.random(input_data_shape))

        input_data = input_data.astype(input_dtype)

    else:

        if input_shape is None:

            input_shape = input_data.shape

        if input_dtype is None:

            input_dtype = input_data.dtype

    if expected_output_dtype is None:

        expected_output_dtype = input_dtype

    # instantiation

    layer = layer_cls(**kwargs)

    # test get_weights , set_weights at layer level

    weights = layer.get_weights()

    layer.set_weights(weights)

    try:
        expected_output_shape = layer.compute_output_shape(input_shape)
    except:
        expected_output_shape = layer._compute_output_shape(input_shape)

    # test in functional API

    if fixed_batch_size:

        x = Input(batch_shape=input_shape, dtype=input_dtype)

    else:

        x = Input(shape=input_shape[1:], dtype=input_dtype)

    y = layer(x)

    assert K.dtype(y) == expected_output_dtype

    # check with the functional API

    model = Model(x, y)

    actual_output = model.predict(input_data)

    actual_output_shape = actual_output.shape

    for expected_dim, actual_dim in zip(expected_output_shape,
                                        actual_output_shape):

        if expected_dim is not None:

            assert expected_dim == actual_dim

    if expected_output is not None:

        assert_allclose(actual_output, expected_output, rtol=1e-3)

    # test serialization, weight setting at model level

    model_config = model.get_config()

    recovered_model = model.__class__.from_config(model_config)

    if model.weights:

        weights = model.get_weights()

        recovered_model.set_weights(weights)

        _output = recovered_model.predict(input_data)

        assert_allclose(_output, actual_output, rtol=1e-3)

    # test training mode (e.g. useful when the layer has a

    # different behavior at training and testing time).

    if has_arg(layer.call, 'training'):

        model.compile('rmsprop', 'mse')

        model.train_on_batch(input_data, actual_output)

    # test instantiation from layer config

    layer_config = layer.get_config()

    layer_config['batch_input_shape'] = input_shape

    layer = layer.__class__.from_config(layer_config)

    # for further checks in the caller function

    return actual_output
示例#15
0
class DeepQ(object):
    """Constructs the desired deep q learning network"""
    def __init__(self, action_size, observation_size, lr=LEARNING_RATE):
        self.action_size = action_size
        self.observation_size = observation_size
        self.lr = lr
        self.model = None
        self.target_model = None
        self.qvalue_evolution = []
        self.construct_q_network()

    def construct_q_network(self):
        """ Construct both the actual Q-network and the target network with three hidden layers and ReLu activation
        functions in between. The network uses an Adam optimizer with MSE loss."""

        self.model = Sequential()
        input_layer = Input(shape=(self.observation_size * NUM_FRAMES, ))
        layer1 = Dense(self.observation_size * NUM_FRAMES)(input_layer)
        layer1 = Activation('relu')(layer1)
        layer3 = Dense(self.observation_size)(layer1)
        layer3 = Activation('relu')(layer3)
        layer4 = Dense(2 * self.action_size)(layer3)
        layer4 = Activation('relu')(layer4)
        output = Dense(self.action_size)(layer4)

        self.model = Model(inputs=[input_layer], outputs=[output])
        self.model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        self.target_model = Model(inputs=[input_layer], outputs=[output])
        self.target_model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        self.target_model.set_weights(self.model.get_weights())

    def predict_movement(self, data, epsilon):
        """ Predict the next action from the network. Epsilon is the probability of making a random move.
        Returns the optimal action and the predicted reward for that action. """

        rand_val = np.random.random()
        q_actions = self.model.predict(data.reshape(
            1, self.observation_size * NUM_FRAMES),
                                       batch_size=1)

        if rand_val < epsilon:
            opt_policy = np.random.randint(0, self.action_size)
        else:
            opt_policy = np.argmax(np.abs(q_actions[0]))

        self.qvalue_evolution.append(q_actions[0][opt_policy])

        return opt_policy, q_actions[0][opt_policy]

    def predict_rewards(self, data):
        """ Like predict_movement, only without a probability of a random move and returns only the predicted
        q-values."""

        q_actions = self.model.predict(np.array(data).reshape(
            1, self.observation_size * NUM_FRAMES),
                                       batch_size=1)
        return q_actions[0]

    def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch):
        """ Trains the network on a batch of input.
        The parameters are batches of states, actions, rewards, done booleans and next states. """

        batch_size = s_batch.shape[0]

        # Train according to the Bellman Equation
        targets = self.model.predict(s_batch, batch_size=batch_size)
        fut_action = self.target_model.predict(s2_batch, batch_size=batch_size)

        targets[:, a_batch.flatten()] = r_batch
        targets[d_batch, a_batch[d_batch]] += DISCOUNT_RATE * np.max(
            fut_action[d_batch], axis=-1)

        targets_ts = tf.convert_to_tensor(targets, dtype=tf.float32)
        loss = self.model.train_on_batch(s_batch, targets_ts)

        return loss

    def train_imitation(self, s_batch, t_batch):
        """ Trains network on generated data: Imitation Learning. """
        loss = self.model.train_on_batch(s_batch, t_batch)
        return loss

    def save_network(self, path):
        if not os.path.exists(path):
            os.mkdir(path)
        self.model.save(os.path.join(path, 'network.h5'))
        print("Successfully saved network.")

    def load_network(self, path):
        self.model = load_model(os.path.join(path, 'network.h5'))
        print("Successfully loaded network.")

    def target_train(self):
        """ The target network is updated each step by 'merging' a small part of the actual network into it. """
        model_weights = self.model.get_weights()
        target_model_weights = self.target_model.get_weights()
        for i in range(len(model_weights)):
            target_model_weights[i] = TAU * model_weights[i] + (
                1 - TAU) * target_model_weights[i]
        self.target_model.set_weights(target_model_weights)