def __init__(self, env, lr, n_hidden_layers, n_hidden_units):
        super().__init__()
        # Check the Gym environment
        self.action_dim, self.action_discrete = check_space(env.action_space)
        self.state_dim, self.state_discrete = check_space(
            env.observation_space)

        if not self.action_discrete:
            raise ValueError('Continuous action space not implemented')

        if len(self.state_dim) != 1:
            raise ValueError(
                f"`len(stade_dim)` is {len(self.state_dim)} but should be one")

        self.base_nn = nn.Sequential(*[
            nn.Linear(self.state_dim[0], n_hidden_units),
            nn.ReLU(),
        ] + [
            nn.Linear(n_hidden_units, n_hidden_units),
            nn.ReLU(),
        ] * n_hidden_layers)

        self.pi_hat = nn.Linear(n_hidden_units, self.action_dim)
        self.v_hat = nn.Linear(n_hidden_units, 1)

        self.v_loss = nn.MSELoss()

        self.optimizer = optim.Adam(self.parameters(), lr=lr)

        self.softmax = torch.nn.Softmax(dim=-1)
        self.log_softmax = torch.nn.LogSoftmax(dim=-1)
    def __init__(self, Env, lr, n_hidden_layers, n_hidden_units):
        # Check the Gym environment
        self.action_dim, self.action_discrete = check_space(Env.action_space)
        self.state_dim, self.state_discrete = check_space(Env.observation_space)
        if not self.action_discrete:
            raise ValueError('Continuous action space not implemented')

        # Placeholders
        if not self.state_discrete:
            self.x = x = tf.placeholder("float32", shape=np.append(None, self.state_dim), name='x')  # state
        else:
            self.x = x = tf.placeholder("int32", shape=np.append(None, 1))  # state
            x = tf.squeeze(tf.one_hot(x, self.state_dim, axis=1), axis=2)

        # Feedforward: Can be modified to any representation function, e.g. convolutions, residual networks, etc.
        for i in range(n_hidden_layers):
            x = slim.fully_connected(x, n_hidden_units, activation_fn=tf.nn.elu)

        # Output
        log_pi_hat = slim.fully_connected(x, self.action_dim, activation_fn=None)
        self.pi_hat = tf.nn.softmax(log_pi_hat)  # policy head
        self.V_hat = slim.fully_connected(x, 1, activation_fn=None)  # value head

        # Loss
        self.V = tf.placeholder("float32", shape=[None, 1], name='V')
        self.pi = tf.placeholder("float32", shape=[None, self.action_dim], name='pi')
        self.V_loss = tf.losses.mean_squared_error(labels=self.V, predictions=self.V_hat)
        self.pi_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.pi, logits=log_pi_hat)
        self.loss = self.V_loss + tf.reduce_mean(self.pi_loss)

        self.lr = tf.Variable(lr, name="learning_rate", trainable=False)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=lr)
        self.train_op = optimizer.minimize(self.loss)
Exemplo n.º 3
0
    def __init__(self,
                 Env,
                 lr,
                 n_hidden_layers,
                 n_hidden_units,
                 joint_networks=False):
        # Check the Gym environment
        self.action_dim, self.action_discrete = check_space(Env.action_space)
        self.state_dim, self.state_discrete = check_space(
            Env.observation_space)

        if not self.action_discrete:
            raise ValueError('Continuous action space not implemented')

        # Build the model

        self.joint_model = joint_networks

        self.model = None
        self.value_model = None
        self.policy_model = None

        self.lr = lr
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_units = n_hidden_units

        if self.state_discrete:
            self.input_shape = (1, )
        else:
            self.input_shape = (1, self.state_dim[0])
Exemplo n.º 4
0
    def __init__(self, env, lr, n_hidden_layers, n_hidden_units):
        # Check the Gym environment
        self.action_dim, self.action_discrete = check_space(env.action_space)
        self.state_dim, self.state_discrete = check_space(
            env.observation_space)
        if not self.action_discrete:
            raise ValueError('Continuous action space not implemented')

        # Placeholders
        # if not self.state_discrete:
        #     self.x = x = tf.placeholder("float32",
        #                                 shape=np.append(None, self.state_dim),
        #                                 name='x')  # state
        # else:
        #     self.x = x = tf.placeholder("int32",
        #                                 shape=np.append(None, 1))  # state
        #     x = tf.squeeze(tf.one_hot(x, self.state_dim, axis=1), axis=2)
        #
        # # Feedforward: Can be modified to any representation function,
        # e.g. convolutions, residual networks, etc.
        # for i in range(n_hidden_layers):
        #     x = slim.fully_connected(x, n_hidden_units,
        #     activation_fn=tf.nn.elu)

        # Remy

        # If discrete
        # keras_input = keras.Input(shape=(1,), dtype='int32')
        # x = tf.one_hot(indices=keras_input, depth=10)
        # x = tf.squeeze(x, axis=[1])

        # If continuous
        keras_input = keras.Input(shape=self.state_dim, dtype='float32')
        x = keras_input

        for i in range(n_hidden_layers):
            x = layers.Dense(n_hidden_units)(x)
            x = layers.Activation('relu')(x)

        policy_head = layers.Dense(self.action_dim,
                                   activation='softmax',
                                   name='policy')(x)
        self.pi_hat = policy_head

        value_head = layers.Dense(1, name='value')(x)
        self.v_hat = value_head

        self.model = keras.Model(inputs=keras_input,
                                 outputs=[policy_head, value_head])

        optimizer = tf.optimizers.Adam(learning_rate=lr)

        self.model.compile(optimizer=optimizer,
                           loss={
                               'value': 'mse',
                               'policy': 'categorical_crossentropy'
                           },
                           metrics=['acc'])
Exemplo n.º 5
0
def signup():

    # Pull in the data for your four variables
    username = request.form['username']
    password = request.form['password']
    verifypw = request.form['verifypw']
    email = request.form['email']

    # Initialize empty strings for the errors
    username_error = ""
    password_error = ""
    verifypw_error = ""
    email_error = ""

    # Validate content present in username field
    if check_empty(username):
        username_error += cgi.escape("Please enter a username. ")

    # Validate username length
    if check_length(username):
        username_error += cgi.escape("Your username must be 3-20 characters. ")

    # Validate no spaces in username
    if check_space(username):
        username_error += cgi.escape("Your username may not contain spaces. ")

    # Validate content present in password field
    if check_empty(password):
        password_error += cgi.escape("Please enter a password. ")

    # Validate password length
    if check_length(password):
        password_error += cgi.escape("Your password must be 3-20 characters. ")

    # Validate no spaces in password
    if check_space(password):
        password_error += cgi.escape("Your password may not contain spaces. ")

    # Validate content present in verifypw field
    if check_empty(verifypw):
        verifypw_error += cgi.escape("Please verify your password. ")

    # Check password and verifypw to see if they match
    # You don't need to perform the same checks as on password
    # Because if password passes, then verifypw should too
    if password != verifypw:
        verifypw_error += cgi.escape("Your passwords do not match! ")

    # Check email for spaces

    if email != "":
        if check_length(email):
            email_error += cgi.escape("Your email may not contain spaces. ")

    # Check length of email
        if check_length(email):
            email_error += cgi.escape("Your email must be 3-20 characters. ")

    # Check for @ symbol
        if "@" not in email:
            email_error += cgi.escape("Your email must contain an @ symbol. ")

    # Check for . symbol
        if "." not in email:
            email_error += cgi.escape("Your email must contain an . symbol. ")

    if username_error != "" or password_error != "" or verifypw_error != "" or email_error != "":
        username = username
        password = ""
        verifypw = ""
        email = email
        return render_template('signup-form.html',
                               title="Sign Up",
                               username=username,
                               password=password,
                               verifypw=verifypw,
                               email=email,
                               username_error=username_error,
                               password_error=password_error,
                               verifypw_error=verifypw_error,
                               email_error=email_error)

    else:
        return render_template('welcome.html',
                               title="Welcome!",
                               username=username)