def __init__(self, env, lr, n_hidden_layers, n_hidden_units): super().__init__() # Check the Gym environment self.action_dim, self.action_discrete = check_space(env.action_space) self.state_dim, self.state_discrete = check_space( env.observation_space) if not self.action_discrete: raise ValueError('Continuous action space not implemented') if len(self.state_dim) != 1: raise ValueError( f"`len(stade_dim)` is {len(self.state_dim)} but should be one") self.base_nn = nn.Sequential(*[ nn.Linear(self.state_dim[0], n_hidden_units), nn.ReLU(), ] + [ nn.Linear(n_hidden_units, n_hidden_units), nn.ReLU(), ] * n_hidden_layers) self.pi_hat = nn.Linear(n_hidden_units, self.action_dim) self.v_hat = nn.Linear(n_hidden_units, 1) self.v_loss = nn.MSELoss() self.optimizer = optim.Adam(self.parameters(), lr=lr) self.softmax = torch.nn.Softmax(dim=-1) self.log_softmax = torch.nn.LogSoftmax(dim=-1)
def __init__(self, Env, lr, n_hidden_layers, n_hidden_units): # Check the Gym environment self.action_dim, self.action_discrete = check_space(Env.action_space) self.state_dim, self.state_discrete = check_space(Env.observation_space) if not self.action_discrete: raise ValueError('Continuous action space not implemented') # Placeholders if not self.state_discrete: self.x = x = tf.placeholder("float32", shape=np.append(None, self.state_dim), name='x') # state else: self.x = x = tf.placeholder("int32", shape=np.append(None, 1)) # state x = tf.squeeze(tf.one_hot(x, self.state_dim, axis=1), axis=2) # Feedforward: Can be modified to any representation function, e.g. convolutions, residual networks, etc. for i in range(n_hidden_layers): x = slim.fully_connected(x, n_hidden_units, activation_fn=tf.nn.elu) # Output log_pi_hat = slim.fully_connected(x, self.action_dim, activation_fn=None) self.pi_hat = tf.nn.softmax(log_pi_hat) # policy head self.V_hat = slim.fully_connected(x, 1, activation_fn=None) # value head # Loss self.V = tf.placeholder("float32", shape=[None, 1], name='V') self.pi = tf.placeholder("float32", shape=[None, self.action_dim], name='pi') self.V_loss = tf.losses.mean_squared_error(labels=self.V, predictions=self.V_hat) self.pi_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.pi, logits=log_pi_hat) self.loss = self.V_loss + tf.reduce_mean(self.pi_loss) self.lr = tf.Variable(lr, name="learning_rate", trainable=False) optimizer = tf.train.RMSPropOptimizer(learning_rate=lr) self.train_op = optimizer.minimize(self.loss)
def __init__(self, Env, lr, n_hidden_layers, n_hidden_units, joint_networks=False): # Check the Gym environment self.action_dim, self.action_discrete = check_space(Env.action_space) self.state_dim, self.state_discrete = check_space( Env.observation_space) if not self.action_discrete: raise ValueError('Continuous action space not implemented') # Build the model self.joint_model = joint_networks self.model = None self.value_model = None self.policy_model = None self.lr = lr self.n_hidden_layers = n_hidden_layers self.n_hidden_units = n_hidden_units if self.state_discrete: self.input_shape = (1, ) else: self.input_shape = (1, self.state_dim[0])
def __init__(self, env, lr, n_hidden_layers, n_hidden_units): # Check the Gym environment self.action_dim, self.action_discrete = check_space(env.action_space) self.state_dim, self.state_discrete = check_space( env.observation_space) if not self.action_discrete: raise ValueError('Continuous action space not implemented') # Placeholders # if not self.state_discrete: # self.x = x = tf.placeholder("float32", # shape=np.append(None, self.state_dim), # name='x') # state # else: # self.x = x = tf.placeholder("int32", # shape=np.append(None, 1)) # state # x = tf.squeeze(tf.one_hot(x, self.state_dim, axis=1), axis=2) # # # Feedforward: Can be modified to any representation function, # e.g. convolutions, residual networks, etc. # for i in range(n_hidden_layers): # x = slim.fully_connected(x, n_hidden_units, # activation_fn=tf.nn.elu) # Remy # If discrete # keras_input = keras.Input(shape=(1,), dtype='int32') # x = tf.one_hot(indices=keras_input, depth=10) # x = tf.squeeze(x, axis=[1]) # If continuous keras_input = keras.Input(shape=self.state_dim, dtype='float32') x = keras_input for i in range(n_hidden_layers): x = layers.Dense(n_hidden_units)(x) x = layers.Activation('relu')(x) policy_head = layers.Dense(self.action_dim, activation='softmax', name='policy')(x) self.pi_hat = policy_head value_head = layers.Dense(1, name='value')(x) self.v_hat = value_head self.model = keras.Model(inputs=keras_input, outputs=[policy_head, value_head]) optimizer = tf.optimizers.Adam(learning_rate=lr) self.model.compile(optimizer=optimizer, loss={ 'value': 'mse', 'policy': 'categorical_crossentropy' }, metrics=['acc'])
def signup(): # Pull in the data for your four variables username = request.form['username'] password = request.form['password'] verifypw = request.form['verifypw'] email = request.form['email'] # Initialize empty strings for the errors username_error = "" password_error = "" verifypw_error = "" email_error = "" # Validate content present in username field if check_empty(username): username_error += cgi.escape("Please enter a username. ") # Validate username length if check_length(username): username_error += cgi.escape("Your username must be 3-20 characters. ") # Validate no spaces in username if check_space(username): username_error += cgi.escape("Your username may not contain spaces. ") # Validate content present in password field if check_empty(password): password_error += cgi.escape("Please enter a password. ") # Validate password length if check_length(password): password_error += cgi.escape("Your password must be 3-20 characters. ") # Validate no spaces in password if check_space(password): password_error += cgi.escape("Your password may not contain spaces. ") # Validate content present in verifypw field if check_empty(verifypw): verifypw_error += cgi.escape("Please verify your password. ") # Check password and verifypw to see if they match # You don't need to perform the same checks as on password # Because if password passes, then verifypw should too if password != verifypw: verifypw_error += cgi.escape("Your passwords do not match! ") # Check email for spaces if email != "": if check_length(email): email_error += cgi.escape("Your email may not contain spaces. ") # Check length of email if check_length(email): email_error += cgi.escape("Your email must be 3-20 characters. ") # Check for @ symbol if "@" not in email: email_error += cgi.escape("Your email must contain an @ symbol. ") # Check for . symbol if "." not in email: email_error += cgi.escape("Your email must contain an . symbol. ") if username_error != "" or password_error != "" or verifypw_error != "" or email_error != "": username = username password = "" verifypw = "" email = email return render_template('signup-form.html', title="Sign Up", username=username, password=password, verifypw=verifypw, email=email, username_error=username_error, password_error=password_error, verifypw_error=verifypw_error, email_error=email_error) else: return render_template('welcome.html', title="Welcome!", username=username)