step_plot = [] num_iterations = 20000 # @param {type:"integer"} initial_collect_steps = 100 # @param {type:"integer"} collect_steps_per_iteration = 1 # @param {type:"integer"} replay_buffer_max_length = 100000 # @param {type:"integer"} batch_size = 64 # @param {type:"integer"} learning_rate = 1e-3 # @param {type:"number"} log_interval = 200 # @param {type:"integer"} num_eval_episodes = 10 # @param {type:"integer"} eval_interval = 1000 # @param {type:"integer"} env = Environment.CnfSolverEnv() train_py_env = Environment.CnfSolverEnv() eval_py_env = Environment.CnfSolverEnv() train_env = tf_py_environment.TFPyEnvironment(train_py_env) eval_env = tf_py_environment.TFPyEnvironment(eval_py_env) fc_layer_params = (100, 50) action_tensor_spec = tensor_spec.from_spec(env.action_spec()) num_actions = action_tensor_spec.maximum - action_tensor_spec.minimum + 1 # Define a helper function to create Dense layers configured with the right # activation and kernel initializer. def dense_layer(num_units):
import numpy as np import Environment as env import random import time cnfEnv = env.CnfSolverEnv() action_array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 99] cnfEnv.reset() for _ in range(100): action = random.choice(action_array) cnfEnv.step(action) time.sleep(0.5)