def main(): # Init loggers log.set_level("fine") log.set_sync(False) agent_log.set_level("fine") agent_log.set_sync(False) ure_logger().set_level("fine") ure_logger().set_sync(False) # Set main atomspace atomspace = AtomSpace() set_default_atomspace(atomspace) # Wrap environment wrapped_env = CartPoleWrapper(env, atomspace) # Instantiate CartPoleAgent, and tune parameters cpa = FixedCartPoleAgent(wrapped_env, atomspace) cpa.delta = 1.0e-16 # Run control loop while not cpa.control_cycle(): wrapped_env.render() time.sleep(0.1) log.info("cycle_count = {}".format(cpa.cycle_count)) log_msg(agent_log, f"The final reward is {cpa.accumulated_reward}.")
def __init__(self, env: CartPoleWrapper, atomspace: AtomSpace, log_level="debug"): set_default_atomspace(atomspace) # Create Action Space. The set of allowed actions an agent can take. # TODO take care of action parameters. action_space = {ExecutionLink(SchemaNode(a)) for a in env.action_names} # Create Goal pgoal = EvaluationLink(PredicateNode("Reward"), NumberNode("1")) ngoal = EvaluationLink(PredicateNode("Reward"), NumberNode("0")) # Call super ctor super().__init__(env, atomspace, action_space, pgoal, ngoal, log_level=log_level) # Overwrite some OpencogAgent parameters self.monoaction_general_succeedent_mining = False self.polyaction_mining = False self.temporal_deduction = False
def __init__(self, env: Env, atomspace: AtomSpace, action_names: list[str] = []): super().__init__() self.atomspace = atomspace set_default_atomspace(self.atomspace) self.env = env self.action_space = env.action_space self.observation_space = env.observation_space self.action_names = action_names
def __init__(self, env: CartPoleWrapper, atomspace: AtomSpace): set_default_atomspace(atomspace) # Create Action Space. The set of allowed actions an agent can take. # TODO take care of action parameters. action_space = {ExecutionLink(SchemaNode(a)) for a in env.action_names} # Create Goal pgoal = EvaluationLink(PredicateNode("Reward"), NumberNode("1")) ngoal = EvaluationLink(PredicateNode("Reward"), NumberNode("0")) # Call super ctor super().__init__(env, atomspace, action_space, pgoal, ngoal)
def test_cartpole(): env = gym.make("CartPole-v1") # Set main atomspace atomspace = AtomSpace() set_default_atomspace(atomspace) # Wrap environment wrapped_env = CartPoleWrapper(env, atomspace) # Instantiate CartPoleAgent, and tune parameters cpa = FixedCartPoleAgent(wrapped_env, atomspace) cpa.delta = 1.0e-16 # Run control loop while not cpa.control_cycle(): time.sleep(0.1)
def __init__(self, env, atomspace): set_default_atomspace(atomspace) # Create Action Space. The set of allowed actions an agent can take. # TODO take care of action parameters. action_space = {ExecutionLink(SchemaNode(a)) for a in env.action_names} # Create Goal pgoal = EvaluationLink(PredicateNode("Reward"), NumberNode("1")) ngoal = EvaluationLink(PredicateNode("Reward"), NumberNode("0")) # Call super ctor OpencogAgent.__init__(self, env, atomspace, action_space, pgoal, ngoal) # Overwrite some OpencogAgent parameters self.polyaction_mining = False self.monoaction_general_succeedent_mining = True self.temporal_deduction = True self.cogscm_minimum_strength = 0.9 self.cogscm_maximum_shannon_entropy = 1 self.cogscm_maximum_differential_entropy = 0 self.cogscm_maximum_variables = 0
def eat(self, i): self.env.step(mk_action("move", 0)) self.env.step(mk_action("hotbar.{}".format(i), 1)) self.env.step(mk_action("hotbar.{}".format(i), 0)) self.env.step(mk_action("use", 1)) def wake(self): self.env.step(mk_action("use", 0)) self.env.step(mk_action("hotbar.9", 1)) self.env.step(mk_action("hotbar.9", 0)) if __name__ == "__main__": atomspace = AtomSpace() set_default_atomspace(atomspace) # Wrap environment wrapped_env = MalmoWrapper(missionXML=mission_xml, validate=True) # Create Goal pgoal = EvaluationLink(PredicateNode("Reward"), NumberNode("1")) ngoal = EvaluationLink(PredicateNode("Reward"), NumberNode("0")) # Create Action Space. The set of allowed actions an agent can take. # TODO take care of action parameters. action_space = { ExecutionLink(SchemaNode("tpz"), NumberNode("2.5")), ExecutionLink(SchemaNode("tpz"), NumberNode("-1.5")), ExecutionLink(SchemaNode("attack"), NumberNode("0")), ExecutionLink(SchemaNode("attack"), NumberNode("1")), ExecutionLink(SchemaNode("move"), NumberNode("0")),