Exemplo n.º 1
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent coin flip actions, which is a prediction of the coin landing on
# heads or tails.
coin_flip_action_enum = util.enum('aTails', 'aHeads')

# Define a enumeration to represent coin flip observations e.g. the coin landed on heads or tails.
coin_flip_observation_enum = util.enum('oTails', 'oHeads')

# Define a enumeration to represent coin flip rewards e.g. win or lose, for correcting predicting
# the coin flip.
coin_flip_reward_enum = util.enum('rLose', 'rWin')

# Define some shorthand notation for ease of reference.
aHeads = coin_flip_action_enum.aHeads
aTails = coin_flip_action_enum.aTails

oHeads = coin_flip_observation_enum.oHeads
oTails = coin_flip_observation_enum.oTails
Exemplo n.º 2
0
import math
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import util

# An enumeration type used to specify the type of Monte Carlo search node.
# Chance nodes represent sets of possible observations (one child per observation).
# Decision nodes represent sets of possible actions (one child per action).
# Decision and chance nodes alternate.
nodetype_enum = util.enum('chance', 'decision')

# Defines some shortcuts for ease of reference.
chance_node = nodetype_enum.chance
decision_node = nodetype_enum.decision


class MonteCarloSearchNode:
    """ A class to represent a node in the Monte Carlo search tree.

        The nodes in the search tree represent simulated actions and percepts
        between an agent following an upper confidence bounds (UCB) policy and a generative
        model of the environment represented by a context tree.

        The purpose of the tree is to determine the expected reward of the
        available actions through sampling. Sampling proceeds several time steps
Exemplo n.º 3
0
# -*- coding: utf-8 -*-
"""
Defines a base class for AIXI-approximate agents.
"""

from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import random

from pyaixi import util

# Define a enumeration to represent what type of environment update has been performed.
update_enum = util.enum('action_update', 'percept_update')

# Define some short cuts for ease of reference.
action_update = update_enum.action_update
percept_update = update_enum.percept_update


class Agent:
    """ This base class represents the minimum class elements for a AIXI-style agent.

        The following attributes and methods must be available in all agents, in order
        for the main interaction loop to get responses, give environmental feedback,
        manage learning, and monitor progress:

         - `age`
         - `average_reward()`
Exemplo n.º 4
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as listening, or opening doors.
tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight')

# Define a enumeration to represent environment observations: either not
# hearing the tiger, or hearing it from behind either door.
tiger_observation_enum = util.enum('oNull', 'oLeft', 'oRight')

# Define a enumeration to represent rewards as a result of actions: being eaten by the tiger,
# getting information from listening, or finding the gold
# NOTE: since the enumeration values need to be positive, these values are defined relative to
#       100.
#       So -100 points is 0, -1 points is 99, and 10 points is 110.
tiger_reward_enum = util.enum(rEaten = 0, rListen = 99, rGold = 110)

# Define some shorthand notation for ease of reference.
aListen = tiger_action_enum.aListen
aLeft   = tiger_action_enum.aLeft
Exemplo n.º 5
0
from __future__ import print_function
from __future__ import unicode_literals

import os
import random
import sys
# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util


# 4 actions, encoded in 2 bits.
tiger_action_enum = util.enum('stand','listen','open_left_door','open_right_door')

# 3 Oberservations, encoded in 2 bits.
# 'left': the tiger is behind the left door.
# 'right': the tiger is behind the right door.
# 'void': the agent tried to listen while standing, and thus heard nothing.
tiger_observation_enum = util.enum('left','right','void')

# Reward ranges from 0 to 130, encoded in 8 bits.
# 0(-100) reward is given by choosing the worst action: open the door with tiger hiding behind.
# 90(-10) reward is given by choosing an 'invalid' action: e.g. stand while standing.
# 99(-1)  reward is given by choosing an 'valid' action: e.g. stand sitting.
# 130(30) reward is given by choosing the best action: open the door with gold behind.
tiger_reward_enum = util.enum(penalty = 90, eaten = 0, gold = 130, normal = 99)

# 2 states, encoded in 1 bit.
Exemplo n.º 6
0
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

# Ensure xrange is defined on Python 3.
from six.moves import xrange

from pyaixi import environment, util

# Define a enumeration to represent environment observations: either a square
# is empty, filled with the agent's piece, or the environment's piece.
tictactoe_observation_enum = util.enum('oEmpty', 'oAgent', 'oEnv')

# Define a enumeration to represent rewards as a result of actions: invalid actions,
# losses, null, draws, and wins.
# NOTE: since the enumeration values need to be positive, these values are defined relative to 3.
#       So -3 points is 0, -2 points is 1, and 2 points is 5.
tictactoe_reward_enum = util.enum(rInvalid = 0, rLoss = 1, rNull = 3, rDraw = 4, rWin = 5)

# Define some shorthand notation for ease of reference.
oEmpty  = tictactoe_observation_enum.oEmpty
oAgent  = tictactoe_observation_enum.oAgent
oEnv    = tictactoe_observation_enum.oEnv

rInvalid = tictactoe_reward_enum.rInvalid
rLoss    = tictactoe_reward_enum.rLoss
rNull    = tictactoe_reward_enum.rNull
Exemplo n.º 7
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent rock-paper-scissors actions, which is the
# agent performing either a rock, paper, or a scissors move.
rock_paper_scissors_action_enum = util.enum('aRock', 'aPaper', 'aScissors')

# Define a enumeration to represent rock-paper-scissors observations, which is the
# opponent performing either a rock, paper, or a scissors move.
rock_paper_scissors_observation_enum = util.enum('oRock', 'oPaper', 'oScissors')

# Define a enumeration to represent losing, drawing, or winning.
rock_paper_scissors_reward_enum = util.enum('rLose', 'rDraw', 'rWin')

# Define some shorthand notation for ease of reference.
aRock     = rock_paper_scissors_action_enum.aRock
aPaper    = rock_paper_scissors_action_enum.aPaper
aScissors = rock_paper_scissors_action_enum.aScissors

oRock     = rock_paper_scissors_observation_enum.oRock
oPaper    = rock_paper_scissors_observation_enum.oPaper
Exemplo n.º 8
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as betting or passing.
kuhn_poker_action_enum = util.enum("aBet", "aPass")

# Define a enumeration to represent environment observations, such as card values,
# and opponent bet status
# The final observation is of the form `agent-card + opponent-bet-status`.
kuhn_poker_observation_enum = util.enum(oJack=0, oQueen=1, oKing=2, oBet=0, oPass=4)

# Define a enumeration to represent rewards as a result of actions: betting and losing,
# betting and winning, passing and losing, passing and winning.
kuhn_poker_reward_enum = util.enum(rBetLoss=0, rPassLoss=1, rPassWin=3, rBetWin=4)

# Define some shorthand notation for ease of reference.
aBet = kuhn_poker_action_enum.aBet
aPass = kuhn_poker_action_enum.aPass

oJack = kuhn_poker_observation_enum.oJack
from __future__ import print_function
from __future__ import unicode_literals

from pyaixi import environment, util

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

# define actions
kuhn_action_enum = util.enum('aPass', 'aBet')

# define observations
kuhn_observation_enum = util.enum('oPJ', 'oPQ', 'oPK', 'oBJ', 'oBQ', 'oBK')

# define cards
kuhn_card_enum = util.enum('J', 'Q', 'K')

# define rewards: there are fout rewards, and no draw
kuhn_reward_enum = util.enum(rLose2=0, rLose1=1, rWin1=3, rWin2=4)

aPass = kuhn_action_enum.aPass
aBet = kuhn_action_enum.aBet

oPJ = kuhn_observation_enum.oPJ
oPQ = kuhn_observation_enum.oPQ
Exemplo n.º 10
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as listening, just standing, or opening doors.
extended_tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight', 'aStand')

# Define a enumeration to represent environment observations: either not
# hearing the tiger, or hearing it from behind either door.
extended_tiger_observation_enum = util.enum('oNull', 'oLeft', 'oRight')

# Define a enumeration to represent rewards as a result of actions: being eaten by the tiger,
# performing an invalid action (listening while sitting), getting information from listening,
# just standing there, or finding the gold.
# NOTE: since the enumeration values need to be positive, these values are defined relative to
#       100.
#       So -100 points is 0, -1 points is 99, and 30 points is 130.
extended_tiger_reward_enum = util.enum(rInvalid=0,
                                       rTiger=0,
                                       rStand=99,
                                       rListen=100,
Exemplo n.º 11
0
from __future__ import print_function
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent the agent's actions, which is a choice between left or right.
oned_maze_action_enum = util.enum('aLeft', 'aRight')

# As the agent's observations are uninformative, the observation is defined as goal or non-goal instead of location.
oned_maze_observation_enum = util.enum('oGoal', 'oNonGoal')

# Define a enumeration to represent the agent's rewards e.g. 1 or 0, for reaching the goal point.
oned_maze_reward_enum = util.enum('rOne', 'rZero')

# Define the locations of the cells in the 1D maze.
locs = [0, 1, 2, 3]

# Define some shorthand notation for ease of reference.
aLeft = oned_maze_action_enum.aLeft
aRight = oned_maze_action_enum.aRight

oGoal = oned_maze_observation_enum.oGoal
Exemplo n.º 12
0
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

# Ensure xrange is defined on Python 3.
from six.moves import xrange

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as going left, up, right, or down.
maze_action_enum = util.enum("aLeft", "aUp", "aRight", "aDown")

# Define a enumeration to represent environment observations: either a cell
# is empty, or has a wall in various (bit) positions.
maze_observation_enum = util.enum(oNull=0, oLeftWall=1, oUpWall=2, oRightWall=4, oDownWall=8)

# Define an enumber to represent observation encoding constants.
maze_observation_encoding_enum = util.enum("cUninformative", "cWalls", "cCoordinates")

# Define some shorthand notation for ease of reference.
aLeft = maze_action_enum.aLeft
aUp = maze_action_enum.aUp
aRight = maze_action_enum.aRight
aDown = maze_action_enum.aDown

oNull = maze_observation_enum.oNull
Exemplo n.º 13
0
# -*- coding: utf-8 -*-
"""
Defines a base class for AIXI-approximate agents.
"""

from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import random

from pyaixi import util

# Define a enumeration to represent what type of environment update has been performed.
update_enum = util.enum('action_update', 'percept_update')

# Define some short cuts for ease of reference.
action_update = update_enum.action_update
percept_update = update_enum.percept_update

class Agent:
    """ This base class represents the minimum class elements for a AIXI-style agent.

        The following attributes and methods must be available in all agents, in order
        for the main interaction loop to get responses, give environmental feedback,
        manage learning, and monitor progress:

         - `age`
         - `average_reward()`
         - `generate_random_action()`
Exemplo n.º 14
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent rock-paper-scissors actions, which is the
# agent performing either a rock, paper, or a scissors move.
rock_paper_scissors_action_enum = util.enum('aRock', 'aPaper', 'aScissors')

# Define a enumeration to represent rock-paper-scissors observations, which is the
# opponent performing either a rock, paper, or a scissors move.
rock_paper_scissors_observation_enum = util.enum('oRock', 'oPaper',
                                                 'oScissors')

# Define a enumeration to represent losing, drawing, or winning.
rock_paper_scissors_reward_enum = util.enum('rLose', 'rDraw', 'rWin')

# Define some shorthand notation for ease of reference.
aRock = rock_paper_scissors_action_enum.aRock
aPaper = rock_paper_scissors_action_enum.aPaper
aScissors = rock_paper_scissors_action_enum.aScissors

oRock = rock_paper_scissors_observation_enum.oRock
Exemplo n.º 15
0
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from pyaixi import environment, util
import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

# Defines an enumeration to represent agent action: bet on current observation or pass.
kp_action_enum = util.enum('agent_pass', 'agent_bet')

# Defines an enumeration to represent agent observation.
# The agent observes both its own card and the opponent's action.
kp_card_observation_enum = util.enum(j=0, q=1, k=2)
kp_opponent_observation_enum = util.enum(op_bet=0, op_pass=3)
# Observation codes:
# 0: agent has J, Opponent bet,
# 1: agent has Q, Opponent bet,
# 2: agent has K, Opponent bet,
# 3: agent has J, Opponent passed,
# 4: agent has Q, Opponent passed,
# 5: agent has K, Opponent passed.

# Reward is (final chip count) - (initial chip count).
kp_bet_reward_enum = util.enum(fourChips=4,
Exemplo n.º 16
0
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from pyaixi import environment, util

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight')
tiger_observation_enum = util.enum('oLeft', 'oRight', 'oTiger', 'oGold',
                                   'oNone')

# tiger normalised from -100 to 0, gold from 10 to 110, listen from -1 to 99
tiger_reward_enum = util.enum(rTiger=0, rGold=110, rListen=99)

aListen = tiger_action_enum.aListen
aLeft = tiger_action_enum.aLeft
aRight = tiger_action_enum.aRight

oLeft = tiger_observation_enum.oLeft
oRight = tiger_observation_enum.oRight
oTiger = tiger_observation_enum.oTiger
oGold = tiger_observation_enum.oGold
oNone = tiger_observation_enum.oNone
Exemplo n.º 17
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as listening, just standing, or opening doors.
extended_tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight', 'aStand')

# Define a enumeration to represent environment observations: either not
# hearing the tiger, or hearing it from behind either door.
extended_tiger_observation_enum = util.enum('oNull', 'oLeft', 'oRight')

# Define a enumeration to represent rewards as a result of actions: being eaten by the tiger,
# performing an invalid action (listening while sitting), getting information from listening,
# just standing there, or finding the gold.
# NOTE: since the enumeration values need to be positive, these values are defined relative to
#       100.
#       So -100 points is 0, -1 points is 99, and 30 points is 130.
extended_tiger_reward_enum = util.enum(rInvalid = 0, rTiger = 0, rStand = 99, rListen = 100, rGold = 130)

# Define some shorthand notation for ease of reference.
aListen = extended_tiger_action_enum.aListen
Exemplo n.º 18
0
import random
import sys
import numpy as np

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)
from pyaixi import environment, util

#specify the map of pacman, and the probability to generate the pellets
default_probability = 0.5
layout_txt = "pacMan.txt"

#Define the enumerations to represents the observations and actions
pacman_action_enum = util.enum('top', 'down', 'left', 'right')
pacman_wall_observations_enum = util.enum(wNull=0,
                                          wTopWall=1,
                                          wDownWall=2,
                                          wLeftWall=4,
                                          wRightWall=8)
pacman_ghost_observation_enum = util.enum(gNull=0,
                                          gTopWall=16,
                                          gDownWall=32,
                                          gLeftWall=64,
                                          gRightWall=128)

pacman_smell_observation_enum = util.enum(mD_n=0,
                                          mD_2=256,
                                          mD_3=512,
                                          mD_4=1024)
Exemplo n.º 19
0
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

# Ensure xrange is defined on Python 3.
from six.moves import xrange

from pyaixi import environment, util

# Define a enumeration to represent environment observations: either a square
# is empty, filled with the agent's piece, or the environment's piece.
tictactoe_observation_enum = util.enum('oEmpty', 'oAgent', 'oEnv')

# Define a enumeration to represent rewards as a result of actions: invalid actions,
# losses, null, draws, and wins.
# NOTE: since the enumeration values need to be positive, these values are defined relative to 3.
#       So -3 points is 0, -2 points is 1, and 2 points is 5.
tictactoe_reward_enum = util.enum(rInvalid=0,
                                  rLoss=1,
                                  rNull=3,
                                  rDraw=4,
                                  rWin=5)

# Define some shorthand notation for ease of reference.
oEmpty = tictactoe_observation_enum.oEmpty
oAgent = tictactoe_observation_enum.oAgent
oEnv = tictactoe_observation_enum.oEnv
Exemplo n.º 20
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent coin flip actions, which is a prediction of the coin landing on
# heads or tails.
coin_flip_action_enum = util.enum('aTails', 'aHeads')

# Define a enumeration to represent coin flip observations e.g. the coin landed on heads or tails.
coin_flip_observation_enum = util.enum('oTails', 'oHeads')

# Define a enumeration to represent coin flip rewards e.g. win or lose, for correcting predicting
# the coin flip.
coin_flip_reward_enum = util.enum('rLose', 'rWin')

# Define some shorthand notation for ease of reference.
aHeads = coin_flip_action_enum.aHeads
aTails = coin_flip_action_enum.aTails

oHeads = coin_flip_observation_enum.oHeads
oTails = coin_flip_observation_enum.oTails
Exemplo n.º 21
0
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as betting or passing.
kuhn_poker_action_enum = util.enum('aBet', 'aPass')

# Define a enumeration to represent environment observations, such as card values,
# and opponent bet status
# The final observation is of the form `agent-card + opponent-bet-status`.
kuhn_poker_observation_enum = util.enum(oJack=0,
                                        oQueen=1,
                                        oKing=2,
                                        oBet=0,
                                        oPass=4)

# Define a enumeration to represent rewards as a result of actions: betting and losing,
# betting and winning, passing and losing, passing and winning.
kuhn_poker_reward_enum = util.enum(rBetLoss=0,
                                   rPassLoss=1,
                                   rPassWin=3,
Exemplo n.º 22
0
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

RPS_action_enum = util.enum('aRock', 'aPaper', 'aScissor')
RPS_observation_enum = util.enum('oRock', 'oPaper', 'oScissor')
RPS_reward_enum = util.enum('rLose', 'rDraw', 'rWin')

aRock = RPS_action_enum.aRock
aPaper = RPS_action_enum.aPaper
aScissor = RPS_action_enum.aScissor

oRock = RPS_observation_enum.oRock
oPaper = RPS_observation_enum.oPaper
oScissor = RPS_observation_enum.oScissor

#translate the rewards to the -1 0 1
'''
RPS_reward_enum.rLose=-1
RPS_reward_enum.rDraw=0
Exemplo n.º 23
0
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

# Ensure xrange is defined on Python 3.
from six.moves import xrange

from pyaixi import environment, util

# Define a enumeration to represent agent interactions with the environment,
# such as going left, up, right, or down.
maze_action_enum = util.enum('aLeft', 'aUp', 'aRight', 'aDown')

# Define a enumeration to represent environment observations: either a cell
# is empty, or has a wall in various (bit) positions.
maze_observation_enum = util.enum(oNull=0,
                                  oLeftWall=1,
                                  oUpWall=2,
                                  oRightWall=4,
                                  oDownWall=8)

# Define an enumber to represent observation encoding constants.
maze_observation_encoding_enum = util.enum('cUninformative', 'cWalls',
                                           'cCoordinates')

# Define some shorthand notation for ease of reference.
aLeft = maze_action_enum.aLeft
Exemplo n.º 24
0
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import environment, util

# Defines an enumeration to represent agent action: move up, down, left, right in the maze
cheese_maze_action_enum = util.enum('up', 'down', 'left', 'right')

# Defines an enumeration to represent agent observation: which aliased location the mouse is at
cheese_maze_observation_enum = util.enum(five=5,
                                         seven=7,
                                         eight=8,
                                         nine=9,
                                         ten=10,
                                         twelve=12)

# Defines an enumeration to represent agent reward: the agent took an invalid step or valid or highest rewarded step
cheese_maze_reward_enum = util.enum(wall=0, move=9, cheese=20)

# Defines some shorthand notation for ease of reference.
up = cheese_maze_action_enum.up
down = cheese_maze_action_enum.down
Exemplo n.º 25
0
import random
import sys

# Insert the package's parent directory into the system search path, so that this package can be
# imported when the aixi.py script is run directly from a release archive.
PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
sys.path.insert(0, PROJECT_ROOT)

from pyaixi import util

# An enumeration type used to specify the type of Monte Carlo search node.
# Chance nodes represent a set of possible observation
# (one child per observation) while decision nodes
# represent sets of possible actions (one child per action).
# Decision and chance nodes alternate.
nodetype_enum = util.enum('chance', 'decision')

# Define some short cuts for ease of reference.
chance_node = nodetype_enum.chance
decision_node = nodetype_enum.decision

class MonteCarloSearchNode:
    """ A class to represent a node in the Monte Carlo search tree.
        The nodes in the search tree represent simulated actions and percepts
        between an agent following an upper confidence bounds (UCB) policy and a generative
        model of the environment represented by a context tree.

        The purpose of the tree is to determine the expected reward of the
        available actions through sampling. Sampling proceeds several time steps
        into the future according to the size of the agent's horizon.
        (`MC_AIXI_CTW_Agent.horizon`)