def reset(self): self.switched = False EpisodicTask.reset(self) if self.opponent.color == EuphoriaGame.BLACK: # first move by opponent self.opponent.game = self.env EpisodicTask.performAction(self, (EuphoriaGame.BLACK,self.opponent.getAction()))
def __init__(self, environment): ''' Constructor ''' EpisodicTask.__init__(self, environment) self.prev_time = 0 self.current_time = 0 self.reward = 0
def __init__(self, env, episodeLength): EpisodicTask.__init__(self, env) #self.inDim = 1 #self.outDim = 1 self.counter = 0 self.history = [] self.total = [] self.episodeLength = episodeLength
def performAction(self, action): """ Perform action on the underlying environment, i.e specify new asset allocation. Args: action (np.array): new allocation """ # Cache new asset allocation for computing rewards self.newAllocation = action # Perform action EpisodicTask.performAction(self, action)
def __init__(self, size, opponent=None, **args): EpisodicTask.__init__(self, GoGame(size)) self.setArgs(**args) if opponent == None: opponent = RandomGoPlayer(self.env) elif isclass(opponent): # assume the agent can be initialized without arguments then. opponent = opponent(self.env) else: opponent.game = self.env if not self.opponentStart: opponent.colour = GoGame.WHITE self.opponent = opponent self.reset()
def __init__(self, size, opponent = None, **args): EpisodicTask.__init__(self, PenteGame((size, size))) self.setArgs(**args) if opponent == None: opponent = RandomGomokuPlayer(self.env) elif isclass(opponent): # assume the agent can be initialized without arguments then. opponent = opponent(self.env) if not self.opponentStart: opponent.color = PenteGame.WHITE self.opponent = opponent self.minmoves = 9 self.maxmoves = self.env.size[0] * self.env.size[1] self.reset()
def __init__(self, size, opponent=None, **args): EpisodicTask.__init__(self, PenteGame((size, size))) self.setArgs(**args) if opponent == None: opponent = RandomGomokuPlayer(self.env) elif isclass(opponent): # assume the agent can be initialized without arguments then. opponent = opponent(self.env) if not self.opponentStart: opponent.color = PenteGame.WHITE self.opponent = opponent self.minmoves = 9 self.maxmoves = self.env.size[0] * self.env.size[1] self.reset()
def __init__(self, size, opponent = None, **args): EpisodicTask.__init__(self, CaptureGame(size)) self.setArgs(**args) if opponent == None: opponent = RandomCapturePlayer(self.env) elif isclass(opponent): # assume the agent can be initialized without arguments then. opponent = opponent(self.env) else: opponent.game = self.env if not self.opponentStart: opponent.color = CaptureGame.WHITE self.opponent = opponent self.maxmoves = self.env.size * self.env.size self.minmoves = 3 self.reset()
def performAction(self, action): # agent.game = self.env if self.opponentStart: EpisodicTask.performAction(self, (EuphoriaGame.WHITE, action)) else: EpisodicTask.performAction(self, (EuphoriaGame.BLACK, action)) if not self.isFinished(): self.opponent.game = self.env if self.opponentStart: EpisodicTask.performAction(self, (EuphoriaGame.BLACK,self.opponent.getAction())) else: EpisodicTask.performAction(self, (EuphoriaGame.WHITE,self.opponent.getAction()))
def reset(self): self.current_time = self.prev_time = 0.0 if const.USE_PERIODS: self.current_time = self.prev_time = random.uniform(0,const.PERIODS) self.current_time = const.MID_DAY #print "ST", self.current_time self.start_time = self.current_time self.counter = 0 #choose a random node that is not the destination node = grid.node_number(const.DESTINATION) while(node == grid.node_number(const.DESTINATION)): node = random.randint(0, const.NODES - 1) #See what happens if const.SAME_START: node = 0 # while(node == grid.node_number(const.DESTINATION)): # node = random.randint(0, const.NODES - 1) self.start_node = node self.env.reset_grid(self.current_time, node) EpisodicTask.reset(self)
def getObservation(self): """ An augmented observation of the underlying environment state that also includes the current portfolio weights, right before realloacation. Returns: state (np.array): the augmented state (size (P+1) * (I+1)) """ # Observe past asset returns from the environment pastReturns = EpisodicTask.getObservation(self) # Return augmented state return np.concatenate((pastReturns, self.currentAllocation))
def reset(self): self.current_time = self.prev_time = 0.0 if const.USE_PERIODS: self.current_time = self.prev_time = random.uniform( 0, const.PERIODS) self.current_time = const.MID_DAY #print "ST", self.current_time self.start_time = self.current_time self.counter = 0 #choose a random node that is not the destination node = grid.node_number(const.DESTINATION) while (node == grid.node_number(const.DESTINATION)): node = random.randint(0, const.NODES - 1) #See what happens if const.SAME_START: node = 0 # while(node == grid.node_number(const.DESTINATION)): # node = random.randint(0, const.NODES - 1) self.start_node = node self.env.reset_grid(self.current_time, node) EpisodicTask.reset(self)
def __init__(self, environment, deltaP, deltaF, deltaS, discount, backtest=False): """ Standard constructor for the asset allocation task. Args: environment (Environment): market environment object deltaP (double): proportional transaction costs rate deltaF (double): fixed transaction cost rate deltaS (double): short selling borrowing cost rate discount (double): discount factor backtest (bool): flag for training mode or test mode """ # Initialize episodic task EpisodicTask.__init__(self, environment) # Transaction costs self.deltaP = deltaP self.deltaF = deltaF self.deltaS = deltaS # Discount factor self.discount = discount # Backtesting self.backtest = backtest # Report stores allocations and portfolio log-returns for backtesting self.report = pd.DataFrame(columns=list(self.env.data.columns) + ['ptfLogReturn']) # Initialize allocation self.initializeAllocation()
def f(self, x): """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it. Also, if applicable, average the result over multiple games. """ if isinstance(x, Module): agent = ModuleDecidingPlayer(x, self.env, greedySelection = True) elif isinstance(x, EuphoriaRandomPlayer): agent = x else: raise NotImplementedError('Missing implementation for '+x.__class__.__name__+' evaluation') res = 0 agent.game = self.env self.opponent.game = self.env for dummy in range(self.averageOverGames): agent.color = -self.opponent.color res += EpisodicTask.f(self, agent) return res / float(self.averageOverGames)
def f(self, x): """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it. Also, if applicable, average the result over multiple games. """ if isinstance(x, Module): agent = ModuleDecidingPlayer(x, self.env, greedySelection = True) elif isinstance(x, CapturePlayer): agent = x else: raise NotImplementedError('Missing implementation for '+x.__class__.__name__+' evaluation') res = 0 agent.game = self.env self.opponent.game = self.env for _ in range(self.averageOverGames): agent.color = -self.opponent.color x = EpisodicTask.f(self, agent) res += x return res / float(self.averageOverGames)
def performAction(self, action): EpisodicTask.performAction(self, action) if not self.isFinished(): EpisodicTask.performAction(self, self.opponent.getAction())
def reset(self): EpisodicTask.reset(self) self.env.reset() self._ended = False
def reset(self): self.switched = False EpisodicTask.reset(self) if self.opponent.color == CaptureGame.BLACK: # first move by opponent EpisodicTask.performAction(self, self.opponent.getAction())
def reset(self): EpisodicTask.reset(self) self.counter = 0
def reset(self): #i suppose this is the proper way to do it? EpisodicTask.reset(self) self.env.reset()