示例#1
0
    def run( self ):
        """
        Main control loop for game play.
        """
        self.initializeGhostPowers()
        self.initializeAgentObservedVars()
        self.initializePacmanPowers()
        self.display.initialize(self.state.data)
        self.numMoves = 0

        ###self.display.initialize(self.state.makeObservation(1).data)
        if not self.informAgentsOfGameStart():
            return

        agentIndex = self.state.getNextAgentIndex()
        numAgents = len( self.agents )

        while not self.gameOver:
            event = self.state.getNextEvent()
            if event.isAgentMove():
                agentIndex = event.getAgentIndex()
                if not self.getAndRunAgentAction(agentIndex):
                    return
                self.numMoves += 1
            else:
                self.runEvent()

            # Change the display
            self.display.update(self.state.data)

            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        if not self.informLearningAgents():
            return
        self.display.finish()
示例#2
0
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        agentIndex = self.startingIndex
        numAgents = len(self.agents)
        previous_action = Directions.STOP
        expout = int(self.rules.getMoveTimeout(agentIndex))
        totalComputationTime = 0
        totalExpandedNodes = 0
        if (expout > 0):
            pacmodule.pacman.GameState.setMaximumExpanded(expout)
        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False
            # Generate an observation of the state
            observation = self.state.deepCopy()
            # Solicit an action
            action = None
            self.mute(agentIndex)
            pacmodule.pacman.GameState.resetNodeExpansionCounter()
            violated = False
            t = time.time()
            if expout == 0:
                action = agent.get_action(observation)
            else:
                #TODO : node expansion control through getSuccessors
                action = agent.get_action(observation)
                if pacmodule.pacman.GameState.countExpanded > expout:
                    violated = True
            totalComputationTime += (time.time() - t)
            totalExpandedNodes += pacmodule.pacman.GameState.countExpanded
            if not self.state.isLegalAction(agentIndex, action):
                print("Illegal move !")
                action = previous_action
            elif violated:
                print("Node expansion budget violated !")
                action = previous_action

            if not self.state.isLegalAction(agentIndex,action):
                action = Directions.STOP
            self.unmute()
            # Execute the action
            self.moveHistory.append((agentIndex, action))
            previous_action = action
            self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1:
                self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        totalScore = self.state.getScore()

        self.display.finish()
        return totalScore,totalComputationTime,totalExpandedNodes
示例#3
0
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print >> sys.stderr, "Agent %d failed to load" % i
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                agent.registerInitialState(self.state.deepCopy())
                self.unmute()

        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        while not self.gameOver:
            action = None
            observation = self.state.deepCopy()
            if agentIndex == 0:
                # Fetch the next agent
                agent = self.agents[agentIndex]
                move_time = 0
                skip_action = False
                # Generate an observation of the state
                if 'observationFunction' in dir(agent):
                    self.mute(agentIndex)
                    observation = agent.observationFunction(
                        self.state.deepCopy())
                    self.unmute()
                else:
                    observation = self.state.deepCopy()

                # 'Fix' observation
                ppos = observation.getPacmanPosition()
                rpos = observation.getGhostState(1).getPosition()
                if manhattanDistance(ppos, rpos) > 12:
                    observation.removeAgentState(1)
                vis_capsules = []
                for c in observation.data.capsules:
                    if manhattanDistance(ppos, c) <= 12:
                        vis_capsules.append(c)
                observation.data.capsules = vis_capsules
                # print(observation)

                # Solicit an action
                self.mute(agentIndex)
                start = time.time()
                action = agent.getAction(observation)
                if time.time() - start > 3.1:
                    print('Step time exceeds maximum time!')
                self.unmute()
                self.socket.send(self.packAction(action))
            else:
                action = self.unpackAction(self.saveRead(4))

            # Execute the action
            self.moveHistory.append((agentIndex, action))
            self.state = self.state.generateSuccessor(agentIndex, action)
            observation.data._agentMoved = self.state.data._agentMoved

            # Change the display
            self.display.update(self.state.data)

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception, data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
示例#4
0
class Game:
  """
  The Game manages the control flow, soliciting actions from agents.
  """
  
  def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False ):
    self.agentCrashed = False
    self.agents = agents
    self.display = display
    self.rules = rules
    self.startingIndex = startingIndex
    self.gameOver = False
    self.muteAgents = muteAgents
    self.catchExceptions = _BOINC_ENABLED
    self.moveHistory = []
  
  def getProgress(self):
    if self.gameOver:
      return 1.0
    else:
      return self.rules.getProgress(self)
  
  def _agentCrash( self, agentIndex ):
    "Helper method for handling agent crashes"
    self.gameOver = True
    self.agentCrashed = True
    self.rules.agentCrash(agentIndex)
    
  OLD_STDOUT = None
  OLD_STDERR = None
  
  def mute(self):
    if not self.muteAgents: return
    global OLD_STDOUT, OLD_STDERR
    import cStringIO
    OLD_STDOUT = sys.stdout
    OLD_STDERR = sys.stderr
    sys.stdout = cStringIO.StringIO()
    sys.stderr = cStringIO.StringIO()

  def unmute(self):
    if not self.muteAgents: return
    global OLD_STDOUT, OLD_STDERR
    sys.stdout.close()
    sys.stderr.close()
    # Revert stdout/stderr to originals
    sys.stdout = OLD_STDOUT
    sys.stderr = OLD_STDERR

  
  def run( self ):
    """
    Main control loop for game play.
    """
    self.display.initialize(self.state.data)
    self.numMoves = 0
    
    ###self.display.initialize(self.state.makeObservation(1).data)
    # inform learning agents of the game start
    for i in range(len(self.agents)):
      agent = self.agents[i]
      if not agent:
        # this is a null agent, meaning it failed to load
        # the other team wins
        self._agentCrash(i)
        return
      if ("registerInitialState" in dir(agent)):
        self.mute()
        agent.registerInitialState(self.state.deepCopy())
        self.unmute()
      
    agentIndex = self.startingIndex
    numAgents = len( self.agents )
    
    while not self.gameOver:
      # Fetch the next agent
      agent = self.agents[agentIndex]
      # Generate an observation of the state
      if 'observationFunction' in dir( agent ):
        try:
          self.mute()
          observation = agent.observationFunction(self.state.deepCopy())
          self.unmute()
        except Exception,data:
          if not self.catchExceptions: raise data
          self.unmute()
          print "Exception",data
          self._agentCrash(agentIndex)
          return
      else:
        observation = self.state.deepCopy()
        
      # Solicit an action
      startTime = time.time()
      try:
        self.mute()
        action = agent.getAction( observation )
        self.unmute()
        self.moveHistory.append( (agentIndex, action) )
      except Exception,data:
        if not self.catchExceptions: raise data
        self.unmute()
        print "Exception", data
        self._agentCrash(agentIndex)
        return
      if 'checkTime' in dir(self.rules):
        self.rules.checkTime(time.time() - startTime)
      
      # Execute the action
      self.state = self.state.generateSuccessor( agentIndex, action )
      
      # Change the display
      self.display.update( self.state.data )
      ###idx = agentIndex - agentIndex % 2 + 1
      ###self.display.update( self.state.makeObservation(idx).data )
      
      # Allow for game specific conditions (winning, losing, etc.)
      self.rules.process(self.state, self)
      # Track progress
      if agentIndex == numAgents + 1: self.numMoves += 1
      # Next agent
      agentIndex = ( agentIndex + 1 ) % numAgents
      
      if _BOINC_ENABLED:
        boinc.set_fraction_done(self.getProgress())
  def run( self ):
    """
    Main control loop for game play.
    """
    self.display.initialize(self.state.data)
    self.numMoves = 0

    ###self.display.initialize(self.state.makeObservation(1).data)
    # inform learning agents of the game start
    for i in range(len(self.agents)):
      agent = self.agents[i]
      if not agent:
        # this is a null agent, meaning it failed to load
        # the other team wins
        self._agentCrash(i, quiet=True)
        return
      if ("registerInitialState" in dir(agent)):
        self.mute()
        if self.catchExceptions:
          try:
            timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
            try:
              start_time = time.time()
              timed_func(self.state.deepCopy())
              time_taken = time.time() - start_time
              self.totalAgentTimes[i] += time_taken
            except(TimeoutFunctionException):
              print("Agent %d ran out of time on startup!" % i)
              self.unmute()
              self.agentTimeout = True
              self._agentCrash(i, quiet=True)
              return
          except(Exception,data):
            self.unmute()
            self._agentCrash(i, quiet=True)
            return
        else:
          agent.registerInitialState(self.state.deepCopy())
        ## TODO: could this exceed the total time
        self.unmute()

    agentIndex = self.startingIndex
    numAgents = len( self.agents )

    while not self.gameOver:
      # Fetch the next agent
      agent = self.agents[agentIndex]
      move_time = 0
      skip_action = False
      # Generate an observation of the state
      if 'observationFunction' in dir( agent ):
        self.mute()
        if self.catchExceptions:
          try:
            timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
            try:
              start_time = time.time()
              observation = timed_func(self.state.deepCopy())
            except(TimeoutFunctionException):
              skip_action = True
            move_time += time.time() - start_time
            self.unmute()
          except(Exception,data):
            self.unmute()
            self._agentCrash(agentIndex, quiet=True)
            return
        else:
          observation = agent.observationFunction(self.state.deepCopy())
        self.unmute()
      else:
        observation = self.state.deepCopy()

      # Solicit an action
      action = None
      self.mute()
      if self.catchExceptions:
        try:
          timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
          try:
            start_time = time.time()
            if skip_action:
              raise(TimeoutFunctionException())
            action = timed_func( observation )
          except(TimeoutFunctionException):
            print("Agent %d timed out on a single move!" % agentIndex)
            self.agentTimeout = True
            self.unmute()
            self._agentCrash(agentIndex, quiet=True)
            return

          move_time += time.time() - start_time

          if move_time > self.rules.getMoveWarningTime(agentIndex):
            self.totalAgentTimeWarnings[agentIndex] += 1
            print("Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]))
            if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
              print("Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]))
              self.agentTimeout = True
              self.unmute()
              self._agentCrash(agentIndex, quiet=True)

          self.totalAgentTimes[agentIndex] += move_time
          #print("Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]))
          if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
            print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]))
            self.agentTimeout = True
            self.unmute()
            self._agentCrash(agentIndex, quiet=True)
            return
          self.unmute()
        except(Exception,data):
          self.unmute()
          self._agentCrash(agentIndex)
          return
      else:
        action = agent.getAction(observation)
      self.unmute()

      # Execute the action
      self.moveHistory.append( (agentIndex, action) )
      if self.catchExceptions:
        try:
          self.state = self.state.generateSuccessor( agentIndex, action )
        except(Exception,data):
          self._agentCrash(agentIndex)
          return
      else:
        self.state = self.state.generateSuccessor( agentIndex, action )

      # Change the display
      self.display.update( self.state.data )
      ###idx = agentIndex - agentIndex % 2 + 1
      ###self.display.update( self.state.makeObservation(idx).data )

      # Allow for game specific conditions (winning, losing, etc.)
      self.rules.process(self.state, self)
      # Track progress
      if agentIndex == numAgents + 1: self.numMoves += 1
      # Next agent
      agentIndex = ( agentIndex + 1 ) % numAgents

      if _BOINC_ENABLED:
        boinc.set_fraction_done(self.getProgress())

    # inform a learning agent of the game result
    for agent in self.agents:
      if "final" in dir( agent ) :
        try:
          self.mute()
          agent.final( self.state )
          self.unmute()
        except(Exception,data):
          if not self.catchExceptions: raise
          self.unmute()
          print("Exception",data)
          self._agentCrash(agent.index)
          return
    self.display.finish()
示例#6
0
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0
        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return

            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i,
                                  file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()
        "Import classes from busteresAgents"
        from bustersAgents import BasicAgentAA
        from bustersAgents import BustersAgent
        from keyboardAgents import KeyboardAgent
        import os.path
        import arff
        from os import path

        "Check if .arff file exists, if yes than just open it, if no, than create"

        if path.exists("all_data_pacman.arff") is True:
            print('Results file exists')
            f = open("all_data_pacman.arff", "a")
            d = list()
        else:
            f = open("all_data_pacman.arff", "a")
            f.write(
                "@relation Example1 \n@attribute PosX numeric \n@attribute PosY numeric \n@attribute isNorthLegal {True, False} \n@attribute isEastLegal {True, False} \n@attribute isSouthLegal {True, False} \n@attribute isWestLegal {True, False} \n@attribute Direction {Stop, North, South, East, West} \n@attribute Ghost1X numeric \n@attribute Ghost1Y numeric \n@attribute Ghost1Distance numeric \n@attribute Ghost2X numeric \n@attribute Ghost2Y numeric \n@attribute Ghost2Distance numeric \n@attribute Ghost3X numeric \n@attribute Ghost3Y numeric \n@attribute Ghost3Distance numeric \n@attribute Ghost4X numeric \n@attribute Ghost4Y numeric \n@attribute Ghost4Distance numeric  \n@attribute NumberOfDots numeric \n@attribute NearestDotDistance numeric \n@attribute CurrentScore numeric \n@attribute FutureScore numeric \n@attribute FutureDirection {Stop, North, South, East, West} \n@data \n"
            )
            print('Created new results file')

        agentIndex = self.startingIndex
        numAgents = len(self.agents)
        step = 0

        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False

            # Generate an observation of the state
            if 'observationFunction' in dir(agent):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.observationFunction,
                            int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception as data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(
                        self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()
            # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Change it
            if agentIndex == self.startingIndex:
                if step >= 1:
                    f.write(str(observation.getScore()) + ',')
                    f.write(
                        str(observation.data.agentStates[0].getDirection()) +
                        '\n')
                g = BasicAgentAA.printLineData(self, observation)
                if g:
                    for i in g:
                        if i == None:
                            i = '?'
                        f.write(str(i) + ',')

            # Solicit an action
            action = None
            step += 1
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(
                        agent.getAction,
                        int(self.rules.getMoveTimeout(agentIndex)) -
                        int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func(observation)
                    except TimeoutFunctionException:
                        print("Agent %d timed out on a single move!" %
                              agentIndex,
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print(
                            "Agent %d took too long to make a move! This is warning %d"
                            % (agentIndex,
                               self.totalAgentTimeWarnings[agentIndex]),
                            file=sys.stderr)
                        if self.totalAgentTimeWarnings[
                                agentIndex] > self.rules.getMaxTimeWarnings(
                                    agentIndex):
                            print(
                                "Agent %d exceeded the maximum number of warnings: %d"
                                % (agentIndex,
                                   self.totalAgentTimeWarnings[agentIndex]),
                                file=sys.stderr)
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                    self.totalAgentTimes[agentIndex] += move_time
                    #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[
                            agentIndex] > self.rules.getMaxTotalTime(
                                agentIndex):
                        print("Agent %d ran out of time! (time: %1.2f)" %
                              (agentIndex, self.totalAgentTimes[agentIndex]),
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception as data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation)

            self.unmute()
            # Execute the action
            self.moveHistory.append((agentIndex, action))
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor(
                        agentIndex, action)
                except Exception as data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        f.write("0" + ',' + 'Stop' + '\n')
        f.close()
        self.display.finish()
class Game:
    """
    The Game manages the control flow, soliciting actions from agents.
    """
    def __init__(self,
                 agents,
                 display,
                 rules,
                 startingIndex=0,
                 muteAgents=False,
                 catchExceptions=False):
        self.agentCrashed = False
        self.agents = agents
        self.display = display
        self.rules = rules
        self.startingIndex = startingIndex
        self.gameOver = False
        self.muteAgents = muteAgents
        self.catchExceptions = catchExceptions
        self.moveHistory = []
        self.totalAgentTimes = [0 for agent in agents]
        self.totalAgentTimeWarnings = [0 for agent in agents]
        self.agentTimeout = False
        import cStringIO
        self.agentOutput = [cStringIO.StringIO() for agent in agents]

    def getProgress(self):
        if self.gameOver:
            return 1.0
        else:
            return self.rules.getProgress(self)

    def _agentCrash(self, agentIndex, quiet=False):
        "Helper method for handling agent crashes"
        if not quiet: traceback.print_exc()
        self.gameOver = True
        self.agentCrashed = True
        self.rules.agentCrash(self, agentIndex)

    OLD_STDOUT = None
    OLD_STDERR = None

    def mute(self, agentIndex):
        if not self.muteAgents: return
        global OLD_STDOUT, OLD_STDERR
        import cStringIO
        OLD_STDOUT = sys.stdout
        OLD_STDERR = sys.stderr
        sys.stdout = self.agentOutput[agentIndex]
        sys.stderr = self.agentOutput[agentIndex]

    def unmute(self):
        if not self.muteAgents: return
        global OLD_STDOUT, OLD_STDERR
        # Revert stdout/stderr to originals
        sys.stdout = OLD_STDOUT
        sys.stderr = OLD_STDERR

    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print >> sys.stderr, "Agent %d failed to load" % i
                self.unmute()
                self._agentCrash(i, quiet=True)
                return

            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print >> sys.stderr, "Agent %d ran out of time on startup!" % i
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception, data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()

        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        while not self.gameOver:
            # Fetch the next agent
            oneStepActionsList = []
            for i in range(4):
                agent = self.agents[agentIndex]
                move_time = 0
                skip_action = False
                # Generate an observation of the state
                if 'observationFunction' in dir(agent):
                    self.mute(agentIndex)
                    if self.catchExceptions:
                        try:
                            timed_func = TimeoutFunction(
                                agent.observationFunction,
                                int(self.rules.getMoveTimeout(agentIndex)))
                            try:
                                start_time = time.time()
                                observation = timed_func(self.state.deepCopy())
                            except TimeoutFunctionException:
                                skip_action = True
                            move_time += time.time() - start_time
                            self.unmute()
                        except Exception, data:
                            self._agentCrash(agentIndex, quiet=False)
                            self.unmute()
                            return
                    else:
                        observation = agent.observationFunction(
                            self.state.deepCopy())
                    self.unmute()
                else:
                    observation = self.state.deepCopy()

                # Solicit an action
                action = None
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.getAction,
                            int(self.rules.getMoveTimeout(agentIndex)) -
                            int(move_time))
                        try:
                            start_time = time.time()
                            if skip_action:
                                raise TimeoutFunctionException()
                            action = timed_func(observation)
                        except TimeoutFunctionException:
                            print >> sys.stderr, "Agent %d timed out on a single move!" % agentIndex
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                        move_time += time.time() - start_time

                        if move_time > self.rules.getMoveWarningTime(
                                agentIndex):
                            self.totalAgentTimeWarnings[agentIndex] += 1
                            print >> sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (
                                agentIndex,
                                self.totalAgentTimeWarnings[agentIndex])
                            if self.totalAgentTimeWarnings[
                                    agentIndex] > self.rules.getMaxTimeWarnings(
                                        agentIndex):
                                print >> sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (
                                    agentIndex,
                                    self.totalAgentTimeWarnings[agentIndex])
                                self.agentTimeout = True
                                self._agentCrash(agentIndex, quiet=True)
                                self.unmute()
                                return

                        self.totalAgentTimes[agentIndex] += move_time
                        #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                        if self.totalAgentTimes[
                                agentIndex] > self.rules.getMaxTotalTime(
                                    agentIndex):
                            print >> sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (
                                agentIndex, self.totalAgentTimes[agentIndex])
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return
                        self.unmute()
                    except Exception, data:
                        self._agentCrash(agentIndex)
                        self.unmute()
                        return
                else:
                    action = agent.getAction(observation)
                self.unmute()

                # Execute the action
                self.moveHistory.append((agentIndex, action))
                oneStepActionsList.append((agentIndex, action))
                '''
                if self.catchExceptions:
                    try:
                        self.state = self.state.generateSuccessor( agentIndex, action )
                    except Exception,data:
                        self.mute(agentIndex)
                        self._agentCrash(agentIndex)
                        self.unmute()
                        return
                else:
                    self.state = self.state.generateSuccessor( agentIndex, action )
                '''
                # Change the display
                # self.display.update( self.state.data )
                ###idx = agentIndex - agentIndex % 2 + 1
                ###self.display.update( self.state.makeObservation(idx).data )

                # Allow for game specific conditions (winning, losing, etc.)
                #self.rules.process(self.state, self)
                # Track progress
                if agentIndex == numAgents + 1: self.numMoves += 1
                # Next agent
                agentIndex = (agentIndex + 1) % numAgents
                if _BOINC_ENABLED:
                    boinc.set_fraction_done(self.getProgress())
示例#8
0
文件: game.py 项目: zachnyman/pacumen
    def run(self):
        self.display.initialize(self.state.data)
        self.num_moves = 0

        # self.display.initialize(self.state.makeObservation(1).data)

        # Inform learning agents of the game start.
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                sys.stderr.write("Agent %d failed to load" % i)
                self.unmute()
                self._agent_crash(i, quiet=True)
                return

            if "register_initial_state" in dir(agent):
                self.mute(i)
                if self.catch_exceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.register_initial_state,
                            int(self.rules.get_max_startup_time(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deep_copy())
                            time_taken = time.time() - start_time
                            self.total_agent_times[i] += time_taken
                        except TimeoutFunctionException:
                            sys.stderr.write(
                                "Agent %d ran out of time on startup!\n" % i)
                            self.unmute()
                            self.agent_timeout = True
                            self._agent_crash(i, quiet=True)
                            return
                    except Exception:
                        self._agent_crash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.register_initial_state(self.state.deep_copy())

                self.unmute()

        agent_index = self.starting_index
        num_agents = len(self.agents)

        while not self.game_over:
            # Fetch the next agent.
            agent = self.agents[agent_index]
            move_time = 0
            skip_action = False

            # Generate an observation of the state.
            if 'observation_function' in dir(agent):
                self.mute(agent_index)
                if self.catch_exceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.observation_function,
                            int(self.rules.get_move_timeout(agent_index)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deep_copy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception:
                        self._agent_crash(agent_index, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observation_function(
                        self.state.deep_copy())
                self.unmute()
            else:
                observation = self.state.deep_copy()

            # Solicit an action.
            action = None
            self.mute(agent_index)

            if self.catch_exceptions:
                try:
                    timed_func = TimeoutFunction(
                        agent.get_action,
                        int(self.rules.get_move_timeout(agent_index)) -
                        int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()

                        action = timed_func(observation)
                    except TimeoutFunctionException:
                        sys.stderr.write(
                            "Agent %d timed out on a single move!\n" %
                            agent_index)
                        self.agent_timeout = True
                        self._agent_crash(agent_index, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.get_move_warning_time(
                            agent_index):
                        self.total_agent_time_warnings[agent_index] += 1
                        sys.stderr.write(
                            "Agent %d took too long to make a move! This is warning %d\n"
                            % (agent_index,
                               self.total_agent_time_warnings[agent_index]))

                        if self.total_agent_time_warnings[
                                agent_index] > self.rules.get_max_time_warnings(
                                    agent_index):
                            sys.stderr.write(
                                "Agent %d exceeded the maximum number of warnings: %d\n"
                                %
                                (agent_index,
                                 self.total_agent_time_warnings[agent_index]))
                            self.agent_timeout = True
                            self._agent_crash(agent_index, quiet=True)
                            self.unmute()
                            return

                    self.total_agent_times[agent_index] += move_time

                    if self.total_agent_times[
                            agent_index] > self.rules.get_max_total_time(
                                agent_index):
                        sys.stderr.write(
                            "Agent %d ran out of time! (time: %1.2f)\n" %
                            (agent_index, self.total_agent_times[agent_index]))
                        self.agent_timeout = True
                        self._agent_crash(agent_index, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception as data:
                    self._agent_crash(agent_index)
                    self.unmute()
                    return
            else:
                action = agent.get_action(observation)

            self.unmute()

            # Execute the action.
            self.move_history.append((agent_index, action))

            if self.catch_exceptions:
                try:
                    self.state = self.state.generate_successor(
                        agent_index, action)
                except Exception:
                    self.mute(agent_index)
                    self._agent_crash(agent_index)
                    self.unmute()
                    return
            else:
                self.state = self.state.generate_successor(agent_index, action)

            # Change the display.
            self.display.update(self.state.data)

            # idx = agentIndex - agentIndex % 2 + 1
            # self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc).
            self.rules.process(self.state, self)

            # Track progress.
            if agent_index == num_agents + 1:
                self.num_moves += 1

            # Next agent.
            agent_index = (agent_index + 1) % num_agents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.get_progress())

        # Inform a learning agent of the game result.
        for agent_index, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agent_index)
                    agent.final(self.state)
                    self.unmute()
                except Exception:
                    if not self.catch_exceptions:
                        raise
                    self._agent_crash(agent_index)
                    self.unmute()
                    return

        self.display.finish()
    def run(self):
        """
        Main control loop for game play.
        """
        self.inform_agent_about_start()
        self.display.initialize(self.state.data)
        self.numMoves = 0

        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print >> sys.stderr, "Agent %d failed to load" % i
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        pool = Pool(processes=4)
        while not self.gameOver:
            # Fetch the next agent
            # agent = self.agents[agentIndex]
            # move_time = 0
            # skip_action = False

            # Generate an observation of the state
            # observation = self.state.deepCopy()

            # Solicit an action

            action = None
            self.mute(agentIndex)
            gameStateDTO = PublicGameState(self.state.deepCopy())
            myFutureResults = pool.imap_unordered(
                choose_action, [(x, gameStateDTO, self.agents[x].ip_address)
                                for x in range(numAgents)])
            myResults = []
            for agentIndex in range(numAgents):
                try:
                    myResults.append(myFutureResults.next(1))
                except Exception as e:
                    print("Agent %d reached timeout" % agentIndex)
                    myResults.append(
                        (agentIndex,
                         json.dumps(
                             random.choice(
                                 self.state.getLegalActions(agentIndex)))))
            for myResult in myResults:
                agentIndex = myResult[0]
                try:
                    action = json.loads(myResult[1])
                except Exception as dat:
                    print("Have to choose a random action for agent %d" %
                          agentIndex)
                    action = random.choice(
                        self.state.getLegalActions(agentIndex))
                # Execute the action
                self.moveHistory.append((agentIndex, action))
                if self.catchExceptions:
                    try:
                        self.state = self.state.generateSuccessor(
                            agentIndex, action)
                    except Exception as data:
                        self.mute(agentIndex)
                        self._agentCrash(agentIndex)
                        self.unmute()
                        return
                else:
                    self.state = self.state.generateSuccessor(
                        agentIndex, action)
                # Change the display
                self.display.update(self.state.data)

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            self.numMoves += 1

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            self.numMoves += 1
            # Next agent

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        score = self.state.data.score
        if score == 0:
            messagebox.showinfo("No Winner", "There is no Winner")
        if score > 0:
            messagebox.showinfo("Red Wins",
                                "Team Red wins with score: " + str(score))
        if score < 0:
            messagebox.showinfo(
                "Blue Wins", "Team Blue wins with score: " + str(score * -1))
        self.display.finish()
示例#10
0
# game.py
示例#11
0
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        from graphicsDisplay import PacmanGraphics
        if self.createPolicy and 'initPolicy' in dir(self.display):
            policy = self.display.initPolicy(self, 0)  # assume 0 is player
            return policy, self

        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print >> sys.stderr, "Agent %d failed to load" % i
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                agent.registerInitialState(self.state.deepCopy())

        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            # Generate an observation of the state
            if 'observationFunction' in dir(agent):
                observation = agent.observationFunction(self.state.deepCopy())
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            while action == None:
                action = agent.getAction(observation)
                time.sleep(0.1)

            # Execute the action
            self.moveHistory.append((agentIndex, action))
            self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception, data:
                    raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
示例#12
0
    def run( self ):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return

            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i, file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()
            # print("registered...") #whoami


        agentIndex = self.startingIndex
        numAgents = len( self.agents )
        maihoonnaa=1 #whoami
        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False

            # Generate an observation of the state
            if 'observationFunction' in dir( agent ):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception as data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    # timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
                    timed_func = TimeoutFunction(agent.getAction, int(self.state.data.score))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func( observation )
                    except TimeoutFunctionException:
                        print("Agent %d timed out on a single move!" % agentIndex, file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    # if move_time > self.rules.getMoveWarningTime(agentIndex):
                    #     self.totalAgentTimeWarnings[agentIndex] += 1
                    #     print("Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr)
                    #     if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
                    #         print("Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex]), file=sys.stderr)
                    #         self.agentTimeout = True
                    #         self._agentCrash(agentIndex, quiet=True)
                    #         self.unmute()
                    #         return

                    self.totalAgentTimes[agentIndex] += move_time
                    #print("Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex]))
                    # if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
                    #     print("Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex]), file=sys.stderr)
                    #     self.agentTimeout = True
                    #     self._agentCrash(agentIndex, quiet=True)
                    #     self.unmute()
                    #     return
                    self.unmute()
                except Exception as data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                # try:
                # timed_func = TimeoutFunction(agent.getAction, int(math.ceil(self.state.data.score / (SCALING_FACTOR+1))))
                # try:
                #     start_time = time.time()
                #     action = timed_func(observation)
                # except TimeoutFunctionException:
                #     print('You have run out of compute time! You exceeded {:.3f}s of compute'.format(self.state.data.score / (SCALING_FACTOR+1)))
                #     self.state.data.score = 0
                #     self.state.data._lose = True
                #     self.rules.process(self.state, self)
                #     continue
                # # except:
                # #     print('Your agent crashed!')
                # #     self.state.data.score = 0
                # #     self.state.data._lose = True
                # #     self.rules.process(self.state, self)
                # #     continue
                # move_time = time.time() - start_time
                action = agent.getAction(self.state.deepCopy())
                assert(action in self.state.getLegalActions(agentIndex)), str(self.state) + " "+ str(self.state.getLegalActions(agentIndex)) +" "+str(action) +" " + str(agentIndex)
                #whoami

            self.unmute()

            self.state.data.score += 0 #whoami max(0,-1 * SCALING_FACTOR)
            # if self.state.data.score <= 0:
            #     self.state.data.score = 0
            #     self.state.data._lose = True
            #     self.rules.process(self.state, self)
            #     continue #whoami
            # if self.state.data.deathCount >= 2:
            #     self.state.data._lose = True
            #     self.rules.process(self.state, self)
            #     continue
            #     #whoami

            # Execute the action
            self.moveHistory.append( (agentIndex, action) )
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor( agentIndex, action )
                except Exception as data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor( agentIndex, action )

            # Change the display
            self.display.update( self.state.data )
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            # print("death counter now ",self.state.data.deathCount, agentIndex)#whoami
            self.rules.process(self.state, self)

            # print("step ",maihoonnaa,"...",agentIndex) #whoami
            maihoonnaa+=1#whoami

            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = ( agentIndex + 1 ) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())


        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir( agent ) :
                try:
                    self.mute(agentIndex)
                    agent.final( self.state )
                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions: raise data
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            # print("terminal...") #whoami

        self.display.finish()
示例#13
0
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0
        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return

            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i,
                                  file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()
        "Import classes from busteresAgents"
        from bustersAgents import BasicAgentAA
        from bustersAgents import BustersAgent
        import os.path
        from os import path
        "Check if csv file exists, if yes than just open it, if no, than create"
        # assign header columns
        headerList = [
            'PositionX', 'PositionY', 'is North legal', 'is East legal',
            'is South legal', 'is West legal', 'is Stop legal', 'Direction',
            'Position', 'Ghost1 X', 'Ghost1 Y', 'Ghost2 X', 'Ghost2 Y',
            'Ghost3 X', 'Ghost3 Y', 'Ghost4 X', 'Ghost4 Y'
        ]
        if path.exists("Results.csv") is True:
            print('Results file exists')
            csvfile = open('Results.csv', 'a', newline='')
            spamwriter = csv.writer(csvfile,
                                    delimiter=',',
                                    quotechar='|',
                                    quoting=csv.QUOTE_MINIMAL)
            spamwriter.writerow(['', '', '', '', '', '', '', ''])
        else:
            # open CSV file and assign header
            csvfile = open("Results.csv", 'w')
            dw = csv.DictWriter(csvfile, delimiter=',', fieldnames=headerList)
            dw.writeheader()
            print('Created new results file')

        agentIndex = self.startingIndex
        numAgents = len(self.agents)
        step = 0

        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False

            # Generate an observation of the state
            if 'observationFunction' in dir(agent):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.observationFunction,
                            int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception as data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(
                        self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            step += 1
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(
                        agent.getAction,
                        int(self.rules.getMoveTimeout(agentIndex)) -
                        int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func(observation)
                    except TimeoutFunctionException:
                        print("Agent %d timed out on a single move!" %
                              agentIndex,
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print(
                            "Agent %d took too long to make a move! This is warning %d"
                            % (agentIndex,
                               self.totalAgentTimeWarnings[agentIndex]),
                            file=sys.stderr)
                        if self.totalAgentTimeWarnings[
                                agentIndex] > self.rules.getMaxTimeWarnings(
                                    agentIndex):
                            print(
                                "Agent %d exceeded the maximum number of warnings: %d"
                                % (agentIndex,
                                   self.totalAgentTimeWarnings[agentIndex]),
                                file=sys.stderr)
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                    self.totalAgentTimes[agentIndex] += move_time
                    #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[
                            agentIndex] > self.rules.getMaxTotalTime(
                                agentIndex):
                        print("Agent %d ran out of time! (time: %1.2f)" %
                              (agentIndex, self.totalAgentTimes[agentIndex]),
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception as data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation)
            self.unmute()

            # Execute the action
            self.moveHistory.append((agentIndex, action))
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor(
                        agentIndex, action)
                except Exception as data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

            #USE MY FUNCTION PRINT LINE FROM BASIC AGENT
            g = BasicAgentAA.printLineData(self, observation)
            "Writing results to file"
            spamwriter = csv.writer(csvfile,
                                    delimiter=',',
                                    quotechar='|',
                                    quoting=csv.QUOTE_MINIMAL)
            spamwriter.writerow(g)

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        self.display.finish()
示例#14
0
文件: game.py 项目: boris081/CS188-1
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        # Simple way to determine whether we are in phase 3
        # as there will be a ghost
        phase3 = len(self.agents) > 2
        initialBroadcast = None

        # Inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            #agent = None
            if not agent:
                self.mute(i)
                # If null agent, it failed to load. Terminate the game
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return

            # Hack to get broadcast across to index 1 agent
            # during registerInitialState for phase 2
            if not phase3 and initialBroadcast is not None:
                agent.receivedInitialBroadcast = initialBroadcast
                initialBroadcast = None

            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                # Timed register initial state
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i,
                                  file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                        # Agent initialization times
                        # print(self.totalAgentTimes)
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())

                # Part 2 of hack to get agent 0 to broadcast path
                # to agent 1 during registerInitialState for phase 2
                if not phase3 and i == 0:
                    initialBroadcast = agent.toInitialBroadcast
                    assert initialBroadcast is None or all(
                        [a in LEGAL_DIRECTIONS for a in initialBroadcast])

                self.unmute()

        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False

            # Generate an observation of the state
            if 'observationFunction' in dir(agent):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.observationFunction,
                            int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception as data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(
                        self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(
                        agent.getAction,
                        int(self.rules.getMoveTimeout(agentIndex)) -
                        int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func(observation)
                    except TimeoutFunctionException:
                        print("Agent %d timed out on a single move!" %
                              agentIndex,
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print(
                            "Agent %d took too long to make a move! This is warning %d"
                            % (agentIndex,
                               self.totalAgentTimeWarnings[agentIndex]),
                            file=sys.stderr)
                        if self.totalAgentTimeWarnings[
                                agentIndex] > self.rules.getMaxTimeWarnings(
                                    agentIndex):
                            print(
                                "Agent %d exceeded the maximum number of warnings: %d"
                                % (agentIndex,
                                   self.totalAgentTimeWarnings[agentIndex]),
                                file=sys.stderr)
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                    self.totalAgentTimes[agentIndex] += move_time
                    #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[
                            agentIndex] > self.rules.getMaxTotalTime(
                                agentIndex):
                        print("Agent %d ran out of time! (time: %1.2f)" %
                              (agentIndex, self.totalAgentTimes[agentIndex]),
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception as data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation)

            # Get other pacman on the agent's team
            teammateList = [
                self.agents[i] for i in agent.getTeam(observation)
                if i != agent.index
            ]

            # Only broadcast/receive broadcast if has teammates, i.e. is not a ghost
            if len(teammateList) > 0:
                otherPacman = teammateList[0]
                broadcast = agent.toBroadcast
                assert broadcast is None or all(
                    [a in LEGAL_DIRECTIONS for a in broadcast])
                otherPacman.receivedBroadcast = broadcast

            self.unmute()

            # Execute the action
            self.moveHistory.append((agentIndex, action))
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor(
                        agentIndex, action)

                except Exception as data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # Inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        self.display.finish()
示例#15
0
    def run(self):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        # self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i,
                                  file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                # TODO: could this exceed the total time
                self.unmute()

        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False
            # Generate an observation of the state
            if 'observationFunction' in dir(agent):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.observationFunction,
                            int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception as data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(
                        self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(
                        agent.getAction,
                        int(self.rules.getMoveTimeout(agentIndex)) -
                        int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func(observation)
                    except TimeoutFunctionException:
                        print("Agent %d timed out on a single move!" %
                              agentIndex,
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print(
                            "Agent %d took too long to make a move! This is warning %d"
                            % (agentIndex,
                               self.totalAgentTimeWarnings[agentIndex]),
                            file=sys.stderr)
                        if self.totalAgentTimeWarnings[
                                agentIndex] > self.rules.getMaxTimeWarnings(
                                    agentIndex):
                            print(
                                "Agent %d exceeded the maximum number of warnings: %d"
                                % (agentIndex,
                                   self.totalAgentTimeWarnings[agentIndex]),
                                file=sys.stderr)
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                    self.totalAgentTimes[agentIndex] += move_time
                    # print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[
                            agentIndex] > self.rules.getMaxTotalTime(
                                agentIndex):
                        print("Agent %d ran out of time! (time: %1.2f)" %
                              (agentIndex, self.totalAgentTimes[agentIndex]),
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception as data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation)
            self.unmute()

            # Execute the action
            self.moveHistory.append((agentIndex, action))
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor(
                        agentIndex, action)
                except Exception as data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1:
                self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions:
                        raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        self.display.finish()
示例#16
0
    def run( self,EPISODES,callbacks=[],log_dir="" ):

        """
        Main control loop for game play.
        """
        self.agents[0].last_episode = EPISODES
        self.display.initialize(self.state.data)
        self.numMoves = 0

        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print >>sys.stderr, "Agent %d failed to load" % i
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print >>sys.stderr, "Agent %d ran out of time on startup!" % i
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception:
                        # aquí habia variable data
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()

        agentIndex = self.startingIndex
        numAgents = len( self.agents )
        # TODO dejo esto aquí pues es donde empieza

        self.agents[0].n =0
        res = 0
        m = 0
        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]





            mov = 0
            skip_action = False
            # Generate an observation of the state
            if 'observationFunction' in dir( agent ):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func( observation )
                    except TimeoutFunctionException:
                        print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
                        if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
                            print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                    self.totalAgentTimes[agentIndex] += move_time
                    #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
                        print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation)
            self.unmute()

            # Execute the action
            self.moveHistory.append( (agentIndex, action) )
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor( agentIndex, action )
                except Exception:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor( agentIndex, action )

            # Change the display
            self.display.update( self.state.data )
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = ( agentIndex + 1 ) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

            res += 1/(m+1)*(self.agents[0].lastReward-res)
            m += 1



        # print(f"Episodio: {EPISODES:d}")

        # inform a learning agent of the game result
        #TODO aquí aparecen cuando se acaban los juegos

        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir( agent ) :
                try:
                    self.mute(agentIndex)
                    agent.final( self.state )
                    if not agent.prueba:
                        agent.policy_second.update_policy(agent,callbacks,log_dir=log_dir)
                    self.unmute()
                except Exception:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        self.display.finish()
        return self.agents[0].lastReward,self.agents[0].epsilon,self.agents[0].phi
示例#17
0
文件: game.py 项目: shinchen03/Pacman
                self.state = self.state.generateSuccessor(agentIndex, action)

            # Change the display
            self.display.update(self.state.data)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)
                    agent.final(self.state)
                    self.unmute()
                except Exception, data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        self.display.finish()
示例#18
0
    def run(self, replay, minibatch_size):  #whoami
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0

        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return

            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i,
                                  file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()
            # print("registered...") #whoami

        ####################

        old_score = 100
        #####################
        # agentIndex = self.startingIndex
        # numAgents = len( self.agents )
        maihoonnaa = 1  #whoami
        while not self.gameOver:
            # Fetch the next agent
            # agent = self.agents[agentIndex]
            # move_time = 0
            # skip_action = False

            act_vect = []
            action_list = []
            reward_vect = []
            tab_ka_state = self.state
            for i in range(len(self.agents)):
                action = self.agents[i].getAction(self.state.deepCopy())
                assert (action in self.state.getLegalActions(i)), str(
                    self.state) + " " + str(self.state.getLegalActions(
                        i)) + " " + str(action) + " " + str(i)
                action_list.append(action)
                if (self.state.data.agentStates[i].isPacman):
                    act_vect.append(action)

            # print(tab_ka_state,"tab_ka_state") ; print(action_list,"yahan")
            old_state = self.state.deepCopy()
            #print(self.state,"wahan")
            for i in range(len(self.agents)):
                if (self.state.data._win or self.state.data._lose):
                    # self.state=self.state.generateSuccessor( i, action_list[i] ) #agentIndex, action
                    # reward_vect.append(self.state.data.score-old_score)
                    # old_score=self.state.data.score
                    # print(self.state,"kahan2")

                    # if(self.state.data.agentStates[i].isPacman):
                    #     # print("udhar",i)
                    #     reward_vect.append(0)
                    # continue

                    for o in range(len(reward_vect)):
                        if (self.state.data._win):
                            reward_vect[o] = 500
                        else:
                            reward_vect[o] = -500
                    h = len(reward_vect)
                    for o in range(self.state.data.numPacmanAgents - h):
                        if (self.state.data._win):
                            reward_vect.append(500)
                        else:
                            reward_vect.append(-500)
                    break

                try:
                    self.moveHistory.append((i, action_list[i]))
                    self.state = self.state.generateSuccessor(
                        i, action_list[i])  #agentIndex, action
                except Exception as e:
                    if (str(e) ==
                            'Can\'t generate a successor of a terminal state.'
                        ):
                        print("this shouldnt happen")
                        raise Exception(str(e))
                    elif (str(e) != "Illegal ghost action "):
                        print("this too shouldnt happen")
                        raise Exception(str(e))

                self.display.update(self.state.data)
                self.rules.process(self.state, self)

                if (self.state.data.agentStates[i].isPacman):
                    # print("idhar",i)
                    reward_vect.append(self.state.data.score - old_score)
                else:
                    r = self.state.data.score - old_score
                    ghostState = self.state.data.agentStates[i]
                    ghostPosition = ghostState.configuration.getPosition()
                    for j in range(self.state.data.numPacmanAgents):
                        pacmanPosition = self.state.getPacmanPosition(j)
                        if (manhattanDistance(ghostPosition, pacmanPosition) <=
                                COLLISION_TOLERANCE):
                            reward_vect[j] += (r /
                                               self.state.data.numPacmanAgents)

                old_score = self.state.data.score

            assert (len(reward_vect) == self.state.data.numPacmanAgents
                    ), "reward vector flawed" + str(reward_vect)

            f = open("actions.txt", "a")
            f.write("replay ENTRY:\n")
            f.write(str(old_state.deepCopy()) + "\n")
            f.write(
                str(tuple(copy.deepcopy(act_vect))) + " " +
                str(tuple(copy.deepcopy(reward_vect))) + "\n")
            f.write(str(self.state.deepCopy()) + "\n")
            f.close()
            replay[(old_state.deepCopy(), tuple(copy.deepcopy(act_vect)),
                    tuple(copy.deepcopy(reward_vect)),
                    self.state.deepCopy())] = 1

            #------------------------------------------------------

            for i in range(len(self.agents)):
                holdit = replay.keys()
                points = np.random.choice(
                    [nn for nn in range(len(list(holdit)))],
                    min(minibatch_size, len(holdit)), False)
                samples = []
                for t in range(points.shape[0]):
                    samples.append(
                        list(holdit)[points[t]])  #tuples are appended here

                for k in range(len(samples)):
                    (s, a, r, s_n) = samples[k]
                    a = list(a)
                    r = list(r)
                    agent = self.agents[i]  #i=agentIndex

                    # Generate an observation of the state
                    if 'observationFunction' in dir(agent):
                        self.mute(i)
                        if self.catchExceptions:
                            try:
                                timed_func = TimeoutFunction(
                                    agent.observationFunction,
                                    int(self.rules.getMoveTimeout(agentIndex)))
                                try:
                                    start_time = time.time()
                                    observation = timed_func(
                                        s_n.deepCopy(), s.deepCopy(),
                                        copy.deepcopy(r), copy.deepcopy(a), k)
                                except TimeoutFunctionException:
                                    skip_action = True
                                move_time += time.time() - start_time
                                self.unmute()
                            except Exception as data:
                                self._agentCrash(agentIndex, quiet=False)
                                self.unmute()
                                return
                        else:
                            observation = agent.observationFunction(
                                s_n.deepCopy(), s.deepCopy(), copy.deepcopy(r),
                                copy.deepcopy(a), k)
                        self.unmute()
                    else:
                        observation = self.state.deepCopy()

                if (self.state.data.agentStates[i].isPacman):
                    agent.update_the_params(len(samples))

            # # Generate an observation of the state
            # if 'observationFunction' in dir( agent ):
            #     self.mute(i)
            #     if self.catchExceptions:
            #         try:
            #             timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
            #             try:
            #                 start_time = time.time()
            #                 observation = timed_func(s_n.deepCopy(),s.deepCopy(),copy.deepcopy(r),copy.deepcopy(a))
            #             except TimeoutFunctionException:
            #                 skip_action = True
            #             move_time += time.time() - start_time
            #             self.unmute()
            #         except Exception as data:
            #             self._agentCrash(agentIndex, quiet=False)
            #             self.unmute()
            #             return
            #     else:
            #         observation = agent.observationFunction(s_n.deepCopy(),s.deepCopy(),copy.deepcopy(r),copy.deepcopy(a))
            #     self.unmute()
            # else:
            #     observation = self.state.deepCopy()

            ################################### REMOVED AREA

            # Change the display
            # self.display.update( self.state.data )
            # ###idx = agentIndex - agentIndex % 2 + 1
            # ###self.display.update( self.state.makeObservation(idx).data )

            # # Allow for game specific conditions (winning, losing, etc.)
            # # print("death counter now ",self.state.data.deathCount, agentIndex)#whoami
            # self.rules.process(self.state, self)

            # print("maihoonnaa ",maihoonnaa,"...") #whoami
            maihoonnaa += 1  #whoami

            # Track progress
            # if agentIndex == numAgents + 1: self.numMoves += 1
            # # Next agent
            # agentIndex = ( agentIndex + 1 ) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir(agent):
                try:
                    self.mute(agentIndex)

                    #whoami
                    # agent.final( self.state )
                    holdit = replay.keys()
                    points = np.random.choice(
                        [nn for nn in range(len(list(holdit)))],
                        min(minibatch_size, len(holdit)), False)
                    samples = []
                    for t in range(points.shape[0]):
                        samples.append(
                            list(holdit)[points[t]])  #tuples are appended here

                    for k in range(len(samples)):
                        (s, a, r, s_n) = samples[k]
                        a = list(a)
                        r = list(r)
                        observation = agent.observationFunction(
                            s_n.deepCopy(), s.deepCopy(), copy.deepcopy(r),
                            copy.deepcopy(a), k)

                    if (self.state.data.agentStates[agentIndex].isPacman):
                        agent.final(s_n.deepCopy(), len(samples))

                    self.unmute()
                except Exception as data:
                    if not self.catchExceptions: raise data
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            # print("terminal...") #whoami

        self.display.finish()

        return replay
示例#19
0
        self.state = self.state.generateSuccessor( agentIndex, action )

      # Change the display
      self.display.update( self.state.data )
      ###idx = agentIndex - agentIndex % 2 + 1
      ###self.display.update( self.state.makeObservation(idx).data )

      # Allow for game specific conditions (winning, losing, etc.)
      self.rules.process(self.state, self)
      # Track progress
      if agentIndex == numAgents + 1: self.numMoves += 1
      # Next agent
      agentIndex = ( agentIndex + 1 ) % numAgents

      if _BOINC_ENABLED:
        boinc.set_fraction_done(self.getProgress())

    # inform a learning agent of the game result
    for agent in self.agents:
      if "final" in dir( agent ) :
        try:
          self.mute(agent.index)
          agent.final( self.state )
          self.unmute()
        except Exception,data:
          if not self.catchExceptions: raise
          self._agentCrash(agent.index)
          self.unmute()
          return
    self.display.finish()
示例#20
0
    def run(self, total_pacmen, pacman_types_corresponding_indexes, graphics,
            pacmen, stillTraining, is_training, numGames, evalGraphics,
            currentRound, numTraining):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data, total_pacmen,
                                pacman_types_corresponding_indexes)
        self.numMoves = 0

        # self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.registerInitialState,
                            int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i,
                                  file=sys.stderr)
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(
                        self.state.deepCopy(), i,
                        type(agent).__name__,
                        pacman_types_corresponding_indexes, graphics,
                        is_training, evalGraphics, numGames)
                # TODO: could this exceed the total time
                self.unmute()

        agentIndex = self.startingIndex
        numAgents = len(self.agents)

        if graphics or evalGraphics:
            self.display.updateEpochInfo(currentRound, numTraining)

        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            # if this is a dead pacman, skip loading it
            if agent.isPacman == True:
                if agent.isDead == True:
                    # Next agent
                    agentIndex = (agentIndex + 1) % numAgents
                    continue
                agent.scoreChange = 0
            move_time = 0
            skip_action = False
            # Generate an observation of the state
            if 'observationFunction' in dir(agent):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(
                            agent.observationFunction,
                            int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception as data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(
                        self.state.deepCopy(), total_pacmen, agentIndex,
                        stillTraining)
                self.unmute()
            else:
                observation = self.state.deepCopy()

            # Solicit an action
            action = None
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(
                        agent.getAction,
                        int(self.rules.getMoveTimeout(agentIndex)) -
                        int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func(observation)
                    except TimeoutFunctionException:
                        print("Agent %d timed out on a single move!" %
                              agentIndex,
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return

                    move_time += time.time() - start_time

                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print(
                            "Agent %d took too long to make a move! This is warning %d"
                            % (agentIndex,
                               self.totalAgentTimeWarnings[agentIndex]),
                            file=sys.stderr)
                        if self.totalAgentTimeWarnings[
                                agentIndex] > self.rules.getMaxTimeWarnings(
                                    agentIndex):
                            print(
                                "Agent %d exceeded the maximum number of warnings: %d"
                                % (agentIndex,
                                   self.totalAgentTimeWarnings[agentIndex]),
                                file=sys.stderr)
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return

                    self.totalAgentTimes[agentIndex] += move_time
                    # print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[
                            agentIndex] > self.rules.getMaxTotalTime(
                                agentIndex):
                        print("Agent %d ran out of time! (time: %1.2f)" %
                              (agentIndex, self.totalAgentTimes[agentIndex]),
                              file=sys.stderr)
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception as data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation, total_pacmen, agentIndex)
            self.unmute()

            # Execute the action
            self.moveHistory.append((agentIndex, action))
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor(
                        agentIndex, action)
                except Exception as data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor(
                    agentIndex, action, total_pacmen, pacmen, agent)

            # check if there's dead pacman in the current round
            deadPacman = None
            deadPacmanIndex = None
            if self.state.data.deadPacmanIndex != None:
                # inform this learning agent of the game result
                deadPacmanIndex = self.state.data.deadPacmanIndex
                deadPacman = pacmen[deadPacmanIndex]
                if "final" in dir(deadPacman):
                    try:
                        self.mute(deadPacmanIndex)
                        forceFinish = False
                        deadPacman.final(self.state, total_pacmen,
                                         deadPacmanIndex, stillTraining,
                                         forceFinish)
                        # update its death "reward"
                        pacmenScoreChanges[
                            deadPacmanIndex] = deadPacman.scoreChange
                        if 'observationFunction' in dir(deadPacman):
                            observation = deadPacman.observationFunction(
                                self.state.deepCopy(), total_pacmen,
                                deadPacmanIndex, stillTraining)
                        self.unmute()
                    except Exception as data:
                        if not self.catchExceptions:
                            raise
                        self._agentCrash(deadPacmanIndex)
                        self.unmute()
                        return

            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)

            # Change the display
            # now also remove the dead pacman from the screen
            self.display.update(self.state.data, total_pacmen, agent,
                                agentIndex, deadPacman, deadPacmanIndex,
                                currentRound, numTraining)
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )

            # update pacman scoreChange
            if self.state.data.deadPacmanIndex == None:
                if agent.isPacman == True or self.state.data.collidedPacman != None:
                    if agentIndex < total_pacmen:
                        # current agent is pacman
                        pacmenScoreChanges[agentIndex] = agent.scoreChange
                    else:
                        collidedPacmanIndex = self.state.data.collidedPacman
                        pacmenScoreChanges[collidedPacmanIndex] = self.agents[
                            collidedPacmanIndex].scoreChange
                        self.state.data.collidedPacman = None
            self.state.deadPacmanIndex = None

            # Track progress
            if agentIndex == numAgents + 1:
                self.numMoves += 1
            # Next agent
            agentIndex = (agentIndex + 1) % numAgents

            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())

        # Only used for ghosts after changing to end the game iff all pacmen die
        for agentIndex, agent in enumerate(self.agents):
            if agent.isPacman == False:
                if "final" in dir(agent):
                    try:
                        self.mute(agentIndex)
                        agent.final(self.state, total_pacmen, agentIndex)
                        self.unmute()
                    except Exception as data:
                        if not self.catchExceptions:
                            raise
                        self._agentCrash(agentIndex)
                        self.unmute()
                        return
            else:
                continue

        # if already reach to the end of training episodes, finish training for those Pacmen who didn't get a chance to be trained(usually this happs if -x too small)
        if currentRound == numTraining:
            for pacman in pacmen:
                if pacman.hasFinishedTraining == False:
                    forceFinish = True
                    pacman.final(self.state, total_pacmen, pacman.index,
                                 stillTraining, forceFinish)

        self.display.finish()