Exemplo n.º 1
0
    def __init__(self):

        # Initialize logger
        logger.Log.open("./log.txt")

        # Parse the game configuration
        self.config = config.Config.parse(
            "../BlockWorldSimulator/Assets/config.txt")

        # Connect to simulator
        if len(sys.argv) < 2:
            logger.Log.info("IP not given. Using localhost i.e. 0.0.0.0")
            self.unity_ip = "0.0.0.0"
        else:
            self.unity_ip = sys.argv[1]

        if len(sys.argv) < 3:
            logger.Log.info("PORT not given. Using 11000")
            self.PORT = 11000
        else:
            self.PORT = int(sys.argv[2])

        # Size of image
        image_dim = self.config.screen_size
        self.connection = rc.ReliableConnect(self.unity_ip, self.PORT,
                                             image_dim)
        self.connection.connect()

        # Dataset specific parameters
        self.num_block = 20
        self.num_direction = 4
        use_stop = True
        if use_stop:
            self.num_actions = self.num_block * self.num_direction + 1  # 1 for stopping
        else:
            self.num_actions = self.num_block * self.num_direction

        # Create toolkit of message protocol between simulator and agent
        self.message_protocol_kit = mpu.MessageProtocolUtil(
            self.num_direction, self.num_actions, use_stop)

        # Test policy
        self.test_policy = gp.GenericPolicy.get_argmax_action

        # MDP details
        self.gamma = 1.0
        self.config.log_flag()
        logger.Log.info("Created Agent.")
Exemplo n.º 2
0
    def __init__(self, train_alg, config, constants):

        # Initialize logger
        logger.Log.open("./log_" + str(datetime.now()) + ".txt")

        self.config = config

        # Connect to simulator
        if len(sys.argv) < 2:
            logger.Log.info("IP not given. Using localhost i.e. 0.0.0.0")
            self.unity_ip = "0.0.0.0"
        else:
            self.unity_ip = sys.argv[1]

        if len(sys.argv) < 3:
            logger.Log.info("PORT not given. Using 11000")
            self.PORT = 11000
        else:
            self.PORT = int(sys.argv[2])

        # Size of image
        image_dim = self.config.screen_size
        self.connection = rc.ReliableConnect(self.unity_ip, self.PORT,
                                             image_dim)
        self.connection.connect()

        # Dataset specific parameters
        self.num_block = 20
        self.num_direction = 4
        use_stop = True
        if use_stop:
            self.num_actions = self.num_block * self.num_direction + 1  # 1 for stopping
        else:
            self.num_actions = self.num_block * self.num_direction

        # Create toolkit of message protocol between simulator and agent
        self.message_protocol_kit = mpu.MessageProtocolUtil(
            self.num_direction, self.num_actions, use_stop)

        # Test policy
        self.test_policy = gp.GenericPolicy.get_argmax_action

        # MDP details
        self.gamma = 1.0

        # Training algorithm behaviour
        self.train_alg = train_alg

        # Define model and learning algorithm
        if self.train_alg == SUPERVISEDMLE:
            self.model = PolicyNetwork(image_dim, self.num_actions, constants)
            self.learning_alg = MaximumLikelihoodEstimation(self, self.model)
        elif self.train_alg == REINFORCE:
            self.model = PolicyNetwork(image_dim, self.num_actions, constants)
            self.learning_alg = PolicyGradient(self,
                                               self.model,
                                               total_reward=True)
        elif self.train_alg == CONTEXTUALBANDIT:
            self.model = PolicyNetwork(image_dim, self.num_actions, constants)
            self.learning_alg = PolicyGradient(self,
                                               self.model,
                                               total_reward=False)
        elif self.train_alg == PGADVANTAGE:
            self.model = PolicyNetwork(image_dim, self.num_actions, constants)
            self.state_value_model = StateValueFunctionModel(
                250, image_dim, 200, 24, 32)
            self.learning_alg = PolicyGradientWithAdvantage(
                self, self.model, self.state_value_model, total_reward=True)
        elif self.train_alg == SIMPLEQLEARNING:
            self.model = ActionValueFunctionNetwork(250, image_dim, 200, 24,
                                                    32)
            self.target_q_network = ActionValueFunctionNetwork(
                250, image_dim, 200, 24, 32, scope_name="Target_Q_Network")
            self.learning_alg = QLearning(self, self.model,
                                          self.target_q_network)
        else:
            raise AssertionError("Training algorithm " + str(self.train_alg) +
                                 " not found or implemented.")

        self.sess = None
        self.train_writer = None
        self.config.log_flag()
        logger.Log.info("Training Algorithm: " + str(self.train_alg) +
                        ", Gamma: " + str(self.gamma))
        logger.Log.info("Created Agent.")