示例#1
0
    def __init__(self):
        super().__init__("Single Expert")

        expert_params = ExpertParams()

        expert_params.flock_size = 1
        expert_params.n_cluster_centers = 24
        expert_params.produce_actions = True
        expert_params.temporal.seq_length = 9
        expert_params.temporal.seq_lookahead = 7
        expert_params.temporal.n_frequent_seqs = 700
        expert_params.temporal.max_encountered_seqs = 1000
        expert_params.temporal.exploration_probability = 0.01
        expert_params.temporal.batch_size = 200
        expert_params.temporal.own_rewards_weight = 20
        expert_params.temporal.compute_backward_pass = True

        expert_params.compute_reconstruction = True

        expert_node = ExpertFlockNode(expert_params)

        self.add_node(expert_node)

        Connector.connect(self.inputs.data.output, expert_node.inputs.sp.data_input)
        Connector.connect(self.inputs.reward.output, expert_node.inputs.tp.reward_input)

        Connector.connect(expert_node.outputs.sp.predicted_reconstructed_input,
                          self.outputs.predicted_reconstructed_input.input)
示例#2
0
    def __init__(self, c_n_ccs, c_buffer_size, c_seq_length, c_seq_lookahead, p_seq_length, p_seq_lookahead, p_n_ccs,
                 flock_size):
        super().__init__("Two Experts")

        expert_params1 = ExpertParams()

        expert_params1.flock_size = flock_size
        expert_params1.n_cluster_centers = c_n_ccs
        expert_params1.produce_actions = True
        expert_params1.temporal.seq_length = c_seq_length
        expert_params1.temporal.seq_lookahead = c_seq_lookahead
        expert_params1.temporal.n_frequent_seqs = 700
        expert_params1.temporal.max_encountered_seqs = 1000
        expert_params1.temporal.exploration_probability = 0.05
        expert_params1.temporal.batch_size = 200
        expert_params1.temporal.compute_backward_pass = True
        expert_params1.temporal.frustration_threshold = 2

        expert_params2 = expert_params1.clone()

        expert_params1.spatial.buffer_size = c_buffer_size
        expert_params1.compute_reconstruction = True

        expert_params2.temporal.seq_length = p_seq_length
        expert_params2.temporal.seq_lookahead = p_seq_lookahead
        expert_params2.n_cluster_centers = p_n_ccs
        expert_params2.produce_actions = False
        expert_params2.temporal.frustration_threshold = 10

        expert_node1 = ExpertFlockNode(expert_params1)
        expert_node2 = ExpertFlockNode(expert_params2)

        self.add_node(expert_node1)
        self.add_node(expert_node2)

        Connector.connect(self.inputs.data.output, expert_node1.inputs.sp.data_input)
        Connector.connect(self.inputs.reward.output, expert_node1.inputs.tp.reward_input)
        Connector.connect(self.inputs.reward.output, expert_node2.inputs.tp.reward_input)

        # Connect the experts to each other.
        Connector.connect(expert_node1.outputs.tp.projection_outputs, expert_node2.inputs.sp.data_input)
        Connector.connect(expert_node2.outputs.output_context, expert_node1.inputs.tp.context_input, is_backward=True)

        # Connect the group output.
        Connector.connect(expert_node1.outputs.sp.predicted_reconstructed_input,
                          self.outputs.predicted_reconstructed_input.input)
示例#3
0
    def __init__(self):
        super().__init__(device='cuda')
        actions_descriptor = GridWorldActionDescriptor()
        node_action_monitor = ActionMonitorNode(actions_descriptor)

        params = GridWorldParams(map_name='MapE')
        noise_params = RandomNoiseParams(amplitude=0.0001)
        node_grid_world = GridWorldNode(params)
        expert_params = ExpertParams()
        unsqueeze_node = UnsqueezeNode(dim=0)
        noise_node = RandomNoiseNode(noise_params)
        constant_node = ConstantNode(shape=(1, 1, 3, 48))
        one_hot_node = ToOneHotNode()

        def context(inputs, outputs):
            con = inputs[0]
            con[:, :, 1:, 24:] = float('nan')
            outputs[0].copy_(con)

        def f(inputs, outputs):
            probs = inputs[0]
            outputs[0].copy_(probs[0, -1, :4] + SMALL_CONSTANT)

        action_parser = LambdaNode(func=f, n_inputs=1, output_shapes=[(4,)])
        context_assembler = LambdaNode(func=context, n_inputs=1, output_shapes=[(1, 1, 3, 48)])

        expert_params.flock_size = 1
        expert_params.n_cluster_centers = 24
        expert_params.produce_actions = True
        expert_params.temporal.seq_length = 9
        expert_params.temporal.seq_lookahead = 7
        expert_params.temporal.n_frequent_seqs = 700
        expert_params.temporal.max_encountered_seqs = 1000
        expert_params.temporal.exploration_probability = 0.01
        expert_params.temporal.batch_size = 200
        expert_params.temporal.own_rewards_weight = 20
        expert_params.temporal.incoming_context_size = 48

        expert_params.compute_reconstruction = True

        #expert_node = ConvExpertFlockNode(expert_params)
        expert_node = ExpertFlockNode(expert_params)

        self.add_node(node_grid_world)
        self.add_node(node_action_monitor)
        self.add_node(expert_node)
        self.add_node(unsqueeze_node)
        self.add_node(action_parser)
        self.add_node(noise_node)
        self.add_node(constant_node)
        self.add_node(context_assembler)
        self.add_node(one_hot_node)

        Connector.connect(node_grid_world.outputs.egocentric_image_action, noise_node.inputs.input)
        Connector.connect(noise_node.outputs.output, unsqueeze_node.inputs.input)
        Connector.connect(unsqueeze_node.outputs.output, expert_node.inputs.sp.data_input)
        Connector.connect(node_grid_world.outputs.reward, expert_node.inputs.tp.reward_input)

        Connector.connect(constant_node.outputs.output, context_assembler.inputs[0])
        Connector.connect(context_assembler.outputs[0], expert_node.inputs.tp.context_input)

        Connector.connect(expert_node.outputs.sp.predicted_reconstructed_input, action_parser.inputs[0])
        Connector.connect(action_parser.outputs[0], one_hot_node.inputs.input)
        Connector.connect(one_hot_node.outputs.output, node_action_monitor.inputs.action_in)
        Connector.connect(node_action_monitor.outputs.action_out, node_grid_world.inputs.agent_action, is_backward=True)
    def __init__(self):
        super().__init__(device='cuda')
        actions_descriptor = GridWorldActionDescriptor()
        node_action_monitor = ActionMonitorNode(actions_descriptor)

        params = GridWorldParams(map_name='MapTwoRoom',
                                 reset_strategy=ResetStrategy.ANYWHERE)
        noise_params = RandomNoiseParams(amplitude=0.0001)
        node_grid_world = GridWorldNode(params)
        expert_params = ExpertParams()
        unsqueeze_node = UnsqueezeNode(dim=0)
        noise_node = RandomNoiseNode(noise_params)
        one_hot_node = ToOneHotNode()

        def f(inputs, outputs):
            probs = inputs[0]
            outputs[0].copy_(probs[0, -1, :4] + SMALL_CONSTANT)

        action_parser = LambdaNode(func=f, n_inputs=1, output_shapes=[(4, )])

        expert_params.flock_size = 1
        expert_params.n_cluster_centers = 64
        expert_params.produce_actions = True
        expert_params.temporal.seq_length = 17
        expert_params.temporal.seq_lookahead = 13
        expert_params.temporal.n_frequent_seqs = 700
        expert_params.temporal.max_encountered_seqs = 1000
        expert_params.temporal.exploration_probability = 0.05
        expert_params.temporal.batch_size = 200
        expert_params.temporal.buffer_size = 1000
        expert_params.temporal.own_rewards_weight = 20
        expert_params.temporal.frustration_threshold = 2
        expert_params.temporal.compute_backward_pass = True

        expert_params.compute_reconstruction = True

        expert_node = ConvExpertFlockNode(expert_params)
        #expert_node = ExpertFlockNode(expert_params)

        self.add_node(node_grid_world)
        self.add_node(node_action_monitor)
        self.add_node(expert_node)
        self.add_node(unsqueeze_node)
        self.add_node(action_parser)
        self.add_node(noise_node)
        self.add_node(one_hot_node)

        Connector.connect(node_grid_world.outputs.egocentric_image_action,
                          noise_node.inputs.input)
        Connector.connect(noise_node.outputs.output,
                          unsqueeze_node.inputs.input)
        Connector.connect(unsqueeze_node.outputs.output,
                          expert_node.inputs.sp.data_input)
        Connector.connect(node_grid_world.outputs.reward,
                          expert_node.inputs.tp.reward_input)

        Connector.connect(expert_node.outputs.sp.predicted_reconstructed_input,
                          action_parser.inputs[0])
        Connector.connect(action_parser.outputs[0], one_hot_node.inputs.input)
        Connector.connect(one_hot_node.outputs.output,
                          node_action_monitor.inputs.action_in)
        Connector.connect(node_action_monitor.outputs.action_out,
                          node_grid_world.inputs.agent_action,
                          is_backward=True)
示例#5
0
    def __init__(self):
        super().__init__(device='cuda')
        actions_descriptor = GridWorldActionDescriptor()
        node_action_monitor = ActionMonitorNode(actions_descriptor)

        params = GridWorldParams(map_name='MapE')
        noise_params = RandomNoiseParams(amplitude=0.0001)
        node_grid_world = GridWorldNode(params)
        expert_params1 = ExpertParams()
        unsqueeze_node = UnsqueezeNode(dim=0)
        noise_node = RandomNoiseNode(noise_params)
        one_hot_node = ToOneHotNode()

        def f(inputs, outputs):
            probs = inputs[0]
            outputs[0].copy_(probs[0, -1, :4] + SMALL_CONSTANT)

        action_parser = LambdaNode(func=f, n_inputs=1, output_shapes=[(4, )])

        expert_params1.flock_size = 1
        expert_params1.n_cluster_centers = 24
        expert_params1.produce_actions = True
        expert_params1.temporal.seq_length = 4
        expert_params1.temporal.seq_lookahead = 2
        expert_params1.temporal.n_frequent_seqs = 700
        expert_params1.temporal.max_encountered_seqs = 1000
        expert_params1.temporal.exploration_probability = 0.05
        expert_params1.temporal.batch_size = 200
        expert_params1.temporal.frustration_threshold = 2
        # expert_params.temporal.own_rewards_weight = 20

        expert_params1.compute_reconstruction = True

        expert_params2 = expert_params1.clone()
        expert_params2.temporal.seq_length = 5
        expert_params2.temporal.seq_lookahead = 4
        expert_params2.n_cluster_centers = 8
        expert_params2.produce_actions = False
        expert_params2.temporal.frustration_threshold = 10

        #expert_params1.temporal.incoming_context_size = 2 * expert_params2.n_cluster_centers

        expert_node1 = ExpertFlockNode(expert_params1)
        expert_node2 = ExpertFlockNode(expert_params2)

        self.add_node(node_grid_world)
        self.add_node(node_action_monitor)
        self.add_node(expert_node1)
        self.add_node(expert_node2)
        self.add_node(unsqueeze_node)
        self.add_node(action_parser)
        self.add_node(noise_node)
        self.add_node(one_hot_node)

        Connector.connect(node_grid_world.outputs.output_image_action,
                          noise_node.inputs.input)
        Connector.connect(noise_node.outputs.output,
                          unsqueeze_node.inputs.input)
        Connector.connect(unsqueeze_node.outputs.output,
                          expert_node1.inputs.sp.data_input)

        Connector.connect(expert_node1.outputs.tp.projection_outputs,
                          expert_node2.inputs.sp.data_input)
        Connector.connect(expert_node2.outputs.output_context,
                          expert_node1.inputs.tp.context_input,
                          is_backward=True)

        Connector.connect(
            expert_node1.outputs.sp.predicted_reconstructed_input,
            action_parser.inputs[0])

        Connector.connect(node_grid_world.outputs.reward,
                          expert_node1.inputs.tp.reward_input)
        Connector.connect(node_grid_world.outputs.reward,
                          expert_node2.inputs.tp.reward_input)

        Connector.connect(action_parser.outputs[0], one_hot_node.inputs.input)
        Connector.connect(one_hot_node.outputs.output,
                          node_action_monitor.inputs.action_in)

        Connector.connect(node_action_monitor.outputs.action_out,
                          node_grid_world.inputs.agent_action,
                          is_backward=True)