def test_reinforcement_fixed_targets(): input_layer = TransferMechanism( size=2, name='Input Layer', ) action_selection = pnl.DDM(input_format=pnl.ARRAY, function=pnl.DriftDiffusionAnalytical(), output_states=[pnl.SELECTED_INPUT_ARRAY], name='DDM') p = Process(pathway=[input_layer, action_selection], learning=LearningProjection(learning_function=Reinforcement( learning_rate=0.05))) input_list = {input_layer: [[1, 1], [1, 1]]} s = System(processes=[p], # learning_rate=0.05, ) targets = [[10.], [10.]] # logged_mechanisms = [input_layer, action_selection] # for mech in s.learning_mechanisms: # logged_mechanisms.append(mech) # # for mech in logged_mechanisms: # mech.log.set_log_conditions(items=[pnl.VALUE]) results = s.run(inputs=input_list, targets=targets) assert np.allclose(action_selection.value, [[1.], [2.30401336], [0.97340301], [0.02659699], [2.30401336], \ [2.08614798], [1.85006765], [2.30401336], [2.08614798], [1.85006765]])
def _instantiate_learning_mechanism(self, learning_function, learning_rate, learned_projection, context=None): learning_mechanism = KohonenLearningMechanism( default_variable=[ self.learned_projection.sender.value, self.learned_projection.receiver.value ], matrix=self.matrix, function=learning_function, learning_rate=learning_rate, # learning_signals=[self.matrix], name="{} for {}".format(LearningMechanism.className, self.name)) # KDM 10/22/18: should below be aux_components? # FIX: 10/31/19 [JDC]: YES! # Instantiate Projection from learned_projection's sender to LearningMechanism MappingProjection( sender=self.learned_projection.sender, receiver=learning_mechanism.input_ports[ACTIVATION_INPUT], matrix=IDENTITY_MATRIX, name="Error Projection for {}".format(learning_mechanism.name)) # Instantiate Projection from the Mechanism's INPUT_PATTERN OutputPort # (which has the value of the learned_projection's receiver; i.e., the Mechanism's input) # to the LearningMechanism's ACTIVATION_OUTPUT InputPort. MappingProjection( sender=self.output_ports[INPUT_PATTERN], receiver=learning_mechanism.input_ports[ACTIVATION_OUTPUT], matrix=IDENTITY_MATRIX, name="Error Projection for {}".format(learning_mechanism.name)) # Instantiate Projection from LearningMechanism to learned_projection LearningProjection( sender=learning_mechanism.output_ports[LEARNING_SIGNAL], receiver=self.matrix, name="{} for {}".format(LearningProjection.className, self.learned_projection.name)) return learning_mechanism
# Princeton University licenses this file to You under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. You may obtain a copy of the License at: # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. # ********************************************** MappingProjection **************************************************** """ Contents -------- * `MappingProjection_Overview` * `MappingProjection_Creation` - `MappingProjection_Matrix_Specification` - `MappingProjection_Learning_Specification` - `MappingProjection_Deferred_Initialization` * `MappingProjection_Structure` - `MappingProjection_Sender` - `MappingProjection_Receiver` * `MappingProjection_Execution` - `MappingProjection_Learning` * `MappingProjection_Class_Reference` .. _MappingProjection_Overview: Overview -------- A MappingProjection transmits the `value <OutputPort.value>` of an `OutputPort` of one `ProcessingMechanism
def test_reinforcement(): input_layer = TransferMechanism( default_variable=[0, 0, 0], name='Input Layer', ) action_selection = TransferMechanism( default_variable=[0, 0, 0], function=SoftMax( output=PROB, gain=1.0, ), name='Action Selection', ) p = Process( default_variable=[0, 0, 0], size=3, pathway=[input_layer, action_selection], learning=LearningProjection(learning_function=Reinforcement( learning_rate=0.05)), target=0, ) # print ('reward prediction weights: \n', action_selection.input_states[0].path_afferents[0].matrix) # print ('targetMechanism weights: \n', action_selection.output_states.sendsToProjections[0].matrix) reward_values = [10, 10, 10] # Must initialize reward (won't be used, but needed for declaration of lambda function) action_selection.output_state.value = [0, 0, 1] # Get reward value for selected action) reward = lambda: [ reward_values[int(np.nonzero(action_selection.output_state.value)[0])] ] def print_header(system): print("\n\n**** TRIAL: ", system.scheduler_processing.clock.simple_time) def show_weights(): print( 'Reward prediction weights: \n', action_selection.input_states[0].path_afferents[0].get_mod_matrix( s)) print('\nAction selected: {}; predicted reward: {}'.format( np.nonzero(action_selection.output_state.value)[0][0], action_selection.output_state.value[np.nonzero( action_selection.output_state.value)[0][0]], )) input_list = {input_layer: [[1, 1, 1]]} s = System( processes=[p], # learning_rate=0.05, targets=[0], ) results = s.run( num_trials=10, inputs=input_list, targets=reward, call_before_trial=functools.partial(print_header, s), call_after_trial=show_weights, ) results_list = [] for elem in s.results: for nested_elem in elem: nested_elem = nested_elem.tolist() try: iter(nested_elem) except TypeError: nested_elem = [nested_elem] results_list.extend(nested_elem) mech_objective_action = s.mechanisms[2] mech_learning_input_to_action = s.mechanisms[3] reward_prediction_weights = action_selection.input_states[ 0].path_afferents[0] expected_output = [ (input_layer.get_output_values(s), [np.array([1., 1., 1.])]), (action_selection.get_output_values(s), [np.array([0., 3.71496434, 0.])]), (pytest.helpers.expand_np_ndarray( mech_objective_action.get_output_values(s)), pytest.helpers.expand_np_ndarray( [np.array([6.28503566484375]), np.array(39.50167330835792)])), (pytest.helpers.expand_np_ndarray( mech_learning_input_to_action.get_output_values(s)), pytest.helpers.expand_np_ndarray([[ np.array([0., 0.31425178324218755, 0.]), np.array([0., 0.31425178324218755, 0.]) ]])), (reward_prediction_weights.get_mod_matrix(s), np.array([ [1., 0., 0.], [0., 4.02921612, 0.], [0., 0., 1.8775], ])), (results, [ [np.array([0., 1., 0.])], [np.array([0., 1.45, 0.])], [np.array([0., 0., 1.])], [np.array([0., 1.8775, 0.])], [np.array([0., 2.283625, 0.])], [np.array([0., 2.66944375, 0.])], [np.array([0., 0., 1.45])], [np.array([0., 3.03597156, 0.])], [np.array([0., 3.38417298, 0.])], [np.array([0., 3.71496434, 0.])], ]), ] for i, exp in enumerate(expected_output): val, expected = exp np.testing.assert_allclose( val, expected, err_msg='Failed on expected_output[{0}]'.format(i))