Exemplo n.º 1
0
def reinforcementLearning():
    #print '[%s]' % ', '.join(map(str, observations))   #print the list of observations
    #print '[%s]' % ', '.join(map(str, state1))  # print the list of actions
    #print '[%s]' % ', '.join(map(str, state2))
    from learn import MarkovAgent
    mark = MarkovAgent(observations + trap_states)
    mark.learn()
    policy = mark.policy
    f = open('out.txt', 'w')
    #print >> f, mark.policy  # or f.write('...\n')
    f.close()
    print policy
    return policy
Exemplo n.º 2
0
            'state_': 'bottom'
        }],
        'reward':
        0
    },
    {
        'state_transitions': [
            {
                'state': 'top',
                'action': 'sink',
                'state_': 'top'
            },
            {
                'state': 'top',
                'action': 'climb',
                'state_': 'top'
            },
        ],
        'reward':
        1
    },
]

observations += trap_states

mark = MarkovAgent(observations)
mark.learn()

# mark correctly learns that the optimal strategy is to always go up
print(mark.policy)
Exemplo n.º 3
0
      { 'state': 'high', 'action': 'climb', 'state_': 'top' },
    ],
    'reward': 0
  }
]

trap_states = [
  {
    'state_transitions': [
      { 'state': 'bottom', 'action': 'sink', 'state_': 'bottom' },
      { 'state': 'bottom', 'action': 'climb', 'state_': 'bottom' }
    ],
    'reward': 0
  },
  {
    'state_transitions': [
      { 'state': 'top', 'action': 'sink', 'state_': 'top' },
      { 'state': 'top', 'action': 'climb', 'state_': 'top' },
    ],
    'reward': 1
  },
]

observations += trap_states

mark = MarkovAgent(observations)
mark.learn()

# mark correctly learns that the optimal strategy is to always go up
print(mark.policy)
            {
                'state':
                'Knows that user knows concept,Knows concept,Knows user culture,',
                'action':
                'conceptIdentif',
                'state_':
                'Knows that user knows concept,Knows concept,Knows user culture,'
            },
        ],
        'reward':
        1
    },
]

print '[%s]' % ', '.join(map(str, observations))
print '[%s]' % ', '.join(map(str, trap_states))

#def reinforcementLearning():
from learn import MarkovAgent
mark = MarkovAgent(observations + trap_states)
mark.learn()
print(mark.policy)

#  return;

#reinforcementLearning();

#f = open('out.txt', 'w')
#print >> f, mark.policy  # or f.write('...\n')
#f.close()
Exemplo n.º 5
0
import mapping
from learn import MarkovAgent

agent = MarkovAgent(mapping.transitionRewardMap)
agent.learn()

policy = agent.policy

print(policy)
print(list(filter(lambda x: x, policy.values())))
'''
policy
{
    '(False, True, True, False)': False,
    '(True, False, False, True)': False,
    '(False, True, True, True)': False,
    '(True, False, True, False)': False,
    '(False, True, False, True)': False,
    '(True, True, True, False)': False,
    '(False, False, True, True)': False,
    '(True, True, True, True)': False,
    '(False, False, False, True)': False,
    '(False, False, False, False)': False,
    '(False, False, True, False)': False,
    '(False, True, False, False)': False,
    '(True, True, False, False)': False,
    '(True, True, False, True)': False,
    '(True, False, True, True)': False,
    '(True, False, False, False)': False
}
'''