def reinforcementLearning(): #print '[%s]' % ', '.join(map(str, observations)) #print the list of observations #print '[%s]' % ', '.join(map(str, state1)) # print the list of actions #print '[%s]' % ', '.join(map(str, state2)) from learn import MarkovAgent mark = MarkovAgent(observations + trap_states) mark.learn() policy = mark.policy f = open('out.txt', 'w') #print >> f, mark.policy # or f.write('...\n') f.close() print policy return policy
'state_': 'bottom' }], 'reward': 0 }, { 'state_transitions': [ { 'state': 'top', 'action': 'sink', 'state_': 'top' }, { 'state': 'top', 'action': 'climb', 'state_': 'top' }, ], 'reward': 1 }, ] observations += trap_states mark = MarkovAgent(observations) mark.learn() # mark correctly learns that the optimal strategy is to always go up print(mark.policy)
{ 'state': 'high', 'action': 'climb', 'state_': 'top' }, ], 'reward': 0 } ] trap_states = [ { 'state_transitions': [ { 'state': 'bottom', 'action': 'sink', 'state_': 'bottom' }, { 'state': 'bottom', 'action': 'climb', 'state_': 'bottom' } ], 'reward': 0 }, { 'state_transitions': [ { 'state': 'top', 'action': 'sink', 'state_': 'top' }, { 'state': 'top', 'action': 'climb', 'state_': 'top' }, ], 'reward': 1 }, ] observations += trap_states mark = MarkovAgent(observations) mark.learn() # mark correctly learns that the optimal strategy is to always go up print(mark.policy)
{ 'state': 'Knows that user knows concept,Knows concept,Knows user culture,', 'action': 'conceptIdentif', 'state_': 'Knows that user knows concept,Knows concept,Knows user culture,' }, ], 'reward': 1 }, ] print '[%s]' % ', '.join(map(str, observations)) print '[%s]' % ', '.join(map(str, trap_states)) #def reinforcementLearning(): from learn import MarkovAgent mark = MarkovAgent(observations + trap_states) mark.learn() print(mark.policy) # return; #reinforcementLearning(); #f = open('out.txt', 'w') #print >> f, mark.policy # or f.write('...\n') #f.close()
import mapping from learn import MarkovAgent agent = MarkovAgent(mapping.transitionRewardMap) agent.learn() policy = agent.policy print(policy) print(list(filter(lambda x: x, policy.values()))) ''' policy { '(False, True, True, False)': False, '(True, False, False, True)': False, '(False, True, True, True)': False, '(True, False, True, False)': False, '(False, True, False, True)': False, '(True, True, True, False)': False, '(False, False, True, True)': False, '(True, True, True, True)': False, '(False, False, False, True)': False, '(False, False, False, False)': False, '(False, False, True, False)': False, '(False, True, False, False)': False, '(True, True, False, False)': False, '(True, True, False, True)': False, '(True, False, True, True)': False, '(True, False, False, False)': False } '''