def setup_method(self, _): from mlpy.mdp.stateaction import State, Action State.nfeatures = None Action.description = None Action.nfeatures = None case_template = { "state": { "type": "float", "value": "data.state", "is_index": True, "retrieval_method": "knn", "retrieval_method_params": 5 }, "act": { "type": "float", "value": "data.action", "is_index": False, "retrieval_method": "cosine", }, "delta_state": { "type": "float", "value": "data.next_state - data.state", "is_index": False, } } from mlpy.knowledgerep.cbr.engine import CaseBase self.cb = CaseBase(case_template, retention_method_params={'max_error': 1e-5}) from mlpy.auxiliary.io import load_from_file self.data = load_from_file(os.path.join(os.getcwd(), 'tests', 'data/jointsAndActionsData.pkl'))
def main(args): try: data = load_from_file(args.infile) obs = data["state"] except IOError: sys.exit(sys.exc_info()[1]) except KeyError, e: sys.exit("Key not found: {0}".format(e))
def main(args): try: filename = convert_to_policy(args.policies) # noinspection PyUnusedLocal data = load_from_file(filename) if ":" not in args.policy_num: args.policy_num = args.policy_num + ":" + str(int(args.policy_num) + 1) policies = eval("data['act'][" + str(args.policy_num) + "]") except IOError: sys.exit(sys.exc_info()[1]) except KeyError, e: sys.exit("Key not found: {0}".format(e))
def main(args): try: data = load_from_file(args.infile) train = data["train"] test = data["test"] obs_avg = calc_stats(test) nobs = 20 # train[0].shape[0] d, n = train[0][0].shape except IOError: sys.exit(sys.exc_info()[1]) except KeyError, e: sys.exit("Key not found: {0}".format(e))
def setup_method(self, _): from mlpy.mdp.stateaction import State, Action State.nfeatures = None Action.description = None Action.nfeatures = None case_template = { "state": { "type": "float", "value": "data.state", "is_index": True, "retrieval_method": "knn", "retrieval_method_params": 5 }, "act": { "type": "float", "value": "data.action", "is_index": False, "retrieval_method": "cosine", }, "delta_state": { "type": "float", "value": "data.next_state - data.state", "is_index": False, } } from mlpy.mdp.continuous.casml import CASML self.model = CASML(case_template, tau=1e-5, sigma=1e-5, ncomponents=2) from mlpy.auxiliary.io import load_from_file data = load_from_file(os.path.join(os.getcwd(), 'tests', 'data/jointsAndActionsData.pkl')) # Extract 10th experience for testing self.unseen_state = data["states"][0][:, 10] self.unseen_action = data["actions"][0][:, 10] self.model.fit(np.delete(data["states"][0], 10, 1), np.delete(data["actions"][0], 10, 1))
try: data = load_from_file(args.infile) train = data["train"] test = data["test"] obs_avg = calc_stats(test) nobs = 20 # train[0].shape[0] d, n = train[0][0].shape except IOError: sys.exit(sys.exc_info()[1]) except KeyError, e: sys.exit("Key not found: {0}".format(e)) try: data = load_from_file(args.policy) actions = data["act"][args.policy_num] except IOError: sys.exit(sys.exc_info()[1]) except KeyError, e: sys.exit("Key not found: {0}".format(e)) ntrials = 20 fig1 = None fig2 = None ax1 = None ax2 = None ax3 = None ax4 = None
def test_agentmodule_creation(self): from mlpy.agents.modules import AgentModuleFactory # create follow policy module with pytest.raises(TypeError): AgentModuleFactory().create('followpolicymodule') from mlpy.auxiliary.io import load_from_file data = load_from_file( os.path.join(os.getcwd(), 'tests', 'data/policies.pkl')) with pytest.raises(AttributeError): AgentModuleFactory().create('followpolicymodule', data) AgentModuleFactory().create('followpolicymodule', data['act'][0:2]) # create learning module with pytest.raises(TypeError): AgentModuleFactory().create('learningmodule') from mlpy.mdp.stateaction import Action Action.set_description({ 'out': { 'value': [-0.004] }, 'in': { 'value': [0.004] }, 'kick': { 'value': [-1.0] } }) # create `qlearner` learning module AgentModuleFactory().create('learningmodule', 'qlearner', max_steps=10) AgentModuleFactory().create('learningmodule', 'qlearner', lambda s, a: 1.0, max_steps=10) with pytest.raises(ValueError): AgentModuleFactory().create('learningmodule', 'qlearner', 1.0, max_steps=10) # create `rldtlearner` learner module from mlpy.mdp.discrete import DiscreteModel from mlpy.planners.discrete import ValueIteration planner = ValueIteration(DiscreteModel(['out', 'in', 'kick'])) AgentModuleFactory().create('learningmodule', 'rldtlearner', None, planner, max_steps=10) AgentModuleFactory().create('learningmodule', 'rldtlearner', planner=planner, max_steps=10) with pytest.raises(TypeError): AgentModuleFactory().create('learningmodule', 'rldtlearner', max_step=10)
explorer = None if args.explorer_type in ["egreedyexplorer", "softmaxexplorer"]: explorer = ExplorerFactory.create(args.explorer_type, args.explorer_params, args.decay) if args.learner == "apprenticeshiplearner": learner = None if args.progress: try: learner = ApprenticeshipLearner.load(args.savefile) except IOError: pass if not learner: try: data = load_from_file(args.infile) obs = data["state"] actions = data["act"] labels = data["label"] except IOError: sys.exit(sys.exc_info()[1]) except KeyError, e: sys.exit("Key not found: {0}".format(e)) # Train the model with empirical data for i, (s, a, l) in enumerate(zip(obs, actions, labels)): model.fit(s, a, l[0]) # model.print_transitions() learner = ApprenticeshipLearner( np.asarray(demo),