def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03613232738, lambda_=0., boyan_N0=12335.665, initial_learn_rate=0.037282, discretization=6.): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, lambda_=lambda_, discount_factor=domain.discount_factor, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, resolution=25., num_rbfs=206., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=116.7025, initial_learn_rate=0.01402, discretization=6.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, discretization=20., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance(episodeCap=1000) opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def makeComponents(self): domain = InfCartPoleBalance() representation = RepresentationFactory.get(config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str(self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def initConfig(self): domain=InfCartPoleBalance() kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \ / 45.016 self.agentConfig['QLearning'] = {'lambda':0.6596, 'gamma':0.9, 'alpha':0.993, 'alpha_decay_mode':'boyan', 'boyan_N0':235} self.agentConfig['Sarsa'] = {'lambda':0.6596, 'gamma':0.9, 'alpha':0.993, 'alpha_decay_mode':'boyan', 'boyan_N0':235} self.policyConfig['eGreedy'] = {'epsilon':0.1} self.representationConfig['Tabular'] = {'discretization':6} self.representationConfig['IncrementalTabular'] = {'discretization':6} self.representationConfig['RBF'] = {'num_rbfs':206, 'resolution_max':25, 'resolution_min':25} self.representationConfig['iFDD'] = {'discretization':6, 'discover_threshold':0.037282} self.representationConfig['KernelizediFDD']={'sparsify':1,'kernel':gaussian_kernel, 'kernel_args':[kernel_width], 'active_threshold':0.01, 'discover_threshold':0.01356, 'max_active_base_feat':10, 'max_base_feat_sim':0.5, 'kernel_resolution':45.016}
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.01356, boyan_N0=235., lambda_=0.6596, initial_learn_rate=.993, kernel_resolution=45.016): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = InfCartPoleBalance() opt["domain"] = domain kernel_width = old_div( (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution) representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.01256, lambda_=0.81, boyan_N0=9811.2337, initial_learn_rate=.15, discretization=22.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization(domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.013461679, lambda_=0., boyan_N0=484.78006, initial_learn_rate=0.5651405, discretization=23.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization(domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1. - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, BetaCoef=1e-6, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment