def makeComponents(self): self.block_number = int(self.spinBoxBlocksNumber.value()) noise = float(self.spNoise.value()) domain = BlocksWorld(blocks=self.block_number, towerSize=self.block_number, noise=noise) domain.GOAL_REWARD = float(self.spGoalReward.value()) domain.STEP_REWARD = float(self.spStepReward.value()) representation = RepresentationFactory.get( config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str( self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03104970, lambda_=0., boyan_N0=1220.247254, initial_learn_rate=0.27986823): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3, ) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation,discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.012695, lambda_=0.2, boyan_N0=80.798, initial_learn_rate=0.402807): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, lazy=True, lambda_=lambda_) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=10.09, initial_learn_rate=.47): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 5 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def makeComponents(self): self.block_number = int(self.spinBoxBlocksNumber.value()) noise = float(self.spNoise.value()) domain = BlocksWorld(blocks=self.block_number,towerSize=self.block_number, noise=noise) domain.GOAL_REWARD = float(self.spGoalReward.value()) domain.STEP_REWARD = float(self.spStepReward.value()) representation = RepresentationFactory.get(config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str(self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=14.44946, initial_learn_rate=0.240155681): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = BlocksWorld( blocks=6, noise=0.3, ) opt["domain"] = domain mat = np.matrix("""1 1 1 0 0 0; 0 1 1 1 0 0; 0 0 1 1 1 0; 0 0 0 1 1 1; 0 0 1 0 1 1; 0 0 1 1 0 1; 1 0 1 1 0 0; 1 0 1 0 1 0; 1 0 0 1 1 0; 1 0 0 0 1 1; 1 0 1 0 0 1; 1 0 0 1 0 1; 1 1 0 1 0 0; 1 1 0 0 1 0; 1 1 0 0 0 1; 0 1 0 1 1 0; 0 1 0 0 1 1; 0 1 0 1 0 1; 0 1 1 0 1 0; 0 1 1 0 0 1""") #assert(mat.shape[0] == 20) representation = TileCoding(domain, memory=2000, num_tilings=[1] * mat.shape[0], resolution_matrix=mat * 6, safety="none") policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment