def get_reinforce_experiment(case, minor=1): gen = case.generators profile = array([1.0, 1.0]) maxSteps = len(profile) initalSigma = 0.0 # decay = 0.95 # learningRate = 0.0005#005 # (0.1-0.001, down to 1e-7 for RNNs, default: 0.1) sigmaOffset = -5.0 if minor == 1: decay = 0.998#75#95 learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs, default: 0.1) elif minor == 2: decay = 0.999 learningRate = 0.01 elif minor == 3: decay = 0.998#75#95 learningRate = 0.05 elif minor == 4: decay = 0.998#75#95 learningRate = 0.005 else: raise ValueError market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType) experiment = pyreto.continuous.MarketExperiment([], [], market, profile) for g in gen[0:2]: learner = Reinforce() # learner.gd.rprop = False # only relevant for BP learner.learningRate = learningRate # learner.gd.alpha = 0.0001 # learner.gd.alphadecay = 0.9 # learner.gd.momentum = 0.9 # only relevant for RP # learner.gd.deltamin = 0.0001 task, agent = get_continuous_task_agent([g], market, nOffer, markupMax, withholdMax, maxSteps, learner) learner.explorer = ManualNormalExplorer(agent.module.outdim, initalSigma, decay, sigmaOffset) experiment.tasks.append(task) experiment.agents.append(agent) # Passive agent. task, agent = get_neg_one_task_agent(gen[2:3], market, nOffer, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def get_reinforce_experiment(case): locAdj = "ac" initalSigma = 0.0 sigmaOffset = -5.0 decay = 0.995 learningRate = 0.005 market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = \ pyreto.continuous.MarketExperiment([], [], market, branchOutages=None) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = Reinforce() learner.learningRate = learningRate task, agent = get_continuous_task_agent(g, market, nOffer, markupMax, withholdMax, maxSteps, learner) learner.explorer = ManualNormalExplorer(agent.module.outdim, initalSigma, decay, sigmaOffset) experiment.tasks.append(task) experiment.agents.append(agent) # Have an agent bid at marginal cost (0.0) for the sync cond. passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_neg_one_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = \ pyreto.continuous.MarketExperiment([], [], market, branchOutages=None) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = Reinforce() learner.learningRate = learningRate task, agent = get_continuous_task_agent(g, market, nOffer, markupMax, withholdMax, maxSteps, learner) learner.explorer = ManualNormalExplorer(agent.module.outdim, initalSigma, decay, sigmaOffset) experiment.tasks.append(task) experiment.agents.append(agent) # Have an agent bid at marginal cost (0.0) for the sync cond. passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding.
batch = 50 #number of samples per learning step prnts = 4 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts, kind="learner") #tool for printing and plotting for runs in range(numbExp): print 'run: ', runs # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, Reinforce()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) state, action, reward = agent.learner.dataset.getSequence( agent.learner.dataset.getNumSequences() - 1) et.printResults(reward.sum(), runs, updates) et.addExps() et.showExps() print 'done'
case.generators[1].p_cost = (0.0, 6.5, 200.0) case.generators[2].p_cost = (0.0, 2.0, 200.0) case.generators[0].c_shutdown = 100.0 #case.generators[0].p_min = 0.0 # TODO: Unit-decommitment. #case.generators[1].p_min = 0.0 ##case.generators[2].p_min = 0.0 case.generators[0].p_max = 100.0 case.generators[1].p_max = 70.0 case.generators[2].p_max = 70.0 vre = VariantRothErev(experimentation=0.55, recency=0.3) vre.explorer = BoltzmannExplorer() #tau=100, decay=0.95) learners = [vre, Q(), Reinforce()] profile = [0.9, 0.6] m = (20, 75) # markups nb = 1 # no. offers ns = 3 # no. states mx = 60.0 # max markup weeks = 2 days = 2 outdir = "/tmp/case6ww1" dc = True