Exemplo n.º 1
0
def main():
	isTrainingOn = True 
	gamma = 0.99 
	eta = 0.9 
	day_chunk = 10
	total_years = 2000
	episode_number = 0
	E_cap = 6.0
	P_cap = 3.0
	E_init = 0.3*E_cap
	epsilon = 1.0
	actions = np.arange(- P_cap, P_cap + 0.01, 0.5).tolist()
	#actions.sort()
	total_number_hours = 24
	look_ahead = 1

	batch = []
	
	miniBatchSize = 400
	bufferLength = 500
	lasting_list = []
	grid_list = []
	reward_list = []
	action_list = []
	
	#Creation of objects
	environment = Environment(gamma, eta, day_chunk, total_years)
	environment.setCurrentState(episode_number, E_init)
	learningAgent = LearningAgent(environment.currentState, actions, E_cap, P_cap, epsilon)
	funtionApproximator = FunctionApproximation('sgd', actions)

	#starting main episode loop
	total_iterations = total_years * day_chunk #day_chunk*total_years

	while(episode_number < total_iterations) :
		lasting = 1
		#print (episode_number)
		for time in range(total_number_hours) :
			
			'''
			Change in for loop by Siddharth: 
			1. Added exploration function in learningAgent.py
			2. Corrected isValid condition
			3. lasting_list : contains the time_steps upto which the agent reaches until failure (P_grid < 0)
			4. Edited nextStep in environment (currentState update)
			'''

			K = look_ahead
			
			if np.random.random() <= learningAgent.epsilon: 
				currentStateBackup = deepcopy(learningAgent.currentState)
				K = 0
				action_sequence, rewardCumulative = learningAgent.exploration(episode_number, time, environment, K)
			else:
				currentStateBackup = deepcopy(learningAgent.currentState)
				action_sequence, rewardCumulative = learningAgent.getAction(episode_number, learningAgent.currentState, funtionApproximator, environment, look_ahead, gamma, time)
				if action_sequence[0] == None: 
					break
					#print 'none actions', [actions[i] for i in learningAgent.getLegalActions(currentStateBackup)]
					#print 'none', currentStateBackup

			if action_sequence[0] == None:
				qvalue = -10
				funtionApproximator.update_qfunction(currentStateBackup, action_index, qvalue)



			nextState, qvalue, isValid = environment.nextStep(episode_number, time, [learningAgent.actions[action_index] for action_index in action_sequence], K, funtionApproximator, learningAgent)
			#print 'done'
			#print currentStateBackup, actions[action_sequence[0]], nextState

			action_index = action_sequence[0] 
			action_taken = learningAgent.actions[action_sequence[0]]
			action_list.append(action_taken)
			grid_list.append(environment.getP_grid(currentStateBackup, action_taken))
			#Experience Tuple
			###
			####currentStateBackup.append(action_taken) #indexed the actions to change experience tuple
			####batch.append((currentStateBackup, qvalue))
			'''
			currentStateBackup.append(qvalue)
			currentStateBackup.append(action_index)
			batch.append(currentStateBackup)

			if(len(batch) >= bufferLength) :
				miniBatch = random.sample(batch, miniBatchSize)
				funtionApproximator.update_qfunction(miniBatch, learningAgent)
				batch = []
			'''
			funtionApproximator.update_qfunction(currentStateBackup, action_index, qvalue)

			if (isValid) :
				episode_number += 1
				temp = environment.setCurrentState(episode_number, E_init)
				learningAgent.currentState = temp
				break

			learningAgent.currentState = nextState
			lasting += 1
			lasting_list.append(lasting)

		if (learningAgent.epsilon >= 0.0):
			learningAgent.epsilon -= 1/total_iterations
		
		if(episode_number%100 == 0) :
			print ("done with episode number = " + str(episode_number))
			print ("lasted days = ", len([1 for x in lasting_list if x >= 24]))
		
		episode_number += 1
		reward_list.append(rewardCumulative)
		
	plt.plot(action_list)
	plt.xlabel('Action value')
	plt.ylabel('Training Episodes')
	plt.show()
	plt.plot(grid_list)
	#plt.plot(reward_list)
	plt.xlabel('Grid value')
	plt.ylabel('Training Episodes')
	plt.show()
	print learningAgent.epsilon

	with open('model_store_k1_summer','w') as fp:
		pickle.dump(funtionApproximator.models, fp)
Exemplo n.º 2
0
def main():
	gamma = 0.89 
	eta = 0.9 
	day_chunk = 2
	total_years = 1
	episode_number = 0
	E_cap = 6.0
	P_cap = 3.0
	E_init = 0.3*E_cap
	epsilon = 0.5
	actions = np.arange(-P_cap, P_cap + 0.01, 0.5).tolist()
	actions.sort()
	total_number_hours = 24
	look_ahead = 1

	savings = list()

	#Creation of objects
	with open('model_store_k1_summer') as fp:
		models = pickle.load(fp)
	#starting main episode loop
	total_iterations = 1#total_years * day_chunk #day_chunk*total_years
	for i in range(50):
		grid_list = []
		reward_list = list()
		action_list = list()
		energy_list = list()
		price_list = list()
		load_action_list = list()
		episode_number = 0
		environment = Environment(gamma, eta, day_chunk, total_years)
		environment.setCurrentState(episode_number, E_init)
		learningAgent = LearningAgent(environment.currentState, actions, E_cap, P_cap, epsilon)
		funtionApproximator = FunctionApproximation('sgd', actions)
		funtionApproximator.models = models
		agent_bill, base_bill = 0.0, 0.0

		
		while(episode_number < total_iterations) :
			grid_list = list()
			cost_list = list()
			load_list = list()
			netload_list = list()
			solar_list = list()
			add = False
			for time in range(total_number_hours) :
				
				energy_list.append(learningAgent.currentState[1])

				action_sequence, rewardCumulative = learningAgent.getAction(episode_number, environment.currentState, funtionApproximator, environment, look_ahead, gamma, time)
				if action_sequence[0]== None:
					add = True
					break
				print "action action_sequence:",action_sequence			
				copy_current = deepcopy(environment.currentState)
				nextState, qvalue, isValid = environment.nextStep(episode_number, time, [learningAgent.actions[action_index] for action_index in action_sequence], look_ahead, funtionApproximator, learningAgent)
				action_taken = learningAgent.actions[action_sequence[0]]
				print action_taken

				#print('Episode', episode_number, 'current', learningAgent.currentState, 'action', action_taken,'next', nextState)
				grid_list.append(environment.getP_grid(copy_current, action_taken))
				action_list.append(action_taken)
				netload_list.append(learningAgent.currentState[0])
				load_list.append(environment.getLoad(episode_number, time))
				solar_list.append(environment.getSolar(episode_number, time))
				price_list.append(learningAgent.currentState[2]*100-4)
				cost_list.append(learningAgent.currentState[2])

				learningAgent.currentState = nextState

			episode_number += 1
			reward_list.append(rewardCumulative)
	 		agent_bill = sum([a*b for a,b in zip(cost_list, grid_list)])
			base_bill = sum([max(0, a*b) for a,b in zip(cost_list, netload_list)])
			if not add :
				savings.append(base_bill - agent_bill)
	
	print savings, len(savings)
	print 'Mean Savings:', np.mean(savings), '+/-', np.std(savings)
	#sns.distplot(savings)
	plt.plot(savings)
	plt.show()
	plt.plot(grid_list, label='Grid', c='r')
	plt.plot(action_list, label='Charge/Discharge', c='c')
	plt.plot(load_list, label='Load')
	plt.plot(price_list, label='Price (Scaled up)')
	plt.plot(solar_list, label='Solar Power', c='y')
	plt.legend(loc=1, mode="expand", borderaxespad=0.)
	plt.xticks(np.arange(1, len(grid_list)+1, 1.0))
	plt.yticks(np.arange(-3,11, 0.5))
	plt.xlabel('Hours')
	plt.ylabel('Grid')
	plt.show()