Exemplo n.º 1
0
def get_dataset(examples):
    next_kin = []
    next_data = []
    for example in examples:
        for n, step in enumerate(example.states[:-1]):
            next_kin.append(staticGroupSimple2(step, example.actions[n],
                                               0.129))
            next_data.append(example.states[n + 1])
            #if np.sin((np.absolute(np.array(next_kin)[-1,0]-np.array(next_data)[-1,0]))/2) >0.1:
            #	print np.array(next_kin)[-1,:]
            #	print np.array(next_data)[-1,:]
            #	print step,example.actions[n+1]
            #	print example.states[n+2]
            #	print '-------------------------------------------'
    next_kin = np.array(next_kin)
    next_data = np.array(next_data)
    diff = next_kin - next_data
    diff[:, 0] = np.sin(diff[:, 0] / 2)
    #simple filter
    for i in range(len(diff[:, 0])):
        if np.absolute(diff[i, 0]) > 0.2:
            diff[i, 0] = 0
    X = np.concatenate([
        np.hstack((example.states[:-1, :], example.actions[:-1, :]))
        for example in examples
    ],
                       axis=0)
    return X, diff
Exemplo n.º 2
0
    def buildTransitionFunction(self, bin_samples, learn=True):
        def transition_smart():
            self.transition = Transition()
            tot_states = self.transition.tot_states = transition_f.shape[1]
            tot_actions = self.transition.tot_actions = transition_f.shape[0]
            self.transition.backward = [{} for j in xrange(tot_states *
                                                           tot_actions)]
            self.transition.forward = [{} for j in xrange(tot_states *
                                                          tot_actions)]
            idx = np.transpose(np.array(np.nonzero(transition_f)))
            for i in idx:
                self.transition.backward[i[0] + i[1] * tot_actions][str(
                    i[2])] = transition_f[i[0], i[1], i[2]]
                self.transition.forward[i[0] + i[2] * tot_actions][str(
                    i[1])] = transition_f[i[0], i[1], i[2]]
            print self.transition.backward

        #print "Got Here estemated"
        transition_f = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        transtion_test = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        estimators = learn_correction(10, 10)
        for i in xrange(self.disc.tot_states):
            for j in xrange(self.disc.tot_actions):
                bins = self.disc.stateToBins(i)
                for k in xrange(bin_samples):
                    assert bin_samples != 0
                    if bin_samples == 1:
                        samp = False
                    else:
                        samp = True
                    quantity = self.disc.binsToQuantity(bins, sample=samp)
                    action = self.disc.indexToAction(j)
                    correction = predict_next(quantity, action, estimators)
                    correction[0] = np.arcsin(correction[0]) * 2
                    next_quantity = staticGroupSimple2(quantity,
                                                       action) - correction
                    #next_quantity2 = staticGroupSimple2(quantity,action)
                    #if np.sum(next_quantity[:1]-next_quantity2[:1])>1:
                    #	print "Quantity",quantity
                    #	print "ACTION",action
                    #	print next_quantity[:2],next_quantity2[:2]
                    next_state = self.disc.quantityToState(next_quantity)
                    #next2 = self.disc.quantityToState(next_quantity2)
                    #print "state",ic
                    #print "actions",j
                    transition_f[j, i, next_state] += 1
        if learn == True:
            trans = learn_tran()
            transition_f = np.add(transition_f, 2 * trans)
        row_sums = np.sum(transition_f, axis=2)
        transition_f = transition_f / row_sums[:, :, np.newaxis]
        self.transition_f = transition_f
        transition_smart()
Exemplo n.º 3
0
	def buildTransitionFunction(self,bin_samples,learn =True):
		def transition_smart():
			self.transition = Transition()
			tot_states=self.transition.tot_states = transition_f.shape[1]
			tot_actions=self.transition.tot_actions =transition_f.shape[0]
			self.transition.backward = [{} for j in xrange(tot_states*tot_actions)]
			self.transition.forward = [{} for j in xrange(tot_states*tot_actions)]
			idx = np.transpose(np.array(np.nonzero(transition_f)))
			for i in idx:
				self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]]
				self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]]
			print self.transition.backward
		#print "Got Here estemated"	
		transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		estimators = learn_correction(10,10)
		for i in xrange(self.disc.tot_states):
			for j in xrange(self.disc.tot_actions):
				bins = self.disc.stateToBins(i)	
				for k in xrange(bin_samples):
					assert bin_samples != 0
					if bin_samples==1:
						samp = False
					else:
						samp = True
					quantity = self.disc.binsToQuantity(bins,sample = samp)
					action = self.disc.indexToAction(j)
					correction = predict_next(quantity,action,estimators)
					correction[0] = np.arcsin(correction[0])*2
					next_quantity = staticGroupSimple2(quantity,action) - correction
					#next_quantity2 = staticGroupSimple2(quantity,action)
					#if np.sum(next_quantity[:1]-next_quantity2[:1])>1:
					#	print "Quantity",quantity
					#	print "ACTION",action
					#	print next_quantity[:2],next_quantity2[:2]
					next_state = self.disc.quantityToState(next_quantity)
					#next2 = self.disc.quantityToState(next_quantity2)
					#print "state",ic
					#print "actions",j
					transition_f[j,i,next_state] += 1
		if learn ==True:
			trans = learn_tran()
			transition_f =np.add( transition_f ,2* trans)
		row_sums = np.sum(transition_f,axis=2)
		transition_f = transition_f/row_sums[:,:,np.newaxis]
		self.transition_f = transition_f
		transition_smart()
Exemplo n.º 4
0
def get_dataset(examples):
	next_kin = []
	next_data = []
	for example in examples:
		for n, step in enumerate(example.states[:-1]):
			next_kin.append(staticGroupSimple2(step,example.actions[n],0.129))
			next_data.append(example.states[n+1])
			#if np.sin((np.absolute(np.array(next_kin)[-1,0]-np.array(next_data)[-1,0]))/2) >0.1:
			#	print np.array(next_kin)[-1,:]
			#	print np.array(next_data)[-1,:]
			#	print step,example.actions[n+1]
			#	print example.states[n+2]
			#	print '-------------------------------------------'
	next_kin = np.array(next_kin)
	next_data = np.array(next_data)
	diff = next_kin-next_data
	diff[:,0] = np.sin(diff[:,0]/2)
	#simple filter
	for i in range(len(diff[:,0])):
		if np.absolute(diff[i,0]) >0.2:
			diff[i,0] = 0
	X =np.concatenate([np.hstack((example.states[:-1,:],example.actions[:-1,:])) for example in examples],axis = 0)
	return X,diff