def buildTransitionFunction(self,bin_samples,learn =True): def transition_smart(): self.transition = {} tot_states=self.transition["tot_states"] = transition_f.shape[1] tot_actions=self.transition["tot_actions"] =transition_f.shape[0] self.transition["backward"] = [{} for j in xrange(tot_states*tot_actions)] self.transition["forward"] = [{} for j in xrange(tot_states*tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition["backward"][i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]] self.transition["forward"][i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]] transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples==1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins,sample = samp) action = self.disc.indexToAction(j) next_quantity = staticGroupSimple(quantity,action) next_state = self.disc.quantityToState(next_quantity) transition_f[j,i,next_state] += 1 if learn ==True: trans = learn_tran() transition_f =np.add( transition_f , trans) row_sums = np.sum(transition_f,axis=2) transition_f = transition_f/row_sums[:,:,np.newaxis] self.transition_f = transition_f transition_smart()
def buildTransitionFunction(self, bin_samples, learn=True): def transition_smart(): self.transition = Transition() tot_states = self.transition.tot_states = transition_f.shape[1] tot_actions = self.transition.tot_actions = transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states * tot_actions)] self.transition.forward = [{} for j in xrange(tot_states * tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1] * tot_actions][str( i[2])] = transition_f[i[0], i[1], i[2]] self.transition.forward[i[0] + i[2] * tot_actions][str( i[1])] = transition_f[i[0], i[1], i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) transtion_test = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) estimators = learn_correction(10, 10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples == 1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins, sample=samp) action = self.disc.indexToAction(j) correction = predict_next(quantity, action, estimators) correction[0] = np.arcsin(correction[0]) * 2 next_quantity = staticGroupSimple2(quantity, action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j, i, next_state] += 1 if learn == True: trans = learn_tran() transition_f = np.add(transition_f, 2 * trans) row_sums = np.sum(transition_f, axis=2) transition_f = transition_f / row_sums[:, :, np.newaxis] self.transition_f = transition_f transition_smart()
def buildTransitionFunction(self,bin_samples,learn =True): def transition_smart(): self.transition = Transition() tot_states=self.transition.tot_states = transition_f.shape[1] tot_actions=self.transition.tot_actions =transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states*tot_actions)] self.transition.forward = [{} for j in xrange(tot_states*tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]] self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) estimators = learn_correction(10,10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples==1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins,sample = samp) action = self.disc.indexToAction(j) correction = predict_next(quantity,action,estimators) correction[0] = np.arcsin(correction[0])*2 next_quantity = staticGroupSimple2(quantity,action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j,i,next_state] += 1 if learn ==True: trans = learn_tran() transition_f =np.add( transition_f ,2* trans) row_sums = np.sum(transition_f,axis=2) transition_f = transition_f/row_sums[:,:,np.newaxis] self.transition_f = transition_f transition_smart()
def buildTransitionFunction(self, bin_samples, learn=True): def transition_smart(): self.transition = {} tot_states = self.transition["tot_states"] = transition_f.shape[1] tot_actions = self.transition["tot_actions"] = transition_f.shape[ 0] self.transition["backward"] = [{} for j in xrange(tot_states * tot_actions)] self.transition["forward"] = [{} for j in xrange(tot_states * tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition["backward"][i[0] + i[1] * tot_actions][str( i[2])] = transition_f[i[0], i[1], i[2]] self.transition["forward"][i[0] + i[2] * tot_actions][str( i[1])] = transition_f[i[0], i[1], i[2]] transition_f = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples == 1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins, sample=samp) action = self.disc.indexToAction(j) next_quantity = staticGroupSimple(quantity, action) next_state = self.disc.quantityToState(next_quantity) transition_f[j, i, next_state] += 1 if learn == True: trans = learn_tran() transition_f = np.add(transition_f, trans) row_sums = np.sum(transition_f, axis=2) transition_f = transition_f / row_sums[:, :, np.newaxis] self.transition_f = transition_f transition_smart()