#sim((run, Prob, P))
		print "-> RUN "+str(run)
		# count history
		#ch = np.zeros(MAXITER+1, dtype=INTTYPE)
		# controller initial state
		ci = L.names[random.choice(L.S)]
		# environment initial state (non observable)
		ei = M.names[random.choice(M.S)]
		# data tracking		
		ch[run,0] = func.counter(ci,ei,DIM,COUNTER)
		init = { s for s in P.S if P.L.names[s[0]] == ci }
		distr = { s:1./len(init) for s in init }
		# initialize belief state
		b = belief.belief(P,P.Z,init=distr)
		# first observation
		o = func.weighted_choice(
			P.Z[((L.inv_names[ci]),(M.inv_names[ei]))])
		# belief update
		b.update(obs=o)
		if PRINT:
			print 'b(s):',b.d[((L.inv_names[ci]),(M.inv_names[ei]))] \
				if ((L.inv_names[ci]),(M.inv_names[ei])) in b.d else 0
			rob.print_grid(ci,ei,DIM,pause=PAUSE)

		for it in range(MAXITER):
			# scheduled choice
			a = None
			if SCHEDULER == 0:
				a = sched.beliefScheduler(b, P, Prob, sched.avgMinVal, verbose=PRINT)
			elif SCHEDULER == 1:
				a = sched.beliefScheduler(b, P, Prob, sched.maxMinVal, verbose=PRINT)
			elif SCHEDULER == 2:
	Prob = imp.importProb(PRISMFILENAME,NPAR)

	for run in range(RUNS):
		#sim((run, Prob, P))
		print "-> RUN "+str(run)
		# controller initial state
		ci = L.names[random.choice(L.S)]
		# environment initial state (not observable)
		ei = 'tl' #M.names[random.choice([M.inv_names['tl'],M.inv_names['tr']])]

		init = { s for s in P.S if P.L.names[s[0]] == ci }
		distr = {(0,M.inv_names['tl']):0.5, (0,M.inv_names['tr']):0.5}
		# initialize belief state
		b = belief.belief(P,P.Z,init=distr)
		# first observation
		o = func.weighted_choice(
			P.Z[((L.inv_names[ci]),(M.inv_names[ei]))])
		# belief update
		print 'init: ', b.d
		b.update(obs=o)
		if PRINT:
			print 'b(s):',b.d[((L.inv_names[ci]),(M.inv_names[ei]))] \
				if ((L.inv_names[ci]),(M.inv_names[ei])) in b.d else 0

		for it in range(MAXITER):
			print 'belief: ', b.d
			# scheduled choice
			a = None
			if SCHEDULER == 0:
				a = sched.beliefScheduler(b, P, Prob, sched.avgMinVal, verbose=PRINT)
			elif SCHEDULER == 1:
				a = sched.beliefScheduler(b, P, Prob, sched.maxMinVal, verbose=PRINT)
        maximum = -1
        act_max = None
        for a in L.A:
            ba = b.returnUpdate(act=a)
            val = sched.avgMinScheduler(ba,Prob,P)
            #val = rob.opt(ba,Prob,P)
            if round(val,10) == round(maximum,10):
                act_max.add(a)
                print 'EQ act:',a,' - val:',val
            elif val > maximum:
                maximum = val
                act_max = {a}
                print 'OK act:',a,' - val:',val
            else:
                print 'NO act:',a,' - val:',val
                pass
        a = random.choice(list(act_max))
        # apply action
        ci = rob.step(ci,a,DIM)
        ei = (rob.step(ei[0],random.choice(L.A),DIM), 
            rob.step(ei[1],random.choice(L.A),DIM))
        # extract observation
        o = func.weighted_choice(P.Z[(L.inv_names[ci],M.inv_names[ei])])
        # belief update
        b.update(act=a,obs=o)
        print 'act:',a
        print 'obs:',o
        print 'b(s):',b.d[(L.inv_names[ci],M.inv_names[ei])] \
            if (L.inv_names[ci],M.inv_names[ei]) in b.d else 0
        rob.print_grid(ci,ei,DIM,pause=PAUSE)