def __init__(self, pomdp, prior):
		self.pomdp = pomdp
		# initial belief
		self.prior = prior
		# extended states (state id, observation id)
		S = set(it.product(pomdp.states.values(),pomdp.observations.values()))
		# transition function
		T = dict()
		for (s1,o1),a in it.product(S,pomdp.actions.values()):
			for (s2,o2) in S:
				if pomdp.inv_states[s2] in pomdp.transitionFunction[(pomdp.inv_states[s1],pomdp.inv_actions[a])] and pomdp.inv_observations[o2] in pomdp.observationFunction[pomdp.inv_states[s2]]:
					if ((s1,o1),a) not in T:
						T[((s1,o1),a)] = dict()
					T[((s1,o1),a)][(s2,o2)] = pomdp.transitionFunction[(pomdp.inv_states[s1],pomdp.inv_actions[a])][pomdp.inv_states[s2]] * pomdp.observationFunction[pomdp.inv_states[s2]][pomdp.inv_observations[o2]]
		print 'S:',S,'\nA:',pomdp.actions.values(),'\nT:',T

		MarkovDecisionProcess.__init__(self,S,pomdp.actions.values(),T)
	def __init__(self, pomdp, horizon, prior):
		""" 
		Generation of the belief space MDP starting from a prior distribution
		and limited to a fixed horizon H, level 0 is the root, level H is
		included in the generation as a special level without outgoing 
		transitions.
			:param pomdp: partially observable model
			:param horizon: generation horizon
			:parm prior: initial belief distribution
		"""
		self.pomdp = pomdp
		# initial state
		self.root = prior
		# fringe
		self.fringe = set()
		# belief states
		B = set()
		# belief transition function
		T = dict()
		# belief-state space generation using a BFS
		Q = Queue()
		nxtQ = Queue()
		Q.put(prior)
		level = 0
		while not Q.empty() and level < horizon:
			current = Q.get()
			B.add(current)
			for act in pomdp.actions:
				for obs in pomdp.observations:
					nxt = self.beliefUpdate(pomdp,current,act,obs)
					# find-the-copy function
					find = lambda lst, el : \
						reduce(lambda a,b: a if a != None else b, \
						map(lambda x : x if el.equals(x) else None, lst))
					# search for the same belief state
					same = find(list(B) + list(Q.queue) + list(nxtQ.queue),nxt)
					# do not add the state if already visited before
					if same == None:
						nxtQ.put(nxt)
					else:
						# keep the reference to compute 
						# the transition function probability
						nxt = same
					# add the probability mass to the transition function
					# compute Pr(o|b,a)
					pr = 0.0
					for i1,p1 in current.distr.iteritems():
						sub_pr = 0.0
						for i2,p2 in nxt.distr.iteritems():
							if i2 in pomdp.transitionFunction[(i1,act)] and \
								obs in pomdp.observationFunction[i2]:
								sub_pr += pomdp.transitionFunction[(i1,act)][i2] \
									* pomdp.observationFunction[i2][obs]
						pr += p1 * sub_pr
					# do not add null mass probabilities
					if pr > 0.0:
						if (current, pomdp.actions[act]) not in T:
							# if (b,a) entry does not exist, create it
							T[(current,pomdp.actions[act])] = {}
						if nxt not in T[(current,pomdp.actions[act])]:
							# if (b,a)(b') entry does not exist
							# create it as the probability
							T[(current,pomdp.actions[act])][nxt] = pr
						else:
							# if (b,a)(b') entry already exists 
							# increment the probability
							T[(current,pomdp.actions[act])][nxt] += pr
			# if Q is empty replace it with nxtQ of the next level
			if Q.empty():
				Q = nxtQ
				nxtQ = Queue()
				level += 1

		# add last level from Q 
		for b in Q.queue:
			B.add(b)
			self.fringe.add(b)

		MarkovDecisionProcess.__init__(self,B,pomdp.actions.values(),T)