def thompson_sample(mxmdl: omdict, prior_a: float = 1, prior_b: float = 1) -> tuple[Atom, float]: """Perform Thompson sampling over the mixture model. Meaning, for each action 1. Select a TV according to its likelihood (derived from its cognitive schematic). 2. From that TV, sample its second order distribution to obtain a first order probability variate, and return the pair (action, pblty) corresponding to the highest variate. Then return the action with the highest probability of success. """ agent_log.fine("thompson_sample(mxmdl={}, prior_a={}, prior_b={})".format( mxmdl_to_str(mxmdl), prior_a, prior_b)) # 1. For each action select its TV according its weight act_w8d_cogscms = [(action, weighted_sampling(w8d_cogscms)) for (action, w8d_cogscms) in mxmdl.listitems()] agent_log.fine("act_w8d_cogscms:\n{}".format( act_w8d_cogscms_to_str(act_w8d_cogscms))) # 2. For each action select its first order probability given its tv act_pblts = [(action, tv_rv(get_cogscm_tv(w8_cogscm[1]), prior_a, prior_b)) for (action, w8_cogscm) in act_w8d_cogscms] agent_log.fine("act_pblts:\n{}".format(act_pblts_to_str(act_pblts))) # Return an action with highest probability of success (TODO: take # case of ties) return max(act_pblts, key=lambda act_pblt: act_pblt[1])
def mxmdl_to_str(mxmdl: omdict, indent: str = "") -> str: """Pretty print the given mixture model of cogscms""" s = "" for act_w8d_cogscms in mxmdl.listitems(): action = act_w8d_cogscms[0] w8d_cogscms = act_w8d_cogscms[1] s += "\n" + indent + str(action_to_str(action)) + "\n" s += w8d_cogscms_to_str(w8d_cogscms, indent + " ") return s