def _make_rvs(self): r_min, r_max = self._get_return_ranges() # Variables for expected return given T under the candidate policy R1 = rvs.BoundedRealSampleSet(name='R1', lower=r_min, upper=r_max) ER1 = R1.expected_value('E[R|T=1]', mode=mode) # Variables for expected return given T under the reference policy R_ref1 = rvs.BoundedRealSampleSet(name='R_ref1', lower=r_min, upper=r_max) ER_ref1 = R_ref1.expected_value('E[R_ref|T=1]', mode=mode) # Constants e = rvs.constant(self.epsilons[0], name='e') # BQF and Constraint Objectives # g(theta) := (E[R_ref|T=1] - E[R|T=1]) - e BQF = rvs.sum(ER_ref1, -ER1, name='BQF') CO = rvs.sum(BQF, -e, name='CO') SCO = rvs.sum(BQF, -e, name='SCO', scaling=scaling) # Store the sample sets and variables self._scheck_rvs = [CO] self._ccheck_rvs = [SCO] self._eval_rvs = {'bqf_0_mean': BQF, 'co_0_mean': CO} # Add the sample sets and variables to the manager self._vm = rvs.VariableManager(self._preprocessor) self._vm.add_sample_set(R1, R_ref1) self._vm.add(ER1, ER_ref1, BQF, CO, SCO)
def _make_rvs(self): r_min, r_max = self._get_return_ranges() # Variables for expected return given T under the candidate policy R0 = rvs.BoundedRealSampleSet(name='R0', lower=r_min, upper=r_max) R1 = rvs.BoundedRealSampleSet(name='R1', lower=r_min, upper=r_max) ER0 = R0.expected_value('E[R|T=0]', mode=mode) ER1 = R1.expected_value('E[R|T=1]', mode=mode) # Constants eM = rvs.constant(self.epsilons[0], name='eM') eF = rvs.constant(self.epsilons[1], name='eF') r_ref0 = rvs.constant(self._ref_return_T0, name='Avg[R_ref|T=0]') r_ref1 = rvs.constant(self._ref_return_T1, name='Avg[R_ref|T=1]') # BQF and Constraint Objectives # g0(theta) := (E[R_ref|T=0] - Average(R|T=0,D) - eM # g1(theta) := (E[R_ref|T=1] - Average(R|T=1,D) - eF BQF0 = rvs.sum(r_ref0, -ER0, name='BQF0') BQF1 = rvs.sum(r_ref1, -ER1, name='BQF1') CO0 = rvs.sum(BQF0, -eM, name='CO0') SCO0 = rvs.sum(BQF0, -eM, name='SCO0', scaling=scaling) CO1 = rvs.sum(BQF1, -eF, name='CO1') SCO1 = rvs.sum(BQF1, -eF, name='SCO1', scaling=scaling) # Store the sample sets and variables self._scheck_rvs = [CO0, CO1] self._ccheck_rvs = [SCO0, SCO1] self._eval_rvs = { 'bqf_0_mean': BQF0, 'co_0_mean': CO0, 'bqf_1_mean': BQF1, 'co_1_mean': CO1 } # Add the sample sets and variables to the manager self._vm = rvs.VariableManager(self._preprocessor) self._vm.add_sample_set(R0, R1) self._vm.add(ER0, ER1, BQF0, CO0, SCO0, BQF1, CO1, SCO1)
def __init__(self, epsilons, deltas, model_type, minimum_return, iw_type_corrections={}): self.epsilons = epsilons self.deltas = deltas self.model_type = model_type self.model_variables = {} self.minimum_return = minimum_return self._vm = rvs.VariableManager(self._preprocessor) self._scheck_rvs = [] self._ccheck_rvs = [] self._eval_rvs = {} self._iw_type_corrections = defaultdict(lambda : 1.0) for k, v in iw_type_corrections.items(): self._iw_type_corrections[k] = v self._make_rvs()
def _make_rvs(self): # Sample sets for false-positive classifications conditioned on T Pos0 = rvs.BoundedRealSampleSet(name='Pos0', lower=0, upper=1) Pos1 = rvs.BoundedRealSampleSet(name='Pos1', lower=0, upper=1) # Variables representing the conditional false-positive rates and the BQF PrPos0 = Pos0.expected_value('Pr(Pos|T=0)', mode=mode) PrPos1 = Pos1.expected_value('Pr(Pos|T=1)', mode=mode) # Constants pct = rvs.constant(self.epsilons[0], name='pct') # BQF BQF = rvs.maxrec(-(PrPos0 / PrPos1), name='BQF') CO = rvs.sum(BQF, -pct, name='CO') SCO = rvs.sum(BQF, -pct, name='SCO', scaling=scaling) # Store the sample sets and variables self._scheck_rvs = [CO] self._ccheck_rvs = [SCO] self._eval_rvs = {'bqf_0_mean': BQF, 'co_0_mean': CO} # Add the sample sets and variables to the manager self._vm = rvs.VariableManager(self._preprocessor) self._vm.add_sample_set(Pos0, Pos1) self._vm.add(PrPos0, PrPos1, BQF, CO, SCO)