def init(self): ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20)) ts.set_continuing() ts.add_double_obs(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15) ts.add_double_act(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15) ts.set_extra(self._ts_extra + "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson") return ts.to_taskspec()
def init(self): MDPAction.set_description({ 'out': {'value': [-0.004]}, 'in': {'value': [0.004]}, 'kick': {'value': [-1.0]} }) ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20)) ts.set_episodic() ts.set_charcount_obs(0) ts.add_double_act((-1.0, 0.004)) self._ts_extra += "ACTIONDESCR %s " % str(MDPAction.description) self._ts_extra += "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson" ts.set_extra(self._ts_extra) return ts.to_taskspec()