def step(self, action, reward_fun): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) # Record current action self.CHO_hist.append(CHO) self.insulin_hist.append(insulin) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) BG_last_hour = self.CGM_hist[-window_size:] reward = reward_fun(bg_hist=self.BG_hist, cgm_hist=self.CGM_hist, insulin_hist=self.insulin_hist, risk_hist=self.risk_hist) done = BG < 40 or BG > 350 obs = Observation(CGM=CGM) return Step( observation=obs, reward=reward, done=done, sample_time=self.sample_time, patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state)
def _reset(self): if self.perm_sample_time is None: self.sample_time = self.sensor.sample_time else: self.sample_time = self.perm_sample_time self.viewer = None BG = self.patient.observation.Gsub horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) CGM = self.sensor.measure(self.patient) self.time_hist = [self.scenario.start_time] self.BG_hist = [BG] self.CGM_hist = [CGM] self.risk_hist = [risk] self.LBGI_hist = [LBGI] self.HBGI_hist = [HBGI] self.CHO_hist = [0] self.insulin_hist = [0]
def step(self, action, reward_fun, cho): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 if self.model is not None: # Calculate CHO/insulin for _ in range(int(self.sample_time)): patient_action = self.scenario.get_action(self.time) tmp_basal = self.pump.basal(action.basal) tmp_bolus = self.pump.bolus(action.bolus) tmp_insulin = tmp_basal + tmp_bolus if cho is not None: tmp_CHO = cho else: tmp_CHO = patient_action.meal CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time self.patient._t += 1 # copying mini-step of 1 minute # Make state state = np.concatenate([self.state, [CHO, insulin]]) norm_state = ((state-self.norm_params['mu'])/self.norm_params['std']).reshape(1, -1) tensor_state = torch.from_numpy(norm_state).float().to(self.model_device) # feed through model with torch.no_grad(): next_state_tensor = self.model(tensor_state) if self.model_device != 'cpu': next_state_tensor = next_state_tensor.cpu() next_state_norm = next_state_tensor.numpy().reshape(-1) next_state = (next_state_norm*self.norm_params['std'][:13])+self.norm_params['mu'][:13] self.state = next_state # calculate BG and CGM BG = self.state[12]/self.patient._params.Vg self.patient._state[12] = self.state[12] # getting observation correct for CGM measurement CGM = self.sensor.measure(self.patient) else: for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action, cho) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) magni_risk = magni_risk_index([BG]) # Record current action self.CHO_hist.append(CHO) self.insulin_hist.append(insulin) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) self.magni_risk_hist.append(magni_risk) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) BG_last_hour = self.CGM_hist[-window_size:] reward = reward_fun(bg_hist=self.BG_hist, cgm_hist=self.CGM_hist, insulin_hist=self.insulin_hist, risk_hist=self.risk_hist) done = BG < 40 or BG > 350 obs = Observation(CGM=CGM) return Step( observation=obs, reward=reward, done=done, sample_time=self.sample_time, patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state)
def risk_bg(bg_hist, **kwargs): return -risk_index([bg_hist[-1]], 1)[-1]
def risk_diff_bg(bg_hist, **kwargs): if len(bg_hist) < 2: return 0 _, _, risk_current = risk_index([bg_hist[-1]], 1) _, _, risk_prev = risk_index([bg_hist[-2]], 1) return risk_prev - risk_current