def predictive_model(data: pd.DataFrame, interesting_rows, day_zero_n_patients: int = 20, days_in_future: int = 30, aggregated: bool = False): data = data[interesting_rows].iloc[:, :] from lmfit.models import StepModel, ExponentialModel fig = plt.figure(figsize=(10, 5)) for c in range(len(data.index)): if aggregated: values = data.values[c, 4:][data.iloc[c, 4:] > day_zero_n_patients] else: values = np.concatenate( ([0], np.diff( data.values[c, 4:][data.iloc[c, 4:] > day_zero_n_patients]))) n = values.shape[0] x = np.asarray(range(values.shape[0]), dtype='float64') y = np.asarray(values, dtype='float64') if len(x) == 0: continue label = "{}-{}".format(data.values[c, 0], data.values[c, 1]) plt.plot(x, y, label=label) if data.values[c, 1] in ["China", "US"]: continue try: model_step = StepModel() model_exp = ExponentialModel() params_step = model_step.guess(y, x=x) params_exp = model_exp.guess(y, x=x) result_step = model_step.fit(y, params_step, x=x) result_exp = model_exp.fit(y, params_exp, x=x) except Exception: continue x_pred = np.asarray(range(days_in_future)) plt.plot(x_pred, model_step.eval(result_step.params, x=x_pred), ':', label='fit-{}'.format(label)) plt.plot(x_pred, model_exp.eval(result_exp.params, x=x_pred), '.', label='fit-{}'.format(label)) # print(result.fit_report()) # result.plot_fit() plt.legend(prop={"size": 7}) plt.yscale('log') plt.xticks(rotation=45) plt.grid(which='both') now = datetime.now() dt_string = now.strftime("%d%m%Y-%H%M%S")
class gene_set(): def __init__(self, gene_id, cluster): self.cluster = cluster self.gene_id = gene_id self.norm_vals = [ float(y) for y in [x[1:] for x in reference if x[0] == gene_id][0] ] #TPM. self.get_model() self.def_peaks() self.model_resetting() self.half_life() #self.printing() self.saving() def get_model(self): self.x = np.array([0, 1, 2, 6, 12, 24]) self.y = np.array(self.norm_vals) # Compound model with Voigt curve. self.background = ExponentialModel(prefix='b_') self.pars = self.background.guess(self.y, x=self.x) self.peak = VoigtModel(prefix='p_') self.pars += self.peak.guess(self.y, x=self.x) self.comp_mod = self.peak + self.background self.init = self.comp_mod.eval(self.pars, x=self.x) self.comp_out = self.comp_mod.fit( self.y, x=self.x, fit_kws={'nan_policy': 'propagate' }) # instead of 'omit', it keeps up the zero vals. self.comp_list = self.comp_out.fit_report().split('\n') self.comp_chisq = float(self.comp_list[6][-5:]) self.out = self.comp_out self.chisq = float(self.comp_list[6][-5:]) self.usedmod = self.comp_mod self.model_flag = "composite (exponential+Voigt)" return self.comp_out, self.comp_chisq, self.out, self.chisq, self.usedmod, self.model_flag def def_peaks(self): self.bestfit = self.comp_out.best_fit self.idx = np.argmin(np.abs(self.bestfit - 0.5)) self.mysort = np.argsort(np.abs(self.bestfit - 0.5)) self.peak_flag = None if all(i > 0.5 for i in self.bestfit): # Meaning that it is never reaching the half-life, and we don't do extrapolation (not enough data points). self.min = 0 self.max = 0 self.peak_flag = "No predictable half-life" else: if self.bestfit[self.idx] == 0.5: # If by accident one time point hits the half-life. self.half_life_y = self.bestfit[self.idx] self.half_life_x = self.idx self.peak_flag = "Exact compound half-life" elif self.bestfit[0] > 0.5 and self.bestfit[1] < 0.5: self.min = self.x[0] self.max = self.x[1] self.peak_flag = "Compound" elif self.idx == 5 and self.bestfit[self.idx - 1] < 0.5: # Last value crosses only self.max = self.x[self.idx] self.min = self.x[self.idx - 1] self.peak_flag = "Compound" elif np.abs(self.idx - self.mysort[1]) == 1: if self.bestfit[self.idx] < 0.5: self.min = self.x[self.idx - 1] self.max = self.x[self.idx] self.peak_flag = "Compound" elif self.bestfit[self.idx] > 0.5: self.min = self.x[self.idx] self.max = self.x[self.idx + 1] self.peak_flag = "Compound" elif np.abs(self.idx - self.mysort[1]) > 1: # Meaning that the steps are not linear, there's a bump. if self.bestfit[self.idx] < 0.5: self.min = self.x[self.idx - 1] self.max = self.x[self.idx] self.peak_flag = "Compound" elif self.bestfit[self.idx] > 0.5 and self.bestfit[ self.mysort[1]] < 0.5: if self.bestfit[self.idx + 1] < 0.5: self.min = self.x[self.idx] self.max = self.x[self.idx + 1] self.peak_flag = "Compound" #resetting!! else: self.min = self.x[self.mysort[1] - 1] self.max = self.x[self.mysort[1]] self.peak_flag = "Resetting" elif self.bestfit[self.idx] > 0.5 and self.bestfit[ self.mysort[1]] > 0.5: if self.bestfit[self.idx + 1] < 0.5: self.min = self.x[self.idx] self.max = self.x[self.idx + 1] self.peak_flag = "Compound" #resetting!! elif self.bestfit[self.idx + 1] > 0.5 and self.bestfit[ self.mysort[1] + 1] < 0.5: self.min = self.x[self.mysort[1] - 1] self.max = self.x[self.mysort[1]] self.peak_flag = "Resetting" return self.min, self.max, self.peak_flag, self.bestfit def model_resetting(self): if self.peak_flag != "Resetting": #go for the previous method pass elif self.peak_flag == "Resetting": # mostly for plotting, half-life needs new zeros self.scnd_peak = np.sort(self.bestfit)[-2] self.scnd_idx = np.argsort(self.bestfit)[-2] self.newzero = self.x[self.scnd_idx] # Cutting the new time scale, reset to 0. self.x2 = np.array( [i - self.newzero for i in self.x[self.scnd_idx:]]) #x2 = np.array([i for i in x[scnd_idx:]]) # Re-normalized and cutted array self.y2 = np.array( [i / self.y[self.scnd_idx] for i in self.y[self.scnd_idx:]]) #newarray = myarray[scnd_idx:] self.exp_mod = ExponentialModel(prefix='e_') self.pars = self.exp_mod.guess(self.y2, x=self.x2) self.init = self.exp_mod.eval(self.pars, x=self.x2) self.exp_out = self.exp_mod.fit(self.y2, x=self.x2, missing='drop') self.exp_list = self.exp_out.fit_report().split('\n') self.exp_chisq = float(self.exp_list[6][-5:]) self.out = self.exp_out self.chisq = float(self.exp_list[6][-5:]) self.usedmod = self.exp_mod self.bestfit = self.exp_out.best_fit self.idx = np.argmin(np.abs(self.bestfit - 0.5)) self.mysort = np.argsort(np.abs(self.bestfit - 0.5)) self.peak_flag = None if self.bestfit[self.idx] < 0.5: self.min = self.x2[self.idx - 1] self.max = self.x2[self.idx] self.peak_flag = "Resetted exponential" elif self.bestfit[self.idx] > 0.5: self.min = self.x2[self.idx] self.max = self.x2[self.idx + 1] self.peak_flag = "Resetted exponential" # For printing. if len(self.bestfit) < 6: l = [self.bestfit[-1]] * (6 - len(self.bestfit)) self.bestfit = np.append(self.bestfit, l) self.x = self.x2 self.y = self.y2 self.model_flag = "exponential" return self.min, self.max, self.peak_flag, self.bestfit, self.x, self.out, self.chisq, self.usedmod, self.model_flag def half_life(self): self.new_x = np.array([0]) self.hl_eval = np.array([0]) self.hl_array = np.array([0]) self.hl_coord = np.array([0]) self.step = None if self.max == 0: self.half_life_y = 0 self.half_life_x = 0 self.peak_flag = "No predictable half-life" else: self.half_life_y = 0 self.half_life_x = 0 self.step = 0.1 self.max_allowed = 3 self.attempt = 0 #while self.attempt < 3 or self.half_life_y == 0: while self.half_life_y == 0 and self.attempt < 3: self.attempt += 1 self.step = self.step / 100 self.ranging = np.arange( self.min, self.max, self.step ) # normally it 0.001, but the slope is so radical, can't catxh half-life. for j in np.nditer(self.ranging): self.new_x = np.array([j]) #self.h = self.out.eval_components(self.out.params,x=self.new_x) #self.hl_eval = list(self.h.values())[-1] self.hl_eval = self.out.eval(self.out.params, x=self.new_x) if self.hl_eval >= 0.50 and self.hl_eval <= 0.51: self.hl_array = np.append(self.hl_array, self.hl_eval) self.hl_coord = np.append(self.hl_coord, self.new_x) self.half_life_id = np.argmin(np.abs(self.hl_array - 0.5)) self.half_life_y = self.hl_array[self.half_life_id] self.half_life_x = self.hl_coord[self.half_life_id] self.peak_flag = self.peak_flag if self.half_life_y == 0: self.peak_flag = "Above permitted interpolation iterations" return self.half_life_y, self.half_life_x, self.peak_flag def saving(self): with open('model_fit_c5_average_filtering_compound.txt', 'a') as f: f.write( "%s\t%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%s\n" % (self.gene_id, self.cluster, self.model_flag, self.chisq, self.half_life_y, self.half_life_x, self.norm_vals[0], self.norm_vals[1], self.norm_vals[2], self.norm_vals[3], self.norm_vals[4], self.norm_vals[5], self.bestfit[0], self.bestfit[1], self.bestfit[2], self.bestfit[3], self.bestfit[4], self.bestfit[5], self.peak_flag))
class gene_set(): def __init__(self,gene_id, cluster): self.cluster= cluster self.gene_id = gene_id self.norm_vals = [float(y) for y in [x[1:] for x in reference if x[0] == gene_id][0]] #TPM. self.get_model() #self.half_life() #self.printing() #self.saving() def get_model(self): self.x = np.array([0,1,2,6,12,24]) self.y = np.array(self.norm_vals) self.model_flag = None self.model_list = [] # First model with Gaussian curve. self.background1 = ExponentialModel(prefix='e1_') self.pars1 = self.background1.guess(self.y, x=self.x) self.peak1 = GaussianModel(prefix='p1_') self.pars1 += self.peak1.guess(self.y, x=self.x) self.comp_mod1 = self.peak1 + self.background1 self.init1 = self.comp_mod1.eval(self.pars1, x=self.x) self.comp_out1 = self.comp_mod1.fit(self.y, x=self.x, fit_kws={'nan_policy': 'omit'}) self.comp_list1 = self.comp_out1.fit_report().split('\n') self.comp_chisq1 = float(self.comp_list1[6][-5:]) # Second model with Voigt curve. self.background2 = ExponentialModel(prefix='e2_') self.pars2 = self.background2.guess(self.y, x=self.x) self.peak2 = VoigtModel(prefix='p2_') self.pars2 += self.peak2.guess(self.y, x=self.x) self.comp_mod2 = self.peak2 + self.background2 self.init2 = self.comp_mod2.eval(self.pars2, x=self.x) self.comp_out2 = self.comp_mod2.fit(self.y, x=self.x, fit_kws={'nan_policy': 'omit'}) self.comp_list2 = self.comp_out2.fit_report().split('\n') self.comp_chisq2 = float(self.comp_list2[6][-5:]) # Exponential model for reference self.exp_mod = ExponentialModel(prefix='onlye_') self.pars = self.exp_mod.guess(self.y, x=self.x) self.init = self.exp_mod.eval(self.pars, x=self.x) self.exp_out = self.exp_mod.fit(self.y, x=self.x, missing='drop') self.exp_list = self.exp_out.fit_report().split('\n') self.exp_chisq = float(self.exp_list[6][-5:]) self.model_list = [self.comp_chisq1, self.comp_chisq2, self.exp_chisq] if np.count_nonzero(np.isinf(self.comp_out1.best_fit)) == 5 and np.count_nonzero(np.isinf(self.comp_out2.best_fit)): model_flag = "exponential" self.out = self.exp_out elif len(self.model_list) == len(set(self.model_list)): if min(self.model_list) == self.comp_chisq1: self.model_flag = "Gaussian compound" self.out = self.comp_out1 elif min(self.model_list) == self.comp_chisq2: self.model_flag = "Voigt compound" self.out = self.comp_out2 elif min(self.model_list) == self.exp_chisq: self.model_flag = "exponential" self.out = self.exp_out elif len(self.model_list) != len(set(self.model_list)): if min(self.model_list) == self.comp_chisq1: self.model_flag = "Gaussian compound" self.out = self.comp_out1 elif min(self.model_list) == self.comp_chisq2: self.model_flag = "Voigt compound" self.out = self.comp_out2 elif min(self.model_list) == self.exp_chisq: self.model_flag = "exponential" self.out = self.exp_out if min(self.model_list) == self.comp_chisq1 and self.comp_chisq1 == self.comp_chisq2: self.model_flag = "Both compounds" self.out = self.comp_out2 if min(self.model_list) == self.comp_chisq2 and self.comp_chisq2 == self.exp_chisq: self.model_flag = "Voigt compound and exponential" self.out = self.comp_out2 if min(self.model_list) == self.exp_chisq and self.exp_chisq == self.comp_chisq1: self.model_flag = "Gaussian compound and exponential" self.out = self.comp_out1 return self.comp_out1, self.comp_chisq1, self.comp_out2, self.comp_chisq2, self.exp_out, self.exp_chisq, self.model_flag
class gene_set(): def __init__(self, gene_id, cluster): self.cluster = cluster self.gene_id = gene_id self.norm_vals = [ float(y) for y in [x[1:] for x in reference if x[0] == gene_id][0] ] #TPM. self.get_model() self.half_life() self.printing() self.saving() def get_model(self): self.x = np.array([0, 1, 2, 6, 12, 24]) self.y = np.array(self.norm_vals) #x = np.array([0,1,2,6,12,24]) # Exponential model for reference self.exp_mod = ExponentialModel(prefix='onlye_') self.pars = self.exp_mod.guess(self.y, x=self.x) self.init = self.exp_mod.eval(self.pars, x=self.x) self.exp_out = self.exp_mod.fit(self.y, x=self.x, missing='drop') self.exp_list = self.exp_out.fit_report().split('\n') self.exp_chisq = float(self.exp_list[6][-5:]) return self.exp_out, self.exp_chisq def half_life(self): self.new_x = np.array([0]) self.hl_eval = np.array([0]) self.hl_array = np.array([0]) self.hl_coord = np.array([0]) self.bestfit = self.exp_out.best_fit self.idx = np.argmin(np.abs(self.bestfit - 0.5)) if self.idx == 5 and self.bestfit[self.idx - 1] < 0.5: self.bestfit = self.exp_out.best_fit[:-1] self.idx = np.argmin(np.abs(self.bestfit - 0.5)) if self.bestfit[self.idx] == 0.5: self.half_life_y = self.bestfit[self.idx] self.half_life_x = self.idx elif 0.5 > self.bestfit[self.idx] and self.bestfit[self.idx - 1] > 0.5: self.max = self.x[self.idx] self.min = self.x[self.idx - 1] elif 0.5 < self.bestfit[self.idx] and self.bestfit[ self.idx] == self.bestfit[5]: self.min = 0 self.max = 0 elif 0.5 < self.bestfit[self.idx] and self.bestfit[self.idx + 1] < 0.5: self.min = self.x[self.idx] self.max = self.x[self.idx + 1] elif 0.5 < self.bestfit[self.idx] and self.bestfit[ self.idx + 1] > 0.5 and self.bestfit[self.idx + 2] < 0.5: self.min = self.x[self.idx + 1] self.max = self.x[self.idx + 2] elif 0.5 > self.bestfit[self.idx] and self.bestfit[ self.idx + 1] < 0.5 and self.bestfit[self.idx - 2] > 0.5: self.min = self.x[self.idx - 2] self.max = self.x[self.idx] self.ranging = np.arange(self.min, self.max, 0.001) if self.max > 0: # if self.min > 0 and self.max > 0: for j in np.nditer(self.ranging): self.new_x = np.array([j]) self.hl_eval = self.exp_out.eval(self.exp_out.params, x=self.new_x) if self.hl_eval >= 0.50 and self.hl_eval <= 0.51: self.hl_array = np.append(self.hl_array, self.hl_eval) self.hl_coord = np.append(self.hl_coord, self.new_x) self.half_life_id = np.argmin(np.abs(self.hl_array - 0.5)) self.half_life_y = self.hl_array[self.half_life_id] self.half_life_x = self.hl_coord[self.half_life_id] self.bestfit = self.exp_out.best_fit else: self.half_life_y = 0 self.half_life_x = 0 self.bestfit = self.exp_out.best_fit return self.half_life_y, self.half_life_x def printing(self): print(self.gene_id, self.cluster, "exponential", self.exp_chisq, self.half_life_y, self.half_life_x, self.norm_vals[0], self.norm_vals[1], self.norm_vals[2], self.norm_vals[3], self.norm_vals[4], self.norm_vals[5], self.bestfit[0], self.bestfit[1], self.bestfit[2], self.bestfit[3], self.bestfit[4], self.bestfit[5]) def saving(self): with open('model_fit_c5_average_filtering_newexp.txt', 'a') as f: (f.write( "%s\t%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" % (self.gene_id, self.cluster, "exponential", self.exp_chisq, self.half_life_y, self.half_life_x, self.norm_vals[0], self.norm_vals[1], self.norm_vals[2], self.norm_vals[3], self.norm_vals[4], self.norm_vals[5], self.bestfit[0], self.bestfit[1], self.bestfit[2], self.bestfit[3], self.bestfit[4], self.bestfit[5])))
model.set_param_hint('decay', value=10) model += GaussianModel(prefix='g1_') model.set_param_hint('g1_center', value=105, min=75, max=125) model.set_param_hint('g1_sigma', value=15, min=3) model.set_param_hint('g1_amplitude', value=2000, min=10) model += GaussianModel(prefix='g2_') model.set_param_hint('g2_center', value=155, min=125, max=175) model.set_param_hint('g2_delta_sigma', value=1.5, min=0.8) model.set_param_hint('g2_sigma', expr='g2_delta_sigma*g1_sigma') model.set_param_hint('g2_amplitude', value=2000, min=10) pars = model.make_params() init = model.eval(pars, x=x) out = model.fit(y, pars, x=x) print(out.fit_report(min_correl=0.5)) model.plo fig, axes = plt.subplots(1, 2, figsize=(12.8, 4.8)) axes[0].plot(x, y, 'b') axes[0].plot(x, init, 'k--', label='initial fit') axes[0].plot(x, out.best_fit, 'r-', label='best fit') axes[0].legend(loc='best') comps = out.eval_components(x=x) axes[1].plot(x, y, 'b') axes[1].plot(x, comps['g1_'], 'g--', label='Gaussian component 1')