Пример #1
0
def predictive_model(data: pd.DataFrame,
                     interesting_rows,
                     day_zero_n_patients: int = 20,
                     days_in_future: int = 30,
                     aggregated: bool = False):
    data = data[interesting_rows].iloc[:, :]
    from lmfit.models import StepModel, ExponentialModel

    fig = plt.figure(figsize=(10, 5))
    for c in range(len(data.index)):
        if aggregated:
            values = data.values[c, 4:][data.iloc[c, 4:] > day_zero_n_patients]
        else:
            values = np.concatenate(
                ([0],
                 np.diff(
                     data.values[c,
                                 4:][data.iloc[c, 4:] > day_zero_n_patients])))

        n = values.shape[0]
        x = np.asarray(range(values.shape[0]), dtype='float64')
        y = np.asarray(values, dtype='float64')

        if len(x) == 0:
            continue

        label = "{}-{}".format(data.values[c, 0], data.values[c, 1])
        plt.plot(x, y, label=label)
        if data.values[c, 1] in ["China", "US"]:
            continue

        try:
            model_step = StepModel()
            model_exp = ExponentialModel()
            params_step = model_step.guess(y, x=x)
            params_exp = model_exp.guess(y, x=x)

            result_step = model_step.fit(y, params_step, x=x)
            result_exp = model_exp.fit(y, params_exp, x=x)
        except Exception:
            continue
        x_pred = np.asarray(range(days_in_future))
        plt.plot(x_pred,
                 model_step.eval(result_step.params, x=x_pred),
                 ':',
                 label='fit-{}'.format(label))
        plt.plot(x_pred,
                 model_exp.eval(result_exp.params, x=x_pred),
                 '.',
                 label='fit-{}'.format(label))
        # print(result.fit_report())
        # result.plot_fit()
    plt.legend(prop={"size": 7})
    plt.yscale('log')
    plt.xticks(rotation=45)
    plt.grid(which='both')
    now = datetime.now()
    dt_string = now.strftime("%d%m%Y-%H%M%S")
class gene_set():
    def __init__(self, gene_id, cluster):

        self.cluster = cluster

        self.gene_id = gene_id
        self.norm_vals = [
            float(y) for y in [x[1:] for x in reference if x[0] == gene_id][0]
        ]  #TPM.

        self.get_model()
        self.def_peaks()
        self.model_resetting()
        self.half_life()
        #self.printing()
        self.saving()

    def get_model(self):

        self.x = np.array([0, 1, 2, 6, 12, 24])
        self.y = np.array(self.norm_vals)

        # Compound model with Voigt curve.
        self.background = ExponentialModel(prefix='b_')
        self.pars = self.background.guess(self.y, x=self.x)
        self.peak = VoigtModel(prefix='p_')
        self.pars += self.peak.guess(self.y, x=self.x)
        self.comp_mod = self.peak + self.background
        self.init = self.comp_mod.eval(self.pars, x=self.x)
        self.comp_out = self.comp_mod.fit(
            self.y, x=self.x,
            fit_kws={'nan_policy': 'propagate'
                     })  # instead of 'omit', it keeps up the zero vals.
        self.comp_list = self.comp_out.fit_report().split('\n')
        self.comp_chisq = float(self.comp_list[6][-5:])

        self.out = self.comp_out
        self.chisq = float(self.comp_list[6][-5:])
        self.usedmod = self.comp_mod
        self.model_flag = "composite (exponential+Voigt)"

        return self.comp_out, self.comp_chisq, self.out, self.chisq, self.usedmod, self.model_flag

    def def_peaks(self):

        self.bestfit = self.comp_out.best_fit
        self.idx = np.argmin(np.abs(self.bestfit - 0.5))
        self.mysort = np.argsort(np.abs(self.bestfit - 0.5))
        self.peak_flag = None

        if all(i > 0.5 for i in self.bestfit):

            # Meaning that it is never reaching the half-life, and we don't do extrapolation (not enough data points).

            self.min = 0
            self.max = 0
            self.peak_flag = "No predictable half-life"

        else:

            if self.bestfit[self.idx] == 0.5:

                # If by accident one time point hits the half-life.

                self.half_life_y = self.bestfit[self.idx]
                self.half_life_x = self.idx
                self.peak_flag = "Exact compound half-life"

            elif self.bestfit[0] > 0.5 and self.bestfit[1] < 0.5:

                self.min = self.x[0]
                self.max = self.x[1]
                self.peak_flag = "Compound"

            elif self.idx == 5 and self.bestfit[self.idx - 1] < 0.5:

                # Last value crosses only

                self.max = self.x[self.idx]
                self.min = self.x[self.idx - 1]
                self.peak_flag = "Compound"

            elif np.abs(self.idx - self.mysort[1]) == 1:

                if self.bestfit[self.idx] < 0.5:

                    self.min = self.x[self.idx - 1]
                    self.max = self.x[self.idx]
                    self.peak_flag = "Compound"

                elif self.bestfit[self.idx] > 0.5:

                    self.min = self.x[self.idx]
                    self.max = self.x[self.idx + 1]
                    self.peak_flag = "Compound"

            elif np.abs(self.idx - self.mysort[1]) > 1:

                # Meaning that the steps are not linear, there's a bump.

                if self.bestfit[self.idx] < 0.5:

                    self.min = self.x[self.idx - 1]
                    self.max = self.x[self.idx]
                    self.peak_flag = "Compound"

                elif self.bestfit[self.idx] > 0.5 and self.bestfit[
                        self.mysort[1]] < 0.5:

                    if self.bestfit[self.idx + 1] < 0.5:

                        self.min = self.x[self.idx]
                        self.max = self.x[self.idx + 1]
                        self.peak_flag = "Compound"

                    #resetting!!
                    else:

                        self.min = self.x[self.mysort[1] - 1]
                        self.max = self.x[self.mysort[1]]
                        self.peak_flag = "Resetting"

                elif self.bestfit[self.idx] > 0.5 and self.bestfit[
                        self.mysort[1]] > 0.5:

                    if self.bestfit[self.idx + 1] < 0.5:

                        self.min = self.x[self.idx]
                        self.max = self.x[self.idx + 1]
                        self.peak_flag = "Compound"

                    #resetting!!
                    elif self.bestfit[self.idx + 1] > 0.5 and self.bestfit[
                            self.mysort[1] + 1] < 0.5:

                        self.min = self.x[self.mysort[1] - 1]
                        self.max = self.x[self.mysort[1]]
                        self.peak_flag = "Resetting"

        return self.min, self.max, self.peak_flag, self.bestfit

    def model_resetting(self):

        if self.peak_flag != "Resetting":
            #go for the previous  method
            pass

        elif self.peak_flag == "Resetting":

            # mostly for plotting, half-life needs new zeros

            self.scnd_peak = np.sort(self.bestfit)[-2]
            self.scnd_idx = np.argsort(self.bestfit)[-2]
            self.newzero = self.x[self.scnd_idx]

            # Cutting the new time scale, reset to 0.
            self.x2 = np.array(
                [i - self.newzero for i in self.x[self.scnd_idx:]])
            #x2 = np.array([i for i in x[scnd_idx:]])

            # Re-normalized and cutted array
            self.y2 = np.array(
                [i / self.y[self.scnd_idx] for i in self.y[self.scnd_idx:]])
            #newarray = myarray[scnd_idx:]

            self.exp_mod = ExponentialModel(prefix='e_')
            self.pars = self.exp_mod.guess(self.y2, x=self.x2)
            self.init = self.exp_mod.eval(self.pars, x=self.x2)

            self.exp_out = self.exp_mod.fit(self.y2, x=self.x2, missing='drop')
            self.exp_list = self.exp_out.fit_report().split('\n')
            self.exp_chisq = float(self.exp_list[6][-5:])

            self.out = self.exp_out
            self.chisq = float(self.exp_list[6][-5:])
            self.usedmod = self.exp_mod

            self.bestfit = self.exp_out.best_fit
            self.idx = np.argmin(np.abs(self.bestfit - 0.5))
            self.mysort = np.argsort(np.abs(self.bestfit - 0.5))
            self.peak_flag = None

            if self.bestfit[self.idx] < 0.5:

                self.min = self.x2[self.idx - 1]
                self.max = self.x2[self.idx]
                self.peak_flag = "Resetted exponential"

            elif self.bestfit[self.idx] > 0.5:

                self.min = self.x2[self.idx]
                self.max = self.x2[self.idx + 1]
                self.peak_flag = "Resetted exponential"

            # For printing.
            if len(self.bestfit) < 6:

                l = [self.bestfit[-1]] * (6 - len(self.bestfit))
                self.bestfit = np.append(self.bestfit, l)

            self.x = self.x2
            self.y = self.y2
            self.model_flag = "exponential"

        return self.min, self.max, self.peak_flag, self.bestfit, self.x, self.out, self.chisq, self.usedmod, self.model_flag

    def half_life(self):

        self.new_x = np.array([0])
        self.hl_eval = np.array([0])
        self.hl_array = np.array([0])
        self.hl_coord = np.array([0])
        self.step = None

        if self.max == 0:
            self.half_life_y = 0
            self.half_life_x = 0
            self.peak_flag = "No predictable half-life"

        else:
            self.half_life_y = 0
            self.half_life_x = 0
            self.step = 0.1
            self.max_allowed = 3
            self.attempt = 0

            #while self.attempt < 3 or self.half_life_y == 0:
            while self.half_life_y == 0 and self.attempt < 3:
                self.attempt += 1
                self.step = self.step / 100

                self.ranging = np.arange(
                    self.min, self.max, self.step
                )  # normally it 0.001, but the slope is so radical, can't catxh half-life.
                for j in np.nditer(self.ranging):

                    self.new_x = np.array([j])

                    #self.h = self.out.eval_components(self.out.params,x=self.new_x)
                    #self.hl_eval = list(self.h.values())[-1]

                    self.hl_eval = self.out.eval(self.out.params, x=self.new_x)

                    if self.hl_eval >= 0.50 and self.hl_eval <= 0.51:

                        self.hl_array = np.append(self.hl_array, self.hl_eval)
                        self.hl_coord = np.append(self.hl_coord, self.new_x)

                self.half_life_id = np.argmin(np.abs(self.hl_array - 0.5))
                self.half_life_y = self.hl_array[self.half_life_id]
                self.half_life_x = self.hl_coord[self.half_life_id]
                self.peak_flag = self.peak_flag

            if self.half_life_y == 0:
                self.peak_flag = "Above permitted interpolation iterations"

        return self.half_life_y, self.half_life_x, self.peak_flag

    def saving(self):
        with open('model_fit_c5_average_filtering_compound.txt', 'a') as f:

            f.write(
                "%s\t%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%s\n"
                % (self.gene_id, self.cluster, self.model_flag, self.chisq,
                   self.half_life_y, self.half_life_x, self.norm_vals[0],
                   self.norm_vals[1], self.norm_vals[2], self.norm_vals[3],
                   self.norm_vals[4], self.norm_vals[5], self.bestfit[0],
                   self.bestfit[1], self.bestfit[2], self.bestfit[3],
                   self.bestfit[4], self.bestfit[5], self.peak_flag))
class gene_set():

    def __init__(self,gene_id, cluster):

        self.cluster= cluster

        self.gene_id = gene_id
        self.norm_vals = [float(y) for y in [x[1:] for x in reference if x[0] == gene_id][0]] #TPM.

        self.get_model()
        #self.half_life()
        #self.printing()
        #self.saving()

    def get_model(self):

        self.x = np.array([0,1,2,6,12,24])
        self.y = np.array(self.norm_vals)
        self.model_flag = None
        self.model_list = []

        # First model with Gaussian curve.
        self.background1 = ExponentialModel(prefix='e1_')
        self.pars1 = self.background1.guess(self.y, x=self.x)
        self.peak1 = GaussianModel(prefix='p1_')
        self.pars1 += self.peak1.guess(self.y, x=self.x)
        self.comp_mod1 = self.peak1 + self.background1
        self.init1 = self.comp_mod1.eval(self.pars1, x=self.x)
        self.comp_out1 = self.comp_mod1.fit(self.y, x=self.x, fit_kws={'nan_policy': 'omit'})
        self.comp_list1 = self.comp_out1.fit_report().split('\n')
        self.comp_chisq1 = float(self.comp_list1[6][-5:])

        # Second model with Voigt curve.
        self.background2 = ExponentialModel(prefix='e2_')
        self.pars2 = self.background2.guess(self.y, x=self.x)
        self.peak2 = VoigtModel(prefix='p2_')
        self.pars2 += self.peak2.guess(self.y, x=self.x)
        self.comp_mod2 = self.peak2 + self.background2
        self.init2 = self.comp_mod2.eval(self.pars2, x=self.x)
        self.comp_out2 = self.comp_mod2.fit(self.y, x=self.x, fit_kws={'nan_policy': 'omit'})
        self.comp_list2 = self.comp_out2.fit_report().split('\n')
        self.comp_chisq2 = float(self.comp_list2[6][-5:])

        # Exponential model for reference
        self.exp_mod = ExponentialModel(prefix='onlye_')
        self.pars = self.exp_mod.guess(self.y, x=self.x)
        self.init = self.exp_mod.eval(self.pars, x=self.x)

        self.exp_out = self.exp_mod.fit(self.y, x=self.x, missing='drop')
        self.exp_list = self.exp_out.fit_report().split('\n')
        self.exp_chisq = float(self.exp_list[6][-5:])

        self.model_list = [self.comp_chisq1, self.comp_chisq2, self.exp_chisq]

        if np.count_nonzero(np.isinf(self.comp_out1.best_fit)) == 5 and np.count_nonzero(np.isinf(self.comp_out2.best_fit)):
             model_flag = "exponential"
             self.out = self.exp_out

        elif len(self.model_list) == len(set(self.model_list)):

             if min(self.model_list) == self.comp_chisq1:
                 self.model_flag = "Gaussian compound"
                 self.out = self.comp_out1

             elif min(self.model_list) == self.comp_chisq2:
                 self.model_flag = "Voigt compound"
                 self.out = self.comp_out2

             elif min(self.model_list) == self.exp_chisq:
                 self.model_flag = "exponential"
                 self.out = self.exp_out

        elif len(self.model_list) != len(set(self.model_list)):

             if min(self.model_list) == self.comp_chisq1:
                 self.model_flag = "Gaussian compound"
                 self.out = self.comp_out1

             elif min(self.model_list) == self.comp_chisq2:
                 self.model_flag = "Voigt compound"
                 self.out = self.comp_out2

             elif min(self.model_list) == self.exp_chisq:
                 self.model_flag = "exponential"
                 self.out = self.exp_out


             if min(self.model_list) == self.comp_chisq1 and self.comp_chisq1 == self.comp_chisq2:
                 self.model_flag = "Both compounds"
                 self.out = self.comp_out2

             if min(self.model_list) == self.comp_chisq2 and self.comp_chisq2 == self.exp_chisq:
                 self.model_flag = "Voigt compound and exponential"
                 self.out = self.comp_out2

             if min(self.model_list) == self.exp_chisq and self.exp_chisq == self.comp_chisq1:
                 self.model_flag = "Gaussian compound and exponential"
                 self.out = self.comp_out1


        return self.comp_out1, self.comp_chisq1, self.comp_out2, self.comp_chisq2, self.exp_out, self.exp_chisq, self.model_flag
class gene_set():
    def __init__(self, gene_id, cluster):

        self.cluster = cluster

        self.gene_id = gene_id
        self.norm_vals = [
            float(y) for y in [x[1:] for x in reference if x[0] == gene_id][0]
        ]  #TPM.

        self.get_model()
        self.half_life()
        self.printing()
        self.saving()

    def get_model(self):

        self.x = np.array([0, 1, 2, 6, 12, 24])
        self.y = np.array(self.norm_vals)
        #x = np.array([0,1,2,6,12,24])

        # Exponential model for reference
        self.exp_mod = ExponentialModel(prefix='onlye_')
        self.pars = self.exp_mod.guess(self.y, x=self.x)
        self.init = self.exp_mod.eval(self.pars, x=self.x)

        self.exp_out = self.exp_mod.fit(self.y, x=self.x, missing='drop')
        self.exp_list = self.exp_out.fit_report().split('\n')
        self.exp_chisq = float(self.exp_list[6][-5:])

        return self.exp_out, self.exp_chisq

    def half_life(self):

        self.new_x = np.array([0])
        self.hl_eval = np.array([0])
        self.hl_array = np.array([0])
        self.hl_coord = np.array([0])
        self.bestfit = self.exp_out.best_fit
        self.idx = np.argmin(np.abs(self.bestfit - 0.5))

        if self.idx == 5 and self.bestfit[self.idx - 1] < 0.5:
            self.bestfit = self.exp_out.best_fit[:-1]
            self.idx = np.argmin(np.abs(self.bestfit - 0.5))

        if self.bestfit[self.idx] == 0.5:
            self.half_life_y = self.bestfit[self.idx]
            self.half_life_x = self.idx

        elif 0.5 > self.bestfit[self.idx] and self.bestfit[self.idx - 1] > 0.5:
            self.max = self.x[self.idx]
            self.min = self.x[self.idx - 1]

        elif 0.5 < self.bestfit[self.idx] and self.bestfit[
                self.idx] == self.bestfit[5]:
            self.min = 0
            self.max = 0

        elif 0.5 < self.bestfit[self.idx] and self.bestfit[self.idx + 1] < 0.5:
            self.min = self.x[self.idx]
            self.max = self.x[self.idx + 1]

        elif 0.5 < self.bestfit[self.idx] and self.bestfit[
                self.idx + 1] > 0.5 and self.bestfit[self.idx + 2] < 0.5:
            self.min = self.x[self.idx + 1]
            self.max = self.x[self.idx + 2]

        elif 0.5 > self.bestfit[self.idx] and self.bestfit[
                self.idx + 1] < 0.5 and self.bestfit[self.idx - 2] > 0.5:
            self.min = self.x[self.idx - 2]
            self.max = self.x[self.idx]

        self.ranging = np.arange(self.min, self.max, 0.001)

        if self.max > 0:

            #        if self.min > 0 and self.max > 0:
            for j in np.nditer(self.ranging):

                self.new_x = np.array([j])
                self.hl_eval = self.exp_out.eval(self.exp_out.params,
                                                 x=self.new_x)

                if self.hl_eval >= 0.50 and self.hl_eval <= 0.51:

                    self.hl_array = np.append(self.hl_array, self.hl_eval)
                    self.hl_coord = np.append(self.hl_coord, self.new_x)

            self.half_life_id = np.argmin(np.abs(self.hl_array - 0.5))
            self.half_life_y = self.hl_array[self.half_life_id]
            self.half_life_x = self.hl_coord[self.half_life_id]
            self.bestfit = self.exp_out.best_fit

        else:
            self.half_life_y = 0
            self.half_life_x = 0
            self.bestfit = self.exp_out.best_fit
        return self.half_life_y, self.half_life_x

    def printing(self):

        print(self.gene_id, self.cluster, "exponential", self.exp_chisq,
              self.half_life_y, self.half_life_x, self.norm_vals[0],
              self.norm_vals[1], self.norm_vals[2], self.norm_vals[3],
              self.norm_vals[4], self.norm_vals[5], self.bestfit[0],
              self.bestfit[1], self.bestfit[2], self.bestfit[3],
              self.bestfit[4], self.bestfit[5])

    def saving(self):
        with open('model_fit_c5_average_filtering_newexp.txt', 'a') as f:

            (f.write(
                "%s\t%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n"
                % (self.gene_id, self.cluster, "exponential", self.exp_chisq,
                   self.half_life_y, self.half_life_x, self.norm_vals[0],
                   self.norm_vals[1], self.norm_vals[2], self.norm_vals[3],
                   self.norm_vals[4], self.norm_vals[5], self.bestfit[0],
                   self.bestfit[1], self.bestfit[2], self.bestfit[3],
                   self.bestfit[4], self.bestfit[5])))
Пример #5
0
model.set_param_hint('decay', value=10)

model += GaussianModel(prefix='g1_')
model.set_param_hint('g1_center', value=105, min=75, max=125)
model.set_param_hint('g1_sigma', value=15, min=3)
model.set_param_hint('g1_amplitude', value=2000, min=10)

model += GaussianModel(prefix='g2_')
model.set_param_hint('g2_center', value=155, min=125, max=175)
model.set_param_hint('g2_delta_sigma', value=1.5, min=0.8)
model.set_param_hint('g2_sigma', expr='g2_delta_sigma*g1_sigma')
model.set_param_hint('g2_amplitude', value=2000, min=10)

pars = model.make_params()

init = model.eval(pars, x=x)
out = model.fit(y, pars, x=x)

print(out.fit_report(min_correl=0.5))

model.plo

fig, axes = plt.subplots(1, 2, figsize=(12.8, 4.8))
axes[0].plot(x, y, 'b')
axes[0].plot(x, init, 'k--', label='initial fit')
axes[0].plot(x, out.best_fit, 'r-', label='best fit')
axes[0].legend(loc='best')

comps = out.eval_components(x=x)
axes[1].plot(x, y, 'b')
axes[1].plot(x, comps['g1_'], 'g--', label='Gaussian component 1')