Exemplo n.º 1
0
 def log_stats(self, vals):
     eps = 1e-16
     nreps = len(vals)
     vals = np.asarray(vals)+eps
     g_mean = gmean(vals)-eps
     g_std = gstd(vals)
     return g_mean, g_mean*(g_std**(-1.96/np.sqrt(nreps))), g_mean*(g_std**(1.96/np.sqrt(nreps)))
Exemplo n.º 2
0
 def cochrans(input_list):
     # confidence = 0.05
     error = sem(input_list)
     std = stats.gstd(input_list)
     # z = stats.zscore(input_list)
     z = 1.96
     n = (((z ** 2) * .25) / (error ** 2))
     return int(n)
Exemplo n.º 3
0
 def sample_size(input_list):
     # confidence = 0.05
     error = sem(input_list)
     std = stats.gstd(input_list)
     # z = stats.zscore(input_list)
     z = 1.96
     n = ((z * std) / error) ** 2
     return int(n)
Exemplo n.º 4
0
def trace_length(log):
    trace_lengths = []
    n_events = 0
    for trace in log:
        n_events += len(trace)
        trace_lengths.append(len(trace))

    trace_len_min = np.min(trace_lengths)
    trace_len_max = np.max(trace_lengths)
    trace_len_mean = np.mean(trace_lengths)
    trace_len_median = np.median(trace_lengths)
    trace_len_mode = stats.mode(trace_lengths)[0][0]
    trace_len_std = np.std(trace_lengths)
    trace_len_variance = np.var(trace_lengths)
    trace_len_q1 = np.percentile(trace_lengths, 25)
    trace_len_q3 = np.percentile(trace_lengths, 75)
    trace_len_iqr = stats.iqr(trace_lengths)
    trace_len_geometric_mean = stats.gmean(trace_lengths)
    trace_len_geometric_std = stats.gstd(trace_lengths)
    trace_len_harmonic_mean = stats.hmean(trace_lengths)
    trace_len_skewness = stats.skew(trace_lengths)
    trace_len_kurtosis = stats.kurtosis(trace_lengths)
    trace_len_coefficient_variation = stats.variation(trace_lengths)
    trace_len_entropy = stats.entropy(trace_lengths)
    trace_len_hist, _ = np.histogram(trace_lengths, density=True)
    trace_len_skewness_hist = stats.skew(trace_len_hist)
    trace_len_kurtosis_hist = stats.kurtosis(trace_len_hist)

    return [
        n_events,
        trace_len_min,
        trace_len_max,
        trace_len_mean,
        trace_len_median,
        trace_len_mode,
        trace_len_std,
        trace_len_variance,
        trace_len_q1,
        trace_len_q3,
        trace_len_iqr,
        trace_len_geometric_mean,
        trace_len_geometric_std,
        trace_len_harmonic_mean,
        trace_len_skewness,
        trace_len_kurtosis,
        trace_len_coefficient_variation,
        trace_len_entropy,
        *trace_len_hist,
        trace_len_skewness_hist,
        trace_len_kurtosis_hist,
    ]
Exemplo n.º 5
0
 def comparacion_utilidadades(self):
     print('CALCULANDO ALFA ÓPTIMO Y PEOR...')
     self.senal_actualizar_avance.emit(
         ('CALCULANDO ALFA ÓPTIMO Y PEOR...', 0))
     self.calcular_alfas()
     print('CALCULANDO INTERVALOS DE CONFIANZA...')
     self.senal_actualizar_avance.emit(
         ('CALCULANDO INTERVALOS DE CONFIANZA...',
          self.iteraciones_busqueda_alfas))
     iteracion = 0
     valores_buenos = []
     valores_malos = []
     while iteracion < self.iteraciones_validacion_alfas:
         bueno = self.calcular_utilidad(self.alfa_bueno)
         malo = self.calcular_utilidad(self.alfa_malo)
         valores_buenos.append(bueno)
         valores_malos.append(malo)
         iteracion += 1
         print(iteracion)
         self.senal_actualizar_avance.emit(
             ('CALCULANDO INTERVALOS DE CONFIANZA...',
              iteracion + self.iteraciones_busqueda_alfas))
     self.intervalo_bueno = st.norm.interval(self.confianza_intervalos,
                                             loc=np.mean(valores_buenos),
                                             scale=st.gstd(valores_buenos))
     self.intervalo_malo = st.norm.interval(self.confianza_intervalos,
                                            loc=np.mean(valores_malos),
                                            scale=st.gstd(valores_malos))
     self.senal_actualizar_avance.emit(
         ('CALCULANDO INTERVALOS DE CONFIANZA...',
          iteracion + self.iteraciones_busqueda_alfas))
     self.senal_terminar.emit({
         'utilidad_buena': self.intervalo_bueno,
         'utilidad_mala': self.intervalo_malo,
         'todas': self.intervalo_todas_las_ut
     })
    def get_cgm_stats(self, start_date, end_date):
        """
        Compute cgm stats with dates

        Args:
            start_date (dt.DateTime): start date
            end_date (dt.DateTime): end date

        Returns:
            (float, float): geo mean and std
        """

        cgm_values = []
        for time, cgm_event in self.glucose_timeline.items():
            if start_date <= time <= end_date:
                cgm_value = cgm_event.get_value()
                cgm_values.append(cgm_value)

        return gmean(cgm_values), gstd(cgm_values)
Exemplo n.º 7
0
def combine_triplicates(plate_df_in, checks_include, master, use_master_curve):
    '''
    Flag outliers via grubbs test
    Calculate the Cq means, Cq stds, counts before & after removing outliers

    Params
    plate_df_in:
        qpcr data in pandas df, must be 1 plate with 1 target
        should be in the format from QuantStudio3 with
        columns 'Target', 'Sample', 'Cq'
    checks_include:
        which way to check for outliers options are
        ( 'grubbs_only', None)
    Returns
    plate_df: same data, with additional columns depending on checks_include
        Cq_mean (calculated mean of Cq after excluding outliers)
        Q_QuantStudio_std (calculated standard deviation based on QuantStudio output) for intrassay coefficient of variation

    Note: Cq_raw preserves the raw values, Cq_fin is after subbing and outlier removal, and plain Cq_subbed is after subbing (so that it goes through grubbs)

    '''

    if (checks_include not in ['grubbs_only', None]):
        raise ValueError('''invalid input, must be 'grubbs_only' or None''')

    if len(plate_df_in.Target.unique()) > 1:
        raise ValueError('''More than one target in this dataframe''')

    target = plate_df_in.Target.unique()
    plate_df = plate_df_in.copy()  # fixes pandas warning
    groupby_list = [
        'plate_id', 'Sample', 'sample_full', 'Sample_plate', 'Target', 'Task',
        'inhibition_testing', 'is_dilution', "dilution"
    ]

    # make copy of Cq column and later turn this to np.nan for outliers
    plate_df['Cq_raw'] = plate_df['Cq'].copy()
    plate_df["master_curve_bloq_qpcr_reps"] = False
    if ((use_master_curve) & (target[0] != "Xeno")):
        plate_df.loc[(np.isnan(plate_df.Cq)) | (plate_df.Cq > 40),
                     "master_curve_bloq_qpcr_reps"] = True
        plate_df.loc[(np.isnan(plate_df.Cq)) | (plate_df.Cq > 40),
                     "Cq"] = master.loc[master.Target == target[0],
                                        "LoD_Cq"].item()

    plate_df['Cq_subbed'] = plate_df['Cq'].copy()
    plate_df['Cq_fin'] = plate_df['Cq'].copy()

    # grubbs with scikit
    if checks_include in ['all', 'grubbs_only']:
        plate_df = get_pass_grubbs_test(plate_df, ['Sample'])
        plate_df.loc[plate_df.grubbs_test == False, 'Cq_fin'] = np.nan

    # summarize to get mean, std, counts with and without outliers removed
    plate_df_avg = plate_df.groupby(groupby_list).agg(
        raw_Cq_values=('Cq_raw', list),
        sub_Cq_values=('Cq_subbed', list),
        outlier_Cq_values=('Cq_fin', list),
        # grubbs_check=('grubbs_test', list),
        template_volume=('template_volume', 'max'),
        Q_init_mean=(
            'Quantity', 'mean'
        ),  #only needed to preserve quantity information for standards later
        Q_init_std=('Quantity', 'std'),
        Q_init_gstd=('Quantity', lambda x: np.nan
                     if ((len(x.dropna()) < 2) | all(np.isnan(x))) else
                     (sci.gstd(x.dropna(), axis=0))),
        # Q_QuantStudio_std = ('Quantity', 'std'),
        Cq_init_mean=('Cq_raw', 'mean'),
        Cq_init_std=('Cq_raw', 'std'),
        Cq_init_min=('Cq_raw', 'min'),
        replicate_init_count=('Cq', 'count'),
        Cq_mean=('Cq_fin', 'mean'),
        Cq_std=('Cq_fin', 'std'),
        replicate_count=('Cq_fin', 'count'),
        is_undetermined_count=('is_undetermined', 'sum'),
        is_bloq_count=('master_curve_bloq_qpcr_reps', 'sum'))
    # note: count in agg will exclude nan
    plate_df_avg = plate_df_avg.reset_index()

    return (plate_df, plate_df_avg)
Exemplo n.º 8
0
def process_unknown(plate_df, std_curve_info, use_master_curve, master):
    '''
    Calculates quantity based on Cq_mean and standard curve
    Params
        plate_df: output from combine_triplicates(); df containing Cq_mean
        must be single plate with single target
        std_curve_info: output from process_standard() as a list
    Returns
        unknown_df: the unknown subset of plate_df, with new columns
        Quantity_mean
        q_diff
        Cq_of_lowest_sample_quantity: the Cq value of the lowest pt used on the plate
        these columns represent the recalculated quantity using Cq mean and the
        slope and intercept from the std curve
        qpcr_coefficient_var the coefficient of variation for qpcr technical triplicates
        intraassay_var intraassay variation (arithmetic mean of the coefficient of variation for all triplicates on a plate)
    '''

    [
        num_points, Cq_of_lowest_std_quantity, lowest_std_quantity,
        Cq_of_lowest_std_quantity_gsd, slope, intercept, r2, efficiency
    ] = std_curve_info
    unknown_df = plate_df[plate_df.Task != 'Standard'].copy()
    unknown_df['Cq_of_lowest_sample_quantity'] = np.nan
    unknown_df['percent_CV'] = (
        unknown_df['Q_init_gstd'] - 1
    ) * 100  #the geometric std - 1 is the coefficient of variation using quant studio quantities to capture all the variation in the plate
    if all(np.isnan(unknown_df['percent_CV'])):
        unknown_df['intraassay_var'] = np.nan  #avoid error
    else:
        unknown_df['intraassay_var'] = np.nanmean(unknown_df['percent_CV'])

    # Set the Cq of the lowest std quantity for different ssituations
    if len(unknown_df.Task) == 0:  #only standard curve plate
        unknown_df['Cq_of_lowest_sample_quantity'] = np.nan
    else:
        if all(np.isnan(
                unknown_df.Cq_mean)):  #plate with all undetermined samples
            unknown_df['Cq_of_lowest_sample_quantity'] = np.nan  #avoid error
        else:
            targs = unknown_df.Target.unique()  #other  plates (most  cases)
            for target in targs:
                unknown_df.loc[(unknown_df.Target == target),
                               'Cq_of_lowest_sample_quantity'] = np.nanmax(
                                   unknown_df.loc[(
                                       unknown_df.Target == target),
                                                  'Cq_mean'])  #because of xeno

    unknown_df['Quantity_mean'] = np.nan
    unknown_df['q_diff'] = np.nan

    if ~use_master_curve:
        unknown_df['Quantity_mean'] = 10**(
            (unknown_df['Cq_mean'] - intercept) / slope)

        #initialize columns
        unknown_df['Quantity_std_combined_after'] = np.nan
        unknown_df['Quantity_mean_combined_after'] = np.nan
        for row in unknown_df.itertuples():
            ix = row.Index
            filtered_1 = [
                element for element in row.raw_Cq_values if ~np.isnan(element)
            ]  #initial nas
            filtered = [
                10**((element - intercept) / slope) for element in filtered_1
            ]
            if (len(filtered) > 1):
                filtered = [
                    element for element in filtered if ~np.isnan(element)
                ]  #nas introduced when slope and interceptna
                if (len(filtered) > 1):
                    if (row.Target != "Xeno"):
                        unknown_df.loc[
                            ix, "Quantity_mean_combined_after"] = sci.gmean(
                                filtered)
                        if (all(x > 0 for x in filtered)):
                            unknown_df.loc[
                                ix, "Quantity_std_combined_after"] = sci.gstd(
                                    filtered)
    if use_master_curve:
        targs = unknown_df.Target.unique()
        for targ in targs:
            if targ != "Xeno":
                unknown_df["blod_master_curve"] = False
                m_b = master.loc[master.Target == targ, "b"].item()
                m_m = master.loc[master.Target == targ, "m"].item()
                lowest = master.loc[master.Target == targ,
                                    "lowest_quantity"].item()
                lod = master.loc[master.Target == targ, "LoD_quantity"].item()
                unknown_df.loc[unknown_df.Target == targ,
                               'Quantity_mean'] = 10**(
                                   (unknown_df.loc[unknown_df.Target == targ,
                                                   'Cq_mean'] - m_b) / m_m)
                unknown_df.loc[unknown_df.Quantity_mean < lowest,
                               "blod_master_curve"] = True
                unknown_df.loc[unknown_df.Quantity_mean < lowest,
                               'Quantity_mean'] = lod
    else:
        unknown_df["blod_master_curve"] = False

    # if Cq_mean is zero, don't calculate a quantity (turn to NaN)
    unknown_df.loc[unknown_df[unknown_df.Cq_mean == 0].index,
                   'Quantity_mean'] = np.nan
    unknown_df[
        'q_diff'] = unknown_df['Q_init_mean'] - unknown_df['Quantity_mean']

    return (unknown_df)
Exemplo n.º 9
0
    def calcular_alfas(self):
        '''
        alfa_0: ALFA INICIAL
        var: VARIACION DE CADA ALFA POR ITERACIÓN
        iter_max: CANTIDAD DE ITERACIONES

        RETORNA

        incumbent_ut: MEJOR UTILIDAD
        incumbent_alfa: MEJOR ALFA
        mala_ut: PEOR UTILIDAD
        mal_alfa: PEOR ALFA
        nova_ut: LISTA DE TUPLAS (MEJOR UTILIDAD HASTA EL MOMENTO, MEJOR ALFA HASTA EL MOMENTO)
        malas_utilidades: LISTA DE TUPLAS (PEOR UTILIDAD HASTA EL MOMENTO, PEOR ALFA HASTA EL MOMENTO)
        maximos_por_iter: LISTE DE MEJOR UTILIDAD POR ITERACIÓN
        '''

        incumbent_ut = self.calcular_utilidad(self.alfa_0)

        incumbent_alfa = self.alfa_0
        nova_ut = [(incumbent_ut, incumbent_alfa)]
        mala_ut = incumbent_ut
        mejor_alfa = incumbent_alfa
        mal_alfa = incumbent_alfa
        malas_utilidades = [(mala_ut, mal_alfa)]
        todas_las_utilidades = [incumbent_ut]
        iteracion = 0
        maximo_igual = 0
        minimo_igual = 0
        pond = 1
        while iteracion < self.iteraciones_busqueda_alfas and maximo_igual < self.iteraciones_igual_res:
            seed()
            print(maximo_igual, self.iteraciones_igual_res, incumbent_ut)
            alfas = [self.nuevo_alpha(mejor_alfa, pond) for _ in range(5)]
            print(alfas)
            utilidades = []
            for alpha in alfas:
                ut = self.calcular_utilidad(alpha)
                utilidades.append(ut)
            mejor_utilidad = max(utilidades)
            mejor_alfa = alfas[utilidades.index(mejor_utilidad)]
            self.maximos.append(mejor_utilidad)
            peor_utilidad = min(utilidades)
            peor_alfa = alfas[utilidades.index(peor_utilidad)]
            todas_las_utilidades += utilidades
            if peor_utilidad < mala_ut:
                mala_ut = peor_utilidad
                mal_alfa = peor_alfa
                malas_utilidades.append((mala_ut, mal_alfa))
                minimo_igual = 0
            if mejor_utilidad > incumbent_ut:
                incumbent_ut = mejor_utilidad
                incumbent_alfa = mejor_alfa
                nova_ut.append((incumbent_ut, incumbent_alfa))
                maximo_igual = 0
            maximo_igual += 1
            self.iteracion_busqueda += 1
            print(self.iteracion_busqueda)
            if maximo_igual < self.iteraciones_igual_res * 0.5:
                pond = 1
            if maximo_igual >= self.iteraciones_igual_res * 0.25:
                pond = 2
            elif maximo_igual >= self.iteraciones_igual_res * 0.25:
                pond = 3
            elif maximo_igual >= self.iteraciones_igual_res * 0.75:
                pond = 4
        self.senal_actualizar_avance.emit(
            ('CALCULANDO ALFA ÓPTIMO Y PEOR...', iteracion))
        self.ut_buena = incumbent_ut
        self.alfa_bueno = incumbent_alfa
        self.ut_mala = mala_ut
        self.alfa_malo = mal_alfa
        self.mejores_utilidades = nova_ut
        self.peores_utilidades = malas_utilidades
        self.intervalo_todas_las_ut = st.norm.interval(
            self.confianza_intervalos,
            loc=np.mean(todas_las_utilidades),
            scale=st.gstd(todas_las_utilidades))
Exemplo n.º 10
0
 def log_stats(self, vals):
     eps = 1e-16
     vals = np.asarray(vals) + eps
     g_mean = gmean(vals) - eps
     g_std = gstd(vals)
     return g_mean, g_mean * (g_std**(-1.96)), g_mean * (g_std**(1.96))
 def sample_size(input_list):
     error = sem(input_list)
     std = stats.gstd(input_list)
     z = stats.zscore(input_list)
     n = ((z * std) / error)**2
     return int(n)
Exemplo n.º 12
0
    try:
        part1Time = int(row[2])
        part2Time = int(row[5])
        record.append([part1Time, part2Time])
    except Exception:
        continue

scores = []
logscores = []
for row in record:
    for i in row:
        scores.append(float(i))
        logscores.append(log(float(i)))

gmean = gmean(scores)
gstd = gstd(scores)
logmean = tmean(logscores)
logstd = tstd(logscores)
logshapiro = shapiro(logscores)
lower = exp(logmean - 2 * logstd)
upper = exp(logmean + 2 * logstd)

print("\
Score:\n\
  Geometric mean: {:.6g}\n\
  Geometric standard deviation: {:.6g}\n\
  Test of lognormality (Shapiro-Wilk): {:.6g} (p = {:.6g})\n\
  Prediction interval (95%): {:.6g} - {:.6g} (lognormal dist.)\
".format(round(gmean), gstd, logshapiro[0], logshapiro[1], round(lower),
         round(upper)))
Exemplo n.º 13
0
def xeno_inhibition_test(qpcr_data, qpcr_normd, x=1):
    '''
        Calculates the difference in Ct compared to the NTC for xeno inhibition test, outputs a list of inhibited samples

          Params
            optional x: the dCt defined as inhibited
            qpcr_data (main dfm)
            qpcr_data_xeno-- dataframe with qpcr technical triplicates averaged. Requires the columns
                    Target (includes xeno)
                    plate_id
                    Well
                    Quantity_mean
                    Sample
                    Task
          Returns
          qpcr_data with is_inhibited column
          xeno_fin_all -- calculates the difference in Ct values of the negative control (spiked with xeno) to the sample spiked with xeno, adds column for inhibited (Yes or No)
          ntc_col -- all of the negative control values for xeno
  '''

    #Find targets other than xeno for each well+plate combination
    p_w_targets = qpcr_data[qpcr_data.Target != 'Xeno'].copy()
    p_w_targets['p_id'] = p_w_targets.plate_id.astype('str').str.cat(
        p_w_targets.Well.astype('str'), sep="_")
    p_w_targets = p_w_targets.groupby('p_id')['Target'].apply(
        lambda targs: ','.join(targs)).reset_index()
    p_w_targets.columns = ['p_id', 'additional_target']

    #subset out xeno samples, merge with previous, use to calculate mean and std
    target = qpcr_data[(
        qpcr_data.Target == 'Xeno')].copy()  #includes NTC & stds
    target['p_id'] = qpcr_data.plate_id.astype('str').str.cat(qpcr_data.Well,
                                                              sep="_")
    target = target.merge(p_w_targets, how='left', on='p_id')
    if target.additional_target.astype('str').str.contains(',').any():
        print(target.additional_target.unique())
        raise ValueError(
            'Error: update function, more than 2 multiplexed targets or one of the two multiplexed targets is not xeno'
        )

    target_s = target.groupby([
        "Sample", "sample_full", 'additional_target', 'plate_id', 'Task'
    ]).agg(
        Ct_vet_mean=('Cq', lambda x: np.nan
                     if all(np.isnan(x)) else sci.gmean(x.dropna(), axis=0)),
        Quantity_std_crv=('Quantity', 'max'),  #just for standards
        Ct_vet_std=('Cq', lambda x: np.nan
                    if ((len(x.dropna()) < 2) | all(np.isnan(x))) else
                    (sci.gstd(x.dropna(), axis=0))),
        Ct_vet_count=('Cq', 'count')).reset_index()
    target = target_s[(target_s.Task != 'Standard')].copy()  #remove standards

    #subset and recombine to get NTC as a col
    ntc_col_c = target[target.Task == 'Negative Control'].copy()
    ntc_col = ntc_col_c[["plate_id", 'additional_target',
                         'Ct_vet_mean']].copy()
    ntc_col.columns = ["plate_id", 'additional_target', 'Ct_control_mean']

    ntc_col_c = ntc_col_c[[
        "plate_id", 'Task', 'Quantity_std_crv', 'additional_target',
        'Ct_vet_mean'
    ]].copy()
    ntc_col_c.columns = [
        "plate_id", 'Task', 'Quantity_std_crv', 'additional_target',
        'Ct_control_mean'
    ]

    std_col = target_s[target_s.Task == 'Standard'].copy()
    std_col = std_col[[
        "plate_id", 'Task', 'Quantity_std_crv', 'additional_target',
        'Ct_vet_mean'
    ]].copy()
    std_col.columns = [
        "plate_id", 'Task', 'Quantity_std_crv', 'additional_target',
        'Ct_control_mean'
    ]

    xeno_fin_all = target[target.Task == 'Unknown'].copy()
    xeno_fin_all = xeno_fin_all.merge(ntc_col, how='left')
    xeno_fin_all["dCt"] = (xeno_fin_all["Ct_vet_mean"] -
                           xeno_fin_all["Ct_control_mean"])
    xeno_fin_all["inhibited"] = 'No'
    xeno_fin_all.loc[(xeno_fin_all.dCt > x), "inhibited"] = "Yes"

    ntc_std_control = ntc_col_c.append(std_col)

    inhibited = xeno_fin_all[xeno_fin_all.dCt > 1].Sample.unique()
    not_inhibited = xeno_fin_all[xeno_fin_all.dCt <= 1].Sample.unique()

    qpcr_normd["is_inhibited"] = 'unknown'
    qpcr_normd.loc[qpcr_normd.Sample.isin(inhibited), "is_inhibited"] = True
    qpcr_normd.loc[qpcr_normd.Sample.isin(not_inhibited),
                   "is_inhibited"] = False

    return qpcr_normd, xeno_fin_all, ntc_std_control