Пример #1
0
def calculate_fill_rates(machine, rate_df, ts_start, days_back):
    """
    Calculate and stores the fill rates in the database

    :param machine: The machine
    :param rate_df: Data frame with fill levels and interpolated pulse counts
    :param ts_start: Start time of the calculation
    :param days_back: the number of days that should be considered for fitting and removing old data

    :return:-
    """

    # Get the max: either the collector start time or the now - days_back (+ partial day)
    ts_start = max(get_now() - dt.timedelta(days=(days_back + 1)), ts_start)

    for gp_cond in GP_CONDITIONS:
        rate_gp = rate_df[PULSECOUNT_INTERPOLATED][rate_df[PULSECOUNT_INTERPOLATED] > gp_cond[0]]. \
            apply(lambda x: fit_rate_pulsecount(x, rate_df, gp_cond))
        # Only keep 'days_back' days of data
        rate_gp = rate_gp[rate_gp.index > ts_start]
        sh.save_to_db(
            {
                "s{0}.VDR_BUCKET._FillRate_{1}Gp".format(
                    machine[SOURCE_NR], gp_cond[0]):
                rate_gp.astype(pd.np.float64)
            }, None)
Пример #2
0
def calc_vdr_bucket_fill_level2(machine, ts_start, ts_stop, days_back):
    """
    Calculates vdr bucket fill level 2

    :param machine: machine dict
    :param ts_start: Start time of the calculation
    :param ts_stop:  Stop time of the calculation
    :param days_back: the number of days that should be considered for retrieving and removing old data

    :return: True if data was returned in one of the data frames, False if not
    """

    source_nr = "s{0}".format(machine[SOURCE_NR])

    # Get the max: either the collector start time or the now - days_back (+ partial day)
    ts_start = max(get_now() - dt.timedelta(days=(days_back + 1)), ts_start)
    df1 = get_temperatures_df("VDR_HEATER.TCbot", "VDR_HEATER.TCtop",
                              source_nr, ts_start, ts_stop)
    df2 = get_temperatures_df("KPI.HTVB_Bot_Temperature_VALUE",
                              "KPI.HTVB_Top_Temperature_VALUE", source_nr,
                              ts_start, ts_stop)
    df = pd.DataFrame(pd.concat([df1, df2]))
    if df.empty or len(df.columns) < 2:
        TASK_LOGGER.warning("Missing fill level base signals")
        return False
    df = df.loc[(df[BOTTOM] > MIN_TEMP) & (df[BOTTOM] < MAX_TEMP)]
    sh.save_to_db(
        {
            source_nr + "." + FILL_LEVEL_2:
            100 - (0.662 * (df[BOTTOM] - df[TOP])).astype(pd.np.float64)
        },
        None,
        existing_data_option=NEW_DATA_ONLY)
    return True
def process_signal(db_client, handler, machine,
                   signal, signal_type, job_name, days_back):
    """
    Function that reads data for a certain signal for a certain machine
    :param db_client: the database client that will be used
    :param handler: the handler
    :param machine: the machine
    :param signal: the signal to look for
    :param signal_type: the signal type
    :param job_name: the name of the job
    :param days_back: days_back that should be loaded as an overwrite for redis
    :return:-
    """

    full_signal = "m{0}.{1}.{2}".format(machine[MACHINE_NR], signal_type, signal[NAME])
    signal_id = "SIGNAL:" + ".".join([handler, full_signal])
    last_updated = get_last_updated(signal_id, days_back)
    while True:
        (rows, no_errors, time_spent) = db_client.get_all(
            PMA_QUERY, [signal[NAME], machine[MACHINE_NR],
                        last_updated.strftime(DATETIME_FORMAT), CHUNKSIZE])
        global total_no_errors
        total_no_errors += no_errors
        global total_time_spent
        total_time_spent += time_spent

        if len(rows) > 0:
            idx = [row[0] for row in rows]
            data = [row[1] for row in rows]
            idx = pd.to_datetime(idx).tz_localize(machine["timezone"], ambiguous="NaT")
            out = dict()
            out[signal[NAME]] = pd.Series(data=data, index=idx)
            save_to_db(
                data=out,
                prefix="s{0}.{1}".format(machine[SOURCE_NR], signal_type),
                job_name=job_name)
            last_updated = arrow.get(idx[-1])
            REDIS_CLIENT.set(signal_id, last_updated.timestamp)

        if len(rows) < CHUNKSIZE:
            break
Пример #4
0
def calc_vdr_bucket_medians(machine, ts_start, ts_stop, days_back):
    """
    Calculates the 24 hour median values of Collector._PulseCount and
    VDR_BUCKET._FillLevel2
    Note that days_back is not taken into consideration, as we need to look back in time
    for the pulse counts and median 24h level calculations

    :param machine: machine
    :param ts_start: Start time of the calculation
    :param ts_stop:  Stop time of the calculation
    :param days_back: For the save_to_db function ONLY, data is written from days_back days to ts_stop

    :return: True if data is found, False otherwise
    """

    source_nr = "s{0}".format(machine[SOURCE_NR])

    signal_fill_level_median_24h = ("{0}." +
                                    FILL_LEVEL_MEDIAN_24H).format(source_nr)
    signal_collector_pulsecount = "{0}.Collector._PulseCount".format(source_nr)
    signal_collector_pulsecount_median_24h = (
        "{0}." + PULSECOUNT_HTVB_LEVEL_MEDIAN_24H).format(source_nr)

    df_pc_median, df_fl_median = get_median_dfs(
        days_back=days_back,
        signal_collector_pulsecount=signal_collector_pulsecount,
        signal_collector_pulsecount_median_24h=
        signal_collector_pulsecount_median_24h,
        signal_fill_level_median_24h=signal_fill_level_median_24h,
        signal_fill_level2=("{0}." + FILL_LEVEL_2).format(source_nr),
        ts_start=ts_start,
        ts_stop=ts_stop)

    sh.save_to_db(
        {
            signal_collector_pulsecount_median_24h: df_pc_median,
            signal_fill_level_median_24h: df_fl_median
        }, None)
    return
def calculate_machine_channel_sensitivities(days_back, machine, dt_stop):
    """
    Calculates channel sensitivities and variations for all days for a certain machine
    :param days_back: the number of days back that should be crawled
    :param machine: the machine
    :param dt_stop: stop datetime, closest 6 hour moment
    :return: -
    """

    dt_start = dt_stop - pd.Timedelta(days=days_back)
    df_raw_signals = get_signals(machine, RAW_SIGNAL_PATTERN, dt_start, dt_stop)
    df_avg_signals = get_averages_signals(machine, dt_start, dt_stop)
    df_avg_all_signals = get_all_averages_signal(machine, dt_start, dt_stop)
    LOGGER.info("Calculate Channel Sensitivities: #signals (raw/avg/all_avg) " +
                str(len(df_raw_signals)) + "/" + str(len(df_avg_signals)) + "/" +
                str(len(df_avg_all_signals)))

    # Get Sensitivity signals, take EXTRA_DAYS_BACK_SENSITIVITY days extra,
    # as these are possibly needed to calculate the variations for the current day.
    # Store the result in a singleton.
    CachedSensitivityData().reset_sensitivities()
    CachedSensitivityData().add_extra_safety_sensitivities(
        get_signals(
            machine, SENSITIVITY_SIGNAL_PATTERN,
            dt_stop - pd.Timedelta(days=days_back + EXTRA_SAFETY_DAYS_SENSITIVITY), dt_stop))
    df_final_results = pd.DataFrame()
    for dt_start in pd.date_range(start=dt_start,
                                  end=dt_stop - pd.Timedelta(hours=24),
                                  freq="6H"):
        dt_stop = dt_start + pd.Timedelta(hours=24)
        df_final_results = process_time_window(
            df_final_results=df_final_results, df_avg_all_signals=df_avg_all_signals,
            df_avg_signals=df_avg_signals, df_raw_signals=df_raw_signals,
            dt_start=dt_start, dt_stop=dt_stop)

    sh.save_to_db(df_final_results, None)