def calculate_fill_rates(machine, rate_df, ts_start, days_back): """ Calculate and stores the fill rates in the database :param machine: The machine :param rate_df: Data frame with fill levels and interpolated pulse counts :param ts_start: Start time of the calculation :param days_back: the number of days that should be considered for fitting and removing old data :return:- """ # Get the max: either the collector start time or the now - days_back (+ partial day) ts_start = max(get_now() - dt.timedelta(days=(days_back + 1)), ts_start) for gp_cond in GP_CONDITIONS: rate_gp = rate_df[PULSECOUNT_INTERPOLATED][rate_df[PULSECOUNT_INTERPOLATED] > gp_cond[0]]. \ apply(lambda x: fit_rate_pulsecount(x, rate_df, gp_cond)) # Only keep 'days_back' days of data rate_gp = rate_gp[rate_gp.index > ts_start] sh.save_to_db( { "s{0}.VDR_BUCKET._FillRate_{1}Gp".format( machine[SOURCE_NR], gp_cond[0]): rate_gp.astype(pd.np.float64) }, None)
def calc_vdr_bucket_fill_level2(machine, ts_start, ts_stop, days_back): """ Calculates vdr bucket fill level 2 :param machine: machine dict :param ts_start: Start time of the calculation :param ts_stop: Stop time of the calculation :param days_back: the number of days that should be considered for retrieving and removing old data :return: True if data was returned in one of the data frames, False if not """ source_nr = "s{0}".format(machine[SOURCE_NR]) # Get the max: either the collector start time or the now - days_back (+ partial day) ts_start = max(get_now() - dt.timedelta(days=(days_back + 1)), ts_start) df1 = get_temperatures_df("VDR_HEATER.TCbot", "VDR_HEATER.TCtop", source_nr, ts_start, ts_stop) df2 = get_temperatures_df("KPI.HTVB_Bot_Temperature_VALUE", "KPI.HTVB_Top_Temperature_VALUE", source_nr, ts_start, ts_stop) df = pd.DataFrame(pd.concat([df1, df2])) if df.empty or len(df.columns) < 2: TASK_LOGGER.warning("Missing fill level base signals") return False df = df.loc[(df[BOTTOM] > MIN_TEMP) & (df[BOTTOM] < MAX_TEMP)] sh.save_to_db( { source_nr + "." + FILL_LEVEL_2: 100 - (0.662 * (df[BOTTOM] - df[TOP])).astype(pd.np.float64) }, None, existing_data_option=NEW_DATA_ONLY) return True
def process_signal(db_client, handler, machine, signal, signal_type, job_name, days_back): """ Function that reads data for a certain signal for a certain machine :param db_client: the database client that will be used :param handler: the handler :param machine: the machine :param signal: the signal to look for :param signal_type: the signal type :param job_name: the name of the job :param days_back: days_back that should be loaded as an overwrite for redis :return:- """ full_signal = "m{0}.{1}.{2}".format(machine[MACHINE_NR], signal_type, signal[NAME]) signal_id = "SIGNAL:" + ".".join([handler, full_signal]) last_updated = get_last_updated(signal_id, days_back) while True: (rows, no_errors, time_spent) = db_client.get_all( PMA_QUERY, [signal[NAME], machine[MACHINE_NR], last_updated.strftime(DATETIME_FORMAT), CHUNKSIZE]) global total_no_errors total_no_errors += no_errors global total_time_spent total_time_spent += time_spent if len(rows) > 0: idx = [row[0] for row in rows] data = [row[1] for row in rows] idx = pd.to_datetime(idx).tz_localize(machine["timezone"], ambiguous="NaT") out = dict() out[signal[NAME]] = pd.Series(data=data, index=idx) save_to_db( data=out, prefix="s{0}.{1}".format(machine[SOURCE_NR], signal_type), job_name=job_name) last_updated = arrow.get(idx[-1]) REDIS_CLIENT.set(signal_id, last_updated.timestamp) if len(rows) < CHUNKSIZE: break
def calc_vdr_bucket_medians(machine, ts_start, ts_stop, days_back): """ Calculates the 24 hour median values of Collector._PulseCount and VDR_BUCKET._FillLevel2 Note that days_back is not taken into consideration, as we need to look back in time for the pulse counts and median 24h level calculations :param machine: machine :param ts_start: Start time of the calculation :param ts_stop: Stop time of the calculation :param days_back: For the save_to_db function ONLY, data is written from days_back days to ts_stop :return: True if data is found, False otherwise """ source_nr = "s{0}".format(machine[SOURCE_NR]) signal_fill_level_median_24h = ("{0}." + FILL_LEVEL_MEDIAN_24H).format(source_nr) signal_collector_pulsecount = "{0}.Collector._PulseCount".format(source_nr) signal_collector_pulsecount_median_24h = ( "{0}." + PULSECOUNT_HTVB_LEVEL_MEDIAN_24H).format(source_nr) df_pc_median, df_fl_median = get_median_dfs( days_back=days_back, signal_collector_pulsecount=signal_collector_pulsecount, signal_collector_pulsecount_median_24h= signal_collector_pulsecount_median_24h, signal_fill_level_median_24h=signal_fill_level_median_24h, signal_fill_level2=("{0}." + FILL_LEVEL_2).format(source_nr), ts_start=ts_start, ts_stop=ts_stop) sh.save_to_db( { signal_collector_pulsecount_median_24h: df_pc_median, signal_fill_level_median_24h: df_fl_median }, None) return
def calculate_machine_channel_sensitivities(days_back, machine, dt_stop): """ Calculates channel sensitivities and variations for all days for a certain machine :param days_back: the number of days back that should be crawled :param machine: the machine :param dt_stop: stop datetime, closest 6 hour moment :return: - """ dt_start = dt_stop - pd.Timedelta(days=days_back) df_raw_signals = get_signals(machine, RAW_SIGNAL_PATTERN, dt_start, dt_stop) df_avg_signals = get_averages_signals(machine, dt_start, dt_stop) df_avg_all_signals = get_all_averages_signal(machine, dt_start, dt_stop) LOGGER.info("Calculate Channel Sensitivities: #signals (raw/avg/all_avg) " + str(len(df_raw_signals)) + "/" + str(len(df_avg_signals)) + "/" + str(len(df_avg_all_signals))) # Get Sensitivity signals, take EXTRA_DAYS_BACK_SENSITIVITY days extra, # as these are possibly needed to calculate the variations for the current day. # Store the result in a singleton. CachedSensitivityData().reset_sensitivities() CachedSensitivityData().add_extra_safety_sensitivities( get_signals( machine, SENSITIVITY_SIGNAL_PATTERN, dt_stop - pd.Timedelta(days=days_back + EXTRA_SAFETY_DAYS_SENSITIVITY), dt_stop)) df_final_results = pd.DataFrame() for dt_start in pd.date_range(start=dt_start, end=dt_stop - pd.Timedelta(hours=24), freq="6H"): dt_stop = dt_start + pd.Timedelta(hours=24) df_final_results = process_time_window( df_final_results=df_final_results, df_avg_all_signals=df_avg_all_signals, df_avg_signals=df_avg_signals, df_raw_signals=df_raw_signals, dt_start=dt_start, dt_stop=dt_stop) sh.save_to_db(df_final_results, None)