def __init__(self, parent=None): super().__init__(parent=parent) col_db_startdate, col_db_enddate = 'ShiftDate', 'ShiftDate' df_week = qr.df_period(freq='week') df_month = qr.df_period(freq='month') d = dt.now() + delta(days=-6) default_week = df_week[df_week.start_date < d].iloc[ -1, :].name # index name d = dt.now() + delta(days=-30) default_month = df_month[df_month.start_date < d].iloc[ -1, :].name # index name f.set_self(vars()) self.add_input(field=InputField(text='Week', default=default_week), items=df_week.index, checkbox=True, cb_enabled=False) self.add_input(field=InputField(text='Month', default=default_month), items=df_month.index, checkbox=True, cb_enabled=False) self.add_features(['start_date', 'end_date', 'unit']) self.insert_linesep(i=2)
def __init__( self, ftype: str, max_depth: int = 6, d_lower: dt = None): """ Parameters ---------- ftype : str file type to collect (dsc, fault | plm | tr3) max_depth : int, optional max depth to recurse, default 5 d_lower : dt, optional date to filter file date created, default 2016-01-01 """ if not ftype in self.keys: raise ValueError(f'Incorrect ftype "{ftype}", must be in {self.keys}') if d_lower is None: d_lower = dt.now() + delta(days=-180) cfg = self.cfg.get(ftype) expr_exclude = self.make_re_exclude(lst=cfg.get('exclude')) expr_find = cfg.get('find') f.set_self(vars())
def update_db(self): """Check maxdate in database, query fluidlife api, save new samples to database""" maxdate = db.max_date_db(table='OilSamples', field='process_date', join_minesite=False) + delta(days=-1) self.load_samples_fluidlife(d_lower=maxdate) return self.to_sql()
def set_lastperiod(self, days=7): if hasattr(self, 'date_col') and not self.date_col is None: vals = {self.date_col: dt.now().date() + delta(days=days * -1)} self.fltr.add(vals=vals, opr=op.ge) return True else: return False
def set_allopen(self, **kw): a = self.a self.add_fltr_args([ dict(vals=dict(process_date=dt.now() + delta(days=-6))), # dict(vals=dict(component_id=component)) ])
def collect_plm_files(unit: str, d_lower: dt = None, lst: list = None): """Collect PLM files from p drive and save to desktop - Used for uploading to KA PLM report system - TODO this could be replaced by utl.FileProcessor now? """ start = time.time() p = efl.UnitFolder(unit=unit).p_dls if d_lower is None: d_lower = dt.now() + delta(days=-180) if lst is None: lst = utl.FolderSearch('plm', d_lower=d_lower).search(p) log.info(f'{f.deltasec(start)} | Found {len(lst)} files.') p_dst = cf.desktop / f'plm/{unit}' for p in lst: fl.copy_file(p_src=p, p_dst=p_dst / f'{fl.date_created(p):%Y-%m-%d}_{p.name}') log.info(f'{f.deltasec(start)} | {len(lst)} files copied to desktop.') return lst
def add_defaults(d: dt = None, em=None): """Add default rows for all dates till current date with 0 time""" # get df # groupby date # get date_range btwn now and start # merge grouped data # filter dates = 0 or Nan if em is None: em = ExcelModel() if not d is None: # init specific day if isinstance(d, str): d = dt.strptime(d, '%Y-%m-%d') day = d.strftime('%a') df = em.get_df(name='Default') \ .pipe(lambda df: df[df.day == day]) \ .drop(columns='day') \ .assign( duration=lambda x: x.duration.astype(float), date=d.date()) else: # init all blank days d = dt.now().date() # type: date n = 90 d_lower = d + delta(days=-n) rng = pd.date_range(d_lower, d) df_default = em.get_df(name='Default') # defaults per day # get sum duration from last n days per day df_sum = em.get_df('ActLog') \ .pipe(lambda df: df[df.date.dt.date >= d_lower]) \ .groupby('date', as_index=False)[['duration']].sum() \ .rename(columns=dict(duration='sum')) # merge default values for all days with 0 duration df = pd.DataFrame(dict(date=rng)) \ .assign(day=lambda x: x.date.dt.strftime('%a')) \ .merge(right=df_sum, on='date', how='left') \ .fillna(0) \ .merge(right=df_default, on='day', how='outer') \ .dropna() \ .pipe(lambda df: df[df['sum'] == 0]) \ .drop(columns=['day', 'sum']) \ .sort_values('date') \ .assign( duration=lambda x: x.duration.astype(float)) lst = list(df.to_dict(orient='index').values()) em.add_row(m=lst) em.close() n_dates = df.groupby('date').size().shape[0] print('\n') log.info(f'Dates initialized: {n_dates}')
def df_period(freq: str, n: int = 0, ytd: bool = False, n_years: int = 1) -> pd.DataFrame: """Return df of periods for specified freq Parameters ---------- freq : str M or W n : int, optional filter last n periods, default 0 ytd : bool, optional filter periods to start of year, default False n_years : int number of previous years Returns ------- pd.DataFrame df of periods """ freq = dict(month='M', week='W').get(freq, freq) # convert from month/week d_upper = dt.now() d_lower = d_upper + delta(days=-365 * n_years) idx = pd.date_range(d_lower, d_upper, freq=freq).to_period() # fmt_week = f'%Y-%{week_letter}' fmt_week = '%G-%V' m = dict( W=dict(fmt_str=fmt_week), M=dict(fmt_str='%Y-%m')) \ .get(freq) def _rename_week(df, do=False): if not do: return df return df \ .assign(name=lambda x: x.period.dt.strftime(f'Week %{week_letter}')) def _filter_ytd(df, do=ytd): if not do: return df return df[df.period >= str(df.period.max().year)] df = pd.DataFrame(index=idx) return df \ .assign( start_date=lambda x: pd.to_datetime(x.index.start_time.date), end_date=lambda x: pd.to_datetime(x.index.end_time.date), d_rng=lambda x: list(zip(x.start_date.dt.date, x.end_date.dt.date)), name=lambda x: x.index.to_timestamp(freq).strftime(m['fmt_str'])) \ .rename_axis('period') \ .reset_index(drop=False) \ .set_index('name', drop=False) \ .pipe(_filter_ytd, do=ytd) \ .pipe(_rename_week, do=freq == 'W') \ .rename(columns=dict(name='name_title')) \ .iloc[-1 * n:]
def create_plm_report(self): """Trigger plm report from current unit selected in table""" from smseventlog.data.internal import plm view = self.active_table() try: e = view.e unit, d_upper = e.Unit, e.DateAdded except er.NoRowSelectedError: # don't set dialog w unit and date, just default unit, d_upper, e = None, None, None # Report dialog will always set final unit etc dlg = dlgs.PLMReport(unit=unit, d_upper=d_upper) ok = dlg.exec() if not ok: return # user exited m = dlg.get_items(lower=True) # unit, d_upper, d_lower # check if unit selected matches event selected if not e is None: if not e.Unit == m['unit']: e = None m['e'] = e # NOTE could make a func 'rename_dict_keys' m['d_upper'], m['d_lower'] = m['date upper'], m['date lower'] # check max date in db maxdate = plm.max_date_plm(unit=m['unit']) if maxdate + delta(days=5) < m['d_upper']: # worker will call back and make report when finished if not fl.drive_exists(warn=False): msg = 'Can\'t connect to P Drive. Create report without updating records first?' if dlgs.msgbox(msg=msg, yesno=True): self.make_plm_report(**m) return Worker(func=plm.update_plm_single_unit, mw=self, unit=m['unit']) \ .add_signals( signals=('result', dict( func=self.handle_import_result, kw=m))) \ .start() msg = f'Max date in db: {maxdate:%Y-%m-%d}. ' \ + 'Importing haul cylce files from network drive, this may take a few minutes...' self.update_statusbar(msg=msg) else: # just make report now self.make_plm_report(**m)
def df_weeks(): # Week cols = ['StartDate', 'EndDate', 'Name'] m = {} year = dt.now().year for wk in range(1, 53): s = f'2020-W{wk-1}' d = dt.strptime(s + '-1', '%Y-W%W-%w').date() m[f'{year}-{wk}'] = (d, d + delta(days=6), f'Week {wk}') return pd.DataFrame.from_dict(m, columns=cols, orient='index')
def fix_dls_all_units(d_lower: dt = None) -> None: if d_lower is None: d_lower = dt.now() + delta(days=-30) units = utl.all_units() # collect dsc files from all units in parallel result = Parallel(n_jobs=-1, verbose=11)(delayed(utl.process_files)( ftype='dsc', units=unit, d_lower=d_lower, parallel=False) for unit in units)
def df_months(): # Month cols = ['StartDate', 'EndDate', 'Name'] d_start = dt.now() + delta(days=-365) d_start = dt(d_start.year, d_start.month, 1) m = {} for i in range(24): d = d_start + relativedelta(months=i) name = f'{d:%Y-%m}' m[name] = (*first_last_month(d), name) return pd.DataFrame.from_dict(m, columns=cols, orient='index')
def __init__(self): __name__ = 'SMS Event Log Database' log.info('Initializing database') self.reset(False) df_unit = None df_fc = None df_component = None dfs = {} domain_map = dict(SMS='KOMATSU', Cummins='CED', Suncor='NETWORK') domain_map_inv = f.inverse(m=domain_map) last_internet_success = dt.now() + delta(seconds=-61) f.set_self(vars()) self.expected_exceptions = []
def __init__(self, unit: str, d_upper: dt = None, d_lower: dt = None, **kw): """Select PLM report data for single unit. Parameters ---------- unit : str d_upper : dt d_lower : dt If None, default to d_upper - 6 months Not needed if just using max_date """ super().__init__(select_tablename='viewPLM') # use_cached_df = True # hmmm dont actually need this a = self.select_table cols = [a.star] if d_lower is None: # always start at first day of month if d_upper is None: d_upper = dt.now() d_lower = first_last_month(d_upper + delta(days=-180))[0] if not d_upper is None: d_rng = (d_lower, d_upper + delta(days=1) ) # between cuts off at T00:00:00 q = Query.from_(a) \ .orderby(a.datetime) f.set_self(vars()) self.set_default_args( ) # NOTE not sure if need this or if report always does it
def __init__(self, days=14, use_user_settings=False, **kw): download_dir = cf.p_drive / 'Regional/SMS West Mining/PSN/PSNs' super().__init__(use_user_settings=use_user_settings, download_dir=download_dir, **kw) startdate = (dt.now() + delta(days=days * -1)).strftime('%m/%d/%Y') self.pages.update({ 'login': '******', }) f.set_self(vars())
def __init__(self, unit, d_rng=None, **kw): super().__init__(**kw) a = T('UnitSMR') if d_rng is None: d_upper = dt.now() d_lower = d_upper + delta(days=-60) d_rng = (d_lower, d_upper) cols = ['Unit', 'DateSMR', 'SMR'] q = Query.from_(a) \ .where(a.Unit == unit) \ .where(a.DateSMR.between(d_rng[0], d_rng[1])) f.set_self(vars())
def __init__(self, recent_days=-120, da=None): super().__init__(da=da) a, b = self.a, self.b # subquery for ordering with row_number c = Query.from_(a).select( a.star, (RowNumber() .over(a.unit, a.component_id, a.modifier) .orderby(a.sample_date, order=Order.desc)).as_('rn')) \ .left_join(b).on_field('Unit') \ .where(a.sample_date >= dt.now() + delta(days=recent_days)) \ .as_('sq0') cols = [c.star] sq0 = c f.set_self(vars())
def get_df_sum(self, n: int = 10): """Return df of summary durations for last n dates""" df_sum = self.get_df(name='ActLog') \ .groupby('date', as_index=False)[['date', 'day', 'sum']].first() \ .set_index('date') d = dt.now().date() rng = pd.date_range(d + delta(days=-n), d) return pd.DataFrame(index=rng) \ .rename_axis('date') \ .merge(right=df_sum, left_index=True, right_index=True, how='left') \ .reset_index() \ .assign( day=lambda x: x.date.dt.strftime('%a'), sum=lambda x: x['sum'].fillna(0).astype(float))
def max_date_plm(unit: str) -> dt: """Get max date in PLM database for specific unit Parameters ---------- unit : str Returns ------- dt max date """ query = PLMUnit(unit=unit) maxdate = query.max_date() if maxdate is None: maxdate = dt.now() + delta(days=-731) return maxdate
def import_history(): from smseventlog import queries as qr oils = OilSamplesDownloader() rng = pd.date_range(dt(2020, 1, 1), dt(2021, 4, 1), freq='M') for d in rng: d_lower, d_upper = qr.first_last_month(d) d_upper = d_upper + delta(days=1) oils.load_samples_fluidlife(d_lower=d_lower, d_upper=d_upper, save_samples=True) df = oils.df_samples() p = cf.desktop / 'fluidlife.csv' df.to_csv(p) print(f'rows downloaded from fluidlife: {df.shape}') oils.to_sql()
def build_url(self, **kw): login = self.login url = 'https://mylab2.fluidlife.com/mylab/api/history/jsonExport?' if not 'd_lower' in kw: kw['d_lower'] = dt.now() + delta(days=-14) # convert to suncor unit names if 'unit' in kw: customer = db.get_unit_val(unit=kw['unit'], field='Customer') if customer == 'Suncor': m = {'^F': 'F0', '^3': '03', '^2': '02'} for expr, repl in m.items(): kw['unit'] = re.sub(expr, repl, kw['unit']) # convert easier kws to fluidlife kws m_conv = dict(d_lower='startDateTime', d_upper='endDateTime', minesite='customerName', component='componentType', unit='unitId') # NOTE unitId doesn't actually work m = dict(username=login['username'], password=login['password']) kw.update(m) for k, v in kw.items(): if isinstance(v, (dt)): v = self.format_date(v) # convert MineSite to Fluidlife customer_name if v in m_customer.keys(): v = m_customer[v] if isinstance(v, list): v = v[0] if k in m_conv: k = m_conv[k] ampersand = '&' if not url[-1] == '?' else '' url = f'{url}{ampersand}{k}={v}' return url
def __init__(self, unit: str, d_upper: dt, d_lower: dt = None, **kw): """Create PLM report for single unit Parameters ---------- unit : str\n d_lower : dt\n """ if d_lower is None: d_lower = qr.first_last_month(d_upper + delta(days=-180))[0] d_rng = (d_lower, d_upper) super().__init__(d_rng=d_rng, **kw) title = f'PLM Report - {unit} - {d_upper:%Y-%m-%d}' f.set_self(vars()) self.load_sections('PLMUnit')
def __init__(self, d: dt = None, d_rng: Tuple[dt] = None, minesite: str = None, mw=None, rep_type: str = 'pdf', **kw): # dict of {df_name: {func: func_definition, da: **da, df=None}} dfs, charts, sections, exec_summary, style_funcs = {}, {}, {}, {}, {} signatures = [] self.html_template = 'report_template.html' dfs_loaded = False p_rep = None if d is None: d = dt.now() + delta(days=-31) if d_rng is None: d_rng = qr.first_last_month(d=d) # make sure everything is date not datetime if isinstance(d_rng[0], dt): d_rng = (d_rng[0].date(), d_rng[1].date()) # don't use current ytd until first monthly report end of jan cur_year = dt.now().year d = dt(cur_year, 1, 1) d_end_jan = qr.first_last_month(d)[1].date() if d_rng[1] < d_end_jan: d_rng_ytd = (dt(cur_year - 1, 1, 1), dt(cur_year - 1, 12, 31)) else: d_rng_ytd = (dt(cur_year, 1, 1).date(), d_rng[1]) include_items = dict(title_page=False, truck_logo=False, exec_summary=False, table_contents=False, signature_block=False) env = Environment(loader=FileSystemLoader(str(p_reports))) f.set_self(vars())
def recent_weekday(d: str) -> dt: """Get recent date from weekday Parameters ---------- d : str day of week (eg 'fri') Returns ------- dt date value """ df = pd.DataFrame( data=pd.date_range(dt.now() + delta(days=-7), dt.now()), columns=['date']) \ .assign( day=lambda x: x.date.dt.strftime('%a'), date=lambda x: x.date.dt.date) \ .set_index('day') return df.date.loc[d.title()]
def deltasec(start, end=None): """Return difference from time object formatted as seconds Parameters ---------- start : time.time start time obj end : time.time, optional end time, by default None Returns ------- str time formatted as seconds string Examples -------- >>> start = time() >>> f.deltasec(start) >>> '00:00:13' """ if end is None: end = time.time() return str(delta(seconds=end - start)).split('.')[0]
def mins_secs(seconds: int) -> str: """Convert seconds to mins, secs string 03:14 """ return ':'.join(str(delta(seconds=seconds)).split('.')[0].split(':')[1:])
def set_default_filter(self, **kw): super().set_default_filter(**kw) self.fltr.add(vals=dict(DateAdded=dt.now().date() + delta(days=-30)))
def parse_fault_time(tstr): arr = tstr.split('|') t, tz = int(arr[0]), int(arr[1]) return dt.fromtimestamp(t) + delta(seconds=tz)
def to_seconds(t): x = time.strptime(t, '%H:%M:%S') return int(delta(hours=x.tm_hour, minutes=x.tm_min, seconds=x.tm_sec).total_seconds())
def import_dls(p: Path, mw=None) -> dict: """Upload downloads folder from local computer to p-drive p : Path filepath to process mw : gui.gui.MainWindow mw object to update statusbar with progress Returns ------- dict dict of result times Import csvs to database: faults plm Zip: dsc folder (ge files) Attempt to get unit from: - file name - dsc stats file - fault csv - plm csv - TODO check selected dir contains some correct files (eg not accidental selection) """ start = time.time() now = lambda x: time.time() - x # check if unit given in file name unit = utl.unit_from_str(s=p.name) d = f.date_from_str(s=p.name) d_lower = dt.now() + delta(days=-365 * 2) m_result = {k: dict(num=0, time=0) for k in ('ge_zip', 'fault', 'plm')} # list of dates created as backup if no dsc lst_dates = [fl.date_created(p) for p in p.iterdir()] # callback to update statusbar if mw is None: from smseventlog.gui._global import update_statusbar as us else: us = mw.update_statusbar # find dsc files to use for stat file first lst_dsc = utl.FolderSearch('dsc', d_lower=d_lower).search(p) if lst_dsc: lst_dates = [] # use dsc for date, clear backup dates # try to get unit from first dsc serial file first try: p_stat = stats_from_dsc(p=lst_dsc[0]) if unit is None: print('p_stat', p_stat) unit = unit_from_stat(p_stat) except Exception as e: # print(e) log.warning('Failed to get unit from stats file.') # save files to import after unit check m_import = {} unit_func = dict( fault=flt.unit_from_fault, plm=plm.unit_from_haulcycle) # check unit from fault/plm for ftype in unit_func.keys(): try: lst_csv = utl.FolderSearch(ftype, d_lower=d_lower).search(p) if lst_csv: m_import[ftype] = lst_csv # try to get unit if doesn't exist yet if unit is None: unit = unit_func[ftype](p=lst_csv[0], raise_errors=False) except Exception as e: # print(e) us(msg=f'Failed to read {ftype} file(s).', warn=True, log_=True) # get dates from ge dsc for p_dsc in lst_dsc: lst_dates.append(date_from_dsc(p_dsc)) # check for AHS files in first level of dls folder ahs_folders = utl.FolderSearch('ahs', max_depth=0).search(p) if not ahs_folders: suffix = 'DLS' else: suffix = 'FRDLS' if unit is None: unit = val_from_ahs_files(ahs_folders, 'unit') # get date from ahs files if d is None: lst_dates.append(val_from_ahs_files(ahs_folders, 'date')) # final check, fail if unit doesn't exist yet if unit is None: raise er.NoUnitError() # sort dates and set date if not given in folder name if d is None and lst_dates: lst_dates = sorted(lst_dates, reverse=False) d = lst_dates[0] if d is None: raise er.NoDateError() name = f'{unit} - {d:%Y-%m-%d}' title = f'{name} - {suffix}' m_result['name'] = name from smseventlog.eventfolders import UnitFolder uf = UnitFolder(unit=unit) p_dst = uf.p_dls / f'{d.year}/{title}' # make sure we don't overwrite folder log.info(f'p_dst: {p_dst}') if p_dst.exists(): raise er.FolderExistsError(p=p_dst) # import fault/plm for ftype, lst_csv in m_import.items(): time_prev = time.time() # log.info(f'importing: {ftype}') try: rowsadded = utl.combine_import_csvs(lst_csv=lst_csv, ftype=ftype, unit=unit, n_jobs=-4) m_result[ftype] = dict(num=rowsadded or 0, time=now(time_prev)) except Exception as e: # NOTE could maybe raise a custom exception here? us(msg=f'Failed to import {ftype} files.', warn=True, log_=True) # zip GE dsc files if lst_dsc: time_prev = time.time() for p_dsc in lst_dsc: # log.info(f'zipping: {p_dsc}') fl.zip_folder_threadsafe(p_src=p_dsc, p_dst=p_dst / p_dsc.name, delete=True) m_result['ge_zip'] = dict(num=len(lst_dsc), time=now(time_prev)) # zip dnevent/sfevent folders in place if ahs_folders: time_prev = time.time() # copy 6 newest files > 3mb to PREVIEW dir make_ahs_data_preview(ahs_folders) for p_ahs in ahs_folders: # if any(item in p_ahs.name.lower() for item in ('dnevent', 'sfevent')): fl.zip_folder_threadsafe(p_src=p_ahs, p_dst=p_dst / p_ahs.name, delete=True) m_result['ahs_zip'] = dict(num=len(ahs_folders), time=now(time_prev)) # upload all to p-drive us(f'Uploading files to: {p_dst}') fl.move_folder(p_src=p, p_dst=p_dst) m_result['time_total'] = now(start) return m_result