def import_stats(lst=None, d_lower=dt(2021, 1, 1)): """Use list of most recent dsc and combine into dataframe""" if lst is None: lst = get_recent_dsc_all(d_lower=d_lower) if isinstance(lst, dict): dfs = [] for unit, lst_csv in tqdm(lst.items()): # try to find/load csv, or move to next if fail for p in lst_csv: try: p_csv = stats_from_dsc(p) df_single = get_stats(p=p_csv) dfs.append(df_single) break except Exception as e: log.warning(f'Failed to load csv: {p}, \n{str(e)}') df = pd.concat(dfs) else: df = pd.concat([get_stats(stats_from_dsc(p)) for p in lst]) return df
def __init__(self, d_lower=None, components=None, minesite='FortHills', **kw): super().__init__(**kw) a, b, c = self.a, self.b, self.c self.cols = [ a.UID, a.Unit, a.Title, a.WorkOrder, c.Component, c.Modifier, a.DateAdded, a.SMR, a.ComponentSMR, a.Floc ] if d_lower is None: d_lower = dt(2020, 4, 1) if components is None: components = [ 'Spindle', 'Front Suspension', 'Rear Suspension', 'Steering Cylinder' ] self.fltr \ .add(ct=a.DateAdded >= d_lower) \ .add(ct=c.Component.isin(components)) \ .add(ct=a.MineSite == minesite)
def example(): fltr = dict(customer_name='fort hills', component_id='spindle', unitModel='980') oils = OilSamplesDownloader(fltr=fltr) oils.load_samples_fluidlife(d_lower=dt(2020, 6, 1)) return oils
def import_history(): from smseventlog import queries as qr oils = OilSamplesDownloader() rng = pd.date_range(dt(2020, 1, 1), dt(2021, 4, 1), freq='M') for d in rng: d_lower, d_upper = qr.first_last_month(d) d_upper = d_upper + delta(days=1) oils.load_samples_fluidlife(d_lower=d_lower, d_upper=d_upper, save_samples=True) df = oils.df_samples() p = cf.desktop / 'fluidlife.csv' df.to_csv(p) print(f'rows downloaded from fluidlife: {df.shape}') oils.to_sql()
def __init__(self, d: dt = None, d_rng: Tuple[dt] = None, minesite: str = None, mw=None, rep_type: str = 'pdf', **kw): # dict of {df_name: {func: func_definition, da: **da, df=None}} dfs, charts, sections, exec_summary, style_funcs = {}, {}, {}, {}, {} signatures = [] self.html_template = 'report_template.html' dfs_loaded = False p_rep = None if d is None: d = dt.now() + delta(days=-31) if d_rng is None: d_rng = qr.first_last_month(d=d) # make sure everything is date not datetime if isinstance(d_rng[0], dt): d_rng = (d_rng[0].date(), d_rng[1].date()) # don't use current ytd until first monthly report end of jan cur_year = dt.now().year d = dt(cur_year, 1, 1) d_end_jan = qr.first_last_month(d)[1].date() if d_rng[1] < d_end_jan: d_rng_ytd = (dt(cur_year - 1, 1, 1), dt(cur_year - 1, 12, 31)) else: d_rng_ytd = (dt(cur_year, 1, 1).date(), d_rng[1]) include_items = dict(title_page=False, truck_logo=False, exec_summary=False, table_contents=False, signature_block=False) env = Environment(loader=FileSystemLoader(str(p_reports))) f.set_self(vars())
def zip_recent_dls(units, d_lower=dt(2020, 1, 1)): # get most recent dsc from list of units and zip parent folder for attaching to TSI if not isinstance(units, list): units = [units] lst = [] for unit in units: lst.extend(get_recent_dsc_single(unit=unit, d_lower=d_lower)) lst_zip = [fl.zip_folder_threadsafe(p_src=p.parent, delete=False) for p in lst] return lst_zip
def __init__( self, ftype: str, d_lower: dt = dt(2020, 1, 1), max_depth: int = 4, search_folders: list = ['downloads']): self.collected_files = [] self.collected_files_dict = {} self.folder_search = FolderSearch(ftype=ftype, d_lower=d_lower, max_depth=max_depth) f.set_self(vars())
def df_months(): # Month cols = ['StartDate', 'EndDate', 'Name'] d_start = dt.now() + delta(days=-365) d_start = dt(d_start.year, d_start.month, 1) m = {} for i in range(24): d = d_start + relativedelta(months=i) name = f'{d:%Y-%m}' m[name] = (*first_last_month(d), name) return pd.DataFrame.from_dict(m, columns=cols, orient='index')
def __init__(self, d: dt, minesite='FortHills', **kw): super().__init__(**kw) a, b = pk.Tables('UnitID', 'UnitSMR') d_lower = dt(d.year, d.month, 1) dates = (d_lower, d_lower + relativedelta(months=1) ) # (2020-12-01, 2021-01-01) cols = [a.Unit, b.DateSMR, b.SMR] q = Query.from_(a).select(*cols) \ .left_join(b).on_field('Unit') \ .where((a.MineSite == minesite) & (b.DateSMR.isin(dates) & (a.ExcludeMA.isnull()))) f.set_self(vars())
def setModelData(self, editor, model, index): editor_date = getattr(editor, self.date_type)() if isinstance(self, TimeDelegate): # get date from DateAdded index_dateadded = index.siblingAtColumn( model.get_col_idx('Date Added')) d1 = model.data(index=index_dateadded, role=TableDataModel.RawDataRole) if d1 is None: d1 = dt.now() t = QTime(editor_date).toPyTime() d = dt(d1.year, d1.month, d1.day, t.hour, t.minute) else: # d = QDateTime(editor_date).toPyDateTime() d = f.convert_date(editor_date.toPyDate()) model.setData(index, d)
def get_recent_dsc_single( unit: str, d_lower: dt = dt(2020, 1, 1), year: str = None, all_files: bool = False, ftype: str = 'dsc', max_depth: int = 3): """Return list of most recent dsc folder from each unit - OR most recent fault... could extend this for any filetype Parameters ---------- d_lower : datetime, optional, limit search by date, default dt(2020,1,1) unit : str, optional all_files: bool return dict of unit: list of all sorted files Returns ------- list | dict """ lst = [] uf = efl.UnitFolder(unit=unit) p_dls = uf.p_dls if not year is None: p_year = p_dls / year if p_year.exists(): p_dls = p_year lst_unit = utl.FolderSearch(ftype, d_lower=d_lower, max_depth=max_depth).search(p_dls) if lst_unit: lst_unit.sort(key=lambda p: date_from_dsc(p), reverse=True) if not all_files: lst.append(lst_unit[0]) else: lst.extend(lst_unit) return lst
def process_df(self, df): """Pivot raw df for fc summary table""" self.df_orig = df.copy() df_shape = df.shape # saving to var for err troubleshooting if len(df) == 0: return df # create summary (calc complete %s) df2 = pd.DataFrame() gb = df.groupby('FC Number') df2['Total'] = gb['Complete'].count() df2['Complete'] = gb.apply(lambda x: x[x['Complete'] == 'Y']['Complete'].count()) df2['Total Complete'] = df2.Complete.astype(str) + ' / ' + df2.Total.astype(str) df2['% Complete'] = df2.Complete / df2.Total df2 = df2.drop(columns=['Total', 'Complete']) \ .rename_axis('FC Number') \ .reset_index() # If ALL values in column are null (Eg ReleaseDate) need to fill with dummy var to pivot for col in ['Release Date', 'Expiry Date']: if df[col].isnull().all(): df[col] = dt(1900, 1, 1).date() index = [c for c in df.columns if not c in ('Unit', 'Complete')] # use all df columns except unit, complete df = df \ .fillna(dict(Hrs=0)) \ .pipe(f.multiIndex_pivot, index=index, columns='Unit', values='Complete') \ .reset_index() \ .merge(right=df2, how='left', on='FC Number') # merge summary # reorder cols after merge cols = list(df) endcol = 10 cols.insert(endcol + 1, cols.pop(cols.index('Total Complete'))) cols.insert(endcol + 2, cols.pop(cols.index('% Complete'))) df = df.loc[:, cols] df.pipe(self.sort_by_fctype) return df
def process_files( ftype: str, units: list = None, search_folders: list = ['downloads'], d_lower: dt = dt(2020, 1, 1), max_depth: int = 4, import_: bool = True, parallel: bool = True) -> Union[int, pd.DataFrame]: """ Top level control function - pass in single unit or list of units 1. Get list of files (plm, fault, dsc) 2. Process - import plm/fault or 'fix' dsc eg downloads folder structure TODO - make this into a FileProcessor class """ if ftype == 'tr3': search_folders.append('vibe tests') # bit sketch # assume ALL units # TODO: make this work for all minesites? units = f.as_list(units or all_units()) search_folders = [item.lower() for item in search_folders] lst = [] fl.drive_exists() for unit in units: p_unit = efl.UnitFolder(unit=unit).p_unit lst_search = [x for x in p_unit.iterdir() if x.is_dir() and x.name.lower() in search_folders] # start at downloads # could search more than just downloads folder (eg event too) for p_search in lst_search: lst.extend(FolderSearch(ftype, d_lower=d_lower, max_depth=max_depth).search(p_search)) # process all dsc folders per unit as we find them if ftype == 'dsc': log.info(f'Processing dsc, unit: {unit} | dsc folders found: {len(lst)}') # group by "downloads/2021/F301 - 2021-01-01 - DLS" to avoid parallel collisions lst_grouped = [list(g) for _, g in itertools.groupby( lst, lambda p: fl.get_parent(p, 'downloads', offset=2).name)] def proc_dsc_batch(lst: List[Path]) -> None: """Process batch of dsc files that may be in the same top folder""" for p in lst: dls.fix_dsc(p) Parallel(n_jobs=-1, verbose=11)(delayed(proc_dsc_batch)(lst=lst) for lst in lst_grouped) # Parallel(n_jobs=-1, verbose=11)(delayed(dls.fix_dsc)(p=p) for p in lst) # return lst # if parallel: # else: # # when calling "all_units", process individual files per unit in sequence to avoid conflicts # for p in lst: # dls.fix_dsc(p=p) lst = [] # need to reset list, only for dsc, this is a bit sketch elif ftype == 'tr3': for p in lst: dls.move_tr3(p=p) lst = [] # collect all csv files for all units first, then import together if ftype in ('plm', 'fault'): log.info(f'num files: {len(lst)}') if lst: df = combine_csv(lst_csv=lst, ftype=ftype, d_lower=d_lower) return import_csv_df(df=df, ftype=ftype) if import_ else df else: return pd.DataFrame() # return blank dataframe
def example(cls): d_rng = (dt(2016, 1, 1), dt(2020, 12, 31)) return cls(d_rng=d_rng, minesite='FortHills')
def example(cls): return cls(unit='F306', d_upper=dt(2020, 10, 18), d_lower=dt(2019, 10, 18))
def first_last_month(d): d_lower = dt(d.year, d.month, 1) d_upper = d_lower + relativedelta(months=1) + delta(days=-1) return (d_lower, d_upper)