def top(self, what: str = "flaps", count: int = 5, reverse: str = "False"): """ Show top n entries based on specific field """ if self.columns is None: return now = time.time() what_map = { "flaps": "numChanges", "v4PrefixRx": "v4PfxRx", "evpnPrefixRx": "evpnPfxRx", "v6PrefixRx": "v6PfxRx", "updatesTx": "updatesTx", "updatesRx": "updatesRx", "uptime": "estdTime", } df = self.sqobj.top( hostname=self.hostname, what=what_map[what], n=count, reverse=reverse == "True" or False, columns=self.columns, namespace=self.namespace, ) if not df.empty: df['estdTime'] = humanize_timestamp( df.estdTime, self.cfg.get('analyzer', {}).get('timezone', None)) self.ctxt.exec_time = "{:5.4f}s".format(time.time() - now) return self._gen_output(df, sort=False)
def show(self, state: str = "", vrf: str = '', peer: str = ''): """ Show bgp info """ if self.columns is None: return # Get the default display field names now = time.time() if self.columns != ["default"]: self.ctxt.sort_fields = None else: self.ctxt.sort_fields = [] if (self.columns != ['default'] and self.columns != ['*'] and 'state' not in self.columns): addnl_fields = ['state'] else: addnl_fields = [] df = self.sqobj.get(hostname=self.hostname, columns=self.columns, namespace=self.namespace, state=state, addnl_fields=addnl_fields, vrf=vrf.split(), peer=peer.split()) if 'estdTime' in df.columns and not df.empty: df['estdTime'] = humanize_timestamp( df.estdTime, self.cfg.get('analyzer', {}).get('timezone', None)) self.ctxt.exec_time = "{:5.4f}s".format(time.time() - now) return self._gen_output(df)
def get_file_timestamps(filelist: List[str]) -> pd.DataFrame: """Read the files and construct a dataframe of files and timestamp of record in them. :param filelist: list, of full path name files, typically from pyarrow's dataset.files :returns: dataframe of filename with the time it represents, sorted :rtype: pandas.DataFrame """ if not filelist: return pd.DataFrame(columns=['file', 'timestamp']) # We can't rely on the system istat time to find the times involved # So read the data for each block and check. We tried using threading # and it didn't dramatically alter the results. Given that we might've # too many threads running with the poller and everything, we skipped # doing it. fname_list = [] fts_list = [] for file in filelist: ts = pd.read_parquet(file, columns=['timestamp']) if not ts.empty: fname_list.append(file) fts_list.append(ts.timestamp.min()) # Construct file dataframe as its simpler to deal with if fname_list: fdf = pd.DataFrame({'file': fname_list, 'timestamp': fts_list}) fdf['timestamp'] = humanize_timestamp(fdf.timestamp, 'UTC') return fdf.sort_values(by=['timestamp']) return pd.DataFrame(['file', 'timestamp'])
def show(self, ifname: str = "", state: str = "", type: str = "", mtu: str = ""): """ Show interface info """ if self.columns is None: return # Get the default display field names now = time.time() if self.columns != ["default"]: self.ctxt.sort_fields = None else: self.ctxt.sort_fields = [] df = self.sqobj.get( hostname=self.hostname, ifname=ifname.split(), columns=self.columns, namespace=self.namespace, state=state, mtu=mtu.split(), type=type.split(), ) if 'statusChangeTimestamp' in df.columns: df['statusChangeTimestamp'] = humanize_timestamp( df.statusChangeTimestamp, self.cfg.get('analyzer', {}).get('timezone', None)) self.ctxt.exec_time = "{:5.4f}s".format(time.time() - now) return self._gen_output(df)
def summarize(self, **kwargs): """Describe the data""" # Discard these kwargs.pop('columns', None) # 'ospfIf' is ignored self._init_summarize('ospfIf', **kwargs) if self.summary_df.empty: return self.summary_df self._summarize_on_add_field = [ ('deviceCnt', 'hostname', 'nunique'), ('peerCnt', 'hostname', 'count'), ] self._summarize_on_add_with_query = [ ('stubbyPeerCnt', 'areaStub', 'areaStub'), ('passivePeerCnt', 'adjState == "passive"', 'ifname'), ('unnumberedPeerCnt', 'isUnnumbered', 'isUnnumbered'), ('failedPeerCnt', 'adjState != "passive" and nbrCount == 0', 'ifname'), ] self._summarize_on_add_list_or_count = [ ('area', 'area'), ('vrf', 'vrf'), ('helloTime', 'helloTime'), ('deadTime', 'deadTime'), ('retxTime', 'retxTime'), ('networkType', 'networkType'), ] self.summary_df['lastChangeTime'] = np.where( self.summary_df.lastChangeTime.isnull(), 0, self.summary_df.lastChangeTime) self.summary_df['lastChangeTime'] = humanize_timestamp( self.summary_df.lastChangeTime, self.cfg.get('analyzer', {}) .get('timezone', None)) self.summary_df['lastChangeTime'] = ( self.summary_df['timestamp'] - self.summary_df['lastChangeTime']) self.summary_df['lastChangeTime'] = self.summary_df['lastChangeTime'] \ .apply(lambda x: x.round('s')) self._summarize_on_add_stat = [ ('adjChangesStat', '', 'numChanges'), ('upTimeStat', 'adjState == "full"', 'lastChangeTime'), ] self._gen_summarize_data() self._post_summarize() return self.ns_df.convert_dtypes()
def humanize_fields(self, df: pd.DataFrame, subset=None) -> pd.DataFrame: '''Humanize the timestamp and boot time fields''' if df.empty: return df if 'estdTime' in df.columns: df['estdTime'] = humanize_timestamp( df.estdTime, self.cfg.get('analyzer', {}).get('timezone', None)) return df
def humanize_fields(self, df: pd.DataFrame, subset=None) -> pd.DataFrame: '''Humanize the timestamp and boot time fields''' if df.empty: return df if 'lastChangeTime' in df.columns: df['lastChangeTime'] = humanize_timestamp( df.lastChangeTime.fillna(0), self.cfg.get('analyzer', {}).get('timezone', None)) if 'adjState' in df.columns: df['lastChangeTime'] = np.where(df.adjState == "passive", "-", df.lastChangeTime) return df
def humanize_fields(self, df: pd.DataFrame, subset=None) -> pd.DataFrame: '''Humanize the timestamp and boot time fields''' if df.empty: return df # Convert the bootup timestamp into a time delta if 'bootupTimestamp' in df.columns: df['bootupTimestamp'] = humanize_timestamp( df['bootupTimestamp'] * 1000, self.cfg.get('analyzer', {}).get('timezone', None)) uptime_cols = (df['timestamp'] - df['bootupTimestamp']) uptime_cols = pd.to_timedelta(uptime_cols, unit='s') df.insert(len(df.columns) - 1, 'uptime', uptime_cols) return df
def get_valid_df(self, table, **kwargs) -> pd.DataFrame: if not self.ctxt.engine: print("Specify an analysis engine using set engine command") return pd.DataFrame(columns=["namespace", "hostname"]) sch = SchemaForTable(table, schema=self.schemas) phy_table = sch.get_phy_table_for_table() columns = kwargs.pop('columns', ['default']) addnl_fields = kwargs.pop('addnl_fields', []) view = kwargs.pop('view', self.iobj.view) active_only = kwargs.pop('active_only', True) fields = sch.get_display_fields(columns) key_fields = sch.key_fields() drop_cols = [] if columns == ['*']: drop_cols.append('sqvers') aug_fields = sch.get_augmented_fields() if 'timestamp' not in fields: fields.append('timestamp') if 'active' not in fields + addnl_fields: addnl_fields.append('active') drop_cols.append('active') # Order matters. Don't put this before the missing key fields insert for f in aug_fields: dep_fields = sch.get_parent_fields(f) addnl_fields += dep_fields for fld in key_fields: if fld not in fields + addnl_fields: addnl_fields.insert(0, fld) drop_cols.append(fld) for f in addnl_fields: if f not in fields: # timestamp is always the last field fields.insert(-1, f) if self.iobj.start_time: try: start_time = dateparser.parse( self.iobj.start_time.replace('last night', 'yesterday')) \ .timestamp()*1000 except Exception as e: print(f"ERROR: invalid time {self.iobj.start_time}: {e}") return pd.DataFrame() else: start_time = '' if self.iobj.start_time and not start_time: # Something went wrong with our parsing print(f"ERROR: unable to parse {self.iobj.start_time}") return pd.DataFrame() if self.iobj.end_time: try: end_time = dateparser.parse( self.iobj.end_time.replace('last night', 'yesterday')) \ .timestamp()*1000 except Exception as e: print(f"ERROR: invalid time {self.iobj.end_time}: {e}") return pd.DataFrame() else: end_time = '' if self.iobj.end_time and not end_time: # Something went wrong with our parsing print(f"ERROR: Unable to parse {self.iobj.end_time}") return pd.DataFrame() table_df = self._dbeng.read(phy_table, 'pandas', start_time=start_time, end_time=end_time, columns=fields, view=view, key_fields=key_fields, **kwargs) if not table_df.empty: if view == "all" or not active_only: table_df.drop(columns=drop_cols, inplace=True) else: table_df = table_df.query('active') \ .drop(columns=drop_cols) if 'timestamp' in table_df.columns and not table_df.empty: table_df['timestamp'] = humanize_timestamp( table_df.timestamp, self.cfg.get('analyzer', {}).get('timezone', None)) return table_df
def _write_verify_transform(mod_df, table, dbeng, schema, config_file, query_str_list, changed_fields): """Write and verify that the written data is present :param mod_df: pd.DataFrame, the modified dataframe to write :param table: str, the name of the table to write :param dbeng: SqParquetDB, pointer to DB class to write/read :param schema: SchemaForTable, Schema of data to be written :param config_file: str, Filename where suzieq config is stored :param query_str_list: List[str], query string if any to apply to data for verification check :param changed_fields: set, list of changed fields to verify :returns: Nothing :rtype: """ mod_df = mod_df.reset_index(drop=True) mod_df.timestamp = mod_df.timestamp.astype(np.int64) mod_df.timestamp = mod_df.timestamp // 1000000 mod_df.sqvers = mod_df.sqvers.astype(str) dbeng.write(table, 'pandas', mod_df, False, schema.get_arrow_schema(), None) # Verify that what we wrote is what we got back mod_df.sqvers = mod_df.sqvers.astype(float) tblobj = get_sqobject(table) post_read_df = tblobj(config_file=config_file).get(columns=schema.fields) assert (not post_read_df.empty) # If the data was built up as a series of queries, we have to # apply the queries to verify that we have what we wrote dfconcat = None if query_str_list: for qstr in query_str_list: qdf = post_read_df.query(qstr).reset_index(drop=True) assert (not qdf.empty) if dfconcat is not None: dfconcat = pd.concat([dfconcat, qdf]) else: dfconcat = qdf if dfconcat is not None: qdf = dfconcat.set_index(schema.key_fields()) \ .sort_index() else: qdf = post_read_df.set_index(schema.key_fields()) \ .sort_index() mod_df = mod_df.set_index(schema.key_fields()) \ .query('~index.duplicated(keep="last")') \ .sort_index() mod_df.timestamp = humanize_timestamp(mod_df.timestamp, 'GMT') # We can't call assert_df_equal directly and so we # compare this way. The catch is if we accidentally # change some of the unchanged fields assert (mod_df.shape == qdf.shape) assert (not [ x for x in mod_df.columns.tolist() if x not in qdf.columns.tolist() ]) assert ((mod_df.index == qdf.index).all()) assert_df_equal(mod_df[changed_fields].reset_index(), qdf[changed_fields].reset_index(), None)
def get_valid_df(self, table, **kwargs) -> pd.DataFrame: if not self.ctxt.engine: print("Specify an analysis engine using set engine command") return pd.DataFrame(columns=["namespace", "hostname"]) sch = SchemaForTable(table, schema=self.schemas) phy_table = sch.get_phy_table_for_table() columns = kwargs.pop('columns', ['default']) addnl_fields = kwargs.pop('addnl_fields', []) view = kwargs.pop('view', self.iobj.view) active_only = kwargs.pop('active_only', True) query_str = kwargs.pop('query_str', '') # The REST API provides the query_str enclosed in ". Strip that if query_str: if query_str.startswith('"') and query_str.endswith('"'): query_str = query_str[1:-1] fields = sch.get_display_fields(columns) key_fields = sch.key_fields() drop_cols = [] if columns == ['*']: drop_cols.append('sqvers') if 'timestamp' not in fields: fields.append('timestamp') if 'active' not in fields+addnl_fields: addnl_fields.append('active') drop_cols.append('active') for fld in key_fields: if fld not in fields+addnl_fields: addnl_fields.insert(0, fld) drop_cols.append(fld) for f in addnl_fields: if f not in fields: # timestamp is always the last field fields.insert(-1, f) if self.iobj.start_time: try: start_time = dateparser.parse( self.iobj.start_time.replace('last night', 'yesterday')) \ .timestamp()*1000 except Exception as e: print(f"ERROR: invalid time {self.iobj.start_time}: {e}") return pd.DataFrame() else: start_time = '' if self.iobj.start_time and not start_time: # Something went wrong with our parsing print(f"ERROR: unable to parse {self.iobj.start_time}") return pd.DataFrame() if self.iobj.end_time: try: end_time = dateparser.parse( self.iobj.end_time.replace('last night', 'yesterday')) \ .timestamp()*1000 except Exception as e: print(f"ERROR: invalid time {self.iobj.end_time}: {e}") return pd.DataFrame() else: end_time = '' if self.iobj.end_time and not end_time: # Something went wrong with our parsing print(f"ERROR: Unable to parse {self.iobj.end_time}") return pd.DataFrame() table_df = self._dbeng.read( phy_table, 'pandas', start_time=start_time, end_time=end_time, columns=fields, view=view, key_fields=key_fields, **kwargs ) if not table_df.empty: if view == 'latest' and active_only: table_df = table_df.query('active') \ .drop(columns=drop_cols) else: table_df.drop(columns=drop_cols, inplace=True) if 'timestamp' in table_df.columns: table_df['timestamp'] = humanize_timestamp( table_df.timestamp, self.cfg.get('analyzer', {}) .get('timezone', None)) if query_str: return table_df.query(query_str) else: return table_df
def summarize(self, **kwargs) -> pd.DataFrame: """Summarize key information about BGP""" self._init_summarize(self.iobj._table, **kwargs) if self.summary_df.empty or ('error' in self.summary_df.columns): return self.summary_df self.summary_df['afiSafi'] = (self.summary_df['afi'] + ' ' + self.summary_df['safi']) afi_safi_count = self.summary_df.groupby(by=['namespace'])['afiSafi'] \ .nunique() self.summary_df = self.summary_df \ .set_index(['namespace', 'hostname', 'vrf', 'peer']) \ .query('~index.duplicated(keep="last")') \ .reset_index() self.ns = {i: {} for i in self.summary_df['namespace'].unique()} self.nsgrp = self.summary_df.groupby(by=["namespace"], observed=True) self._summarize_on_add_field = [('deviceCnt', 'hostname', 'nunique'), ('totalPeerCnt', 'peer', 'count'), ('uniqueAsnCnt', 'asn', 'nunique'), ('uniqueVrfsCnt', 'vrf', 'nunique')] self._summarize_on_add_with_query = [ ('failedPeerCnt', 'state == "NotEstd"', 'peer'), ('iBGPPeerCnt', 'asn == peerAsn', 'peer'), ('eBGPPeerCnt', 'asn != peerAsn', 'peer'), ('rrClientPeerCnt', 'rrclient == "True"', 'peer', 'count'), ] self._gen_summarize_data() { self.ns[i].update({'activeAfiSafiCnt': afi_safi_count[i]}) for i in self.ns.keys() } self.summary_row_order.append('activeAfiSafiCnt') self.summary_df['estdTime'] = humanize_timestamp( self.summary_df.estdTime, self.cfg.get('analyzer', {}).get('timezone', None)) self.summary_df['estdTime'] = (self.summary_df['timestamp'] - self.summary_df['estdTime']) self.summary_df['estdTime'] = self.summary_df['estdTime'] \ .apply(lambda x: x.round('s')) # Now come the BGP specific ones established = self.summary_df.query("state == 'Established'") \ .groupby(by=['namespace']) uptime = established["estdTime"] rx_updates = established["updatesRx"] tx_updates = established["updatesTx"] self._add_stats_to_summary(uptime, 'upTimeStat') self._add_stats_to_summary(rx_updates, 'updatesRxStat') self._add_stats_to_summary(tx_updates, 'updatesTxStat') self.summary_row_order.extend( ['upTimeStat', 'updatesRxStat', 'updatesTxStat']) self._post_summarize() return self.ns_df.convert_dtypes()
def summarize(self, **kwargs) -> pd.DataFrame: """Summarize key information about BGP""" self._init_summarize(self.iobj._table, **kwargs) if self.summary_df.empty: return self.summary_df self._summarize_on_add_field = [ ('deviceCnt', 'hostname', 'nunique'), ('totalPeerCnt', 'hostname', 'count'), ('uniqueAsnCnt', 'peerAsn', 'nunique'), ('uniqueVrfsCnt', 'vrf', 'nunique') ] self._summarize_on_add_with_query = [ ('failedPeerCnt', 'state == "NotEstd"', 'peer') ] self._gen_summarize_data() self.summary_df['estdTime'] = humanize_timestamp( self.summary_df.estdTime, self.cfg.get('analyzer', {}).get('timezone', None)) self.summary_df['estdTime'] = ( self.summary_df['timestamp'] - self.summary_df['estdTime']) self.summary_df['estdTime'] = self.summary_df['estdTime'] \ .apply(lambda x: x.round('s')) # Now come the BGP specific ones established = self.summary_df.query("state == 'Established'") \ .groupby(by=['namespace']) uptime = established["estdTime"] v4_updates = established["v4PfxRx"] v6_updates = established["v6PfxRx"] evpn_updates = established["evpnPfxRx"] rx_updates = established["updatesRx"] tx_updates = established["updatesTx"] self._add_stats_to_summary(uptime, 'upTimeStat') self._add_stats_to_summary(v4_updates, 'v4PfxRxStat') self._add_stats_to_summary(v6_updates, 'v6PfxRxStat') self._add_stats_to_summary(evpn_updates, 'evpnPfxRxStat') self._add_stats_to_summary(rx_updates, 'updatesRxStat') self._add_stats_to_summary(tx_updates, 'updatesTxStat') self.summary_row_order.extend(['upTimeStat', 'v4PfxRxStat', 'v6PfxRxStat', 'evpnPfxRxStat', 'updatesRxStat', 'updatesTxStat']) ipv4_enabled = self.summary_df.query("v4Enabled")["namespace"].unique() ipv6_enabled = self.summary_df.query("v6Enabled")["namespace"].unique() evpn_enabled = self.summary_df.query( "evpnEnabled")["namespace"].unique() for i in self.ns.keys(): self.ns[i].update({'activeAfiSafiList': []}) if i in ipv4_enabled: self.ns[i]['activeAfiSafiList'].append("ipv4") if i in ipv6_enabled: self.ns[i]['activeAfiSafiList'].append("ipv6") if i in evpn_enabled: self.ns[i]['activeAfiSafiList'].append('evpn') self.summary_row_order.append('activeAfiSafiList') self._post_summarize() return self.ns_df.convert_dtypes()