示例#1
0
    def top(self, what: str = "flaps", count: int = 5, reverse: str = "False"):
        """
        Show top n entries based on specific field
        """
        if self.columns is None:
            return

        now = time.time()

        what_map = {
            "flaps": "numChanges",
            "v4PrefixRx": "v4PfxRx",
            "evpnPrefixRx": "evpnPfxRx",
            "v6PrefixRx": "v6PfxRx",
            "updatesTx": "updatesTx",
            "updatesRx": "updatesRx",
            "uptime": "estdTime",
        }

        df = self.sqobj.top(
            hostname=self.hostname,
            what=what_map[what],
            n=count,
            reverse=reverse == "True" or False,
            columns=self.columns,
            namespace=self.namespace,
        )
        if not df.empty:
            df['estdTime'] = humanize_timestamp(
                df.estdTime,
                self.cfg.get('analyzer', {}).get('timezone', None))

        self.ctxt.exec_time = "{:5.4f}s".format(time.time() - now)
        return self._gen_output(df, sort=False)
示例#2
0
    def show(self, state: str = "", vrf: str = '', peer: str = ''):
        """
        Show bgp info
        """
        if self.columns is None:
            return

        # Get the default display field names
        now = time.time()
        if self.columns != ["default"]:
            self.ctxt.sort_fields = None
        else:
            self.ctxt.sort_fields = []

        if (self.columns != ['default'] and self.columns != ['*']
                and 'state' not in self.columns):
            addnl_fields = ['state']
        else:
            addnl_fields = []

        df = self.sqobj.get(hostname=self.hostname,
                            columns=self.columns,
                            namespace=self.namespace,
                            state=state,
                            addnl_fields=addnl_fields,
                            vrf=vrf.split(),
                            peer=peer.split())

        if 'estdTime' in df.columns and not df.empty:
            df['estdTime'] = humanize_timestamp(
                df.estdTime,
                self.cfg.get('analyzer', {}).get('timezone', None))

        self.ctxt.exec_time = "{:5.4f}s".format(time.time() - now)
        return self._gen_output(df)
示例#3
0
def get_file_timestamps(filelist: List[str]) -> pd.DataFrame:
    """Read the files and construct a dataframe of files and timestamp of
       record in them.

    :param filelist: list, of full path name files, typically from pyarrow's
                     dataset.files
    :returns: dataframe of filename with the time it represents, sorted
    :rtype: pandas.DataFrame

    """
    if not filelist:
        return pd.DataFrame(columns=['file', 'timestamp'])

    # We can't rely on the system istat time to find the times involved
    # So read the data for each block and check. We tried using threading
    # and it didn't dramatically alter the results. Given that we might've
    # too many threads running with the poller and everything, we skipped
    # doing it.
    fname_list = []
    fts_list = []
    for file in filelist:
        ts = pd.read_parquet(file, columns=['timestamp'])
        if not ts.empty:
            fname_list.append(file)
            fts_list.append(ts.timestamp.min())

    # Construct file dataframe as its simpler to deal with
    if fname_list:
        fdf = pd.DataFrame({'file': fname_list, 'timestamp': fts_list})
        fdf['timestamp'] = humanize_timestamp(fdf.timestamp, 'UTC')
        return fdf.sort_values(by=['timestamp'])

    return pd.DataFrame(['file', 'timestamp'])
示例#4
0
    def show(self, ifname: str = "", state: str = "", type: str = "",
             mtu: str = ""):
        """
        Show interface info
        """
        if self.columns is None:
            return

        # Get the default display field names
        now = time.time()
        if self.columns != ["default"]:
            self.ctxt.sort_fields = None
        else:
            self.ctxt.sort_fields = []

        df = self.sqobj.get(
            hostname=self.hostname,
            ifname=ifname.split(),
            columns=self.columns,
            namespace=self.namespace,
            state=state,
            mtu=mtu.split(),
            type=type.split(),
        )
        if 'statusChangeTimestamp' in df.columns:
            df['statusChangeTimestamp'] = humanize_timestamp(
                df.statusChangeTimestamp,
                self.cfg.get('analyzer', {}).get('timezone', None))

        self.ctxt.exec_time = "{:5.4f}s".format(time.time() - now)
        return self._gen_output(df)
示例#5
0
文件: ospf.py 项目: spunxx/suzieq
    def summarize(self, **kwargs):
        """Describe the data"""

        # Discard these
        kwargs.pop('columns', None)

        # 'ospfIf' is ignored
        self._init_summarize('ospfIf', **kwargs)
        if self.summary_df.empty:
            return self.summary_df

        self._summarize_on_add_field = [
            ('deviceCnt', 'hostname', 'nunique'),
            ('peerCnt', 'hostname', 'count'),
        ]

        self._summarize_on_add_with_query = [
            ('stubbyPeerCnt', 'areaStub', 'areaStub'),
            ('passivePeerCnt', 'adjState == "passive"', 'ifname'),
            ('unnumberedPeerCnt', 'isUnnumbered', 'isUnnumbered'),
            ('failedPeerCnt', 'adjState != "passive" and nbrCount == 0',
             'ifname'),
        ]

        self._summarize_on_add_list_or_count = [
            ('area', 'area'),
            ('vrf', 'vrf'),
            ('helloTime', 'helloTime'),
            ('deadTime', 'deadTime'),
            ('retxTime', 'retxTime'),
            ('networkType', 'networkType'),
        ]

        self.summary_df['lastChangeTime'] = np.where(
            self.summary_df.lastChangeTime.isnull(), 0,
            self.summary_df.lastChangeTime)

        self.summary_df['lastChangeTime'] = humanize_timestamp(
            self.summary_df.lastChangeTime, self.cfg.get('analyzer', {})
            .get('timezone', None))

        self.summary_df['lastChangeTime'] = (
            self.summary_df['timestamp'] - self.summary_df['lastChangeTime'])
        self.summary_df['lastChangeTime'] = self.summary_df['lastChangeTime'] \
                                                .apply(lambda x: x.round('s'))

        self._summarize_on_add_stat = [
            ('adjChangesStat', '', 'numChanges'),
            ('upTimeStat', 'adjState == "full"', 'lastChangeTime'),
        ]

        self._gen_summarize_data()
        self._post_summarize()
        return self.ns_df.convert_dtypes()
示例#6
0
    def humanize_fields(self, df: pd.DataFrame, subset=None) -> pd.DataFrame:
        '''Humanize the timestamp and boot time fields'''
        if df.empty:
            return df

        if 'estdTime' in df.columns:
            df['estdTime'] = humanize_timestamp(
                df.estdTime,
                self.cfg.get('analyzer', {}).get('timezone', None))

        return df
示例#7
0
    def humanize_fields(self, df: pd.DataFrame, subset=None) -> pd.DataFrame:
        '''Humanize the timestamp and boot time fields'''
        if df.empty:
            return df

        if 'lastChangeTime' in df.columns:
            df['lastChangeTime'] = humanize_timestamp(
                df.lastChangeTime.fillna(0),
                self.cfg.get('analyzer', {}).get('timezone', None))

            if 'adjState' in df.columns:
                df['lastChangeTime'] = np.where(df.adjState == "passive", "-",
                                                df.lastChangeTime)

        return df
示例#8
0
    def humanize_fields(self, df: pd.DataFrame, subset=None) -> pd.DataFrame:
        '''Humanize the timestamp and boot time fields'''
        if df.empty:
            return df

        # Convert the bootup timestamp into a time delta
        if 'bootupTimestamp' in df.columns:
            df['bootupTimestamp'] = humanize_timestamp(
                df['bootupTimestamp'] * 1000,
                self.cfg.get('analyzer', {}).get('timezone', None))

            uptime_cols = (df['timestamp'] - df['bootupTimestamp'])
            uptime_cols = pd.to_timedelta(uptime_cols, unit='s')
            df.insert(len(df.columns) - 1, 'uptime', uptime_cols)

        return df
示例#9
0
    def get_valid_df(self, table, **kwargs) -> pd.DataFrame:
        if not self.ctxt.engine:
            print("Specify an analysis engine using set engine command")
            return pd.DataFrame(columns=["namespace", "hostname"])

        sch = SchemaForTable(table, schema=self.schemas)
        phy_table = sch.get_phy_table_for_table()

        columns = kwargs.pop('columns', ['default'])
        addnl_fields = kwargs.pop('addnl_fields', [])
        view = kwargs.pop('view', self.iobj.view)
        active_only = kwargs.pop('active_only', True)

        fields = sch.get_display_fields(columns)
        key_fields = sch.key_fields()
        drop_cols = []

        if columns == ['*']:
            drop_cols.append('sqvers')

        aug_fields = sch.get_augmented_fields()

        if 'timestamp' not in fields:
            fields.append('timestamp')

        if 'active' not in fields + addnl_fields:
            addnl_fields.append('active')
            drop_cols.append('active')

        # Order matters. Don't put this before the missing key fields insert
        for f in aug_fields:
            dep_fields = sch.get_parent_fields(f)
            addnl_fields += dep_fields

        for fld in key_fields:
            if fld not in fields + addnl_fields:
                addnl_fields.insert(0, fld)
                drop_cols.append(fld)

        for f in addnl_fields:
            if f not in fields:
                # timestamp is always the last field
                fields.insert(-1, f)

        if self.iobj.start_time:
            try:
                start_time = dateparser.parse(
                    self.iobj.start_time.replace('last night', 'yesterday')) \
                    .timestamp()*1000
            except Exception as e:
                print(f"ERROR: invalid time {self.iobj.start_time}: {e}")
                return pd.DataFrame()
        else:
            start_time = ''

        if self.iobj.start_time and not start_time:
            # Something went wrong with our parsing
            print(f"ERROR: unable to parse {self.iobj.start_time}")
            return pd.DataFrame()

        if self.iobj.end_time:
            try:
                end_time = dateparser.parse(
                    self.iobj.end_time.replace('last night', 'yesterday')) \
                    .timestamp()*1000
            except Exception as e:
                print(f"ERROR: invalid time {self.iobj.end_time}: {e}")
                return pd.DataFrame()
        else:
            end_time = ''

        if self.iobj.end_time and not end_time:
            # Something went wrong with our parsing
            print(f"ERROR: Unable to parse {self.iobj.end_time}")
            return pd.DataFrame()

        table_df = self._dbeng.read(phy_table,
                                    'pandas',
                                    start_time=start_time,
                                    end_time=end_time,
                                    columns=fields,
                                    view=view,
                                    key_fields=key_fields,
                                    **kwargs)

        if not table_df.empty:
            if view == "all" or not active_only:
                table_df.drop(columns=drop_cols, inplace=True)
            else:
                table_df = table_df.query('active') \
                                   .drop(columns=drop_cols)
            if 'timestamp' in table_df.columns and not table_df.empty:
                table_df['timestamp'] = humanize_timestamp(
                    table_df.timestamp,
                    self.cfg.get('analyzer', {}).get('timezone', None))

        return table_df
示例#10
0
def _write_verify_transform(mod_df, table, dbeng, schema, config_file,
                            query_str_list, changed_fields):
    """Write and verify that the written data is present

    :param mod_df: pd.DataFrame, the modified dataframe to write
    :param table: str, the name of the table to write
    :param dbeng: SqParquetDB, pointer to DB class to write/read
    :param schema: SchemaForTable, Schema of data to be written
    :param config_file: str, Filename where suzieq config is stored
    :param query_str_list: List[str], query string if any to apply to data for
                           verification check
    :param changed_fields: set, list of changed fields to verify
    :returns: Nothing
    :rtype:

    """
    mod_df = mod_df.reset_index(drop=True)
    mod_df.timestamp = mod_df.timestamp.astype(np.int64)
    mod_df.timestamp = mod_df.timestamp // 1000000
    mod_df.sqvers = mod_df.sqvers.astype(str)
    dbeng.write(table, 'pandas', mod_df, False, schema.get_arrow_schema(),
                None)

    # Verify that what we wrote is what we got back
    mod_df.sqvers = mod_df.sqvers.astype(float)

    tblobj = get_sqobject(table)
    post_read_df = tblobj(config_file=config_file).get(columns=schema.fields)

    assert (not post_read_df.empty)
    # If the data was built up as a series of queries, we have to
    # apply the queries to verify that we have what we wrote
    dfconcat = None
    if query_str_list:
        for qstr in query_str_list:
            qdf = post_read_df.query(qstr).reset_index(drop=True)
            assert (not qdf.empty)
            if dfconcat is not None:
                dfconcat = pd.concat([dfconcat, qdf])
            else:
                dfconcat = qdf

    if dfconcat is not None:
        qdf = dfconcat.set_index(schema.key_fields()) \
                      .sort_index()
    else:
        qdf = post_read_df.set_index(schema.key_fields()) \
                          .sort_index()

    mod_df = mod_df.set_index(schema.key_fields()) \
                   .query('~index.duplicated(keep="last")') \
                   .sort_index()

    mod_df.timestamp = humanize_timestamp(mod_df.timestamp, 'GMT')

    # We can't call assert_df_equal directly and so we
    # compare this way. The catch is if we accidentally
    # change some of the unchanged fields
    assert (mod_df.shape == qdf.shape)

    assert (not [
        x for x in mod_df.columns.tolist() if x not in qdf.columns.tolist()
    ])

    assert ((mod_df.index == qdf.index).all())

    assert_df_equal(mod_df[changed_fields].reset_index(),
                    qdf[changed_fields].reset_index(), None)
示例#11
0
    def get_valid_df(self, table, **kwargs) -> pd.DataFrame:
        if not self.ctxt.engine:
            print("Specify an analysis engine using set engine command")
            return pd.DataFrame(columns=["namespace", "hostname"])

        sch = SchemaForTable(table, schema=self.schemas)
        phy_table = sch.get_phy_table_for_table()

        columns = kwargs.pop('columns', ['default'])
        addnl_fields = kwargs.pop('addnl_fields', [])
        view = kwargs.pop('view', self.iobj.view)
        active_only = kwargs.pop('active_only', True)
        query_str = kwargs.pop('query_str', '')

        # The REST API provides the query_str enclosed in ". Strip that
        if query_str:
            if query_str.startswith('"') and query_str.endswith('"'):
                query_str = query_str[1:-1]

        fields = sch.get_display_fields(columns)
        key_fields = sch.key_fields()
        drop_cols = []

        if columns == ['*']:
            drop_cols.append('sqvers')

        if 'timestamp' not in fields:
            fields.append('timestamp')

        if 'active' not in fields+addnl_fields:
            addnl_fields.append('active')
            drop_cols.append('active')

        for fld in key_fields:
            if fld not in fields+addnl_fields:
                addnl_fields.insert(0, fld)
                drop_cols.append(fld)

        for f in addnl_fields:
            if f not in fields:
                # timestamp is always the last field
                fields.insert(-1, f)

        if self.iobj.start_time:
            try:
                start_time = dateparser.parse(
                    self.iobj.start_time.replace('last night', 'yesterday')) \
                    .timestamp()*1000
            except Exception as e:
                print(f"ERROR: invalid time {self.iobj.start_time}: {e}")
                return pd.DataFrame()
        else:
            start_time = ''

        if self.iobj.start_time and not start_time:
            # Something went wrong with our parsing
            print(f"ERROR: unable to parse {self.iobj.start_time}")
            return pd.DataFrame()

        if self.iobj.end_time:
            try:
                end_time = dateparser.parse(
                    self.iobj.end_time.replace('last night', 'yesterday')) \
                    .timestamp()*1000
            except Exception as e:
                print(f"ERROR: invalid time {self.iobj.end_time}: {e}")
                return pd.DataFrame()
        else:
            end_time = ''

        if self.iobj.end_time and not end_time:
            # Something went wrong with our parsing
            print(f"ERROR: Unable to parse {self.iobj.end_time}")
            return pd.DataFrame()

        table_df = self._dbeng.read(
            phy_table,
            'pandas',
            start_time=start_time,
            end_time=end_time,
            columns=fields,
            view=view,
            key_fields=key_fields,
            **kwargs
        )

        if not table_df.empty:
            if view == 'latest' and active_only:
                table_df = table_df.query('active') \
                                   .drop(columns=drop_cols)
            else:
                table_df.drop(columns=drop_cols, inplace=True)
            if 'timestamp' in table_df.columns:
                table_df['timestamp'] = humanize_timestamp(
                    table_df.timestamp, self.cfg.get('analyzer', {})
                    .get('timezone', None))

        if query_str:
            return table_df.query(query_str)
        else:
            return table_df
示例#12
0
    def summarize(self, **kwargs) -> pd.DataFrame:
        """Summarize key information about BGP"""

        self._init_summarize(self.iobj._table, **kwargs)
        if self.summary_df.empty or ('error' in self.summary_df.columns):
            return self.summary_df

        self.summary_df['afiSafi'] = (self.summary_df['afi'] + ' ' +
                                      self.summary_df['safi'])

        afi_safi_count = self.summary_df.groupby(by=['namespace'])['afiSafi'] \
                                        .nunique()

        self.summary_df = self.summary_df \
                              .set_index(['namespace', 'hostname', 'vrf',
                                          'peer']) \
                              .query('~index.duplicated(keep="last")') \
                              .reset_index()
        self.ns = {i: {} for i in self.summary_df['namespace'].unique()}
        self.nsgrp = self.summary_df.groupby(by=["namespace"], observed=True)

        self._summarize_on_add_field = [('deviceCnt', 'hostname', 'nunique'),
                                        ('totalPeerCnt', 'peer', 'count'),
                                        ('uniqueAsnCnt', 'asn', 'nunique'),
                                        ('uniqueVrfsCnt', 'vrf', 'nunique')]

        self._summarize_on_add_with_query = [
            ('failedPeerCnt', 'state == "NotEstd"', 'peer'),
            ('iBGPPeerCnt', 'asn == peerAsn', 'peer'),
            ('eBGPPeerCnt', 'asn != peerAsn', 'peer'),
            ('rrClientPeerCnt', 'rrclient == "True"', 'peer', 'count'),
        ]

        self._gen_summarize_data()

        {
            self.ns[i].update({'activeAfiSafiCnt': afi_safi_count[i]})
            for i in self.ns.keys()
        }
        self.summary_row_order.append('activeAfiSafiCnt')

        self.summary_df['estdTime'] = humanize_timestamp(
            self.summary_df.estdTime,
            self.cfg.get('analyzer', {}).get('timezone', None))

        self.summary_df['estdTime'] = (self.summary_df['timestamp'] -
                                       self.summary_df['estdTime'])
        self.summary_df['estdTime'] = self.summary_df['estdTime'] \
                                          .apply(lambda x: x.round('s'))
        # Now come the BGP specific ones
        established = self.summary_df.query("state == 'Established'") \
            .groupby(by=['namespace'])

        uptime = established["estdTime"]
        rx_updates = established["updatesRx"]
        tx_updates = established["updatesTx"]
        self._add_stats_to_summary(uptime, 'upTimeStat')
        self._add_stats_to_summary(rx_updates, 'updatesRxStat')
        self._add_stats_to_summary(tx_updates, 'updatesTxStat')

        self.summary_row_order.extend(
            ['upTimeStat', 'updatesRxStat', 'updatesTxStat'])

        self._post_summarize()
        return self.ns_df.convert_dtypes()
示例#13
0
文件: bgp.py 项目: thilak07/suzieq
    def summarize(self, **kwargs) -> pd.DataFrame:
        """Summarize key information about BGP"""

        self._init_summarize(self.iobj._table, **kwargs)
        if self.summary_df.empty:
            return self.summary_df

        self._summarize_on_add_field = [
            ('deviceCnt', 'hostname', 'nunique'),
            ('totalPeerCnt', 'hostname', 'count'),
            ('uniqueAsnCnt', 'peerAsn', 'nunique'),
            ('uniqueVrfsCnt', 'vrf', 'nunique')
        ]

        self._summarize_on_add_with_query = [
            ('failedPeerCnt', 'state == "NotEstd"', 'peer')
        ]

        self._gen_summarize_data()

        self.summary_df['estdTime'] = humanize_timestamp(
            self.summary_df.estdTime,
            self.cfg.get('analyzer', {}).get('timezone', None))

        self.summary_df['estdTime'] = (
            self.summary_df['timestamp'] - self.summary_df['estdTime'])
        self.summary_df['estdTime'] = self.summary_df['estdTime'] \
                                          .apply(lambda x: x.round('s'))
        # Now come the BGP specific ones
        established = self.summary_df.query("state == 'Established'") \
            .groupby(by=['namespace'])

        uptime = established["estdTime"]
        v4_updates = established["v4PfxRx"]
        v6_updates = established["v6PfxRx"]
        evpn_updates = established["evpnPfxRx"]
        rx_updates = established["updatesRx"]
        tx_updates = established["updatesTx"]

        self._add_stats_to_summary(uptime, 'upTimeStat')
        self._add_stats_to_summary(v4_updates, 'v4PfxRxStat')
        self._add_stats_to_summary(v6_updates, 'v6PfxRxStat')
        self._add_stats_to_summary(evpn_updates, 'evpnPfxRxStat')
        self._add_stats_to_summary(rx_updates, 'updatesRxStat')
        self._add_stats_to_summary(tx_updates, 'updatesTxStat')

        self.summary_row_order.extend(['upTimeStat', 'v4PfxRxStat',
                                       'v6PfxRxStat', 'evpnPfxRxStat',
                                       'updatesRxStat', 'updatesTxStat'])

        ipv4_enabled = self.summary_df.query("v4Enabled")["namespace"].unique()
        ipv6_enabled = self.summary_df.query("v6Enabled")["namespace"].unique()
        evpn_enabled = self.summary_df.query(
            "evpnEnabled")["namespace"].unique()

        for i in self.ns.keys():
            self.ns[i].update({'activeAfiSafiList': []})
            if i in ipv4_enabled:
                self.ns[i]['activeAfiSafiList'].append("ipv4")
            if i in ipv6_enabled:
                self.ns[i]['activeAfiSafiList'].append("ipv6")
            if i in evpn_enabled:
                self.ns[i]['activeAfiSafiList'].append('evpn')

        self.summary_row_order.append('activeAfiSafiList')
        self._post_summarize()
        return self.ns_df.convert_dtypes()