Пример #1
0
    def train_model(self):
        """Calculate the mean and stddev for all relevant metrics and store them for use in calulcating zscore at each timestep.
        """
        before = int(datetime.now().timestamp()) - self.offset_secs
        after = before - self.train_secs

        self.df_mean = get_data(
            self.host, self.charts_in_scope, after, before, points=10, group='average', col_sep='.'
        ).mean().to_frame().rename(columns={0: "mean"})

        self.df_std = get_data(
            self.host, self.charts_in_scope, after, before, points=10, group='stddev', col_sep='.'
        ).mean().to_frame().rename(columns={0: "std"})
Пример #2
0
 def get_data(self):
     """
     """
     self.df = get_data(self.hosts,
                        self.charts,
                        after=self.after,
                        before=self.before,
                        user=None,
                        pwd=None)
     # remove duplicate columns that we might get from get_data()
     self.df = self.df.loc[:, ~self.df.columns.duplicated()]
     # drop any empty columns
     self.df = self.df.dropna(axis=1, how='all')
     # forward fill and backward fill to try remove any N/A values
     self.df = self.df.ffill().bfill()
Пример #3
0
    def train(self,
              models_to_train=None,
              train_data_after=0,
              train_data_before=0):
        """Pull required training data and train a model for each specified model.

        :param models_to_train <list>: list of models to train on.
        :param train_data_after <int>: integer timestamp for start of train data.
        :param train_data_before <int>: integer timestamp for end of train data.
        """
        now = datetime.now().timestamp()
        if train_data_after > 0 and train_data_before > 0:
            before = train_data_before
            after = train_data_after
        else:
            before = int(now) - self.offset_n_secs
            after = before - self.train_n_secs

        # get training data
        df_train = get_data(host_charts_dict=self.host_charts_dict,
                            host_prefix=True,
                            host_sep='::',
                            after=after,
                            before=before,
                            sort_cols=True,
                            numeric_only=True,
                            protocol=self.protocol,
                            float_size='float32',
                            user=self.username,
                            pwd=self.password).ffill()
        self.expected_cols = list(df_train.columns)
        if self.custom_models:
            df_train = self.add_custom_models_dims(df_train)

        # train model
        self.try_fit(df_train, models_to_train=models_to_train)
        self.info(
            f'training complete in {round(time.time() - now, 2)} seconds (runs_counter={self.runs_counter}, model={self.model}, train_n_secs={self.train_n_secs}, models={len(self.fitted_at)}, n_fit_success={self.n_fit_success}, n_fit_fails={self.n_fit_fail}, after={after}, before={before}).'
        )
        self.last_train_at = self.runs_counter
Пример #4
0
def results():

    time_start = time.time()

    # get params
    params = parse_params(request)
    app.logger.info(f'... params = {params}')
    highlight_before = params['highlight_before']
    highlight_after = params['highlight_after']
    baseline_before = params['baseline_before']
    baseline_after = params['baseline_after']
    return_type = params['return_type']
    remote_host = params['remote_host']
    local_host = params['local_host']
    model = params['model']
    score_thold = params['score_thold']
    model_level = params['model'].get('model_level', 'dim')

    # get charts to pull data for
    charts = get_chart_list(host=remote_host)

    # get data
    df = get_data(remote_host, charts, after=baseline_after, before=highlight_before,
                  diff=True, ffill=True, numeric_only=True, nunique_thold=0.05)
    colnames = list(df.columns)
    arr_baseline = df.query(f'{baseline_after} <= time_idx <= {baseline_before}').values
    arr_highlight = df.query(f'{highlight_after} <= time_idx <= {highlight_before}').values
    charts = list(set([col.split('|')[0] for col in colnames]))
    app.logger.info(f'... len(charts) = {len(charts)}')
    app.logger.info(f'... len(colnames) = {len(colnames)}')
    app.logger.info(f'... arr_baseline.shape = {arr_baseline.shape}')
    app.logger.info(f'... arr_highlight.shape = {arr_highlight.shape}')
    time_got_data = time.time()
    app.logger.info(f'... time start to data = {time_got_data - time_start}')

    # get scores
    results_dict = run_model(model, colnames, arr_baseline, arr_highlight)

    time_got_scores = time.time()
    app.logger.info(f'... time data to scores = {round(time_got_scores - time_got_data, 2)}')

    # get results to df
    df_results = results_to_df(results_dict, score_thold)

    time_done = time.time()
    app.logger.info(f'... time total = {round(time_done - time_start, 2)}')

    # build response
    if return_type == 'html':
        charts = df_results['chart'].values.tolist()
        counts = OrderedDict(Counter([c.split('.')[0] for c in charts]).most_common())
        counts = ' | '.join([f"{c}:{counts[c]}" for c in counts])
        summary_text = f"number of charts = {df_results['chart'].nunique()}, number of dimensions = {len(df_results)}, {counts}"
        charts_to_render = []
        for chart in df_results['chart'].unique():
            df_results_chart = df_results[df_results['chart'] == chart]
            dimensions = ','.join(df_results_chart['dimension'].values.tolist())
            rank = df_results_chart['chart_rank'].unique().tolist()[0]
            score_avg = round(df_results_chart['score'].mean(), 2)
            score_min = round(df_results_chart['score'].min(), 2)
            score_max = round(df_results_chart['score'].max(), 2)
            charts_to_render.append(
                {
                    "id": chart,
                    "title": f"{rank} - {chart} - score_avg = {score_avg}, score_min = {score_min}, score_max = {score_max}",
                    "after": baseline_after,
                    "before": highlight_before,
                    "data_host": "http://" + f"{remote_host.replace('127.0.0.1', local_host)}/".replace('//', '/'),
                    "dimensions": dimensions
                }
            )
        return render_template(
            'results.html', charts=charts_to_render, highlight_after=highlight_after*1000,
            highlight_before=highlight_before*1000, summary_text=summary_text
        )
    elif return_type == 'json':
        return jsonify(df_results.to_dict(orient='records'))
    else:
        return None