Python complete_timestamp примеры использования

Язык программирования: Python

Пространство имен/Пакет: donut

Метод/Функция: complete_timestamp

Примеров на hotexamples.com: 5

Python complete_timestamp - 5 примеров найдено. Это лучшие примеры Python кода для donut.complete_timestamp, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def get_data():
    values = []
    labels = []
    timestamp = []
    for i in range(len(raw_data['value1'])):
        values.append(raw_data['value1'][i])
        labels.append(raw_data['label1'][i])
        timestamp.append(i)
    values, labels, timestamp = np.array(values), np.array(labels), np.array(
        timestamp)

    # Complete the timestamp, and obtain the missing point indicators.
    timestamp, missing, (values, labels) = \
        complete_timestamp(timestamp, (values, labels))

    # Split the training and testing data.
    test_portion = 0.2
    test_n = int(len(values) * test_portion)
    train_values, test_values = values[:-test_n], values[-test_n:]
    train_labels, test_labels = labels[:-test_n], labels[-test_n:]
    train_missing, test_missing = missing[:-test_n], missing[-test_n:]

    # Standardize the training and testing data.
    train_values, mean, std = standardize_kpi(train_values,
                                              excludes=np.logical_or(
                                                  train_labels, train_missing))
    test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std)
    return train_values, train_labels, train_missing, mean, std, test_values, test_labels, test_missing

Пример #2

Показать файл

    def fit(self, X: pd.DataFrame):
        with self.device:
            # Reset all results from last run to avoid reusing variables
            self.means, self.stds, self.tf_sessions, self.models = [], [], [], []
            for col_idx in trange(len(X.columns)):
                col = X.columns[col_idx]
                tf_session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
                timestamps = X.index
                features = X.loc[:, col].interpolate().bfill().values
                labels = pd.Series(0, X.index)
                timestamps, _, (features, labels) = complete_timestamp(timestamps, (features, labels))
                missing = np.isnan(X.loc[:, col].values)
                _, mean, std = standardize_kpi(features, excludes=np.logical_or(labels, missing))

                with tf.variable_scope('model') as model_vs:
                    model = DonutModel(
                        h_for_p_x=Sequential([
                            K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001),
                                           activation=tf.nn.relu),
                            K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001),
                                           activation=tf.nn.relu),
                        ]),
                        h_for_q_z=Sequential([
                            K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001),
                                           activation=tf.nn.relu),
                            K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001),
                                           activation=tf.nn.relu),
                        ]),
                        x_dims=self.x_dims,
                        z_dims=5,
                    )

                trainer = QuietDonutTrainer(model=model, model_vs=model_vs, max_epoch=self.max_epoch,
                                            batch_size=self.batch_size, valid_batch_size=self.batch_size,
                                            missing_data_injection_rate=0.0, lr_anneal_factor=1.0)
                with tf_session.as_default():
                    trainer.fit(features, labels, missing, mean, std, valid_portion=0.25)
                self.means.append(mean)
                self.stds.append(std)
                self.tf_sessions.append(tf_session)
                self.models.append(model)

Пример #3

Показать файл

def generate_score(number):
    # Read the raw data.
    data_dir_path = 'C:/Users/Administrator/Downloads/research/donut-master/SMD/data_concat/data-' + number + '.csv'
    data = np.array(pd.read_csv(data_dir_path, header=None), dtype=np.float64)
    tag_dir_path = './SMD/test_label/machine-' + number + '.csv'
    tag = np.array(pd.read_csv(tag_dir_path, header=None), dtype=np.int)
    labels = np.append(np.zeros(int(len(data) / 2)), tag)
    # pick one colume
    values = data[:, 1]
    timestamp = np.arange(len(data)) + 1

    # If there is no label, simply use all zeros.
    # labels = np.zeros_like(values, dtype=np.int32)

    # Complete the timestamp, and obtain the missing point indicators.
    timestamp, (values, labels) = \
        complete_timestamp(timestamp, (values, labels))

    # Split the training and testing data.
    test_portion = 0.5
    test_n = int(len(values) * test_portion)
    train_values = values[:-test_n]
    test_values = values[-len(train_values):]
    train_labels, test_labels = labels[:-test_n], labels[-test_n:]
    # print(len(test_values), len(test_labels))

    # Standardize the training and testing data.
    train_values, mean, std = standardize_kpi(train_values,
                                              excludes=train_labels)
    test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std)

    import tensorflow as tf
    from donut import Donut
    from tensorflow import keras as K
    from tfsnippet.modules import Sequential

    # We build the entire model within the scope of `model_vs`,
    # it should hold exactly all the variables of `model`, including
    # the variables created by Keras layers.
    with tf.variable_scope('model') as model_vs:
        model = Donut(
            h_for_p_x=Sequential([
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
            ]),
            h_for_q_z=Sequential([
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
            ]),
            x_dims=120,
            z_dims=5,
        )

    from donut import DonutTrainer, DonutPredictor

    trainer = DonutTrainer(model=model, model_vs=model_vs)
    predictor = DonutPredictor(model)

    with tf.Session().as_default():
        trainer.fit(train_values, train_labels, mean, std)
        test_score = predictor.get_score(test_values)

    if not os.path.exists('./score'):
        os.makedirs('./score')

    np.save('./score/' + number + '.npy', test_score)

Пример #4

Показать файл

Файл: train.py Проект: HuichaoHong/monitor

    parser = parser.parse_args()

    # load data for train:
    data = pd.read_csv(parser.train_data, skiprows=[0, 1], header=None)
    data = data.dropna()
    values = data[2].values
    labels = data[3].values
    labels = np.zeros_like(values, dtype=np.int32)
    date_str = data[0].values
    date = [datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in date_str]
    dateDelta = [x - date[0] for x in date]
    timestamp = [x.days * 1440 + x.seconds / 60 for x in dateDelta]

    # Complete the timestamp, and obtain the missing point indicators.
    timestamp, missing, (values,
                         labels) = complete_timestamp(timestamp,
                                                      (values, labels))
    train_values = values
    train_labels = labels
    train_missing = missing
    # Split the training and testing data.
    test_portion = 0.3
    test_n = int(len(values) * test_portion)
    train_values, test_values = values[:-test_n], values[-test_n:]
    train_labels, test_labels = labels[:-test_n], labels[-test_n:]
    train_missing, test_missing = missing[:-test_n], missing[-test_n:]

    # Standardize the training and testing data.
    train_values, mean, std = standardize_kpi(train_values,
                                              excludes=np.logical_or(
                                                  train_labels, train_missing))
    test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std)

Пример #5

Показать файл

def vae_donut(ts_obj,
              window_size,
              mcmc_iteration,
              latent_dim,
              gaussian_window_size,
              step_size,
              plot_reconstruction=False,
              plot_anomaly_score=False):
    # authors use window_size = 120
    # mcmc_iteration = 10

    # https://github.com/kratzert/finetune_alexnet_with_tensorflow/issues/8
    tf.reset_default_graph()

    start = time.time()

    # if there are missing time steps, we DO NOT fill them with NaNs because donut will replace them with 0s
    # using complete_timestamp
    # see line 6 in https://github.com/NetManAIOps/donut/blob/master/donut/preprocessing.py
    timestamp, values, labels = ts_obj.dataframe[
        "timestamp"].values, ts_obj.dataframe["value"].values, np.zeros_like(
            ts_obj.dataframe["value"].values, dtype=np.int32)

    # print(len(timestamp))
    # print(len(values))
    # print(len(labels))

    # Complete the timestamp, and obtain the missing point indicators
    # replaces  missing with 0s.

    # donut cannot handle this date format for some reason
    if ts_obj.dateformat == "%Y-%m":
        rng = pd.date_range('2000-01-01', periods=len(values), freq='T')
        timestamp, missing, (values, labels) = complete_timestamp(
            rng, (values, labels))
    else:
        timestamp, missing, (values, labels) = complete_timestamp(
            timestamp, (values, labels))

    # print(len(timestamp))
    # print(len(values))
    # print(len(labels))
    # print(sum(missing))

    # Standardize the training and testing data.
    values, mean, std = standardize_kpi(values,
                                        excludes=np.logical_or(
                                            labels, missing))

    with tf.variable_scope('model') as model_vs:
        model = Donut(
            h_for_p_x=Sequential([
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
            ]),
            h_for_q_z=Sequential([
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
                K.layers.Dense(100,
                               kernel_regularizer=K.regularizers.l2(0.001),
                               activation=tf.nn.relu),
            ]),
            x_dims=window_size,
            z_dims=latent_dim,
        )

        trainer = DonutTrainer(model=model, model_vs=model_vs)
        predictor = DonutPredictor(model)

        with tf.Session().as_default():
            trainer.fit(values, labels, missing, mean, std)
            score = predictor.get_score(values, missing)

            # if time series is [1,2,3,4...] and ts_length is 3
            # this gives us [[1,2,3],[2,3,4]...]
            ts_strided = ah.as_sliding_window(values, window_size)
            ts_strided = my_func_float(np.array(ts_strided, dtype=np.float32))
            missing_strided = ah.as_sliding_window(missing, window_size)
            missing_strided = my_func_int(
                np.array(missing_strided, dtype=np.int32))

            # print(ts_strided)
            # print(missing_strided)

            x = model.vae.reconstruct(
                iterative_masked_reconstruct(reconstruct=model.vae.reconstruct,
                                             x=ts_strided,
                                             mask=missing_strided,
                                             iter_count=mcmc_iteration,
                                             back_prop=False))

            # `x` is a :class:`tfsnippet.stochastic.StochasticTensor`, from which
            # you may derive many useful outputs, for example:
            # print(x.tensor.eval())  # the `x` samples
            # print(x.log_prob(group_ndims=0).eval())  # element-wise log p(x|z) of sampled x
            # print(x.distribution.log_prob(ts_strided).eval())  # the reconstruction probability
            # print(x.distribution.mean.eval(), x.distribution.std.eval())  # mean and std of p(x|z)

            tensor_reconstruction_probabilities = x.distribution.log_prob(
                ts_strided).eval()

            # because of the way strided works, we use the first 120 anomaly scores in the first slide
            # and then for remaining slides, we use the last point/score
            reconstruction_probabilities = list(
                tensor_reconstruction_probabilities[0])
            for i in range(len(tensor_reconstruction_probabilities)):
                if i != 0:
                    slide = tensor_reconstruction_probabilities[i]
                    reconstruction_probabilities.append(slide[-1])

    # print(len(reconstruction_probabilities))
    # print(len(ts_obj.dataframe))

    if ts_obj.miss:
        ref_date_range = ch.get_ref_date_range(ts_obj.dataframe,
                                               ts_obj.dateformat,
                                               ts_obj.timestep)
        gaps = ref_date_range[~ref_date_range.isin(ts_obj.
                                                   dataframe["timestamp"])]
        filled_df = ch.fill_df(ts_obj.dataframe, ts_obj.timestep,
                               ref_date_range, "fill_nan")
        # print("NaNs exist?: ",filled_df['value'].isnull().values.any())
        filled_df[
            "reconstruction_probabilities"] = reconstruction_probabilities
        # remove nans
        filled_df = filled_df.dropna()
        reconstruction_probabilities = list(
            filled_df["reconstruction_probabilities"].values)

    # print(len(reconstruction_probabilities))
    # print(len(ts_obj.dataframe))

    reconstruction_probabilities = [
        abs(item) for item in reconstruction_probabilities
    ]

    anomaly_scores = anomaly_scores = ah.determine_anomaly_scores_error(
        reconstruction_probabilities,
        np.zeros_like(reconstruction_probabilities), ts_obj.get_length(),
        gaussian_window_size, step_size)

    end = time.time()

    if plot_reconstruction:
        plt.subplot(211)
        # see lines 98 to 100 of https://github.com/NetManAIOps/donut/blob/master/donut/prediction.py
        plt.title("Negative of Reconstruction Probabilities")
        plt.plot(reconstruction_probabilities)
        # plt.ylim([.99,1])
        plt.subplot(212)
        plt.title("Time Series")
        plt.plot(ts_obj.dataframe["value"].values)
        plt.axvline(ts_obj.get_probationary_index(),
                    color="black",
                    label="probationary line")
        plt.tight_layout()
        plt.show()

    if plot_anomaly_score:
        plt.subplot(211)
        plt.title("Anomaly Scores")
        plt.plot(anomaly_scores)
        plt.ylim([.998, 1])
        plt.subplot(212)
        plt.title("Time Series")
        plt.plot(ts_obj.dataframe["value"].values)
        plt.axvline(ts_obj.get_probationary_index(),
                    color="black",
                    label="probationary line")
        plt.tight_layout()
        plt.show()

    return {
        "Anomaly Scores": anomaly_scores,
        "Time": end - start,
        "Reconstruction Probabilities": reconstruction_probabilities
    }