Exemplo n.º 1
0
    def _calc_embeddings(
            self, model_traces: List[List[str]], real_traces: List[List[str]]
    ) -> Tuple[np.ndarray, np.ndarray, Any]:
        """Calculates the embeddings of the traces.

        :param model_traces: The traces coming from the model.
        :param real_traces: The traces coming from the real log.
        :return: Dicts for the model and real log contains
            index of activities and its frequencies
            and a distance matrix for Euclidean distances of
            every two actives in all traces.
        """

        emb_gen = ActivityEmbeddingGenerator(
            model_traces + real_traces,
            act2vec_windows_size=self.window_size,
            num_ns=self.num_negative,
            auto_train=False,
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            embedding_size=self.embedding_size,
        )

        # start to train the models
        emb_gen.start_training()

        model_embedding, real_embedding, context = emb_gen.get_activity_embedding(
            model_traces, real_traces, norm=True)
        dist_matrix = calc_euclidean(context)
        model_embedding = _calc_d(model_embedding, len(dist_matrix))
        real_embedding = _calc_d(real_embedding, len(dist_matrix))

        # return frequency tables for the model log and the real log
        # and an embedding lookup table
        return model_embedding, real_embedding, dist_matrix
def test_wmd():
    """Is the wmd calculated correctly?"""
    model_embedding = [{0: 1, 1: 1}]
    real_embedding = [{1: 1}]
    context = np.array([[1, 4], [5, 1]])

    # calculate Euclidean distance matrix
    distance_matrix = calc_euclidean(context)

    # calc d for embeddings
    vocab_len = len(context)
    d_model = _calc_d(model_embedding, vocab_len)
    d_real = _calc_d(real_embedding, vocab_len)

    assert calc_wmd(d_model[0], d_real[0],
                    distance_matrix) == pytest.approx(2.5)
def main():
    # create some embeddings as example
    # (int, int, ...): int =
    # embedding of a activity: count of this activity within a trace
    model_embedding = [{0: 3, 1: 1, 2: 2}]
    real_embedding = [{0: 2}]
    context = np.array([[0.4, 0.3], [0.2, 0.6], [0.5, 0.9]])

    # calculate Euclidean distance matrix
    distance_matrix = calc_euclidean(context)

    # calc d for embeddings
    vocab_len = len(context)
    d_model = _calc_d(model_embedding, vocab_len)
    d_real = _calc_d(real_embedding, vocab_len)

    # calculate WMD between these two traces
    print("WMD: ", calc_wmd(d_model[0], d_real[0], distance_matrix))
    print("ICT: ", calc_ict(d_model[0], d_real[0], distance_matrix))