Exemplo n.º 1
0
def create_log(G, conn_comp, timestamps, max_comp_len=50, include_loops=False):
    log = EventLog()
    for i in range(len(conn_comp)):
        if len(conn_comp[i]) <= max_comp_len:
            trace = Trace()
            trace.attributes["concept:name"] = str(i)
            SG = G.subgraph(conn_comp[i])
            SGG = networkx.DiGraph(SG)
            edges = list(SGG.edges)
            for e in edges:
                if e[0] == e[1]:
                    SGG.remove_edge(e[0], e[1])
            sorted_nodes = list(networkx.topological_sort(SGG))
            for n in sorted_nodes:
                selfloop = 1 if (n, n) in SG.edges else 0
                trace.append(
                    Event({
                        'time:timestamp': timestamps[n.split("=")[1]],
                        'concept:name': n.split("=")[0],
                        'value': n.split("=")[1],
                        'typevalue': n,
                        'selfloop': selfloop
                    }))
                if include_loops and selfloop:
                    trace.append(
                        Event({
                            'time:timestamp': timestamps[n.split("=")[1]],
                            'concept:name': n.split("=")[0],
                            'value': n.split("=")[1],
                            'typevalue': n,
                            'selfloop': selfloop
                        }))
            log.append(trace)
    log = sorting.sort_timestamp_log(log, "time:timestamp")
    return log
Exemplo n.º 2
0
def apply(log, list_activities, sample_size, parameters):
    """
    Finds the performance spectrum provided a log
    and a list of activities

    Parameters
    -------------
    log
        Log
    list_activities
        List of activities interesting for the performance spectrum (at least two)
    sample_size
        Size of the sample
    parameters
        Parameters of the algorithm, including the activity key and the timestamp key

    Returns
    -------------
    points
        Points of the performance spectrum
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY

    log = sorting.sort_timestamp_log(log, timestamp_key=timestamp_key)
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    log = basic_filter.filter_log_events_attr(log,
                                              list_activities,
                                              parameters=parameters)

    points = []

    for trace in log:
        for i in range(len(trace) - len(list_activities) + 1):
            acti_comb = [
                event[activity_key]
                for event in trace[i:i + len(list_activities)]
            ]

            if acti_comb == list_activities:
                timest_comb = [
                    event[timestamp_key].timestamp()
                    for event in trace[i:i + len(list_activities)]
                ]

                points.append(timest_comb)

    points = sorted(points, key=lambda x: x[0])

    if len(points) > sample_size:
        points = points_subset.pick_chosen_points_list(sample_size, points)

    return points
Exemplo n.º 3
0
def insert_time_from_previous(log, parameters=None):
    """
    Inserts the time from the previous event, both in normal and business hours

    Parameters
    -------------
    log
        Event log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    enriched_log
        Enriched log (with the time passed from the previous event)
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    if not type(log) is EventLog:
        log = log_converter.apply(log)

    log = sorting.sort_timestamp_log(log, timestamp_key)

    for trace in log:
        if trace:
            trace[0]["@@passed_time_from_previous"] = 0
            trace[0]["@@approx_bh_passed_time_from_previous"] = 0

            i = 1
            while i < len(trace):
                trace[i]["@@passed_time_from_previous"] = (
                    trace[i][timestamp_key] -
                    trace[i - 1][timestamp_key]).total_seconds()
                bh = BusinessHours(
                    trace[i - 1][timestamp_key].replace(tzinfo=None),
                    trace[i][timestamp_key].replace(tzinfo=None),
                    worktiming=worktiming,
                    weekends=weekends)
                trace[i][
                    "@@approx_bh_passed_time_from_previous"] = bh.getseconds()
                i = i + 1

    return log
Exemplo n.º 4
0
    def build_from_path(self, path, parameters=None):
        """
        Builds the handler from the specified path to XES file

        Parameters
        -------------
        path
            Path to the log file
        parameters
            Parameters of the algorithm
        """
        if parameters is None:
            parameters = {}
        try:
            # try faster non standard importer
            self.log = xes_importer.apply(path, variant="nonstandard")
            if len(self.log) == 0:
                # non standard imported failed
                self.log = xes_importer.apply(path)
        except:
            # revert to classic importer
            self.log = xes_importer.apply(path)
        self.log, classifier_key = insert_classifier.search_act_class_attr(
            self.log, force_activity_transition_insertion=True)

        self.activity_key = xes.DEFAULT_NAME_KEY
        if classifier_key is not None:
            self.activity_key = classifier_key

        # sorts the traces and the events in the log
        self.log = sorting.sort_timestamp_log(self.log)

        self.build_variants()
        self.calculate_variants_number()
        self.calculate_cases_number()
        self.calculate_events_number()
        # inserts the event and the case index attributes
        self.insert_event_index()
Exemplo n.º 5
0
def apply(
    log: EventLog,
    list_activities: List[str],
    sample_size: int,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Dict[str, Any]:
    """
    Finds the disconnected performance spectrum provided a log
    and a list of activities

    Parameters
    -------------
    log
        Log
    list_activities
        List of activities interesting for the performance spectrum (at least two)
    sample_size
        Size of the sample
    parameters
        Parameters of the algorithm,  including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY

    Returns
    -------------
    points
        Points of the performance spectrum
    """
    if parameters is None:
        parameters = {}

    sort_log_required = exec_utils.get_param_value(
        Parameters.SORT_LOG_REQUIRED, parameters, True)

    all_acti_combs = set(
        tuple(list_activities[j:j + i])
        for i in range(2,
                       len(list_activities) + 1)
        for j in range(0,
                       len(list_activities) - i + 1))
    two_acti_combs = set((list_activities[i], list_activities[i + 1])
                         for i in range(len(list_activities) - 1))

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             xes.DEFAULT_TRACEID_KEY)

    parameters[Parameters.ATTRIBUTE_KEY] = activity_key
    log = basic_filter.filter_log_events_attr(log,
                                              list_activities,
                                              parameters=parameters)
    if sort_log_required:
        log = sorting.sort_timestamp_log(log, timestamp_key=timestamp_key)

    points = []
    for trace in log:
        matches = [(i, i + 1) for i in range(len(trace) - 1)
                   if (trace[i][activity_key],
                       trace[i + 1][activity_key]) in two_acti_combs]

        i = 0
        while i < len(matches) - 1:
            matchAct = (trace[mi][activity_key]
                        for mi in (matches[i] + matches[i + 1][1:]))
            if matches[i][-1] == matches[i +
                                         1][0] and matchAct in all_acti_combs:
                matches[i] = matches[i] + matches[i + 1][1:]
                del matches[i + 1]
                i = 0
            else:
                i += 1

        if matches:
            matches = set(matches)
            timest_comb = [{
                'points':
                [(trace[i][activity_key], trace[i][timestamp_key].timestamp())
                 for i in match]
            } for match in matches]
            for p in timest_comb:
                p['case_id'] = trace.attributes[case_id_key]

            points += timest_comb

    points = sorted(points,
                    key=lambda x: min(x['points'], key=lambda x: x[1])[1])

    if len(points) > sample_size:
        points = points_subset.pick_chosen_points_list(sample_size, points)

    return points
Exemplo n.º 6
0
def apply(
    log: EventLog,
    list_activities: List[str],
    sample_size: int,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Dict[str, Any]:
    """
    Finds the performance spectrum provided a log
    and a list of activities

    Parameters
    -------------
    log
        Log
    list_activities
        List of activities interesting for the performance spectrum (at least two)
    sample_size
        Size of the sample
    parameters
        Parameters of the algorithm,  including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY

    Returns
    -------------
    points
        Points of the performance spectrum
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    sort_log_required = exec_utils.get_param_value(
        Parameters.SORT_LOG_REQUIRED, parameters, True)

    parameters[Parameters.ATTRIBUTE_KEY] = activity_key
    log = basic_filter.filter_log_events_attr(log,
                                              list_activities,
                                              parameters=parameters)
    if sort_log_required:
        log = sorting.sort_timestamp_log(log, timestamp_key=timestamp_key)

    points = []

    for trace in log:
        for i in range(len(trace) - len(list_activities) + 1):
            acti_comb = [
                event[activity_key]
                for event in trace[i:i + len(list_activities)]
            ]

            if acti_comb == list_activities:
                timest_comb = [
                    event[timestamp_key].timestamp()
                    for event in trace[i:i + len(list_activities)]
                ]

                points.append(timest_comb)

    points = sorted(points, key=lambda x: x[0])

    if len(points) > sample_size:
        points = points_subset.pick_chosen_points_list(sample_size, points)

    return points