def filter_time_range(log: Union[EventLog, pd.DataFrame], dt1: str, dt2: str, mode="events") -> Union[EventLog, pd.DataFrame]: """ Filter a log on a time interval Parameters ---------------- log Log object dt1 Left extreme of the interval dt2 Right extreme of the interval mode Modality of filtering (events, traces_contained, traces_intersecting) events: any event that fits the time frame is retained traces_contained: any trace completely contained in the timeframe is retained traces_intersecting: any trace intersecting with the time-frame is retained. Returns ---------------- filtered_log Filtered log """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): from pm4py.algo.filtering.pandas.timestamp import timestamp_filter if mode == "events": return timestamp_filter.apply_events( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_contained": return timestamp_filter.filter_traces_contained( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting( log, dt1, dt2, parameters=get_properties(log)) else: warnings.warn('mode provided: ' + mode + ' is not recognized; original log returned!') return log else: from pm4py.algo.filtering.log.timestamp import timestamp_filter if mode == "events": return timestamp_filter.apply_events( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_contained": return timestamp_filter.filter_traces_contained( log, dt1, dt2, parameters=get_properties(log)) elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting( log, dt1, dt2, parameters=get_properties(log)) else: warnings.warn('mode provided: ' + mode + ' is not recognized; original log returned!') return log
def apply(dataframe, filter, parameters=None): """ Apply a timestamp filter to the dataframe Parameters ------------ dataframe Dataframe where the filter should be applied filter Filter (two timestamps separated by @@@) parameters Parameters of the algorithm """ if parameters is None: parameters = {} dt1 = str(datetime.utcfromtimestamp(int(filter[1][1].split("@@@")[0]))) dt2 = str(datetime.utcfromtimestamp(int(filter[1][1].split("@@@")[1]))) new_parameters = copy(parameters) new_parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] = filter[1][0] return timestamp_filter.apply_events(dataframe, dt1, dt2, parameters=new_parameters)
def filter_time_range(log, dt1, dt2, mode="events"): """ Filter a log on a time interval Parameters ---------------- log Log object dt1 Left extreme of the interval dt2 Right extreme of the interval mode Modality of filtering (events, traces_contained, traces_intersecting) events: any event that fits the time frame is retained traces_contained: any trace completely contained in the timeframe is retained traces_intersecting: any trace intersecting with the time-frame is retained. Returns ---------------- filtered_log Filtered log """ if check_is_dataframe(log): from pm4py.algo.filtering.pandas.timestamp import timestamp_filter if mode == "events": return timestamp_filter.apply_events(log, dt1, dt2) elif mode == "traces_contained": return timestamp_filter.filter_traces_contained(log, dt1, dt2) elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting(log, dt1, dt2) else: warnings.warn('mode provided: ' + mode + ' is not recognized; original log returned!') return log else: from pm4py.algo.filtering.log.timestamp import timestamp_filter if mode == "events": return timestamp_filter.apply_events(log, dt1, dt2) elif mode == "traces_contained": return timestamp_filter.filter_traces_contained(log, dt1, dt2) elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting(log, dt1, dt2) else: warnings.warn('mode provided: ' + mode + ' is not recognized; original log returned!') return log
def test_filtering_timeframe(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv") df = csv_import_adapter.import_dataframe_from_path(input_log, sep=',') df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") del df1 del df2 del df3
def test_filtering_timeframe(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv") df = pd.read_csv(input_log) df = dataframe_utils.convert_timestamp_columns_in_df(df) df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df3 = timestamp_filter.filter_traces_contained(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") del df1 del df2 del df3
def filter_timestamp(log, dt1, dt2, how="events"): """ Filter a log_skeleton on a time interval Parameters ---------------- log Log object dt1 Left extreme of the interval dt2 Right extreme of the interval how Modality of filtering (events, traces_contained, traces_intersecting) Returns ---------------- filtered_log Filtered log_skeleton """ if check_is_dataframe(log): from pm4py.algo.filtering.pandas.timestamp import timestamp_filter if how == "events": return timestamp_filter.apply_events(log, dt1, dt2) elif how == "traces_contained": return timestamp_filter.filter_traces_contained(log, dt1, dt2) elif how == "traces_intersecting": return timestamp_filter.filter_traces_intersecting(log, dt1, dt2) else: from pm4py.algo.filtering.log.timestamp import timestamp_filter if how == "events": return timestamp_filter.apply_events(log, dt1, dt2) elif how == "traces_contained": return timestamp_filter.filter_traces_contained(log, dt1, dt2) elif how == "traces_intersecting": return timestamp_filter.filter_traces_intersecting(log, dt1, dt2)
def apply(dataframe, filter, parameters=None): """ Apply a timestamp filter to the dataframe Parameters ------------ dataframe Dataframe where the filter should be applied filter Filter (two timestamps separated by @@@) parameters Parameters of the algorithm """ if parameters is None: parameters = {} dt1 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[0]))) dt2 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[1]))) return timestamp_filter.apply_events(dataframe, dt1, dt2, parameters=parameters)