def gradient(window_size, states_path, states_list):
    ctx = click.get_current_context()
    properties = ctx.meta['properties']

    if states_path:
        states = ctx.meta['files_manager'].read_csv(states_path)
    else:
        bins = [-inf] + [float(x) for x in states_list.split(' ')] + [inf]
        bins_lows_per_property = bins[:-1]
        bins_highs_per_property = bins[1:]

        bins_lows = np.tile(bins_lows_per_property, len(properties))
        bins_highs = np.tile(bins_highs_per_property, len(properties))

        states = DataframesGenerator.generate_states(
            np.arange(1,
                      len(bins_lows) + 1),
            np.repeat(properties, len(bins_lows_per_property)),
            np.tile(np.arange(len(bins_lows_per_property)), len(properties)),
            bins_lows, bins_highs, np.full(len(bins_lows), np.nan))

    params_df = DataframesGenerator.generate_temporal_abstraction_params(
        properties, ['gradient'] * len(properties),
        np.full(len(properties), nan), np.full(len(properties), window_size))

    system_flow(ctx.meta['files_manager'],
                ctx.meta['preprocessing_params_df'],
                params_df,
                states,
                kfold=ctx.meta['kfold'])
예제 #2
0
 def discretize_property(self, prop_df):
     if prop_df.empty:
         return DataframesGenerator.generate_empty_states(
         ), DataframesGenerator.generate_empty_symbolic_time_series()
     prop_id = prop_df[DatasetColumns.TemporalPropertyID].values[0]
     return self.__states[self.__states[StatesColumns.TemporalPropertyID] == prop_id], \
            KnowledgeBased.knowledge_based(self.__states, prop_df)
    def create_symbolic_time_series(states, time_point_series):
        """
        Creates a symbolic time series from a given states dataframe and a time point series
        :param states: Dataframe, a states dataframe
        :param time_point_series: Dataframe, a time point series
        :return:
            Dataframe, a symbolic time series so that each TemporalPropertyValue was replaced with
            its corresponding StateID
        """
        if states.empty or time_point_series.empty:
            return DataframesGenerator.generate_empty_symbolic_time_series()

        dataset_df = time_point_series.copy()

        cutpoints = TemporalAbstraction.extract_cutpoints_from_states(states)

        for prop_id, cutoffs in cutpoints.items():
            df_prop_idx = (dataset_df.TemporalPropertyID == prop_id)
            dataset_df.loc[df_prop_idx, StatesColumns.BinID] = \
                TemporalAbstraction.discretize_to_bins(cutpoints[prop_id],
                                                       dataset_df[df_prop_idx].TemporalPropertyValue)

        symbolic_time_series = dataset_df.drop(DatasetColumns.TemporalPropertyValue, axis='columns')

        symbolic_time_series = symbolic_time_series.join(
            states[[StatesColumns.StateID,
                    StatesColumns.TemporalPropertyID,
                    StatesColumns.BinID]].set_index([StatesColumns.TemporalPropertyID, StatesColumns.BinID]),
            how='left',
            on=[StatesColumns.TemporalPropertyID, StatesColumns.BinID]
        )

        return symbolic_time_series.drop(StatesColumns.BinID, axis='columns')
예제 #4
0
    def create_time_intervals(self, symbolic_time_series):
        """
        convert the discretized dataset to time-intervals sorted by start-time, end-time, state-id
        :param symbolic_time_series: Dataframe
        :return: Dataframe, symbolic time intervals
        """

        if symbolic_time_series.empty:
            return DataframesGenerator.generate_empty_symbolic_time_intervals()

        time_intervals_df = symbolic_time_series \
            .groupby(by=[TimeIntervalsColumns.TemporalPropertyID, TimeIntervalsColumns.EntityID]) \
            .apply(lambda df: self.__connect_timestamps(df))

        time_intervals_df.sort_values(
            by=[TimeIntervalsColumns.Start, TimeIntervalsColumns.End, TimeIntervalsColumns.StateID],
            inplace=True)

        time_intervals_df.reset_index(inplace=True)
        time_intervals_df.drop('level_2', axis=1, inplace=True)

        time_intervals_df = time_intervals_df[[
            TimeIntervalsColumns.EntityID,
            TimeIntervalsColumns.TemporalPropertyID,
            TimeIntervalsColumns.StateID,
            TimeIntervalsColumns.Start,
            TimeIntervalsColumns.End
        ]]

        return time_intervals_df
예제 #5
0
    def discretize_property(self, prop_df):
        if prop_df.empty:
            return self.__states,\
                   DataframesGenerator.generate_empty_symbolic_time_series()

        prop_id = prop_df[DatasetColumns.TemporalPropertyID].values[0]
        return self.__states[self.__states[StatesColumns.TemporalPropertyID] == prop_id], \
               Gradient.gradient(prop_df,
                                 self.__window_size,
                                 self.__states)
    def temporal_abstraction_per_property(self,
                                          dataset,
                                          entity_class_relations=None,
                                          states=None):
        if dataset.empty:
            return DataframesGenerator.generate_empty_states(
            ), DataframesGenerator.generate_empty_symbolic_time_series()

        symbolic_time_series = [
            AbstractionPerProperty.__generate_abstraction_by_name(
                row[1], entity_class_relations,
                states).discretize_property(dataset[dataset[
                    DatasetColumns.TemporalPropertyID] == row[1][
                        TemporalAbstractionParamsColumns.TemporalPropertyID]])
            for row in self.__temporal_abstraction_params_df.iterrows()
        ]
        """ It counts the number of states and accumulates them to re-index the StateID column """
        state_ids_ctr = reduce(
            lambda acc, new: acc + [acc[-1] + new[0].shape[0]],
            symbolic_time_series, [0])
        """ For each given states df, it re-indexes the StateID to start from the last max StateID + 1
            to avoid collisions between states ids """
        def reindex_states(tmp_states, states_ctr):
            return {
                StatesColumns.StateID:
                tmp_states[StatesColumns.StateID] + states_ctr -
                (tmp_states[StatesColumns.StateID].min() - 1)
            }

        """ Just a map to update and re-assign the StateID column """
        symbolic_time_series = [
            (tmp_states.assign(**reindex_states(tmp_states, states_ctr)),
             sts.assign(**reindex_states(sts, states_ctr)))
            for ((tmp_states, sts),
                 states_ctr) in zip(symbolic_time_series, state_ids_ctr)
        ]
        """ Concatenating the data-frames one after the other """
        combined_symbolic_time_series = pd.concat(
            [p[1] for p in symbolic_time_series], ignore_index=True)
        combined_states = pd.concat([p[0] for p in symbolic_time_series],
                                    ignore_index=True)
        return combined_states, combined_symbolic_time_series
def per_dataset(paa_window_size, std_coef, max_gap):
    ctx = click.get_current_context()
    properties = np.sort(
        ctx.meta['dataset'][DatasetColumns.TemporalPropertyID].unique())
    ctx.meta['properties'] = properties

    preprocessing_params_df = DataframesGenerator.generate_preprocessing_params(
        properties, np.full(len(properties), paa_window_size),
        np.full(len(properties), std_coef), np.full(len(properties), max_gap))

    ctx.meta['preprocessing_params_df'] = preprocessing_params_df
def discretization(method, nb_bins):
    ctx = click.get_current_context()
    properties = ctx.meta['properties']

    params_df = DataframesGenerator.generate_temporal_abstraction_params(
        properties, [method] * len(properties),
        np.full(len(properties), nb_bins), np.full(len(properties), nan))

    system_flow(ctx.meta['files_manager'],
                ctx.meta['preprocessing_params_df'],
                params_df,
                kfold=ctx.meta['kfold'])
    def intervalization_per_property(self, symbolic_time_series):
        if symbolic_time_series.empty:
            return DataframesGenerator.generate_empty_symbolic_time_intervals()
        intervalized_props = [
            TimeIntervalsAdapter(row[1][PreprocessingParamsColumns.MaxGap]).
            create_time_intervals(symbolic_time_series[symbolic_time_series[
                SymbolicTimeSeriesColumns.TemporalPropertyID] == row[1][
                    PreprocessingParamsColumns.TemporalPropertyID]])
            for row in self.__preprocessing_params_df.iterrows()
        ]

        return pd.concat(intervalized_props, ignore_index=True)
    def discretize_property(self, prop_df):
        """
        A template method in which the only change between algorithms is the _generate_cutpoints method
        which is common to all child classes.
        Each discretization method creates cutpoints in a different way
        :param prop_df: Dataframe, A time-point series of a single property
        :return: Tuple, (Dataframe , Dataframe) - states, , and the symbolic-point-series created from prop_df
        """
        if prop_df.empty:
            return DataframesGenerator.generate_empty_states(), DataframesGenerator.generate_empty_symbolic_time_series()

        cutpoints = self._generate_cutpoints(prop_df)

        # in case it is td4c or persist, they also return a list of bins scores
        if len(cutpoints) == 2 and type(cutpoints[0]) is list:
            cutpoints, scores = cutpoints
        else:
            scores = np.full(len(cutpoints), nan)

        states = Discretization.create_prop_states(prop_df[DatasetColumns.TemporalPropertyID].values[0],
                                                   cutpoints, scores)
        symbolic_point_series = TemporalAbstraction.create_symbolic_time_series(states, prop_df)

        return states, symbolic_point_series
def knowledge_based(states_path):
    ctx = click.get_current_context()
    properties = ctx.meta['properties']
    states = ctx.meta['files_manager'].read_csv(states_path)

    params_df = DataframesGenerator.generate_temporal_abstraction_params(
        properties,
        ['knowledge-based'] * len(properties),
        np.full(len(properties), nan),
        np.full(len(properties), nan),
    )

    system_flow(ctx.meta['files_manager'],
                ctx.meta['preprocessing_params_df'],
                params_df,
                states,
                kfold=ctx.meta['kfold'])
    def create_prop_states(prop_id, cutpoints, scores=None):
        """
        Creates a states dataframe for a single property, containing the cutpoints and scores given as parameter
        :param prop_id: int, the property id
        :param cutpoints: List, a list of cutpoints
        :param scores: List, a list of scores, one for each cut-point
        :return: Dataframe, a states dataframe
        """
        nb_states = len(cutpoints) + 1
        bin_low = [-inf] + cutpoints
        bin_high = cutpoints + [inf]

        if scores is None:
            scores = np.full(nb_states - 1, np.nan)

        states = DataframesGenerator.generate_states(
            np.arange(1, nb_states + 1),
            np.full(nb_states, prop_id),
            np.arange(nb_states),
            bin_low,
            bin_high,
            np.append([nan], scores)
        )
        return states