Пример #1
0
 def normalize_col(
     df: pd.DataFrame,
     timestamp_format: Optional[str],
     offset: int,
     time_shift: Optional[timedelta],
 ) -> pd.DataFrame:
     df = df.copy()
     normalize_dttm_col(df, timestamp_format, offset, time_shift)
     return df
Пример #2
0
    def test_normalize_dttm_col(self):
        ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0)
        df = pd.DataFrame([{"__timestamp": ts, "a": 1}])

        # test regular (non-numeric) format
        assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
        assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
        assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts

        # test offset
        assert normalize_dttm_col(df, None, 1,
                                  None)[DTTM_ALIAS][0] == pd.Timestamp(
                                      2021, 2, 15, 20, 0, 0, 0)

        # test offset and timedelta
        assert normalize_dttm_col(
            df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][0] == pd.Timestamp(
                2021, 2, 15, 20, 30, 0, 0)

        # test numeric epoch_s format
        df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}])
        assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts

        # test numeric epoch_ms format
        df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
        assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
Пример #3
0
    def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
        """Returns a pandas dataframe based on the query object"""

        # Here, we assume that all the queries will use the same datasource, which is
        # a valid assumption for current setting. In the long term, we may
        # support multiple queries from different data sources.

        timestamp_format = None
        if self.datasource.type == "table":
            dttm_col = self.datasource.get_column(query_object.granularity)
            if dttm_col:
                timestamp_format = dttm_col.python_date_format

        # The datasource here can be different backend but the interface is common
        result = self.datasource.query(query_object.to_dict())

        df = result.df
        # Transform the timestamp we received from database to pandas supported
        # datetime format. If no python_date_format is specified, the pattern will
        # be considered as the default ISO date format
        # If the datetime format is unix, the parse will use the corresponding
        # parsing logic
        if not df.empty:
            normalize_dttm_col(
                df=df,
                timestamp_format=timestamp_format,
                offset=self.datasource.offset,
                time_shift=query_object.time_shift,
            )

            if self.enforce_numerical_metrics:
                self.df_metrics_to_num(df, query_object)

            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            df = query_object.exec_post_processing(df)

        return {
            "query": result.query,
            "status": result.status,
            "error_message": result.error_message,
            "df": df,
        }
Пример #4
0
    def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> pd.DataFrame:
        timestamp_format = None
        if self.datasource.type == "table":
            dttm_col = self.datasource.get_column(query_object.granularity)
            if dttm_col:
                timestamp_format = dttm_col.python_date_format

        normalize_dttm_col(
            df=df,
            timestamp_format=timestamp_format,
            offset=self.datasource.offset,
            time_shift=query_object.time_shift,
        )

        if self.enforce_numerical_metrics:
            self.df_metrics_to_num(df, query_object)

        df.replace([np.inf, -np.inf], np.nan, inplace=True)

        return df