def test_transform_dow_fmt_list(self): df = pd.DataFrame( {"date_column": ["2016-05-28", "2016-06-28", "2016-07-28"]}) trainable = Map(columns=[day_of_week(it.date_column, "%Y-%m-%d")]) trained = trainable.fit(df) transformed_df = trained.transform(df) self.assertEqual(transformed_df["date_column"][0], 5) self.assertEqual(transformed_df["date_column"][1], 1) self.assertEqual(transformed_df["date_column"][2], 3)
def test_transform_dow_fmt_list(self): df = pd.DataFrame( {"date_column": ["2016-05-28", "2016-06-28", "2016-07-28"]}) sdf = self.sqlCtx.createDataFrame(df) trainable = Map(columns=[day_of_week(it.date_column, "y-M-d")]) trained = trainable.fit(sdf) transformed_df = trained.transform(sdf) self.assertEqual(transformed_df.collect()[0]["date_column"], 7) self.assertEqual(transformed_df.collect()[1]["date_column"], 3) self.assertEqual(transformed_df.collect()[2]["date_column"], 5)
def test_transform_dow_list(self): df = pd.DataFrame( {"date_column": ["2016-05-28", "2016-06-28", "2016-07-28"]}) sdf = self.sqlCtx.createDataFrame(df) trainable = Map(columns=[day_of_week(it.date_column)]) trained = trainable.fit(sdf) transformed_df = trained.transform(sdf) # Note that spark dayofweek outputs are different from pandas self.assertEqual(transformed_df.collect()[0]["date_column"], 7) self.assertEqual(transformed_df.collect()[1]["date_column"], 3) self.assertEqual(transformed_df.collect()[2]["date_column"], 5)