示例#1
0
 def test_test_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("test")
     self.assertTrue(df.id.is_unique)
     self.assertFalse(df.id.hasnans)
     self.assertEqual(df[["date", "store_nbr", "item_nbr"]].drop_duplicates().shape[0], df.shape[0])
     self.assertFalse(df.date.hasnans)
     self.assertFalse(df.store_nbr.hasnans)
     self.assertFalse(df.item_nbr.hasnans)
示例#2
0
def main(sample=False):
    filename = "data/cache/master_data.hdf"
    assert os.path.exists(os.path.split(filename)[0])

    df_master = FactoryLoader().load("master", sample=sample)
    df_master.to_hdf(filename, "table")

    df = get_data_cube_from_df(df=df_master)
    with h5py.File(filename, "a") as h:
        h.create_dataset("data_cube", data=df)
示例#3
0
if __name__ == "__main__":
    config = get_custom_project_config()
    alias = config["alias"]
    random_seed = config["random_seed"]
    sample = config["sample"]
    cuda = config["cuda"]
    batch_size = config["batch_size"]
    forecast_horizon = config["forecast_horizon"]
    learning_rate = config["learning_rate"]
    log_config(config)
    wandb.config.update(config)

    # Load data dependent on time
    logger.info("Generating time-dependent dataset...")
    df_master = FactoryLoader().load("master", sample=sample)
    logger.info(
        f"Time dataset generated successfully! Shape: {df_master.shape}")
    logger.info("Converting time-dependent dataset to data cube...")
    df_master = get_records_cube_from_df(df=df_master)
    cat_cardinalities_time = {
        col: len(np.unique(df_master[col]))
        for col in df_master.dtype.names if col in categorical_feats
    }
    logger.info(f"Data cube successfully generated! Shape: {df_master.shape}")

    # Load static data
    logger.info("Generating static dataset...")
    df_master_static = FactoryLoader().load("master_timeless", sample=sample)
    df_master_static = df_master_static.to_records()
    cat_cardinalities_timeless = {
示例#4
0
 def test_prototype_name(self):
     incorrect_ref = "__prototype"
     self.assertRaises(ValueError, lambda: FactoryLoader().load(incorrect_ref))
示例#5
0
 def test_incorrect_name(self):
     incorrect_ref = "holidays"
     self.assertRaises(ValueError, lambda: FactoryLoader().load(incorrect_ref))
示例#6
0
 def test_transactions_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("transactions")
     self.assertEqual(df[["date", "store_nbr"]].drop_duplicates().shape[0], df.shape[0])
     self.assertFalse(df.date.hasnans)
     self.assertFalse(df.store_nbr.hasnans)
示例#7
0
 def test_stores_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("stores")
     self.assertTrue(df.store_nbr.is_unique)
     self.assertFalse(df.store_nbr.hasnans)
示例#8
0
 def test_oil_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("oil")
     self.assertTrue(df.date.is_unique)
     self.assertFalse(df.date.hasnans)
示例#9
0
 def test_items_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("items")
     self.assertEqual((4100, 3), df.shape)
示例#10
0
 def test_holidays_events_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("holidays_events")
     self.assertTrue(df.date.is_unique)
     self.assertFalse(df.date.hasnans)
示例#11
0
 def test_transactions_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("transactions")
     self.assertEqual((83488, 3), df.shape)
示例#12
0
 def test_train_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("train")
     self.assertEqual((125497040, 6), df.shape)
示例#13
0
 def test_test_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("test")
     self.assertEqual((3370464, 5), df.shape)
示例#14
0
 def test_stores_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("stores")
     self.assertEqual((54, 5), df.shape)
示例#15
0
 def test_oil_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("oil")
     self.assertEqual((1175, 2), df.shape)
示例#16
0
 def test_holidays_events_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("holidays_events")
     self.assertEqual((312, 6), df.shape)