def engineer_features(input_dir, output_dir): """Engineer features""" X_df, y_df = api.load_data(input_dir=input_dir) result = api.engineer_features() pipeline, encoder = result.pipeline, result.encoder X_ft = pipeline.transform(X_df) y_ft = encoder.transform(y_df) save_features(X_ft, output_dir) save_targets(y_ft, output_dir)
def main(input_dir, output_dir): """Engineer features""" import ballet.util.log ballet.util.log.enable(logger=logger, level='INFO', echo=False) ballet.util.log.enable(logger=ballet.util.log.logger, level='INFO', echo=False) X_df, y_df = load_data(input_dir=input_dir) out = build() mapper_X = out.mapper_X encoder_y = out.encoder_y X_ft = mapper_X.transform(X_df) y_ft = encoder_y.transform(y_df) save_features(X_ft, output_dir) save_targets(y_ft, output_dir)
def engineer_features(input_dir, output_dir, train_dir): """Engineer features""" # load pipeline trained on development set if train_dir is not None: X_df_tr, y_df_tr = api.load_data(input_dir=train_dir) result = api.engineer_features(X_df_tr, y_df_tr) else: result = api.engineer_features() pipeline, encoder = result.pipeline, result.encoder # load input data X_df, y_df = api.load_data(input_dir=input_dir) # transform entities and targets X_ft = pipeline.transform(X_df) y_ft = encoder.transform(y_df) # save to output dir save_features(X_ft, output_dir) save_targets(y_ft, output_dir)