def cli(recommendations, set_size, journey_id): model_path, _, test_path = postgres.get_best_model_paths() df_test = s3.read_parquet(test_path) model = s3.read_pickle(model_path) if not journey_id: click.echo("No Journey id specified. Try for example {}".format( df_test.sample(5).index.tolist())) return elif journey_id not in df_test.index.tolist(): click.echo("Journey ID {} not found. Try for example {}".format( journey_id, df_test.sample(5).index.tolist())) return observation = df_test.loc[journey_id, :].copy() observation = observation.drop(["ttj_sub_12", "ttj"]) interv_cols = [col for col in observation.index if "i_" in col[:2]] dem_cols = [col for col in observation.index if "d_" in col[:2]] click.echo("Journey {} --- Demographics".format(journey_id)) click.echo("---------------") output = observation[dem_cols] click.echo(output[output == 1]) # NOTE: Un-normalize age here. # Get maximum age from journey data instead click.echo("Age: {}".format(round(output["d_age"] * 78))) click.echo("---------------") click.echo("Use Model: {}".format(model.__class__.__name__)) click.echo("---------------") observation[interv_cols] = 0 base_probability = model.predict_proba(observation.to_numpy().reshape( 1, -1)) click.echo("Base employment probability {:.4f}".format( base_probability[0][1])) click.echo("---------------") click.echo("Intervention Recommendations".format(journey_id)) click.echo("---------------") df_recs = get_top_recommendations(model, observation, set_size=set_size, n=recommendations) click.echo(df_recs)
def run(self): params = yaml.load(open("./conf/base/parameters.yml"), Loader=yaml.FullLoader)["evaluation_params"] model = s3.read_pickle(self.input()[0].path) model_id, test_path, train_path = get_model_info_by_path( self.input()[0].path) df_train = s3.read_parquet(train_path) df_test = s3.read_parquet(test_path) rec_error = get_aggregate_recommendation_error( df_train, df_test, model, params["set_size"], params["num_recs"], params["percent_sample"], ) write_recommendation_eval(get_db_engine(), rec_error, model_id, params) self.task_complete = True
def run(self): df_test = s3.read_parquet(self.input()[0][1].path) y_test = df_test.loc[:, "ttj_sub_12"] X_test = df_test.drop(["ttj", "ttj_sub_12"], axis="columns") lg = s3.read_pickle(self.input()[1].path) metrics = evaluate(lg, X_test, y_test) model_info_to_db( engine=get_db_engine(), model=lg, metrics=metrics, features=X_test.columns.tolist(), date=self.date, model_path=self.input()[1].path, train_data_path=self.input()[0][2].path, test_data_path=self.input()[0][3].path, ) # NOTE: Set task as completed manually. Use the build-in # luigi.contrib.postgres.CopyToTable Task would the right. self.task_complete = True