def get_customer_history(flag, n=1000): in_file = getattr(FileNames, '{}_v2'.format(flag)) out_file = getattr(FileNames, '{}_customer_hist_nn_data'.format(flag)) hist_flag = flag if flag == 'val': hist_flag = 'tr' if flag == 'test': hist_flag = 'train' hist_file = getattr(FileNames, 'cust_{}_artifact1'.format(hist_flag)) df = load_pickle(in_file) hist = load_pickle(hist_file) agg = ListAggregation(date_col=FieldNames.campaign_start_date, user_col=FieldNames.customer_id, key_col=FieldNames.item_set, hist_artifact=hist) arr = agg.transform(df) padded_arr = [] for row in arr: parr = pad_sequences(row, maxlen=n, padding='pre', truncating='pre', value=0, dtype='int32') padded_arr.append(parr) padded_arr = np.concatenate(padded_arr) save_npy(out_file, padded_arr)
def load_data(flag="val"): if flag == "val": x_tr = load_pickle(FileNames.tr_features_v1) x_val = load_pickle(FileNames.val_features_v1) elif flag == "test": x_tr = load_pickle(FileNames.train_features_v1) x_val = load_pickle(FileNames.test_features_v1) return make_x_y(x_tr, x_val, flag=flag)
def map_campign_id(x_tr, x_val, flag="val"): if flag == "val": tr = load_pickle(FileNames.tr_v2) val = load_pickle(FileNames.val_v2) elif flag == "test": tr = load_pickle(FileNames.train_v2) val = load_pickle(FileNames.test_v2) x_tr["campaign_id"] = tr["campaign_id"].values x_val["campaign_id"] = val["campaign_id"].values return x_tr, x_val
def generate_features(flag): if flag == "test": tr_artifact_file = FileNames.train_artifact hist_artifact_files = [ FileNames.cust_train_artifact1, FileNames.cust_train_artifact2, FileNames.cust_train_artifact3, FileNames.cust_train_artifact4, ] tr_file = FileNames.train_v2 te_file = FileNames.test_v2 tr_save_file = FileNames.train_features_v1 te_save_file = FileNames.test_features_v1 elif flag == "val": tr_artifact_file = FileNames.tr_artifact hist_artifact_files = [ FileNames.cust_tr_artifact1, FileNames.cust_tr_artifact2, FileNames.cust_tr_artifact3, FileNames.cust_tr_artifact4, ] tr_file = FileNames.tr_v2 te_file = FileNames.val_v2 tr_save_file = FileNames.tr_features_v1 te_save_file = FileNames.val_features_v1 else: print("flag not VALD!") tr_artifact = load_pickle(tr_artifact_file) hist_artifacts = [ load_pickle(hist_file) for hist_file in hist_artifact_files ] columns = get_feature_names(3) tr_data = load_pickle(tr_file) te_data = load_pickle(te_file) all_data = pd.concat([tr_data, te_data]) pipeline = get_feature_pipeline(tr_artifact, hist_artifacts, all_data) x_tr = pipeline.fit_transform(tr_data) x_te = pipeline.transform(te_data) x_tr = pd.DataFrame(x_tr, columns=columns) x_te = pd.DataFrame(x_te, columns=columns) x_tr[FieldNames.target] = tr_data[FieldNames.target].values if flag == "val": x_te[FieldNames.target] = te_data[FieldNames.target].values save_pickle(x_tr, tr_save_file) save_pickle(x_te, te_save_file)
def save_transaction_artifact(flag): """Sace artifacts for customer transactions with different conditions.""" if flag == 'test': inp_file = FileNames.transaction_test_v1 save_file1 = FileNames.cust_train_artifact1 save_file2 = FileNames.cust_train_artifact2 save_file3 = FileNames.cust_train_artifact3 save_file4 = FileNames.cust_train_artifact4 elif flag == 'val': inp_file = FileNames.transaction_val_v1 save_file1 = FileNames.cust_tr_artifact1 save_file2 = FileNames.cust_tr_artifact2 save_file3 = FileNames.cust_tr_artifact3 save_file4 = FileNames.cust_tr_artifact4 else: print('flag not VALID!') transactions = load_pickle(inp_file) transactions_grp = group_transactions(transactions) artifact = _get_transaction_artifact(transactions_grp) save_pickle(artifact, save_file1) del artifact, transactions_grp print("Customer artifact 1 done!") transactions2 = transactions.loc[ np.abs(transactions[FieldNames.coupon_discount]) > 0] transactions_grp2 = group_transactions(transactions2) artifact = _get_transaction_artifact(transactions_grp2) save_pickle(artifact, save_file2) del transactions2, transactions_grp2, artifact print("Customer artifact 2 done!") transactions3 = transactions.loc[ (np.abs(transactions[FieldNames.coupon_discount]) > 0) & (np.abs(transactions[FieldNames.other_discount]) > 0)] transactions_grp3 = group_transactions(transactions3) artifact = _get_transaction_artifact(transactions_grp3) save_pickle(artifact, save_file3) del transactions3, transactions_grp3, artifact print("Customer artifact 3 done!") transactions4 = transactions.loc[(np.abs( transactions[FieldNames.coupon_discount]) > np.abs( transactions[FieldNames.other_discount]))] transactions_grp4 = group_transactions(transactions4) artifact = _get_transaction_artifact(transactions_grp4) save_pickle(artifact, save_file4) del transactions4, artifact print("Customer artifact 4 done!")
def save_train_artifact(flag): """Create artifact using training data.""" if flag == 'test': inp_file = FileNames.train_v2 save_file = FileNames.train_artifact elif flag == 'val': inp_file = FileNames.tr_v2 save_file = FileNames.tr_artifact tr = load_pickle(inp_file) tr_artifact = HistoricalArtifact( tr, user_field=FieldNames.customer_id, date_field=FieldNames.campaign_start_date, key_fields=[ FieldNames.campaign_id, FieldNames.coupon_id, FieldNames.target, FieldNames.item_category, ], ) save_pickle(tr_artifact, save_file)
def get_save_coupon_vecs(flag, brty_map, cat_map): in_file = getattr(FileNames, '{}_v2'.format(flag)) out_file = getattr(FileNames, '{}_coupon_nn_data'.format(flag)) df = load_pickle(in_file) coupon_vectors = prepare_coupon_data(df, brty_map, cat_map) save_npy(out_file, coupon_vectors)