def drop_multiple(feature_names, month): col_names = np.genfromtxt(paths.my_path + 'col_names{0}.csv'.format(month), delimiter=',', dtype=str) col_number = col_names.shape[1] X = Matrix( np.genfromtxt(paths.my_path + 'X{0}.csv'.format(month), delimiter=',', dtype=str), list(col_names[0, :]), ["%s" for i in xrange(col_names.shape[1])]) for name in feature_names: X.drop(name) X.check_point(month)
def drop(feature_name, month, checkpoint=True): col_names = np.genfromtxt(paths.my_path + 'col_names{0}.csv'.format(month), delimiter=',', dtype=str) col_number = col_names.shape[1] X = Matrix( np.genfromtxt(paths.my_path + 'X{0}.csv'.format(month), delimiter=',', dtype=str), list(col_names[0, :]), ["%s" for i in xrange(col_names.shape[1])]) X.drop(feature_name) if checkpoint: X.check_point(month)
def append_feature_by_month(func, month, since, to): col_names = np.genfromtxt( paths.my_path + 'col_names_{0}_{1}{2}.csv'.format(month, since, to), delimiter=',', dtype=str) col_number = col_names.shape[1] X = Matrix( np.genfromtxt(paths.my_path + 'X_{0}_{1}{2}.csv'.format(month, since, to), delimiter=',', dtype=str), list(col_names[0, :]), ["%s" for i in xrange(col_names.shape[1])]) func(X, month) X.check_point(month, since, to)
def gen_by_month(offline_source, online_source, target_file, month, since=None): offline = Matrix( np.genfromtxt(paths.ccf_path + offline_source, delimiter=',', dtype=str), ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"], ["%s" for i in xrange(7)]) online = Matrix( np.genfromtxt(paths.ccf_path + online_source, delimiter=',', dtype=str), ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"], ["%s" for i in xrange(7)]) if month < 7: X = Matrix( np.genfromtxt(paths.ccf_path + 'offline_train_test_' + str(month) + '.csv', delimiter=',', dtype=str), ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"], ["%s" for i in xrange(7)]) else: X = Matrix( np.genfromtxt(paths.ccf_path + 'offline_train_test_' + str(month) + '.csv', delimiter=',', dtype=str), ["uid", "mid", "cid", "dis_rate", "dist", "date_rec"], ["%s" for i in xrange(6)]) print "generating month {0}".format(month) print "gen_basic_features" #11 gen_basic_features(X) print "gen_user_buy_with_coupon" gen_user_buy_with_coupon(offline, online, X) #14 print "gen_user_shop_features" #X.join("uid", feature_names, features, formats, dft=0.0) gen_user_shop_features( paths.my_path + "u_features_10_{0}.csv".format(month), paths.my_path + "i_features_10_{0}.csv".format(month), X) #35 print "gen_user_buy_in_shop" gen_user_buy_in_shop(offline, online, X) #37 print "gen_user_buy_coupon_in_shop" gen_user_buy_coupon_in_shop(offline, online, X) if month < 7: print "gen_label" gen_label(X) print "gen_user_get_shop_coupon" gen_user_get_shop_coupon(offline_source, online_source, X) print "gen_unused_shop_coupon" gen_unused_shop_coupon(X) print "gen_no_penalty_user_shop_features" gen_no_penalty_user_shop_features(X, month) X.check_point(target_file)