def drop_multiple(feature_names, month):
    col_names = np.genfromtxt(paths.my_path + 'col_names{0}.csv'.format(month),
                              delimiter=',',
                              dtype=str)
    col_number = col_names.shape[1]
    X = Matrix(
        np.genfromtxt(paths.my_path + 'X{0}.csv'.format(month),
                      delimiter=',',
                      dtype=str), list(col_names[0, :]),
        ["%s" for i in xrange(col_names.shape[1])])
    for name in feature_names:
        X.drop(name)
    X.check_point(month)
def drop(feature_name, month, checkpoint=True):
    col_names = np.genfromtxt(paths.my_path + 'col_names{0}.csv'.format(month),
                              delimiter=',',
                              dtype=str)
    col_number = col_names.shape[1]
    X = Matrix(
        np.genfromtxt(paths.my_path + 'X{0}.csv'.format(month),
                      delimiter=',',
                      dtype=str), list(col_names[0, :]),
        ["%s" for i in xrange(col_names.shape[1])])
    X.drop(feature_name)
    if checkpoint:
        X.check_point(month)
Пример #3
0
def append_feature_by_month(func, month, since, to):
    col_names = np.genfromtxt(
        paths.my_path + 'col_names_{0}_{1}{2}.csv'.format(month, since, to),
        delimiter=',',
        dtype=str)
    col_number = col_names.shape[1]
    X = Matrix(
        np.genfromtxt(paths.my_path +
                      'X_{0}_{1}{2}.csv'.format(month, since, to),
                      delimiter=',',
                      dtype=str), list(col_names[0, :]),
        ["%s" for i in xrange(col_names.shape[1])])
    func(X, month)
    X.check_point(month, since, to)
Пример #4
0
def gen_by_month(offline_source,
                 online_source,
                 target_file,
                 month,
                 since=None):

    offline = Matrix(
        np.genfromtxt(paths.ccf_path + offline_source,
                      delimiter=',',
                      dtype=str),
        ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"],
        ["%s" for i in xrange(7)])
    online = Matrix(
        np.genfromtxt(paths.ccf_path + online_source, delimiter=',',
                      dtype=str),
        ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"],
        ["%s" for i in xrange(7)])

    if month < 7:
        X = Matrix(
            np.genfromtxt(paths.ccf_path + 'offline_train_test_' + str(month) +
                          '.csv',
                          delimiter=',',
                          dtype=str),
            ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"],
            ["%s" for i in xrange(7)])
    else:
        X = Matrix(
            np.genfromtxt(paths.ccf_path + 'offline_train_test_' + str(month) +
                          '.csv',
                          delimiter=',',
                          dtype=str),
            ["uid", "mid", "cid", "dis_rate", "dist", "date_rec"],
            ["%s" for i in xrange(6)])
    print "generating month {0}".format(month)
    print "gen_basic_features"  #11
    gen_basic_features(X)
    print "gen_user_buy_with_coupon"
    gen_user_buy_with_coupon(offline, online, X)  #14
    print "gen_user_shop_features"
    #X.join("uid", feature_names, features, formats, dft=0.0)
    gen_user_shop_features(
        paths.my_path + "u_features_10_{0}.csv".format(month),
        paths.my_path + "i_features_10_{0}.csv".format(month), X)  #35
    print "gen_user_buy_in_shop"
    gen_user_buy_in_shop(offline, online, X)  #37

    print "gen_user_buy_coupon_in_shop"
    gen_user_buy_coupon_in_shop(offline, online, X)
    if month < 7:
        print "gen_label"
        gen_label(X)

    print "gen_user_get_shop_coupon"
    gen_user_get_shop_coupon(offline_source, online_source, X)

    print "gen_unused_shop_coupon"
    gen_unused_shop_coupon(X)

    print "gen_no_penalty_user_shop_features"
    gen_no_penalty_user_shop_features(X, month)

    X.check_point(target_file)