示例#1
0
def attach_user_act(X, month):
    col_names = np.genfromtxt(
        paths.my_path +
        'user_act_counts_month_{0}{1}_col_names.csv'.format(1, month - 1),
        delimiter=',',
        dtype=str)
    full_act = Matrix(
        np.genfromtxt(paths.my_path +
                      'user_act_counts_month_{0}{1}.csv'.format(1, month - 1),
                      delimiter=',',
                      dtype=float),
        col_names[0, :],
        col_formats=["%s" for i in xrange(col_names.shape[1] - 1)])
    full_act_hash = HashSet()
    for i in xrange(full_act.ndata):
        if i % 100000 == 0:
            print i
        uid = full_act.get_cell(i, "uid")
        full_act_hash.set(uid, full_act.matrix[i, 1:])
    X_full_act = np.zeros((X.ndata, col_names.shape[1] - 1))
    for i in xrange(X.ndata):
        if i % 100000 == 0:
            print i
        uid = X.get_cell(i, "uid")
        X_full_act[i, :] = full_act_hash.get(uid,
                                             np.zeros(col_names.shape[1] - 1))
    col_names_check_point = []
    for name in col_names[0, 1:]:
        col_names_check_point.append('{0}_user_all_month'.format(name))
    X.cat_col(X_full_act, col_names_check_point,
              ["%s" for i in xrange(len(col_names_check_point))])
示例#2
0
def attach_user_act_history(X, month):
    for k in xrange(month - 1, 0, -1):
        print k
        col_names_history = np.genfromtxt(
            paths.my_path +
            'user_act_counts_month_{0}_col_names.csv'.format(k),
            delimiter=',',
            dtype=str)
        history = Matrix(
            np.genfromtxt(paths.my_path +
                          'user_act_counts_month_{0}.csv'.format(k),
                          delimiter=',',
                          dtype=float), col_names_history[0, :],
            ["%s" for i in xrange(col_names_history.shape[1])])
        history_hash = HashSet()
        for i in xrange(history.ndata):
            if i % 100000 == 0:
                print i
            uid = history.get_cell(i, "uid")
            history_hash.set(uid, history.matrix[i, 2:])
        X_history = np.zeros((X.ndata, col_names_history.shape[1] - 2))
        for i in xrange(X.ndata):
            if i % 100000 == 0:
                print i
            uid = X.get_cell(i, "uid")
            X_history[i, :] = history_hash.get(
                uid, np.zeros(col_names_history.shape[1] - 2))
        col_names = []
        for name in col_names_history[0, 2:]:
            col_names.append('{0}_user_month_{1}'.format(name, month - k))
        X.cat_col(X_history, col_names, ["%s" for i in xrange(len(col_names))])
    for k in xrange(7 - month):
        col_names = []
        for name in col_names_history[0, 2:]:
            col_names.append('{0}_user_month_{1}'.format(name, month - k))
        X_history = np.zeros((X.ndata, len(col_names)))
        X.cat_col(X_history, col_names, ["%s" for i in xrange(len(col_names))])