示例#1
0
def gen_analyzed_data():
    """
    Generate the data to be analyzed from the original pred data
    """
    # schemas
    SCHEMA = Config().read_schema(
    )  # dict id -> col_name, e.g. SCHEMA[1]='clk'
    del SCHEMA[1]
    header_str = [v for k, v in SCHEMA.iteritems()]
    header_int = [k for k, v in SCHEMA.iteritems()]
    col2id = {v: k for k, v in SCHEMA.iteritems()}
    feature_conf_dic = CONF.read_feature_conf()
    cross_feature_list = CONF.read_cross_feature_conf()

    # load data
    df = pd.read_table(FLAGS.pred_data + "/pred1", header=header_int)

    # reformat the table, only analyzed columns are left
    keep_columns_str = get_analyzed_columns(feature_conf_dic)
    keep_columns_int = [col2id[v] for v in keep_columns_str]
    keep_columns_int.sort()
    df_keep_columns_int = [
        col - 2 for col in keep_columns_int
    ]  # dataframe starts from column 0; while our map start from 2
    analyzed_table = df.iloc[:, df_keep_columns_int]

    # save to csv
    analyzed_table.to_csv(FLAGS.analyzed_data,
                          header=[SCHEMA[k] for k in keep_columns_int],
                          index=False)
    print("Analyzed data generation finished.")
示例#2
0
def gen_pred_csv():
    """
    Save the pred data as csv
    """
    # schemas
    SCHEMA = Config().read_schema(
    )  # dict id -> col_name, e.g. SCHEMA[1]='clk'
    del SCHEMA[1]

    # load data
    df = pd.read_table(FLAGS.pred_data + "/pred1")

    # save to csv
    df.to_csv("../data/pred/pred1.csv",
              header=[v for k, v in SCHEMA.iteritems()],
              index=False)
    print("Csv generation finished.")