Пример #1
0
def export_day_statistical_sequence(lst_data):
    from src.data.generate_data import generate_day_statistical_sequence
    show_title("加工数据为 91 天的序列数据,每天为6个特征(最大值、最小值、平均值)96维数据")
    x_data = generate_day_statistical_sequence(lst_data)
    from src.base.config import x_data_file_name, base_data_type
    save_model_data(x_data, data_file_path + x_data_file_name, base_data_type)
    return x_data
Пример #2
0
def export_train_balance(x_train, y_train):
    show_title(f"对类别 :{label_name}实施平衡{config.train_data_type}")
    x_train_balance, y_train_balance = generate_balance_data(x_train, y_train)

    from src.base.config import train_balance_data_type
    from src.base.config import x_train_balance_file_name, y_train_balance_file_name
    save_model_data(x_train_balance,
                    data_file_path + x_train_balance_file_name,
                    train_balance_data_type)
    save_model_data(y_train_balance,
                    data_file_path + y_train_balance_file_name,
                    train_balance_data_type)
Пример #3
0
def export_day_list_data():
    from src.data.load_data import load_original_data
    show_title("加载原始数据")
    x_csv, y_csv = load_original_data()

    from src.data.generate_data import generate_day_list_data
    show_title("导出每个用户每天访问数据的不截断列表")
    lst_data, y_data = generate_day_list_data(x_csv, y_csv)
    save_model_data(lst_data, data_file_path + config.lst_data_file_name,
                    config.base_data_type)
    save_model_data(y_data, data_file_path + config.y_data_file_name,
                    config.base_data_type)
    return lst_data, y_data
Пример #4
0
def export_train_test_data(x_data, y_data):
    x_data = x_data[0:config.user_id_max]
    show_title("拆分训练数据集和测试数据集")
    x_train, x_test, y_train, y_test = train_test_split(
        x_data, y_data, random_state=config.seed, stratify=y_data)
    from src.base.config import train_data_type
    from src.base.config import x_train_file_name, y_train_file_name
    save_model_data(x_train, data_file_path + x_train_file_name,
                    train_data_type)
    save_model_data(y_train, data_file_path + y_train_file_name,
                    train_data_type)

    from src.base.config import test_data_type
    from src.base.config import x_test_file_name, y_test_file_name
    save_model_data(x_test, data_file_path + x_test_file_name, test_data_type)
    save_model_data(y_test, data_file_path + y_test_file_name, test_data_type)
    return x_train, y_train
Пример #5
0
def export_val_data(x_train, y_train):
    show_title("拆分训练数据集和验证数据集")
    x_train_val, x_val, y_train_val, y_val = train_test_split(
        x_train, y_train, random_state=config.seed, stratify=y_train)

    from src.base.config import train_val_data_type
    from src.base.config import x_train_val_file_name, y_train_val_file_name
    save_model_data(x_train_val, data_file_path + x_train_val_file_name,
                    train_val_data_type)
    save_model_data(y_train_val, data_file_path + y_train_val_file_name,
                    train_val_data_type)

    from src.base.config import val_data_type
    from src.base.config import x_val_file_name, y_val_file_name
    save_model_data(x_val, data_file_path + x_val_file_name, val_data_type)
    save_model_data(y_val, data_file_path + y_val_file_name, val_data_type)
    return x_train_val, y_train_val
Пример #6
0
def export_w2v_data(lst_data):
    show_title("导出用于Word2Vec训练的数据")
    from src.data.generate_data import generate_w2v_data
    x_w2v = generate_w2v_data(lst_data)
    from src.base.config import data_w2v_path, w2v_file_name, w2v_data_type
    save_model_data(x_w2v, data_w2v_path + w2v_file_name, w2v_data_type)