Exemplo n.º 1
0
Arquivo: cv.py Projeto: zle1992/CIKM
def load_data():
    print('load data')
    data = read_cut_es()  #cut word
    print(data)
    data = data_2id(data,['q1_es_cut','q2_es_cut'])  # 2id
    print(data)
    data = add_hum_feats(data,config.train_feats) #生成特征并加入
    data = add_hum_feats(data, config.train_feats)  # 生成特征并加入

    x_train, y_train = get_X_Y_from_df(data, config.data_augment)
    print(len(x_train[2]))
    
    return x_train, y_train
Exemplo n.º 2
0
def load_data():
    path = config.origin_csv
    print('load data')
    data = read_cut(path)  # cut word
    data = data_2id(data)  # 2id
    data = add_hum_feats(data, config.train_featdires)  # 生成特征并加入
    return data
Exemplo n.º 3
0
def load_data():

    print('load data')
    data = read_cut_test()  #cut word
    data = data_2id(data, ['q1_es_cut', 'q2_es_cut'])  # 2id

    data = add_hum_feats(data, config.test_feats)  #生成特征并加入

    return data
Exemplo n.º 4
0
def load_data():
    path = config.origin_csv
    print('load data')
    data = read_cut(path)  # cut word
    data = data_2id(data)  # 2id
    data = add_hum_feats(data, config.train_feats)  # 生成特征并加入

    x_train, y_train = get_X_Y_from_df(data, config.data_augment)
    print(len(x_train[2]))
    
    return x_train, y_train
Exemplo n.º 5
0
def load_data():
    print('load data')
    data = read_cut_es()  #cut word
    print(data)
    data = data_2id(data, ['q1_es_cut', 'q2_es_cut'])  # 2id
    print(data)
    data = add_hum_feats(data, config.train_feats)  #生成特征并加入
    train, dev = train_test(data)
    x_train, y_train = get_X_Y_from_df(train, config.data_augment)
    print(x_train)
    x_dev, y_dev = get_X_Y_from_df(dev, False)
    print('train shape', x_train[0].shape)
    print('dev shape', x_dev[0].shape)
    return x_train, y_train, x_dev, y_dev
Exemplo n.º 6
0
def main(model_path):
    out_path = 'submit/{0}.txt'.format(model_path.split('/')[-1])
    print('load data')
    data = read_cut_test()  #cut word
    data = data_2id(data, ['q1_es_cut', 'q2_es_cut'])  # 2id

    data = add_hum_feats(data, config.test_feats)  #生成特征并加入
    X, _ = get_X_Y_from_df(data, False)
    if config.feats == []:
        X = X[:2]
    print('load model and predict')
    model = load_model(model_path, custom_objects={"softmax": softmax})
    test_pred = model.predict(X, batch_size=config.batch_size)
    print(test_pred)
    data['label'] = test_pred[:, 1]
    data['label'].to_csv(
        out_path,
        index=False,
        header=None,
    )
Exemplo n.º 7
0
def load_data(in_path):
    print('load data')
    data = cut_word(in_path)
    data = data_2id(data)  # 2id
    data = add_hum_feats(data, config.test_featdires)  #生成特征并加入
    return data