Пример #1
0
for i in xrange(testdata.shape[1]):
    c_dict_test['cD2V' + str(i)] = testdata[:, i]

out_c_pd_train = pd.DataFrame(c_dict_train)
out_c_pd_test = pd.DataFrame(c_dict_test)

# load data with pandas

train_data = '../train_after_preproc.csv'
test_data = '../test_after_preproc.csv'

train = pd.read_csv(train_data).fillna("")
test = pd.read_csv(test_data).fillna("")

cftrain = fe.count_word_feature(train)
cftest = fe.count_word_feature(test)

query_label_stat = pd.read_csv('../query_label_stat.csv')

# A+
features_train = pd.merge(cftrain, out_a_pd_train)
features_test = pd.merge(cftest, out_a_pd_test)

features_train = pd.merge(features_train, query_label_stat)
features_test = pd.merge(features_test, query_label_stat)

features_train = features_train.drop(['d2dsim'], axis=1)
features_test = features_test.drop(['d2dsim'], axis=1)

y_train = features_train.median_relevance.values
Пример #2
0
c_dict_test = {'id':idx_test}
for i in xrange(traindata.shape[1]):
	c_dict_train['cD2V'+str(i)] = traindata[:,i]

for i in xrange(testdata.shape[1]):
	c_dict_test['cD2V'+str(i)] = testdata[:,i]

out_c_pd_train = pd.DataFrame(c_dict_train)
out_c_pd_test = pd.DataFrame(c_dict_test)


#########################Ensemble A and B########################################################################################
train = pd.read_csv(trainfap).fillna("")
test = pd.read_csv(testfap).fillna("")

cftrain = fe.count_word_feature(train)
cftest = fe.count_word_feature(test)

query_label_stat = pd.read_csv('../query_label_stat.csv')

features_train = pd.merge(cftrain,out_a_pd_train)
features_test = pd.merge(cftest,out_a_pd_test)

features_train = pd.merge(cftrain,out_b_pd_train)
features_test = pd.merge(cftest,out_b_pd_test)

features_train = pd.merge(features_train,out_b_pd_train)
features_test = pd.merge(features_test,out_b_pd_train)

features_train = pd.merge(cftrain,query_label_stat)
features_test = pd.merge(cftest,query_label_stat)
Пример #3
0
    ('svd', TruncatedSVD(n_components=200, algorithm='randomized', n_iter=5, random_state=None, tol=0.0)), 
    ('scl', StandardScaler(copy=True, with_mean=True, with_std=True)), 
    ('svm', SVC(C=11, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None))])
    clf.fit(s_data, s_labels)
    t_labels = clf.predict(t_data)
    B_pd = pd.DataFrame({'id':idx,'bp':t_labels})


    # myl

    train = pd.read_csv('../train_after_preproc.csv').fillna("")
    test = pd.read_csv('../test_after_preproc.csv').fillna("")

    # cftrain_rv0 = fe.count_word_feature(train,1)
    # cftrain_rv1 = fe.count_word_feature(train,2)
    cftrain = fe.count_word_feature(train)
    cftest = fe.count_word_feature(test)

    query_label_stat = pd.read_csv('../query_label_stat.csv')

    # features_train_rv0 = pd.merge(cftrain_rv0,query_label_stat)
    # features_train_rv1 = pd.merge(cftrain_rv1,query_label_stat)
    features_train = pd.merge(cftrain,query_label_stat)
    features_test = pd.merge(cftest,query_label_stat)

    # y_rv0 = features_train_rv0.median_relevance.values
    # X_train_rv0 = features_train_rv0.drop(['query','id','median_relevance'],axis=1).values
    # y_rv1 = features_train_rv1.median_relevance.values
    # X_train_rv1 = features_train_rv1.drop(['query','id','median_relevance'],axis=1).values

    y = features_train.median_relevance.values