index=None,
                encoding='utf8')
print('sgd特征已保存\n')

########################### pac(PassiveAggressiveClassifier) ################################
print('PAC stacking')
stack_train = np.zeros((len(train), number))
stack_test = np.zeros((len(test), number))
score_va = 0

for i, (tr, va) in enumerate(
        StratifiedKFold(score, n_folds=n_folds, random_state=1017)):
    print('stack:%d/%d' % ((i + 1), n_folds))
    pac = PassiveAggressiveClassifier(random_state=1017)
    pac.fit(train_feature[tr], score[tr])
    score_va = pac._predict_proba_lr(train_feature[va])
    score_te = pac._predict_proba_lr(test_feature)
    print(score_va)
    print('得分' +
          str(mean_squared_error(score[va], pac.predict(train_feature[va]))))
    stack_train[va] += score_va
    stack_test += score_te
stack_test /= n_folds
stack = np.vstack([stack_train, stack_test])
df_stack = pd.DataFrame()
for i in range(stack.shape[1]):
    df_stack['tfidf_pac_classfiy_{}'.format(i)] = np.around(stack[:, i], 6)
df_stack.to_csv('feature/tfidf_pac_2_error_single_classfiy.csv',
                index=None,
                encoding='utf8')
print('pac特征已保存\n')
# In[100]:

tf = TfidfVectorizer(stop_words='english')
tf_train = tf.fit_transform(X_train)
tf_test = tf.transform(X_test)
#for keys,value in tf.vocabulary_.items():
#print(keys,value)
#if keys=='jessica':
#tat=value

# In[101]:

linear_clf = PassiveAggressiveClassifier(max_iter=50)
linear_clf.fit(tf_train, y_train)
test_proba = linear_clf._predict_proba_lr(tf_test)[:, 1] * 100

# In[102]:

res = pd.DataFrame(X_test)
k = pd.DataFrame(df_test['title'])
res['confidence'] = test_proba
res['class'] = linear_clf.predict(tf_test)
r = df_test['title']
res['title'] = k

# In[103]:

nres = res.to_numpy()
for x in nres:
    if x[3] == title: