for category, category_id in sorted(category_to_id.items()):
    features_chi2 = chi2(features, labels == category_id)
    indices = np.argsort(features_chi2[0])
    feature_names = np.array(tfidf.get_feature_names())[indices]
    unigrams = [v for v in feature_names if len(v.split(' ')) == 1]
    bigrams = [v for v in feature_names if len(v.split(' ')) == 2]

rfmodel = RandomForestClassifier(n_estimators=600,
                                 max_features='sqrt',
                                 max_depth=60,
                                 random_state=42)
rfmodel._name = 'RandomForest'
rfmodel.fit(features_train, labels_train)

nbmodel = MultinomialNB()
nbmodel._name = 'MultinomialNB'
nbmodel.fit(features_train, labels_train)

svModel = LinearSVC()
svModel._name = 'SVC'
svModel.fit(features_train, labels_train)

print("Testing results are as follows- ")
rfPred = rfmodel.predict(features_test)
print("Random forest classifier accuracy is ")
print(accuracy_score(labels_test, rfPred))
conf_mat_rf = confusion_matrix(labels_test, rfPred)
ax = sns.heatmap(conf_mat_rf,
                 annot=True,
                 fmt='d',
                 xticklabels=category_id_df.category.values,