Пример #1
0
 def train_models(self, kernel_type):
     if kernel_type == 'linear_svc':
         print('Training Linear SVC...')
         pipeline = get_pipeline(svm.SVC(kernel='linear', decision_function_shape='ovr', C=1, max_iter = -1, random_state = 42), 'tfidf')
         self.linear_model = get_gdsearch(pipeline, 'SVC')
         self.linear_model.fit(self.X, self.y)
         print('Best estimator params: ', self.linear_model.best_params_,'\n')
         return self.linear_model
     if kernel_type == 'rbf_svc':
         print('Training RBF SVC...')
         pipeline = get_pipeline(svm.SVC(kernel='rbf', decision_function_shape='ovr', C=1, gamma=1, max_iter = -1, random_state = 42), 'tfidf')
         self.rbf_model = get_gdsearch(pipeline, 'SVC')
         self.rbf_model.fit(self.X, self.y)
         print('Best estimator params: ', self.rbf_model.best_params_,'\n')
         return self.rbf_model
     if kernel_type == 'sigmoid_svc':
         print('Training Sigmoid SVC...')
         pipeline = get_pipeline(svm.SVC(kernel='sigmoid', decision_function_shape='ovr', C=1, gamma=1, max_iter = -1, random_state = 42), 'tfidf')
         self.sigmoid_model = get_gdsearch(pipeline, 'SVC')
         self.sigmoid_model.fit(self.X, self.y)
         print('Best estimator params: ', self.sigmoid_model.best_params_,'\n')
         return self.sigmoid_model
     if kernel_type == 'poly_svc':
         print('Training Polynomial SVC...')
         pipeline = get_pipeline(svm.SVC(kernel='poly', decision_function_shape='ovr', C=1, gamma=1, max_iter = -1, random_state = 42), 'tfidf')
         self.poly_model = get_gdsearch(pipeline, 'SVC')
         self.poly_model.fit(self.X, self.y)
         print('Best estimator params: ', self.poly_model.best_params_,'\n')
         return self.poly_model
Пример #2
0
 def train_model(self, skip_pipeline=False):
     if not (skip_pipeline):
         pipeline = get_pipeline(
             XGBClassifier(random_state=42,
                           seed=2,
                           objective='multi:softmax',
                           eval_metric='merror',
                           use_label_encoder=False,
                           learning_rate=0.1,
                           n_jobs=-1,
                           colsample_bytree=1.0), 'tfidf')
         self.model = get_gdsearch(pipeline, 'XGBoost')
         self.model = self.model.fit(self.X, self.y)
         print('Best model params:', self.model.best_params_)
         print('All model params: ', self.model.get_params(True), '\n')
     else:
         self.model = XGBClassifier(random_state=42,
                                    seed=2,
                                    objective='multi:softmax',
                                    eval_metric='merror',
                                    use_label_encoder=False,
                                    n_estimators=30,
                                    colsample_bytree=0.8,
                                    subsample=1.0)
         self.model.fit(self.X, self.y)
 def train_model(self, skip_vectorizer):
     self.get_model()
     pipeline = get_pipeline(self.model, 'count', True)
     if skip_vectorizer:
         pipeline = get_pipeline(self.model)
     self.model = get_gdsearch(pipeline, 'Logistic', skip_vectorizer).fit(self.X, self.y)
     print('Best estimator params:',self.model.best_params_)
Пример #4
0
 def train_model(self):
     pipeline = get_pipeline(
         DecisionTreeClassifier(random_state=42,
                                criterion='gini',
                                splitter='best',
                                class_weight='balanced'))
     self.model = get_gdsearch(pipeline,
                               model_type='DTree').fit(self.X, self.y)
 def train_models(self):
     pipeline = get_pipeline(
         LinearSVC(C=1,
                   multi_class='ovr',
                   class_weight='balanced',
                   max_iter=1000000,
                   dual=True,
                   tol=1e-5), 'tfidf')
     self.model = get_gdsearch(pipeline, 'LSVM').fit(self.X, self.y)
     print('Best estimator params: ', self.model.best_params_)
     print('All model params: ', self.model.get_params(True), '\n')
Пример #6
0
 def train_model(self, model_type='Gaussian'):
     if type == 'Gaussian':
         vectorizer = Vectorizer(self.X, self.y)
         train_features, test_features = vectorizer.get_vectorized_features(
             type='count')
         self.test_features = test_features
         self.model = GaussianNB().fit(train_features.toarray(), self.y)
     else:
         self.model = get_gdsearch(get_pipeline(MultinomialNB(), True),
                                   model_type).fit(self.X, self.y)
         print('Best parameters selected', self.model.best_params_, '\n')
Пример #7
0
 def train_model(self):
     pipeline = get_pipeline(
         RandomForestClassifier(random_state=42, n_jobs=-1))
     self.model = get_gdsearch(pipeline, 'RandomForest').fit(self.X, self.y)