def train_models(self, kernel_type): if kernel_type == 'linear_svc': print('Training Linear SVC...') pipeline = get_pipeline(svm.SVC(kernel='linear', decision_function_shape='ovr', C=1, max_iter = -1, random_state = 42), 'tfidf') self.linear_model = get_gdsearch(pipeline, 'SVC') self.linear_model.fit(self.X, self.y) print('Best estimator params: ', self.linear_model.best_params_,'\n') return self.linear_model if kernel_type == 'rbf_svc': print('Training RBF SVC...') pipeline = get_pipeline(svm.SVC(kernel='rbf', decision_function_shape='ovr', C=1, gamma=1, max_iter = -1, random_state = 42), 'tfidf') self.rbf_model = get_gdsearch(pipeline, 'SVC') self.rbf_model.fit(self.X, self.y) print('Best estimator params: ', self.rbf_model.best_params_,'\n') return self.rbf_model if kernel_type == 'sigmoid_svc': print('Training Sigmoid SVC...') pipeline = get_pipeline(svm.SVC(kernel='sigmoid', decision_function_shape='ovr', C=1, gamma=1, max_iter = -1, random_state = 42), 'tfidf') self.sigmoid_model = get_gdsearch(pipeline, 'SVC') self.sigmoid_model.fit(self.X, self.y) print('Best estimator params: ', self.sigmoid_model.best_params_,'\n') return self.sigmoid_model if kernel_type == 'poly_svc': print('Training Polynomial SVC...') pipeline = get_pipeline(svm.SVC(kernel='poly', decision_function_shape='ovr', C=1, gamma=1, max_iter = -1, random_state = 42), 'tfidf') self.poly_model = get_gdsearch(pipeline, 'SVC') self.poly_model.fit(self.X, self.y) print('Best estimator params: ', self.poly_model.best_params_,'\n') return self.poly_model
def train_model(self, skip_pipeline=False): if not (skip_pipeline): pipeline = get_pipeline( XGBClassifier(random_state=42, seed=2, objective='multi:softmax', eval_metric='merror', use_label_encoder=False, learning_rate=0.1, n_jobs=-1, colsample_bytree=1.0), 'tfidf') self.model = get_gdsearch(pipeline, 'XGBoost') self.model = self.model.fit(self.X, self.y) print('Best model params:', self.model.best_params_) print('All model params: ', self.model.get_params(True), '\n') else: self.model = XGBClassifier(random_state=42, seed=2, objective='multi:softmax', eval_metric='merror', use_label_encoder=False, n_estimators=30, colsample_bytree=0.8, subsample=1.0) self.model.fit(self.X, self.y)
def train_model(self, skip_vectorizer): self.get_model() pipeline = get_pipeline(self.model, 'count', True) if skip_vectorizer: pipeline = get_pipeline(self.model) self.model = get_gdsearch(pipeline, 'Logistic', skip_vectorizer).fit(self.X, self.y) print('Best estimator params:',self.model.best_params_)
def train_model(self): pipeline = get_pipeline( DecisionTreeClassifier(random_state=42, criterion='gini', splitter='best', class_weight='balanced')) self.model = get_gdsearch(pipeline, model_type='DTree').fit(self.X, self.y)
def train_models(self): pipeline = get_pipeline( LinearSVC(C=1, multi_class='ovr', class_weight='balanced', max_iter=1000000, dual=True, tol=1e-5), 'tfidf') self.model = get_gdsearch(pipeline, 'LSVM').fit(self.X, self.y) print('Best estimator params: ', self.model.best_params_) print('All model params: ', self.model.get_params(True), '\n')
def train_model(self, model_type='Gaussian'): if type == 'Gaussian': vectorizer = Vectorizer(self.X, self.y) train_features, test_features = vectorizer.get_vectorized_features( type='count') self.test_features = test_features self.model = GaussianNB().fit(train_features.toarray(), self.y) else: self.model = get_gdsearch(get_pipeline(MultinomialNB(), True), model_type).fit(self.X, self.y) print('Best parameters selected', self.model.best_params_, '\n')
def train_model(self): pipeline = get_pipeline( RandomForestClassifier(random_state=42, n_jobs=-1)) self.model = get_gdsearch(pipeline, 'RandomForest').fit(self.X, self.y)