def grid_search_on_text_features(self, cross_validate=True, file_postfix=""): """ Performs a grid search using text features on the given dataset. Stores the parameters for the optimal classifier. """ self.grid_params = { 'vect__ngram_range': [(1,1),(1,2),(2,2),(1,3),(2,3),(3,3),(1,4)], 'vect__use_idf': (True,False), 'vect__smooth_idf': (True, False), 'vect__sublinear_tf': (True, False), 'vect__max_df': (0.5,), } self.vect = TfidfVectorizer() cross_validation = StratifiedKFold(self.train_targets, n_folds=10) #Build a Pipeline with TFidfVectorizer and classifier pipeline_classifier = Pipeline([ ('vect', self.vect), ('clf', self.classifier)] ) #Perform grid search print "Performing grid search with classifier of instance ",str(self.classifier.__class__.__name__) self.grid = GridSearchCV(pipeline_classifier, self.grid_params, cv=cross_validation, refit=True, n_jobs=-1,verbose=1) self.grid.fit([t.text for t in self.train_tweets], self.train_targets) self.best_estimator = self.grid.best_estimator_ self.best_parameters = self.grid.best_params_ self.best_score = self.grid.best_score_ print "Results for ",self.classifier.__class__.__name__ print "Best params: ", self.best_parameters print "Best score: ", self.best_score print "Storing estimator... " utils.store_model(self.classifier.__class__.__name__, self.best_parameters, self.best_score, file_postfix=file_postfix) return self.grid
def main(arg): # Program Parameters dataset = arg['dataset'] save_dir = arg['save_dir'] load_model = arg['load'] model_filename = arg['model_file'] # Topology model conv_type = arg['conv_type'] filter_shape = arg['filter_shape'] kernel_size = arg['kernel_size'] kernel_pool_size = arg['kernel_pool_size'] # Data [train, valid, _, num_class, image_shape] = utils.load_normalize_data(dataset) # Save/load saveto = os.path.join( save_dir, model_filename) if model_filename is not None else None loadfrom = saveto if load_model else None # Topology model = Model(image_shape, filter_shape, num_class, conv_type, kernel_size, kernel_pool_size) with tf.Session() as sess: saver = tf.train.Saver() save = utils.store_model(saver, sess, saveto) # Load the variables of the model if wanted if load_model: print "Loading model..." utils.restore_model(saver, sess, loadfrom) else: sess.run(tf.global_variables_initializer()) training(sess, model, arg, train, valid, save)
pgm_acled_soc, pgm_acled_sochist, pgm_acled_socnat, pgm_acled_wcm, pgm_canon_histonly, pgm_canon_nocm, pgm_canon_wcm, pgm_nsonly_wcm, pgm_osonly_wcm, pgm_pronly_wcm, pgm_sbonly_wcm, pgm_acled_cm, pgm_acled_meancm, pgm_acled_meancmhist, pgm_acled_protest, pgm_acled_meanprotest, pgm_acled_cm, pgm_acled_meancm, ] runtypes = ["eval", "fcast"] periods = ["calib", "test"] times = t.times_nested for model_root in models_root: for model_root_var in model_root: models_root_var_times = utils.demux_times(model_root_var, runtypes, periods, times) for model_root_var_times in models_root_var_times: utils.store_model(model_root_var_times, "./output/models/")
'Training mode selection. Choices: mnist, synthetic_timeseries, cell_timeseries. (default: mnist)' ) args = parser.parse_args() model_filepath = "model-{}.pth".format(args.train_mode) root_path = "results/{}".format(args.train_mode) try: os.makedirs(root_path) except: pass is_cuda = not args.no_cuda device = torch.device("cuda" if is_cuda else "cpu") model = VAE(dropout=args.dropout, input_dim=input_dims[args.train_mode]).to(device) try: model = load_model(model_filepath, model) logger.info("Loading model from {}".format(model_filepath)) except: logger.info("Creating VAE model from scratch") model = VAE(dropout=args.dropout, input_dim=input_dims[args.train_mode]).to(device) if args.train_mode == 'mnist': train_mnist(model, device, args.epochs, root_path) elif args.train_mode == "synthetic_timeseries": model.decoder.sigmoid = False # disable sigmoid from the final decoder layer train_synthetic_timeseries(model, device, args.epochs, root_path) elif args.train_mode == "cell_timeseries": model.decoder.sigmoid = False # disable sigmoid from the final decoder layer train_cell_timeseries(model, device, args.epochs, root_path) model.to(torch.device("cpu")) store_model(model_filepath, model)
def train_on_feature_set(self, cross_validate=True, use_tfidf=True): """ Performs training with the given model using the given feature set """ #Establish document text feature vectors print "Vectorizing" # self.tokenizer = CountVectorizer().build_tokenizer() self.vect = CountVectorizer(**self.vect_options) self.tfidf_transformer = TfidfTransformer(**self.tfidf_options) self.dict_transformer = TfidfTransformer(**self.tfidf_options) # train_counts_tf = tfidf_transformer.fit_transform(train_counts) count_vector = self.vect.fit_transform([t.text for t in self.train_tweets]) tfidf_count = self.tfidf_transformer.fit_transform(count_vector) if self.only_text_features: combined_vector = tfidf_count else: self.dict_vectorizer = DictVectorizer() dict_vector = self.dict_vectorizer.fit_transform(self.feature_set) f=codecs.open("feature_set.txt", "w", "utf8") for d in dict_vector: f.write(d.__str__()) f.close() tfidf_dict = self.dict_transformer.fit_transform(dict_vector) f=codecs.open("feature_set_tdidf.txt", "w", "utf8") for d in tfidf_dict: f.write(d.__str__()) f.close() combined_vector = sp.hstack([tfidf_count, tfidf_dict]) # combined_features = FeatureUnion() #Crossvalidation cross_validation = StratifiedKFold(self.train_targets, n_folds=10) #Build a Pipeline with TFidfVectorizer and classifier pipeline_classifier = Pipeline([ # ('vect', self.vect), # ('tfidf', self.tfidf_transformer), ('clf', self.classifier) ]) #Perform grid search print "Performing grid search with classifier of instance ",str(self.classifier.__class__.__name__) self.grid = GridSearchCV(pipeline_classifier, self.grid_params, cv=cross_validation, refit=True, n_jobs=-1,verbose=1) self.grid.fit(combined_vector, self.train_targets) self.best_estimator = self.grid.best_estimator_ self.best_parameters = self.grid.best_params_ self.best_score = self.grid.best_score_ print "Results for ",self.classifier.__class__.__name__ print "Best params: ", self.best_parameters print "Best score: ", self.best_score print "Storing estimator... " utils.store_model(self.classifier.__class__.__name__, self.best_parameters, self.best_score) return self.grid