def train(self, tagged_essays, max_epochs): ys_by_sent = self.get_label_data(tagged_essays) for i in range(0, max_epochs): self.epoch += 1 self.log("Epoch: {epoch}".format(epoch=self.epoch)) # TODO - provide option for different model types here? parse_examples = WeightedExamples(labels=PARSE_ACTIONS, positive_value=self.positive_val) crel_examples = WeightedExamples(labels=None, positive_value=self.positive_val) pred_ys_by_sent = defaultdict(list) for essay_ix, essay in enumerate(tagged_essays): for sent_ix, taggged_sentence in enumerate(essay.sentences): predicted_tags = essay.pred_tagged_sentences[sent_ix] pred_relations = self.generate_training_data( taggged_sentence, predicted_tags, parse_examples, crel_examples) # Store predictions for evaluation self.add_cr_labels(pred_relations, pred_ys_by_sent) class2metrics = ResultsProcessor.compute_metrics( ys_by_sent, pred_ys_by_sent) micro_metrics = micro_rpfa(class2metrics.values()) # type: rpfa self.log( "Training Metrics: {metrics}".format(metrics=micro_metrics)) self.train_parse_models(parse_examples) self.train_crel_models(crel_examples) self.training_datasets_parsing[self.epoch] = parse_examples self.training_datasets_crel[self.epoch] = crel_examples
def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True): '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. ''' cp_essay_feats = list(essay_feats) # Copy as we do an inplace shuffle below tag_freq = defaultdict(int) for essay in cp_essay_feats: for taggged_sentence in essay.sentences: for wd in taggged_sentence: fs_tags = self.__get_tags_(wd.tags) tag_freq[fs_tags] +=1 self.classes = set([ fs for fs, cnt in tag_freq.items() if cnt >= self.combo_freq_threshold]) self.model = AveragedPerceptron(self.classes) for iter_ in range(nr_iter): class2predictions = defaultdict(list) class2tags = defaultdict(list) for essay_ix, essay in enumerate(cp_essay_feats): for sent_ix, taggged_sentence in enumerate(essay.sentences): """ Start Sentence """ prev = list(self.START) for i, (wd) in enumerate(taggged_sentence): # Don't mutate the feat dictionary shared_features = dict(wd.features.items()) # get all tagger predictions for previous 2 tags self._add_secondary_tag_features(shared_features, prev) tagger_feats = dict(shared_features.items()) # add more in depth features for this tag actual = self.__get_tags_(wd.tags) if self.use_tag_features: self._add_tag_features(tagger_feats, wd.word, prev[-1], prev[-2]) guess = self.model.predict(tagger_feats) self.model.update(actual, guess, tagger_feats) prev.append(guess) for cls in self.individual_tags: class2predictions[cls].append( 1 if cls in guess else 0 ) class2tags[cls].append( 1 if cls in actual else 0) random.shuffle(cp_essay_feats) class2metrics = ResultsProcessor.compute_metrics(class2tags, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) if verbose: logging.info("Iter {0}: Micro Avg Metrics: {1}".format(iter_, str(micro_metrics))) self.model.average_weights() return None
def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True): '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. ''' # Copy as we do an inplace shuffle below cp_essay_feats = list(essay_feats) for iter_ in range(nr_iter): class2predictions = defaultdict(list) class2tags = defaultdict(list) for essay_ix, essay in enumerate(cp_essay_feats): for sent_ix, taggged_sentence in enumerate(essay.sentences): """ Start Sentence """ class2prev = defaultdict(list) for cls in self.classes: class2prev[cls] = list(self.START) for wd in taggged_sentence: # Don't mutate the feat dictionary shared_features = dict(wd.features.items()) # get all tagger predictions for previous 2 tags for cls in self.classes: self._add_secondary_tag_features( shared_features, wd.word, cls, class2prev[cls]) # train each binary tagger for cls in self.classes: tagger_feats = dict(shared_features.items()) # add more in depth features for this tag self._add_tag_features(tagger_feats, wd.word, class2prev[cls][-1], class2prev[cls][-2]) actual = self.__get_yal_(wd, cls) model = self.class2model[cls] guess = model.predict(tagger_feats) model.update(actual, guess, tagger_feats) class2prev[cls].append(guess) class2predictions[cls].append(guess) class2tags[cls].append(actual) random.shuffle(cp_essay_feats) class2metrics = ResultsProcessor.compute_metrics( class2tags, class2predictions) wtd_mean = weighted_mean_rpfa(class2metrics.values()) if verbose: logging.info("Iter {0}: Wtd Mean: {1}".format( iter_, str(wtd_mean))) for cls in self.classes: self.class2model[cls].average_weights() return None
def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True): '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. ''' # Copy as we do an inplace shuffle below cp_essay_feats = list(essay_feats) for iter_ in range(nr_iter): class2predictions = defaultdict(list) class2tags = defaultdict(list) for essay_ix, essay in enumerate(cp_essay_feats): for sent_ix, taggged_sentence in enumerate(essay.sentences): """ Start Sentence """ class2prev = defaultdict(list) for cls in self.classes: class2prev[cls] = list(self.START) for wd in taggged_sentence: # Don't mutate the feat dictionary shared_features = dict(wd.features.items()) # get all tagger predictions for previous 2 tags for cls in self.classes: self._add_secondary_tag_features(shared_features, wd.word, cls, class2prev[cls]) # train each binary tagger for cls in self.classes: tagger_feats = dict(shared_features.items()) # add more in depth features for this tag self._add_tag_features(tagger_feats, wd.word, class2prev[cls][-1], class2prev[cls][-2]) actual = self.__get_yal_(wd, cls) model = self.class2model[cls] guess = model.predict(tagger_feats) model.update(actual, guess, tagger_feats) class2prev[cls].append(guess) class2predictions[cls].append(guess) class2tags[cls].append(actual) random.shuffle(cp_essay_feats) class2metrics = ResultsProcessor.compute_metrics(class2tags, class2predictions) wtd_mean = weighted_mean_rpfa(class2metrics.values()) if verbose: logging.info("Iter {0}: Wtd Mean: {1}".format(iter_, str(wtd_mean))) for cls in self.classes: self.class2model[cls].average_weights() return None
def get_wd_level_preds(essays, expected_tags): expected_tags = set(expected_tags) ysbycode = defaultdict(list) for e in essays: for sentix in range(len(e.sentences)): p_ccodes = e.pred_tagged_sentences[sentix] for wordix in range(len(p_ccodes)): tags = p_ccodes[wordix] if type(tags) == str: ptag_set = {tags} elif type(tags) in (set,list): ptag_set = set(tags) else: raise Exception("Unrecognized tag type") for exp_tag in expected_tags: ysbycode[exp_tag].append(ResultsProcessor._ResultsProcessor__get_label_(exp_tag, ptag_set)) return ysbycode
def train(self, tagged_essays, max_epochs): trained_with_beta0 = False ys_by_sent = self.get_label_data(tagged_essays) for i in range(0, max_epochs): if self.beta < 0: trained_with_beta0 = True self.epoch += 1 print("Epoch: {epoch}".format(epoch=self.epoch)) print("Beta: {beta}".format(beta=self.beta)) # TODO - provide option for different model types here? parse_examples = WeightedExamples(labels=PARSE_ACTIONS, positive_value=self.positive_val) crel_examples = WeightedExamples(labels=None, positive_value=self.positive_val) pred_ys_by_sent = defaultdict(list) for essay_ix, essay in enumerate(tagged_essays): for sent_ix, taggged_sentence in enumerate(essay.sentences): predicted_tags = essay.pred_tagged_sentences[sent_ix] pred_relations = self.generate_training_data(taggged_sentence, predicted_tags, parse_examples, crel_examples) # Store predictions for evaluation self.add_cr_labels(pred_relations, pred_ys_by_sent) class2metrics = ResultsProcessor.compute_metrics(ys_by_sent, pred_ys_by_sent) micro_metrics = micro_rpfa(class2metrics.values()) # type: rpfa print("Training Metrics: {metrics}".format(metrics=micro_metrics)) # TODO, dictionary vectorize examples, train a weighted binary classifier for each separate parsing action self.train_parse_models(parse_examples) self.train_crel_models(crel_examples) self.training_datasets_parsing[self.epoch] = parse_examples self.training_datasets_crel[self.epoch] = crel_examples # Decay beta self.beta = self.beta_decay_fn(self.beta) if self.beta < 0 and trained_with_beta0: print("beta decayed below 0 - beta:'{beta}', stopping".format(beta=self.beta)) break # end [for each epoch] if not trained_with_beta0: print("Algorithm hit max epochs without training with beta <= 0 - final_beta:{beta}".format(beta=self.beta))
def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file): test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats) wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags) test_x = feature_transformer.transform(test_feats) """ TEST Tagger """ test_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags) print "\nRunning Sentence Model" """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats, test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) """ Test Stack Classifier """ test_sent_predictions_by_code \ = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags) if USE_SVM: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag) else: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag) """ Write out the predicted classes """ with open(out_predictions_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test"))) f_metrics_file.write(s) write_categories(out_predictions_file, "CB", out_categories_file) print s
def evaluate_ranker(model, xs, essay2crels, ys_bytag, set_cr_tags): clone = model.clone() if hasattr(model, "average_weights"): clone.average_weights() pred_ys_bytag = defaultdict(list) ename2inps = dict() for parser_input in xs: ename2inps[parser_input.essay_name] = parser_input for ename, act_crels in essay2crels.items(): if ename not in ename2inps: # no predicted crels for this essay highest_ranked = set() else: parser_input = ename2inps[ename] ixs = clone.rank(parser_input.all_feats_array) highest_ranked = parser_input.all_parses[ixs[0]] # type: Tuple[str] add_cr_labels(set(highest_ranked), pred_ys_bytag, set_cr_tags) mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag, pred_ys_bytag) df = get_micro_metrics(metrics_to_df(mean_metrics)) return df
NGRAMS = 3 MIN_FEAT_FREQ = 5 BETA = 0.2 MAX_EPOCHS = 10 settings = Settings() root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/" training_folder = root_folder + "Training" + "/" test_folder = root_folder + "Test" + "/" training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl" # NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/" config = get_config(training_folder) processor = ResultsProcessor(dbname="metrics_causal") # Get Test Data In Order to Get Test CRELS # load the test essays to make sure we compute metrics over the test CR labels test_config = get_config(test_folder) tagged_essays_test = load_process_essays(**test_config) ######################################################## fname = predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill" with open(fname, "rb") as f: pred_tagged_essays = dill.load(f) print("Number of pred tagged essasy %i" % len(pred_tagged_essays)) # should be 902 print("Started at: " + str(datetime.datetime.now())) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger()
from IterableFP import flatten from collections import defaultdict from window_based_tagger_config import get_config from results_procesor import ResultsProcessor, compute_metrics from nltk.classify import maxent # END Classifiers import Settings import logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor() NUM_TRAIN_ITERATIONS = 5 # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True SPARSE_SENT_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling
test_folds = [(pred_tagged_essays_train, pred_tagged_essays_test)] # type: List[Tuple[Any,Any]] cv_folds = cross_validation(pred_tagged_essays_train, CV_FOLDS) # type: List[Tuple[Any,Any]] result_test_essay_level = evaluate_model_essay_level( folds=cv_folds, extractor_fn_names_lst=best_extractor_names, all_extractor_fns=all_extractor_fns, ngrams=ngrams, beta=beta, stemmed=stemmed, down_sample_rate=1.0, max_epochs=max_epochs) models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, cv_sent_vd_ys_by_tag = result_test_essay_level mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag) print(get_micro_metrics(metrics_to_df(mean_metrics))) models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = result_test_essay_level mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag) print(get_micro_metrics(metrics_to_df(mean_metrics))) result_final_test = evaluate_model_essay_level( folds=test_folds, extractor_fn_names_lst=best_extractor_names, all_extractor_fns=all_extractor_fns, ngrams=ngrams, beta=beta, stemmed=stemmed, down_sample_rate=1.0,
NGRAMS = 3 MIN_FEAT_FREQ = 5 BETA = 0.2 MAX_EPOCHS = 10 settings = Settings() root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/" training_folder = root_folder + "Training" + "/" test_folder = root_folder + "Test" + "/" training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl" # NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/" config = get_config(training_folder) processor = ResultsProcessor(dbname="metrics_causal") # Get Test Data In Order to Get Test CRELS # load the test essays to make sure we compute metrics over the test CR labels test_config = get_config(test_folder) tagged_essays_test = load_process_essays(**test_config) ######################################################## fname = predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill" with open(fname, "rb") as f: pred_tagged_essays = dill.load(f) print("Number of pred tagged essasy %i" % len(pred_tagged_essays)) # should be 902 print("Started at: " + str(datetime.datetime.now())) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
def evaluate_tagger_on_fold(kfold, wd_train_tags, tag_history, tag_plus_word, tag_ngram, avg_weights=True, split=0.2): # logger.info("Loading data for fold %i" % kfold) k_fold_data = k_fold_2data[kfold] essays_TD, essays_VD, essays_TD_most_freq, wd_td_ys_bytag, wd_vd_ys_bytag = k_fold_data """ TRAINING """ tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) # Split into train and test set np_essays = np.asarray(essays_TD_most_freq) ixs = np.arange(len(essays_TD_most_freq)) np.random.shuffle(ixs) split_size = int(split * len(essays_TD_most_freq)) test, train = np_essays[ixs[:split_size]], np_essays[ixs[split_size:]] _, test_tags = flatten_to_wordlevel_feat_tags(test) class2ys = get_wordlevel_ys_by_code(test_tags, wd_train_tags) optimal_num_iterations = -1 last_f1 = -1 """ EARLY STOPPING USING TEST SET """ for i in range(30): tagger.train(train, nr_iter=1, verbose=False, average_weights=False) wts_copy = dict(tagger.model.weights.items()) if avg_weights: tagger.model.average_weights() class2predictions = tagger.predict(test) #Compute F1 score, stop early if worse than previous class2metrics = ResultsProcessor.compute_metrics(class2ys, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) current_f1 = micro_metrics.f1_score if current_f1 <= last_f1: optimal_num_iterations = i # i.e. this number minus 1, but 0 based break # Reset weights (as we are averaging weights) tagger.model.weights = wts_copy last_f1 = current_f1 # print("fold %i - Optimal F1 obtained at iteration %i " % (kfold, optimal_num_iterations)) """ Re-train model using stopping criterion on full training set """ final_tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) final_tagger.train(essays_TD_most_freq, nr_iter=optimal_num_iterations, verbose=False, average_weights=avg_weights) """ PREDICT """ td_wd_predictions_by_code = final_tagger.predict(essays_TD) vd_wd_predictions_by_code = final_tagger.predict(essays_VD) # logger.info("Fold %i finished" % kfold) """ Aggregate results """ return kfold, td_wd_predictions_by_code, vd_wd_predictions_by_code, optimal_num_iterations
if USE_SVM: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag) else: test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag) """ Write out the predicted classes """ with open(out_predictions_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test"))) f_metrics_file.write(s) print s #TODO - need to add logic here for GW #write_categories(out_predictions_file, "CB", out_categories_file)
predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write( "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n" ) predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string( train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string(
td_predictions, regular_tags) vd_wd_predictions_by_code = to_flattened_binary_tags_by_code( vd_predictions, regular_tags) os.remove(model_filename) return wd_td_ys_bytag, wd_vd_ys_bytag, td_wd_predictions_by_code, vd_wd_predictions_by_code logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Load the Essays # --------------- # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor() # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling settings = Settings.Settings() root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/" training_folder = root_folder + "Training/"
def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True): '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. ''' cp_essay_feats = list(essay_feats) # Copy as we do an inplace shuffle below tag_freq = defaultdict(int) for essay in cp_essay_feats: for taggged_sentence in essay.sentences: for wd in taggged_sentence: fs_tags = self.__get_tags_(wd.tags) tag_freq[fs_tags] += 1 self.classes = set([ fs for fs, cnt in tag_freq.items() if cnt >= self.combo_freq_threshold ]) self.model = AveragedPerceptron(self.classes) for iter_ in range(nr_iter): class2predictions = defaultdict(list) class2tags = defaultdict(list) for essay_ix, essay in enumerate(cp_essay_feats): for sent_ix, taggged_sentence in enumerate(essay.sentences): """ Start Sentence """ prev = list(self.START) for i, (wd) in enumerate(taggged_sentence): # Don't mutate the feat dictionary shared_features = dict(wd.features.items()) # get all tagger predictions for previous 2 tags self._add_secondary_tag_features(shared_features, prev) tagger_feats = dict(shared_features.items()) # add more in depth features for this tag actual = self.__get_tags_(wd.tags) if self.use_tag_features: self._add_tag_features(tagger_feats, wd.word, prev[-1], prev[-2]) guess = self.model.predict(tagger_feats) self.model.update(actual, guess, tagger_feats) prev.append(guess) for cls in self.individual_tags: class2predictions[cls].append(1 if cls in guess else 0) class2tags[cls].append(1 if cls in actual else 0) random.shuffle(cp_essay_feats) class2metrics = ResultsProcessor.compute_metrics( class2tags, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) if verbose: logging.info("Iter {0}: Micro Avg Metrics: {1}".format( iter_, str(micro_metrics))) self.model.average_weights() return None
import Settings from CrossValidation import cross_validation from Decorators import memoize_to_disk from IterableFP import flatten from featureextractionfunctions import * from featurevectorizer import FeatureVectorizer from load_data import load_process_essays, extract_features from results_procesor import ResultsProcessor,__MICRO_F1__ from window_based_tagger_config import get_config from wordtagginghelper import * logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor() # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling settings = Settings.Settings()
def get_metrics_raw(essays, expected_tags, micro_only=False): act_ys_bycode = ResultsProcessor.get_wd_level_lbs(essays, expected_tags=expected_tags) pred_ys_bycode = get_wd_level_preds(essays, expected_tags=expected_tags) mean_metrics = ResultsProcessor.compute_mean_metrics(act_ys_bycode, pred_ys_bycode) return mean_metrics
def score_predictions(model, xs, ys_by_tag, seq_len): preds = model.predict_classes(xs, batch_size=batch_size, verbose=0) pred_ys_by_tag = collapse_results(seq_len, preds) class2metrics = ResultsProcessor.compute_metrics(ys_by_tag, pred_ys_by_tag) micro_metrics = micro_rpfa(class2metrics.values()) return micro_metrics, pred_ys_by_tag
vd_predictions = model.tag_sents(to_sentences(vd)) # Delete model file now predictions obtained # Note, we are randomizing name above, so we need to clean up here os.remove(model_filename) td_wd_predictions_by_code[code] = to_flattened_binary_tags(td_predictions) vd_wd_predictions_by_code[code] = to_flattened_binary_tags(vd_predictions) return wd_td_ys_bytag, wd_vd_ys_bytag, td_wd_predictions_by_code, vd_wd_predictions_by_code logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Load the Essays # --------------- # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor() # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling settings = Settings.Settings() root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/" folder = root_folder + "Training/"
from IterableFP import flatten from featureextractionfunctions import * from featurevectorizer import FeatureVectorizer from load_data import load_process_essays, extract_features from predictions_to_file import predictions_to_file from results_procesor import ResultsProcessor from sent_feats_for_stacking import * from window_based_tagger_config import get_config from wordtagginghelper import * # END Classifiers logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor() # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True SPARSE_SENT_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling settings = Settings.Settings()
from CrossValidation import cross_validation from Decorators import memoize_to_disk from IterableFP import flatten from featureextractionfunctions import * from featurevectorizer import FeatureVectorizer from load_data import load_process_essays, extract_features from results_procesor import ResultsProcessor, __MICRO_F1__ from window_based_tagger_config import get_config from wordtagginghelper import * logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor(dbname="metrics_coref_new") # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling settings = Settings.Settings()
from wordtagginghelper import * from IterableFP import flatten from collections import defaultdict from window_based_tagger_config import get_config from results_procesor import ResultsProcessor, compute_metrics from nltk.classify import maxent # END Classifiers import Settings import logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger = logging.getLogger() # Create persister (mongo client) - fail fast if mongo service not initialized processor = ResultsProcessor() NUM_TRAIN_ITERATIONS = 5 # not hashed as don't affect persistence of feature processing SPARSE_WD_FEATS = True SPARSE_SENT_FEATS = True MIN_FEAT_FREQ = 5 # 5 best so far CV_FOLDS = 5 MIN_TAG_FREQ = 5 LOOK_BACK = 0 # how many sentences to look back when predicting tags # end not hashed # construct unique key using settings for pickling
def evaluate_tagger_on_fold(kfold, wd_train_tags, tag_history, tag_plus_word, tag_ngram, avg_weights=True, split=0.2): # logger.info("Loading data for fold %i" % kfold) k_fold_data = k_fold_2data[kfold] essays_TD, essays_VD, essays_TD_most_freq, wd_td_ys_bytag, wd_vd_ys_bytag = k_fold_data """ TRAINING """ tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) # Split into train and test set np_essays = np.asarray(essays_TD_most_freq) ixs = np.arange(len(essays_TD_most_freq)) np.random.shuffle(ixs) split_size = int(split * len(essays_TD_most_freq)) test, train = np_essays[ixs[:split_size]], np_essays[ixs[split_size:]] _, test_tags = flatten_to_wordlevel_feat_tags(test) class2ys = get_wordlevel_ys_by_code(test_tags, wd_train_tags) optimal_num_iterations = -1 last_f1 = -1 """ EARLY STOPPING USING TEST SET """ for i in range(30): tagger.train(train, nr_iter=1, verbose=False, average_weights=False) wts_copy = dict(tagger.model.weights.items()) if avg_weights: tagger.model.average_weights() class2predictions = tagger.predict(test) #Compute F1 score, stop early if worse than previous class2metrics = ResultsProcessor.compute_metrics( class2ys, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) current_f1 = micro_metrics.f1_score if current_f1 <= last_f1: optimal_num_iterations = i # i.e. this number minus 1, but 0 based break # Reset weights (as we are averaging weights) tagger.model.weights = wts_copy last_f1 = current_f1 # print("fold %i - Optimal F1 obtained at iteration %i " % (kfold, optimal_num_iterations)) """ Re-train model using stopping criterion on full training set """ final_tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) final_tagger.train(essays_TD_most_freq, nr_iter=optimal_num_iterations, verbose=False, average_weights=avg_weights) """ PREDICT """ td_wd_predictions_by_code = final_tagger.predict(essays_TD) vd_wd_predictions_by_code = final_tagger.predict(essays_VD) # logger.info("Fold %i finished" % kfold) """ Aggregate results """ return kfold, td_wd_predictions_by_code, vd_wd_predictions_by_code, optimal_num_iterations
CV_FOLDS = 5 MIN_FEAT_FREQ = 5 # Global settings settings = Settings() root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/" training_folder = root_folder + "Training" + "/" test_folder = root_folder + "Test" + "/" training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl" # NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook # used as inputs to parsing model rnn_predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/" config = get_config(training_folder) processor = ResultsProcessor(dbname="metrics_causal") # Get Test Data In Order to Get Test CRELS # load the test essays to make sure we compute metrics over the test CR labels test_config = get_config(test_folder) tagged_essays_test = load_process_essays(**test_config) ######################################################## fname = rnn_predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill" with open(fname, "rb") as f: pred_tagged_essays = dill.load(f) logger.info("Started at: " + str(datetime.datetime.now())) logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays)) # should be 902
def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file): test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats) wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags) test_x = feature_transformer.transform(test_feats) """ TEST Tagger """ test_wd_predictions_by_code = test_classifier_per_code( test_x, tag2word_classifier, wd_test_tags) print "\nRunning Sentence Model" """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model( sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats, test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) """ Test Stack Classifier """ test_sent_predictions_by_code \ = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags) if USE_SVM: test_decision_functions_by_code = test_classifier_per_code( sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag) else: test_decision_functions_by_code = test_classifier_per_code( sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag) """ Write out the predicted classes """ with open(out_predictions_file, "w+") as f_output_file: f_output_file.write( "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n") predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags) with open(out_predicted_margins_file, "w+") as f_output_file: f_output_file.write( "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n" ) predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True) """ Write out the accuracy metrics """ train_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_td_ys_bytag, train_wd_predictions_by_code) test_wd_metrics = ResultsProcessor.compute_mean_metrics( wd_test_ys_bytag, test_wd_predictions_by_code) train_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) test_sent_metrics = ResultsProcessor.compute_mean_metrics( sent_test_ys_bycode, test_sent_predictions_by_code) with open(out_metrics_file, "w+") as f_metrics_file: s = "" pad = ResultsProcessor.pad_str s += ResultsProcessor.metrics_to_string( train_wd_metrics, test_wd_metrics, "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test"))) s += ResultsProcessor.metrics_to_string( train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test"))) f_metrics_file.write(s) write_categories(out_predictions_file, "CB", out_categories_file) print s