def generate_folds( n: int, td: TrainingData) -> Iterator[Tuple[TrainingData, TrainingData]]: """Generates n cross validation folds for training data td.""" from sklearn.model_selection import StratifiedKFold skf = StratifiedKFold(n_splits=n, shuffle=True) x = td.intent_examples y = [example.get("intent") for example in x] for i_fold, (train_index, test_index) in enumerate(skf.split(x, y)): logger.debug("Fold: {}".format(i_fold)) train = [x[i] for i in train_index] test = [x[i] for i in test_index] yield ( TrainingData( training_examples=train, entity_synonyms=td.entity_synonyms, regex_features=td.regex_features, ), TrainingData( training_examples=test, entity_synonyms=td.entity_synonyms, regex_features=td.regex_features, ), )
def drop_intents_below_freq(td: TrainingData, cutoff: int = 5) -> TrainingData: """Remove intent groups with less than cutoff instances.""" logger.debug("Raw data intent examples: {}".format(len( td.intent_examples))) keep_examples = [ ex for ex in td.intent_examples if td.examples_per_intent[ex.get("intent")] >= cutoff ] return TrainingData(keep_examples, td.entity_synonyms, td.regex_features)
def compare_nlu( configs: List[Text], data: TrainingData, exclusion_percentages: List[int], f_score_results: Dict[Text, Any], model_names: List[Text], output: Text, runs: int, ) -> List[int]: """ Trains and compares multiple NLU models. For each run and exclusion percentage a model per config file is trained. Thereby, the model is trained only on the current percentage of training data. Afterwards, the model is tested on the complete test data of that run. All results are stored in the provided output directory. Args: configs: config files needed for training data: training data exclusion_percentages: percentages of training data to exclude during comparison f_score_results: dictionary of model name to f-score results per run model_names: names of the models to train output: the output directory runs: number of comparison runs Returns: training examples per run """ training_examples_per_run = [] for run in range(runs): logger.info("Beginning comparison run {}/{}".format(run + 1, runs)) run_path = os.path.join(output, "run_{}".format(run + 1)) create_path(run_path) test_path = os.path.join(run_path, TEST_DATA_FILE) create_path(test_path) train, test = data.train_test_split() write_to_file(test_path, test.as_markdown()) training_examples_per_run = [] for percentage in exclusion_percentages: percent_string = "{}%_exclusion".format(percentage) _, train = train.train_test_split(percentage / 100) training_examples_per_run.append(len(train.training_examples)) model_output_path = os.path.join(run_path, percent_string) train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE) create_path(train_split_path) write_to_file(train_split_path, train.as_markdown()) for nlu_config, model_name in zip(configs, model_names): logger.info( "Evaluating configuration '{}' with {} training data.". format(model_name, percent_string)) try: model_path = train_nlu( nlu_config, train_split_path, model_output_path, fixed_model_name=model_name, ) except Exception as e: logger.warning( "Training model '{}' failed. Error: {}".format( model_name, str(e))) f_score_results[model_name][run].append(0.0) continue model_path = os.path.join(get_model(model_path), "nlu") report_path = os.path.join(model_output_path, "{}_report".format(model_name)) errors_path = os.path.join(report_path, "errors.json") result = run_evaluation(test_path, model_path, report=report_path, errors=errors_path) f1 = result["intent_evaluation"]["f1_score"] f_score_results[model_name][run].append(f1) return training_examples_per_run