third_order, label_set, size_features ] all_cost_functions = [ micro_f1_cost, micro_f1_cost_squared, micro_f1_cost_plusone, micro_f1_cost_plusepsilon, binary_cost, inverse_micro_f1_cost, uniform_cost ] all_extractor_fn_names = get_function_names(all_extractor_fns) base_extractor_fn_names = get_function_names(base_extractors) all_cost_fn_names = get_function_names(all_cost_functions) for ngrams in [1]: logger.info("*" * LINE_WIDTH) logger.info("NGRAM SIZE: {ngram}".format(ngram=ngrams)) for stemmed in [True]: logger.info("*" * LINE_WIDTH) logger.info("Stemmed: {stemmed}".format(stemmed=stemmed)) # update top level stem setting too config["stem"] = stemmed
# so the base extractors are the MVP for getting to a basic parser, then additional 'meta' parse # features from all_extractors can be included base_extractors = [ single_words, word_pairs, three_words, between_word_features ] all_extractor_fns = base_extractors + [ word_distance, valency, unigrams, third_order, label_set, size_features ] all_cost_functions = [ micro_f1_cost, micro_f1_cost_squared, micro_f1_cost_plusone, micro_f1_cost_plusepsilon, binary_cost, inverse_micro_f1_cost, uniform_cost ] all_extractor_fn_names = get_function_names(all_extractor_fns) base_extractor_fn_names = get_function_names(base_extractors) all_cost_fn_names = get_function_names(all_cost_functions) for ngrams in [1]: logger.info("*" * LINE_WIDTH) logger.info("NGRAM SIZE: {ngram}".format(ngram=ngrams)) for stemmed in [True]: logger.info("*" * LINE_WIDTH) logger.info("Stemmed: {stemmed}".format(stemmed=stemmed)) # update top level stem setting too config["stem"] = stemmed