def test_combination_0010(): conf = "00100" conf_map = _create_config_map(conf) funs = e.collect_expansion_functions(**conf_map) assert len(funs) == 2 assert funs[0] == e.base_expansion assert funs[1].__name__ == _name_ampersand_fun
def test_combination_1000(): conf = "10000" conf_map = _create_config_map(conf) funs = e.collect_expansion_functions(**conf_map) assert len(funs) == 2 assert funs[0] == e.base_expansion assert funs[1].__name__ == _name_replacer assert funs[1](lower_braces_string) == lower_braces_string
def test_all_lower_braces(): conf = "11110" conf_map = _create_config_map(conf) funs = e.collect_expansion_functions(**conf_map) string = "(lower) lower case explanation" for fun in funs: string = fun(string) assert string == "lower"
def test_all_ampersand_from_braces(): conf = "11110" conf_map = _create_config_map(conf) funs = e.collect_expansion_functions(**conf_map) string = "(R&D) research and discovery" for fun in funs: string = fun(string) assert string == "R ?& ?D"
def test_all_abbreviation_from_braces(): conf = "11110" conf_map = _create_config_map(conf) funs = e.collect_expansion_functions(**conf_map) string = "GDP (gross domestic product)" for fun in funs: string = fun(string) assert string == "G\\.?D\\.?P\\.?"
def test_combination_0101(): conf = "01010" conf_map = _create_config_map(conf) funs = e.collect_expansion_functions(**conf_map) assert len(funs) == 3 assert funs[0] == e.base_expansion assert funs[1].__name__ == _name_replacer assert funs[2].__name__ == _name_abbreviation_fun assert funs[1](lower_braces_string) == lower_braces_content
def _init(self): all_deprecated = set(t.extract_deprecated(self.graph)) concepts = set( t.extract_by_type_uri(self.graph, self.concept_type_uri, remove=all_deprecated)) thesauri = set( t.extract_by_type_uri(self.graph, self.sub_thesaurus_type_uri, remove=all_deprecated)) self.concept_map_ = dict(zip(map(str, concepts), range(len(concepts)))) thesaurus_features = ThesaurusFeatureTransformation( self.graph, concepts, thesauri, self.thesaurus_relation_type_uri, self.thesaurus_relation_is_specialisation) labels = t.retrieve_concept_labels(self.graph, allowed=concepts, langs=self.langs) nfautomat = nfa.Nfa() if self.handle_title_case: case_handler = case_handlers.title_case_handler else: case_handler = case_handlers.sentence_case_handler expansion_funs = expansion.collect_expansion_functions( extract_upper_case_from_braces=self.extract_upper_case_from_braces, extract_any_case_from_braces=self.extract_any_case_from_braces, expand_ampersand_with_spaces=self.expand_ampersand_with_spaces, expand_abbreviation_with_punctuation=( self.expand_abbreviation_with_punctuation), ) if self.simple_english_plural_rules: plural_fun = expansion.simple_english_plural_fun else: def plural_fun(x): return x for concept, label in labels: expanded = label for f in expansion_funs: expanded = f(expanded) _handle_construction( construction.ConstructionState( nfautomat, plural_fun(case_handler(expanded)), str(concept)), concept, label) nfautomat.remove_empty_transitions() converter = conversion.NfaToDfaConverter(nfautomat) self.dfa_ = converter.start_conversion() self.pipeline_ = Pipeline([ ("Combined Features", ColumnTransformer([("Thesaurus Features", thesaurus_features, 0), ("Text Features", mk_text_features(), 1)])), ("Classifier", DecisionTreeClassifier(min_samples_leaf=25, max_leaf_nodes=100)) ])
def test_combination_0000(): conf = "00000" conf_map = _create_config_map(conf) assert e.collect_expansion_functions(**conf_map) == [e.base_expansion]