def test_dict1_all_fields(self): """bibindex termcollectors - get_field_values - complicated field""" fields = self.dict1 phrases =[] get_values_recursively(fields['all'], phrases) self.assertEqual(phrases, ['engine', 'flat tyre', 'windscreen', 'Airplane', 'x,y - plane', 'Odin', 'Eric', 'Frank', 'Theodor', 'Richard'])
def test_dict1_all_fields(self): """bibindex termcollectors - get_field_values - complicated field""" fields = self.dict1 phrases = [] get_values_recursively(fields['all'], phrases) self.assertEqual(phrases, [ 'engine', 'flat tyre', 'windscreen', 'Airplane', 'x,y - plane', 'Odin', 'Eric', 'Frank', 'Theodor', 'Richard' ])
def tokenize_via_recjson(self, recID): """ Tokenizes for journal info. Uses bibfield. """ phrases = [] rec = get_record(recID) recjson_field = rec.get(self.nonmarc_tag) get_values_recursively(recjson_field, phrases) final = [] append = final.append for phrase in phrases: info = phrase.split("-", 1) append(info[0]) return final
def tokenize_via_recjson(self, recID): """ Tokenizes for journal info. Uses bibfield. """ phrases = [] rec = get_record(recID) recjson_field = rec.get(self.nonmarc_tag) get_values_recursively(recjson_field, phrases) final = [] append = final.append for phrase in phrases: info = phrase.split("-", 1) append(info[0]) return final
def _collect_string(self, recIDs, termslist): """ Collects terms from specific tags or fields. Used together with string tokenizer. """ tags = self.tags for recID in recIDs: rec = get_record(recID) new_words = [] extend = new_words.extend for tag in tags: tokenizing_function = self.special_tags.get(tag, self.tokenizing_function) phrases = [] recjson_field = rec.get(tag) get_values_recursively(recjson_field, phrases) for phrase in phrases: extend(tokenizing_function(phrase)) if recID not in termslist and new_words: termslist[recID] = [] if new_words: termslist[recID] = list_union(new_words, termslist[recID]) return termslist
def _collect_string(self, recIDs, termslist): """ Collects terms from specific tags or fields. Used together with string tokenizer. """ tags = self.tags for recID in recIDs: rec = get_record(recID) new_words = [] extend = new_words.extend for tag in tags: tokenizing_function = self.special_tags.get( tag, self.tokenizing_function) phrases = [] recjson_field = rec.get(tag) get_values_recursively(recjson_field, phrases) for phrase in phrases: extend(tokenizing_function(phrase)) if recID not in termslist and new_words: termslist[recID] = [] if new_words: termslist[recID] = list_union(new_words, termslist[recID]) return termslist
def test_dict2_all_fields(self): """bibindex termcollectors - get_field_values - nested field""" fields = self.dict2 phrases =[] get_values_recursively(fields['all'], phrases) self.assertEqual(phrases, ['name1', 'name2', 'name4'])
def test_dict1_subfield(self): """bibindex termcollectors - get_field_values - simple field""" fields = self.dict1 phrases =[] get_values_recursively(fields['all']['people'], phrases) self.assertEqual(phrases, ['Frank', 'Theodor', 'Richard'])
def test_dict2_all_fields(self): """bibindex termcollectors - get_field_values - nested field""" fields = self.dict2 phrases = [] get_values_recursively(fields['all'], phrases) self.assertEqual(phrases, ['name1', 'name2', 'name4'])
def test_dict1_subfield(self): """bibindex termcollectors - get_field_values - simple field""" fields = self.dict1 phrases = [] get_values_recursively(fields['all']['people'], phrases) self.assertEqual(phrases, ['Frank', 'Theodor', 'Richard'])