def test_authority_record_string_by_control_no(self): """bibauthority - simple test of get_index_strings_by_control_no()""" # vars _type = CFG_BIBAUTHORITY_TYPE_NAMES['AUTHOR'] control_no = _type + CFG_BIBAUTHORITY_PREFIX_SEP + '(SzGeCERN)aaa0005' string = 'Ellis, Jonathan Richard' # run test self.assertTrue(string in get_index_strings_by_control_no(control_no))
def test_authority_record_get_values_by_bibrecID_from_tag(self): """bibindex - find authors in authority records for given bibrecID""" tags = ['100__a'] bibRecID = 9 values = [] for tag in tags: authority_tag = tag[0:3] + "__0" control_nos = get_fieldvalues(bibRecID, authority_tag) for control_no in control_nos: new_strings = get_index_strings_by_control_no(control_no) values.extend(new_strings) self.assertTrue('Ellis, Jonathan Richard' in values)
def _get_phrases_for_tokenizing(self, tag, recIDs): """ Gets phrases for later tokenization for a range of records and specific tag. @param tag: MARC tag @param recIDs: list of specific recIDs (not range) """ if len(recIDs) == 0: return () bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec BETWEEN %%s AND %%s AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx) phrases = run_sql(query, (self.first_recID, self.last_recID, tag)) if tag == '8564_u': ## FIXME: Quick hack to be sure that hidden files are ## actually indexed. phrases = set(phrases) for recID in recIDs: for bibdocfile in BibRecDocs(recID).list_latest_files(): phrases.add((recID, bibdocfile.get_url())) #authority records pattern = tag.replace('%', '*') matches = fnmatch.filter( CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.keys(), pattern) if not len(matches): return phrases phrases = set(phrases) for tag_match in matches: authority_tag = tag_match[0:3] + "__0" for recID in recIDs: control_nos = get_fieldvalues(recID, authority_tag) for control_no in control_nos: new_strings = get_index_strings_by_control_no(control_no) for string_value in new_strings: phrases.add((recID, string_value)) return phrases
def _get_phrases_for_tokenizing(self, tag, recIDs): """ Gets phrases for later tokenization for a range of records and specific tag. @param tag: MARC tag @param recIDs: list of specific recIDs (not range) """ if len(recIDs) == 0: return () bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec BETWEEN %%s AND %%s AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx) phrases = run_sql(query, (self.first_recID, self.last_recID, tag)) if tag == '8564_u': ## FIXME: Quick hack to be sure that hidden files are ## actually indexed. phrases = set(phrases) for recID in recIDs: for bibdocfile in BibRecDocs(recID).list_latest_files(): phrases.add((recID, bibdocfile.get_url())) #authority records pattern = tag.replace('%', '*') matches = fnmatch.filter(CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.keys(), pattern) if not len(matches): return phrases phrases = set(phrases) for tag_match in matches: authority_tag = tag_match[0:3] + "__0" for recID in recIDs: control_nos = get_fieldvalues(recID, authority_tag) for control_no in control_nos: new_strings = get_index_strings_by_control_no(control_no) for string_value in new_strings: phrases.add((recID, string_value)) return phrases
def test_indexing_of_deleted_authority_record(self): """bibindex - no info for indexing from deleted authority record""" recID = 119 # deleted record control_nos = get_control_nos_from_recID(recID) info = get_index_strings_by_control_no(control_nos[0]) self.assertEqual([], info)