def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for song in get_songs(datapath): # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. # Prefixes from http://xapian.org/docs/omega/termprefixes.html termgenerator.index_text(song['title'], 1, 'S') termgenerator.index_text(song['artist'], 1, 'A') # termgenerator.index_text(description, 1, 'XD') # Index fields without prefixes for general search. termgenerator.index_text(song['title']) termgenerator.increase_termpos() termgenerator.index_text(song['artist']) # Store all the fields for display purposes. doc.set_data(unicode(song['mdata'])) # use doc.add_term(str.join(K, "my tag"), 0) to add tags the way notmuch does # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + song['path'] doc.add_boolean_term(idterm) db.replace_document(idterm, doc)
def test_dirtree_db_get_songs(): from db import dirtree as dt MUSIC_DIR = os.path.join(TESTDIR, "music_dir/") # All song objects MUST at least have these keys: keys = ["length", "artist", "title", "mtime", "year", "album", "path"] for song in dt.get_songs(MUSIC_DIR): assert not None in song assert song != None for key in keys: assert key in song # These keys must also have real vaules: assert song[key] != None
def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for song in get_songs(datapath): # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. for term in PREFIXES: termgenerator.index_text(unicode(song[term]), 1, PREFIXES[term]) # Index fields without prefixes for general search. for pos, term in enumerate(PREFIXES): termgenerator.index_text(unicode(song[term])) #if pos < len(term): termgenerator.increase_termpos() # Store all the fields for display purposes. doc.set_data(unicode(json.dumps(song))) # use doc.add_term(str.join(K, "my tag"), 0) to add tags the way notmuch does # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. # Using relative paths to the data root to get slightly # shorter arguments. # In the future, we might need to handle this better, see this # FAQ: http://trac.xapian.org/wiki/FAQ/UniqueIds idterm = "Q" + os.path.relpath(song['path'], datapath) doc.add_boolean_term(idterm) db.replace_document(idterm, doc)