def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for fields in parse_csv_file(datapath): # 'fields' is a dictionary mapping from field name to value. # Pick out the fields we're going to index. description = fields.get('DESCRIPTION', u'') title = fields.get('TITLE', u'') identifier = fields.get('id_NUMBER', u'') # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. termgenerator.index_text(title, 1, 'S') termgenerator.index_text(description, 1, 'XD') # Index fields without prefixes for general search. termgenerator.index_text(title) termgenerator.increase_termpos() termgenerator.index_text(description) # Store all the fields for display purposes. doc.set_data(json.dumps(fields, encoding='utf8')) ### Start of example code. # parse the two values we need measurements = fields.get('MEASUREMENTS', u'') if measurements != u'': numbers = numbers_from_string(measurements) if len(numbers) > 0: doc.add_value(0, xapian.sortable_serialise(max(numbers))) date_made = fields.get('DATE_MADE', u'') years = numbers_from_string(date_made) if len(years) > 0: doc.add_value(1, xapian.sortable_serialise(years[0])) ### End of example code. # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + identifier doc.add_boolean_term(idterm) db.replace_document(idterm, doc)
def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for fields in parse_csv_file(datapath): # 'fields' is a dictionary mapping from field name to value. # Pick out the fields we're going to index. description = fields.get('DESCRIPTION', u'') title = fields.get('TITLE', u'') identifier = fields.get('id_NUMBER', u'') collection = fields.get('COLLECTION', u'') maker = fields.get('MAKER', u'') # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. termgenerator.index_text(title, 1, 'S') termgenerator.index_text(description, 1, 'XD') # Index fields without prefixes for general search. termgenerator.index_text(title) termgenerator.increase_termpos() termgenerator.index_text(description) ### Start of example code. # add the collection as a value in slot 0 doc.add_value(0, collection) # add the maker as a value in slot 1 doc.add_value(1, maker) ### End of example code. # Store all the fields for display purposes. doc.set_data(json.dumps(fields, encoding='utf8')) # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + identifier doc.add_boolean_term(idterm) db.replace_document(idterm, doc)
def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for fields in parse_csv_file(datapath): # 'fields' is a dictionary mapping from field name to value. # Pick out the fields we're going to index. description = fields.get('DESCRIPTION', u'') title = fields.get('TITLE', u'') identifier = fields.get('id_NUMBER', u'') collection = fields.get('COLLECTION', u'') maker = fields.get('MAKER', u'') # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. termgenerator.index_text(title, 1, 'S') termgenerator.index_text(description, 1, 'XD') # Index fields without prefixes for general search. termgenerator.index_text(title) termgenerator.increase_termpos() termgenerator.index_text(description) ### Start of example code. # add the collection as a value in slot 0 doc.add_value(0, collection) # add the maker as a value in slot 1 doc.add_value(1, maker) ### End of example code. # Store all the fields for display purposes. doc.set_data(json.dumps(fields)) # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + identifier doc.add_boolean_term(idterm) db.replace_document(idterm, doc)
def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for fields in parse_csv_file(datapath): # 'fields' is a dictionary mapping from field name to value. # Pick out the fields we're going to index. description = fields.get('DESCRIPTION', u'') title = fields.get('TITLE', u'') identifier = fields.get('id_NUMBER', u'') # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. termgenerator.index_text(title, 1, 'S') termgenerator.index_text(description, 1, 'XD') # Index fields without prefixes for general search. termgenerator.index_text(title) termgenerator.increase_termpos() termgenerator.index_text(description) ### Start of new indexing code. # Index the MATERIALS field, splitting on semicolons. for material in fields.get('MATERIALS', u'').split(';'): material = material.strip().lower() if material != '': doc.add_boolean_term('XM' + material) ### End of new indexing code. # Store all the fields for display purposes. doc.set_data(json.dumps(fields, encoding='utf8')) # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + identifier doc.add_boolean_term(idterm) db.replace_document(idterm, doc)
def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for fields in parse_csv_file(datapath): # 'fields' is a dictionary mapping from field name to value. # Pick out the fields we're going to index. description = fields.get('DESCRIPTION', u'') title = fields.get('TITLE', u'') identifier = fields.get('id_NUMBER', u'') # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. termgenerator.index_text(title, 1, 'S') termgenerator.index_text(description, 1, 'XD') # Index fields without prefixes for general search. termgenerator.index_text(title) termgenerator.increase_termpos() termgenerator.index_text(description) ### Start of new indexing code. # index the MATERIALS field, splitting on semicolons for material in fields.get('MATERIALS', u'').split(';'): material = material.strip().lower() if material != '': doc.add_boolean_term('XM' + material) ### End of new indexing code. # Store all the fields for display purposes. doc.set_data(json.dumps(fields)) # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + identifier doc.add_boolean_term(idterm) db.replace_document(idterm, doc)
def index(datapath, dbpath): # Create or open the database we're going to be writing to. db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN) # Set up a TermGenerator that we'll use in indexing. termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) for fields in parse_csv_file(datapath): # 'fields' is a dictionary mapping from field name to value. # Pick out the fields we're going to index. description = fields.get("DESCRIPTION", u"") title = fields.get("TITLE", u"") identifier = fields.get("id_NUMBER", u"") # We make a document and tell the term generator to use this. doc = xapian.Document() termgenerator.set_document(doc) # Index each field with a suitable prefix. termgenerator.index_text(title, 1, "S") termgenerator.index_text(description, 1, "XD") # Index fields without prefixes for general search. termgenerator.index_text(title) termgenerator.increase_termpos() termgenerator.index_text(description) # Store all the fields for display purposes. doc.set_data(json.dumps(fields)) # We use the identifier to ensure each object ends up in the # database only once no matter how many times we run the # indexer. idterm = u"Q" + identifier doc.add_boolean_term(idterm) db.replace_document(idterm, doc)