def build_schema(self, fields): schema_fields = { ID: WHOOSH_ID(stored=True, unique=True), DJANGO_CT: WHOOSH_ID(stored=True), DJANGO_ID: WHOOSH_ID(stored=True), } # Grab the number of keys that are hard-coded into Haystack. # We'll use this to (possibly) fail slightly more gracefully later. initial_key_count = len(schema_fields) content_field_name = '' for field_name, field_class in fields.items(): if field_class.is_multivalued: if field_class.indexed is False: schema_fields[field_class.index_fieldname] = IDLIST( stored=True, field_boost=field_class.boost) else: schema_fields[field_class.index_fieldname] = KEYWORD( stored=True, commas=True, scorable=True, field_boost=field_class.boost) elif field_class.field_type in ['date', 'datetime']: schema_fields[field_class.index_fieldname] = DATETIME( stored=field_class.stored) elif field_class.field_type == 'integer': schema_fields[field_class.index_fieldname] = NUMERIC( stored=field_class.stored, type=int, field_boost=field_class.boost) elif field_class.field_type == 'float': schema_fields[field_class.index_fieldname] = NUMERIC( stored=field_class.stored, type=float, field_boost=field_class.boost) elif field_class.field_type == 'boolean': # Field boost isn't supported on BOOLEAN as of 1.8.2. schema_fields[field_class.index_fieldname] = BOOLEAN( stored=field_class.stored) elif field_class.field_type == 'ngram': schema_fields[field_class.index_fieldname] = NGRAM( minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost) elif field_class.field_type == 'edge_ngram': schema_fields[field_class.index_fieldname] = NGRAMWORDS( minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost) else: schema_fields[field_class.index_fieldname] = TEXT( stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost) if field_class.document is True: content_field_name = field_class.index_fieldname # Fail more gracefully than relying on the backend to die if no fields # are found. if len(schema_fields) <= initial_key_count: raise SearchBackendError( "No fields were found in any search_indexes. Please correct this before attempting to search." ) return (content_field_name, Schema(**schema_fields))
import cPickle as pickle ROOT = os.path.dirname(os.path.abspath(__file__)) SRCDIR = os.path.join(ROOT, 'source') BUILDDIR = os.path.join(ROOT, 'build', 'web') INDEXDIR = os.path.join(BUILDDIR, "data", "db") print("SRC:{0}, BUILD:{1}, INDEX:{2}".format(SRCDIR, BUILDDIR, INDEXDIR)) uri = os.environ.get('DATABASE_URL') # DATABSE_URL is given storage = SQLAlchemyStorage(uri) whoosh = whooshsearch.WhooshSearch whoosh.schema = Schema(path=ID(stored=True, unique=True), title=TEXT(field_boost=2.0, stored=True), text=NGRAM(stored=True)) search = whoosh(INDEXDIR) support = WebSupport(srcdir=SRCDIR, builddir=BUILDDIR, search=search, storage=storage) #### flask part from flask import Flask, render_template, abort, g, request, jsonify, url_for from jinja2 import Environment, FileSystemLoader app = Flask(__name__) #app.debug = True #
all_fields = ['info', 'value', 'comment', 'tags'] # If field is None, search in all if not fields: search_fields = all_fields elif isinstance(fields, list): for f in fields: if f not in all_fields: raise Exception('Invalid Fieldname') search_fields = fields else: search_fields = [fields] if not os.path.exists("indexdir"): os.mkdir("indexdir") ix = open_dir("indexdir") mparser = MultifieldParser(search_fields, schema=ix.schema, group=OrGroup) with ix.searcher() as searcher: q = mparser.parse(query) responses = searcher.search(q, limit=None) return Counter([r['eid'] for r in responses]) if __name__ == '__main__': from connector import SnapshotConnector connector = SnapshotConnector() schema = Schema(eid=ID(stored=True), info=NGRAM(minsize=5, phrase=True), value=KEYWORD(lowercase=True), comment=NGRAM(minsize=5, phrase=True), tags=KEYWORD(lowercase=True)) index_all(connector, schema)