Python NGRAM.NGRAM示例

编程语言: Python

命名空间/包名称: whoosh.fields

类/类型: NGRAM

方法/功能: NGRAM

hotexamples.com的示例: 3

Python NGRAM.NGRAM - 已找到3个示例。这些是从开源项目中提取的最受好评的whoosh.fields.NGRAM.NGRAM现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

NGRAM(3)

常用方法

NGRAM (3)

示例#1

显示文件

文件： whoosh_backend.py 项目： ierror/django-haystack-modeltranslation

    def build_schema(self, fields):
        schema_fields = {
            ID: WHOOSH_ID(stored=True, unique=True),
            DJANGO_CT: WHOOSH_ID(stored=True),
            DJANGO_ID: WHOOSH_ID(stored=True),
        }
        # Grab the number of keys that are hard-coded into Haystack.
        # We'll use this to (possibly) fail slightly more gracefully later.
        initial_key_count = len(schema_fields)
        content_field_name = ''

        for field_name, field_class in fields.items():
            if field_class.is_multivalued:
                if field_class.indexed is False:
                    schema_fields[field_class.index_fieldname] = IDLIST(
                        stored=True, field_boost=field_class.boost)
                else:
                    schema_fields[field_class.index_fieldname] = KEYWORD(
                        stored=True,
                        commas=True,
                        scorable=True,
                        field_boost=field_class.boost)
            elif field_class.field_type in ['date', 'datetime']:
                schema_fields[field_class.index_fieldname] = DATETIME(
                    stored=field_class.stored)
            elif field_class.field_type == 'integer':
                schema_fields[field_class.index_fieldname] = NUMERIC(
                    stored=field_class.stored,
                    type=int,
                    field_boost=field_class.boost)
            elif field_class.field_type == 'float':
                schema_fields[field_class.index_fieldname] = NUMERIC(
                    stored=field_class.stored,
                    type=float,
                    field_boost=field_class.boost)
            elif field_class.field_type == 'boolean':
                # Field boost isn't supported on BOOLEAN as of 1.8.2.
                schema_fields[field_class.index_fieldname] = BOOLEAN(
                    stored=field_class.stored)
            elif field_class.field_type == 'ngram':
                schema_fields[field_class.index_fieldname] = NGRAM(
                    minsize=3,
                    maxsize=15,
                    stored=field_class.stored,
                    field_boost=field_class.boost)
            elif field_class.field_type == 'edge_ngram':
                schema_fields[field_class.index_fieldname] = NGRAMWORDS(
                    minsize=2,
                    maxsize=15,
                    at='start',
                    stored=field_class.stored,
                    field_boost=field_class.boost)
            else:
                schema_fields[field_class.index_fieldname] = TEXT(
                    stored=True,
                    analyzer=StemmingAnalyzer(),
                    field_boost=field_class.boost)

            if field_class.document is True:
                content_field_name = field_class.index_fieldname

        # Fail more gracefully than relying on the backend to die if no fields
        # are found.
        if len(schema_fields) <= initial_key_count:
            raise SearchBackendError(
                "No fields were found in any search_indexes. Please correct this before attempting to search."
            )

        return (content_field_name, Schema(**schema_fields))

示例#2

显示文件

文件： __init__.py 项目： shirou/sphinx-websupport-demo-for-heroku

import cPickle as pickle

ROOT = os.path.dirname(os.path.abspath(__file__))
SRCDIR = os.path.join(ROOT, 'source')
BUILDDIR = os.path.join(ROOT, 'build', 'web')
INDEXDIR = os.path.join(BUILDDIR, "data", "db")

print("SRC:{0}, BUILD:{1}, INDEX:{2}".format(SRCDIR, BUILDDIR, INDEXDIR))

uri = os.environ.get('DATABASE_URL')  # DATABSE_URL is given
storage = SQLAlchemyStorage(uri)

whoosh = whooshsearch.WhooshSearch
whoosh.schema = Schema(path=ID(stored=True, unique=True),
                       title=TEXT(field_boost=2.0, stored=True),
                       text=NGRAM(stored=True))
search = whoosh(INDEXDIR)

support = WebSupport(srcdir=SRCDIR,
                     builddir=BUILDDIR,
                     search=search,
                     storage=storage)

#### flask part

from flask import Flask, render_template, abort, g, request, jsonify, url_for
from jinja2 import Environment, FileSystemLoader

app = Flask(__name__)

#app.debug = True #

示例#3

显示文件

    all_fields = ['info', 'value', 'comment', 'tags']
    # If field is None, search in all
    if not fields:
        search_fields = all_fields
    elif isinstance(fields, list):
        for f in fields:
            if f not in all_fields:
                raise Exception('Invalid Fieldname')
        search_fields = fields
    else:
        search_fields = [fields]
    if not os.path.exists("indexdir"):
        os.mkdir("indexdir")
    ix = open_dir("indexdir")
    mparser = MultifieldParser(search_fields, schema=ix.schema, group=OrGroup)
    with ix.searcher() as searcher:
        q = mparser.parse(query)
        responses = searcher.search(q, limit=None)
        return Counter([r['eid'] for r in responses])


if __name__ == '__main__':
    from connector import SnapshotConnector
    connector = SnapshotConnector()
    schema = Schema(eid=ID(stored=True),
                    info=NGRAM(minsize=5, phrase=True),
                    value=KEYWORD(lowercase=True),
                    comment=NGRAM(minsize=5, phrase=True),
                    tags=KEYWORD(lowercase=True))
    index_all(connector, schema)