Python Index примеры использования

Язык программирования: Python

Пространство имен/Пакет: lupy.indexer

Класс/Тип: Index

Примеров на hotexamples.com: 5

Python Index - 5 примеров найдено. Это лучшие примеры Python кода для lupy.indexer.Index, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Index(3)

close(2)

find(2)

index(2)

commit(1)

delete(1)

findInField(1)

optimize(1)

setMergeFactor(1)

Пример #1

Показать файл

def main():
    """An example of using the Indexer wrapper.
    """

    # TODO Command line argument passing
    # TODO e.g.
    # TODO -d directory to store index in
    # TODO -i directory to recursively index

    import time
    tt = time.time()

    filedir = 'aesop'
    indexName = 'aesopind'

    if os.path.exists(indexName):
        for f in os.listdir(indexName):
            os.remove(os.path.join(indexName, f))
        # Remove results of previous runs
        os.rmdir(indexName)

    # Create a new Index
    index = Index(indexName, create=True)
    index.setMergeFactor(20)
    # Get the files
    files = os.listdir(filedir)
    for name in files:
        f = os.path.join(filedir, name)
        if os.path.isdir(f) or os.path.islink(f):
            continue
        text = open(f, 'rb').read().decode("latin-1")
        title = text.split('\n\n\n')[0]
        print 'indexing:', f
        # the next line creates a Document with 2 fields
        # one field is named text and the other is named
        # filename. The latter is created as Keyword since
        # the name is preceded by '_'. Naughty but expdient.
        index.index(text=text, __title=title, _filename=f)

    # Uncomment the following line to optimize the index.
    # Have a look in the index dir before you optimize.
    # You will probably see a dozens of files from
    # several segments. optimize() merges all the segments
    # into one. It can be quite an expensive operation, but
    # it can save space and speed up searches.

    #index.optimize()

    queries = [
        'fox', u'intô', 'python', 'fox python', '"the Fox and the"',
        'the fox and python'
    ]
    for q in queries:
        hits = index.find(q)
        print q.encode('utf8'), hits
        for h in hits:
            print '\tFound in %s (%s)' % (h.get('filename'), h.get('title'))
    index.close()
    print 'Elapsed time:', time.time() - tt

Пример #2

Показать файл

Файл: simple.py Проект: Atom66/tain335

def main():
    """An example of using the Indexer wrapper.
    """
    
    # TODO Command line argument passing
    # TODO e.g.
    # TODO -d directory to store index in
    # TODO -i directory to recursively index
    
    import time
    tt = time.time()

    filedir = 'aesop'
    indexName = 'aesopind'

    if os.path.exists(indexName):
        for f in os.listdir(indexName):
            os.remove(os.path.join(indexName, f))
        # Remove results of previous runs
        os.rmdir(indexName)

    # Create a new Index
    index = Index(indexName, create = True)
    index.setMergeFactor(20)
    # Get the files
    files = os.listdir(filedir)
    for name in files:
        f = os.path.join(filedir, name)
        if os.path.isdir(f) or os.path.islink(f):
            continue
        text = open(f, 'rb').read().decode("latin-1")
        title = text.split('\n\n\n')[0]
        print 'indexing:', f
        # the next line creates a Document with 2 fields
        # one field is named text and the other is named
        # filename. The latter is created as Keyword since
        # the name is preceded by '_'. Naughty but expdient.
        index.index(text=text, __title=title, _filename=f)
        
    # Uncomment the following line to optimize the index.
    # Have a look in the index dir before you optimize.
    # You will probably see a dozens of files from
    # several segments. optimize() merges all the segments
    # into one. It can be quite an expensive operation, but
    # it can save space and speed up searches.
    
    #index.optimize()

    queries = ['fox', u'intô', 'python', 'fox python',
               '"the Fox and the"',
               'the fox and python']
    for q in queries:
        hits = index.find(q)
        print q.encode('utf8'), hits
        for h in hits:
            print '\tFound in %s (%s)' % (h.get('filename'), h.get('title'))
    index.close()
    print 'Elapsed time:', time.time() - tt

Пример #3

Показать файл

Файл: search_lupy.py Проект: mrmaple/open_qon

    def search(self, results, user, path, query, sort, start, end):
        """
        Perform a search in the 'text' field of each searchable item.

        If the search string is enclosed in double quotes, a phrase
        search will be run; otherwise, the search will be for
        documents containing all words specified.

        This lupy implementation ignores the sort parameter, and
        always sorts by relevance.
        """
        index = Index(self._lupy_index_dir, False)    
        hits = index.findInField(text=query)
        numhits = len(hits)

        # lupy is totally brain-dead, as it return the hits in reverse order
        #  (least relevant first, so we have to retrieve *all* the hits
        #   and work our way backwards)
        # also, hits.doc() has an obvious < vs. <= error that makes
        #  hits.doc() return a index out of range error, so i just call
        #  hits.getMoreDocs() to get every hit
        hits.getMoreDocs(numhits)

        # let's go through each hit in reverse order and assemble our
        #  list of search results.
        skipped = 0
        numhits_accessible = 0       
        for x in range(numhits-1, -1, -1):       
            d = hits.doc(x)

            if not self._can_read(d, user, path):
                continue                 

            # we got a good one, so tally it up            
            numhits_accessible += 1            

            # create a SearchResult object and append it to the end
            if skipped < start-1:
                skipped += 1
            else:
                if len(results) < (end - start + 1):                
                    sr = SearchResult(d, hits.score(x))              
                    results.append(sr)      

        return numhits_accessible

Пример #4

Показать файл

Файл: lucene.py Проект: LDiana22/Query-Expansion

# import lucene
# from lupyne import engine   # don't forget to call lucene.initVM
# from lupyne.engine.indexers import Indexer
# indexer = Indexer()                      # create an in-memory index (no filename supplied)
# indexer.set('name', stored=True)                # create stored 'name' field
# indexer.set('text')                             # create indexed 'text' field (the default)
# indexer.add(name='sample', text='hello world')  # add a document to the index
# indexer.commit()                                # commit changes; document is now searchable
# hits = indexer.search('text:hello')             # run search and return sequence of documents
# len(hits), hits.count                           # 1 hit retrieved (out of a total of 1)
# hit, = hits
# hit['name']                                     # hits support mapping interface for their stored fields
# u'sample'
# print(hit.id, hit.score)                               # plus internal doc number and score
# # (0, 0.19178301095962524)
# print(hit.dict())

# import lucene

from lupy.indexer import Index
# we create index named "foobar", create True = overwrite existing
index = Index('foobar', create=True)

Пример #5

Показать файл

Файл: indexemail.py Проект: ythvg/tain335

# General Public License as published by the Free Software Foundation.

import os, sys
import email, email.Iterators, email.Errors
import time

from lupy.indexer import Index


filedir = sys.argv[1]
indexName = 'emailindex'

tt = time.time()

# Create a new Index
index = Index(indexName, create=True)

# Get the files
files = os.listdir(filedir)

i=0
for f in files:
    print 'Indexing', f
    fp = open(os.path.join(filedir, f))

    # Try to parse the message
    try:
        msg = email.message_from_file(fp)
    except email.Errors.MessageParseError:
        print 'Bad msg:', f
        continue