Python ConnectionManager примеры использования

Язык программирования: Python

Пространство имен/Пакет: hypatia.catalog

Класс/Тип: ConnectionManager

Примеров на hotexamples.com: 5

Python ConnectionManager - 5 примеров найдено. Это лучшие примеры Python кода для hypatia.catalog.ConnectionManager, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ConnectionManager(3)

close(2)

commit(2)

Пример #1

Показать файл

def prep_catalog():
    """Download python mailing list, create new catalog and catalog 
       messages, if not done already.
    """
    if not os.path.exists(BENCHMARK_DATA_DIR):
        os.makedirs(BENCHMARK_DATA_DIR)

    # Check to see if mailing list data already present
    if len(get_mailbox_filenames()) == 0:
        MailListSucker(MAILLIST_INDEX, BENCHMARK_DATA_DIR).suck()

    # Create ZODB and index maillist messages, if not yet done
    zodb_file = os.path.join(BENCHMARK_DATA_DIR, 'test.zodb')
    if not os.path.exists(zodb_file):
        # Create a catalog
        manager = ConnectionManager()
        factory = FileStorageCatalogFactory(
            os.path.join(BENCHMARK_DATA_DIR, 'test.zodb'), 'benchmark')
        c = factory(manager)

        # Create some indices
        c['subject'] = CatalogFieldIndex(get_subject)
        c['date'] = CatalogFieldIndex(get_date)
        c['sender_email'] = CatalogFieldIndex(get_sender_email)
        c['topics'] = CatalogFacetIndex(get_topics, topic_taxonomy)
        c['text'] = CatalogTextIndex(get_text)
        manager.commit()

        # Loop over messages to get base line
        profiler.start("Loop over messages without indexing")
        for _ in MessageIterator():
            pass
        profiler.stop("Loop over messages without indexing")

        profiler.start("Index messages")
        id = 1
        for msg in MessageIterator():
            c.index_doc(id, msg)
            id += 1
            if id / 100 == 0:
                manager.commit()
        manager.commit()
        manager.close()

        profiler.stop("Index messages")
        print("Indexed %d messages" % id)

Пример #2

Показать файл

def run():
    # Download mailbox archive of python mailing list and build
    # catalog if needed
    prep_catalog()

    # Open a catalog
    manager = ConnectionManager()
    factory = FileStorageCatalogFactory(
        os.path.join(BENCHMARK_DATA_DIR, 'test.zodb'), 'benchmark')
    c = factory(manager)

    # Do some searches

    profiler.start("unsorted retrieval")
    n, results = c.search(date=('0', 'Z'))
    print('%d results ' % n)
    # Force generator to marshall brains
    for result in results:
        pass
    profiler.stop("unsorted retrieval")

    profiler.start("repeat unsorted retrieval")
    n, results = c.search(date=('0', 'Z'))
    print('%d results ' % n)
    # Force generator to marshall brains
    for result in results:
        pass
    profiler.stop("repeat unsorted retrieval")

    profiler.start("sorted retrieval")
    n, results = c.search(date=('0', 'Z'), sort_index='subject')
    print('%d results ' % n)
    for result in results:
        pass
    profiler.stop("sorted retrieval")

    profiler.start("reverse sorted retrieval")
    n, results = c.search(date=('0', 'Z'), sort_index='subject', reverse=True)
    print('%d results ' % n)
    for result in results:
        pass
    profiler.stop("reverse sorted retrieval")

    profiler.start('limit to topic=year:2000')
    n, results = c.search(topics=['year:2000'])
    print('%d results' % n)
    L = []
    for result in results:
        L.append(result)
    profiler.stop("limit to topic=year:2000")

    profiler.start('count limited to topic=year:2000')
    print(c['topics'].counts(L, ['year:2000']))
    profiler.stop('count limited to topic=year:2000')

    profiler.stop()
    profiler.print_stack()

Пример #3

Показать файл

Файл: benchmark.py Проект: erowan/hypatia

def prep_catalog():
    """Download python mailing list, create new catalog and catalog 
       messages, if not done already.
    """
    if not os.path.exists(BENCHMARK_DATA_DIR):
        os.makedirs(BENCHMARK_DATA_DIR)
        
    # Check to see if mailing list data already present
    if len(get_mailbox_filenames()) == 0:
        MailListSucker(MAILLIST_INDEX,BENCHMARK_DATA_DIR).suck()
        
    # Create ZODB and index maillist messages, if not yet done
    zodb_file = os.path.join(BENCHMARK_DATA_DIR, 'test.zodb')
    if not os.path.exists(zodb_file):
        # Create a catalog
        manager = ConnectionManager()
        factory = FileStorageCatalogFactory(
            os.path.join(BENCHMARK_DATA_DIR,
                         'test.zodb'), 'benchmark' )
        c = factory(manager)
        
        # Create some indices
        c['subject'] = CatalogFieldIndex(get_subject)
        c['date'] = CatalogFieldIndex(get_date)
        c['sender_email'] = CatalogFieldIndex(get_sender_email)
        c['topics'] = CatalogFacetIndex(get_topics, topic_taxonomy)
        c['text'] = CatalogTextIndex(get_text)
        manager.commit()
                
        # Loop over messages to get base line
        profiler.start( "Loop over messages without indexing" )
        for _ in MessageIterator():
            pass
        profiler.stop( "Loop over messages without indexing" )
        
        profiler.start( "Index messages" )
        id = 1
        for msg in MessageIterator():
            c.index_doc(id,msg)
            id += 1
            if id / 100 == 0:
                manager.commit()
        manager.commit()
        manager.close()
        
        profiler.stop( "Index messages" )
        print "Indexed %d messages" % id

Пример #4

Показать файл

Файл: intersection.py Проект: timgates42/hypatia

def do_benchmark(fname, nd, nk1, nk2, out=sys.stdout):
    cumulative1 = 0.0
    cumulative2 = 0.0

    print("Index 1:", file=out)
    print("\t# docs: %d" % nd, file=out)
    print("\t# distinct keys: %d" % nk1, file=out)
    print("Index 2:", file=out)
    print("\t# docs: %d" % nd, file=out)
    print("\t# distinct keys: %d" % nk2, file=out)
    print("", file=out)

    cost1, cost2 = predictions(nd, nk1, nk2)

    print('Cost1: %0.2f' % cost1, file=out)
    print('Cost2: %0.2f' % cost2, file=out)
    print("Prediction:", file=out)
    if cost1 > cost2:
        print("Algorithm 2 %0.2f times faster than Algorithm 1" %
              (cost1 / cost2),
              file=out)
    else:
        print("Algorithm 1 %0.2f times faster than Algorithm 2" %
              (cost2 / cost1),
              file=out)

    print("", file=out)
    print("Setting up indexes...", file=out)
    for fn in glob.glob(fname + "*"):
        os.remove(fn)

    manager = ConnectionManager()
    factory = FileStorageCatalogFactory(fname, 'intersection')
    catalog = factory(manager)

    catalog['one'] = CatalogFieldIndex('one')
    catalog['two'] = CatalogFieldIndex('two')

    class Document(object):
        def __init__(self, docid):
            self.one = str(docid % nk1)
            self.two = str(docid % nk2)

    for docid in xrange(nd):
        catalog.index_doc(docid, Document(docid))
    manager.commit()
    manager.close()

    N_QUERIES = 1000
    print("Running %d queries for each algorithm..." % N_QUERIES, file=out)
    catalog = factory(manager)
    for _ in xrange(1000):
        key1 = random.randrange(nk1)
        key2 = random.randrange(nk2)
        query1 = Intersection1(Eq('one', str(key1)), Eq('two', str(key2)))
        query2 = Intersection2(Eq('one', str(key1)), Eq('two', str(key2)))

        start = time.time()
        result1 = query1.apply(catalog)
        cumulative1 += time.time() - start

        start = time.time()
        result2 = query2.apply(catalog)
        cumulative2 += time.time() - start

        s1 = sorted(list(result1))
        s2 = sorted(list(result2))

        assert s1 == s2, (s1, s2)

    manager.close()
    for fn in glob.glob(fname + "*"):
        os.remove(fn)

    print("", file=out)
    print("Result:", file=out)
    print("Time for algorithm1: %0.3f s" % cumulative1, file=out)
    print("Time for algorithm2: %0.3f s" % cumulative2, file=out)
    if cumulative1 > cumulative2:
        print("Algorithm 2 %0.2f times faster than Algorithm 1" %
              (cumulative1 / cumulative2),
              file=out)
    else:
        print("Algorithm 1 %0.2f times faster than Algorithm 2" %
              (cumulative2 / cumulative1),
              file=out)
    return cost1 / cost2, cumulative1 / cumulative2

Пример #5

Показать файл

Файл: intersection.py Проект: erowan/hypatia

def do_benchmark(fname, nd, nk1, nk2, out=sys.stdout):
    cumulative1 = 0.0
    cumulative2 = 0.0

    print >>out, "Index 1:"
    print >>out, "\t# docs: %d" % nd
    print >>out, "\t# distinct keys: %d" % nk1
    print >>out, "Index 2:"
    print >>out, "\t# docs: %d" % nd
    print >>out, "\t# distinct keys: %d" % nk2
    print >>out, ""

    cost1, cost2 = predictions(nd, nk1, nk2)

    print >>out, 'Cost1: %0.2f' % cost1
    print >>out, 'Cost2: %0.2f' % cost2
    print >>out
    print >>out, "Prediction:"
    if cost1 > cost2:
        print >>out, "Algorithm 2 %0.2f times faster than Algorithm 1" % (
            cost1/cost2)
    else:
        print >>out, "Algorithm 1 %0.2f times faster than Algorithm 2" % (
            cost2/cost1)

    print >>out, ""
    print >>out, "Setting up indexes..."
    for fn in glob.glob(fname + "*"):
        os.remove(fn)

    manager = ConnectionManager()
    factory = FileStorageCatalogFactory(fname, 'intersection')
    catalog = factory(manager)

    catalog['one'] = CatalogFieldIndex('one')
    catalog['two'] = CatalogFieldIndex('two')

    class Document(object):
        def __init__(self, docid):
            self.one = str(docid % nk1)
            self.two = str(docid % nk2)

    for docid in xrange(nd):
        catalog.index_doc(docid, Document(docid))
    manager.commit()
    manager.close()

    N_QUERIES = 1000
    print >>out, "Running %d queries for each algorithm..." % N_QUERIES
    catalog = factory(manager)
    for _ in xrange(1000):
        key1 = random.randrange(nk1)
        key2 = random.randrange(nk2)
        query1 = Intersection1(Eq('one', str(key1)), Eq('two', str(key2)))
        query2 = Intersection2(Eq('one', str(key1)), Eq('two', str(key2)))

        start = time.time()
        result1 = query1.apply(catalog)
        cumulative1 += time.time() - start

        start = time.time()
        result2 = query2.apply(catalog)
        cumulative2 += time.time() - start

        s1 = sorted(list(result1))
        s2 = sorted(list(result2))

        assert s1==s2, (s1, s2)

    manager.close()
    for fn in glob.glob(fname + "*"):
        os.remove(fn)

    print >>out, ""
    print >>out, "Result:"
    print >>out, "Time for algorithm1: %0.3f s" % cumulative1
    print >>out, "Time for algorithm2: %0.3f s" % cumulative2
    if cumulative1 > cumulative2:
        print >>out, "Algorithm 2 %0.2f times faster than Algorithm 1" % (
            cumulative1/cumulative2)
    else:
        print >>out, "Algorithm 1 %0.2f times faster than Algorithm 2" % (
            cumulative2/cumulative1)
    return cost1 / cost2, cumulative1 / cumulative2