Python Index.add_document примеры использования

Язык программирования: Python

Пространство имен/Пакет: index

Класс/Тип: Index

Метод/Функция: add_document

Примеров на hotexamples.com: 22

Python Index.add_document - 22 примеров найдено. Это лучшие примеры Python кода для index.Index.add_document, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Index(17)

add_document(11)

add(8)

PUT_SCHEMA(3)

add_entry(3)

add_index_range(2)

read_index(2)

exists(2)

open_or_create(2)

get_status(1)

get_term(1)

get_or_create_instance(1)

get_net_interface(1)

get_keywords(1)

index_media(1)

index_object(1)

is_duplicate(1)

CreateIndex(1)

name(1)

post_syslog(1)

get_document_vector(1)

put_status(1)

remove_word(1)

rm_data(1)

status(1)

storeIndex(1)

train_path(1)

update_md5s(1)

verify(1)

get_items_generator(1)

getParserType(1)

get_data_by_id(1)

add_key(1)

SearchIndex(1)

__init__(1)

_fields(1)

_kw(1)

addTask(1)

add_data(1)

add_doc(1)

add_downloader(1)

add_index(1)

add_word(1)

get_all(1)

agenda(1)

append(1)

articles(1)

by_prefix(1)

calculate_tfidf(1)

construct_index(1)

Пример #1

Показать файл

 def test_passing_stopwords_should_remove_these_words_from_token_list(self):
     index = Index(stopwords=['yes', 'no', ',', '.', '!'])
     index.add_document('coffee', 'Yes, sir! No, Joyce.')
     self.assertEquals(
         index._index,
         {
             'sir': set(['coffee']),
             'joyce': set(['coffee'])
         },
     )

Пример #2

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_should_store_tokens_lowercase(self):
     index = Index()
     index.add_document('doc', 'This IS mY firsT DoCuMeNt')
     expected_tokens = set(['this', 'is', 'my', 'first', 'document'])
     expected_index = {'this': set(['doc']),
                       'is': set(['doc']),
                       'my': set(['doc']),
                       'first': set(['doc']),
                       'document': set(['doc']),}
     self.assertEquals(index.tokens(), expected_tokens)
     self.assertEquals(dict(index._index), expected_index)

Пример #3

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_calling_method_load_should_retrieve_object_from_pickle_file(self):
     fp = NamedTemporaryFile(delete=False)
     fp.close()
     self.filename = fp.name
     index = Index()
     index.add_document('coffee', 'I liked it')
     index.add_document('water', 'I need it')
     index.dump(self.filename)
     retrieved_index = Index.load(self.filename)
     self.assertEquals(len(retrieved_index), 2)
     self.assertEquals(set(retrieved_index._index.keys()),
                       set(['i', 'liked', 'need', 'it']))

Пример #4

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_passing_a_stemmer_should_index_tokens_stemmed(self):
     porter_stemmer = PorterStemmer()
     index = Index(stemmer=porter_stemmer)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index._index, {'i': set(['coffee']),
                                      'like': set(['coffee']),
                                      'it': set(['coffee'])},)
     index = Index(stemmer=None)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index._index, {'i': set(['coffee']),
                                      'liked': set(['coffee']),
                                      'it': set(['coffee'])},)

Пример #5

Показать файл

 def test_calling_method_load_should_retrieve_object_from_pickle_file(self):
     fp = NamedTemporaryFile(delete=False)
     fp.close()
     self.filename = fp.name
     index = Index()
     index.add_document('coffee', 'I liked it')
     index.add_document('water', 'I need it')
     index.dump(self.filename)
     retrieved_index = Index.load(self.filename)
     self.assertEquals(len(retrieved_index), 2)
     self.assertEquals(set(retrieved_index._index.keys()),
                       set(['i', 'liked', 'need', 'it']))

Пример #6

Показать файл

 def test_should_store_tokens_lowercase(self):
     index = Index()
     index.add_document('doc', 'This IS mY firsT DoCuMeNt')
     expected_tokens = set(['this', 'is', 'my', 'first', 'document'])
     expected_index = {
         'this': set(['doc']),
         'is': set(['doc']),
         'my': set(['doc']),
         'first': set(['doc']),
         'document': set(['doc']),
     }
     self.assertEquals(index.tokens(), expected_tokens)
     self.assertEquals(dict(index._index), expected_index)

Пример #7

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_calling_method_dump_should_pickle_the_index_object(self):
     fp = NamedTemporaryFile(delete=False)
     fp.close()
     self.filename = fp.name
     index = Index()
     index.add_document('coffee', 'I liked it')
     index.add_document('water', 'I need it')
     index.dump(self.filename)
     self.assertTrue(file_exists(self.filename))
     fp = open(self.filename)
     retrieved_index = cPickle.load(fp)
     self.assertEquals(len(retrieved_index), 2)
     self.assertEquals(set(retrieved_index._index.keys()),
                       set(['i', 'liked', 'need', 'it']))

Пример #8

Показать файл

 def test_calling_method_dump_should_pickle_the_index_object(self):
     fp = NamedTemporaryFile(delete=False)
     fp.close()
     self.filename = fp.name
     index = Index()
     index.add_document('coffee', 'I liked it')
     index.add_document('water', 'I need it')
     index.dump(self.filename)
     self.assertTrue(file_exists(self.filename))
     fp = open(self.filename)
     retrieved_index = cPickle.load(fp)
     self.assertEquals(len(retrieved_index), 2)
     self.assertEquals(set(retrieved_index._index.keys()),
                       set(['i', 'liked', 'need', 'it']))

Пример #9

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_should_automatically_index_when_add_documents(self):
     index = Index()
     index.add_document('test', 'this is my first document')
     index.add_document('test2', 'this is my second document')
     expected_tokens = set(['this', 'is', 'my', 'first', 'second',
                            'document'])
     expected_index = {'this': set(['test', 'test2']),
                       'is': set(['test', 'test2']),
                       'my': set(['test', 'test2']),
                       'first': set(['test']),
                       'second': set(['test2']),
                       'document': set(['test', 'test2']),}
     self.assertEquals(index.tokens(), expected_tokens)
     self.assertEquals(dict(index._index), expected_index)

Пример #10

Показать файл

 def test_should_automatically_index_when_add_documents(self):
     index = Index()
     index.add_document('test', 'this is my first document')
     index.add_document('test2', 'this is my second document')
     expected_tokens = set(
         ['this', 'is', 'my', 'first', 'second', 'document'])
     expected_index = {
         'this': set(['test', 'test2']),
         'is': set(['test', 'test2']),
         'my': set(['test', 'test2']),
         'first': set(['test']),
         'second': set(['test2']),
         'document': set(['test', 'test2']),
     }
     self.assertEquals(index.tokens(), expected_tokens)
     self.assertEquals(dict(index._index), expected_index)

Пример #11

Показать файл

 def test_passing_a_stemmer_should_index_tokens_stemmed(self):
     porter_stemmer = PorterStemmer()
     index = Index(stemmer=porter_stemmer)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(
         index._index,
         {
             'i': set(['coffee']),
             'like': set(['coffee']),
             'it': set(['coffee'])
         },
     )
     index = Index(stemmer=None)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(
         index._index,
         {
             'i': set(['coffee']),
             'liked': set(['coffee']),
             'it': set(['coffee'])
         },
     )

Пример #12

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_should_be_able_to_find_using_AND_OR_and_NOT(self):
     index = Index()
     index.add_document('doc1', 'this is my first document')
     index.add_document('doc2', 'this is my second document')
     index.add_document('doc3', 'another document')
     self.assertEquals(index.find('this document'), set(['doc1', 'doc2']))
     self.assertEquals(index.find('this another'), set())
     self.assertEquals(index.find('a b'), set())
     self.assertEquals(index.find('another'), set(['doc3']))
     self.assertEquals(index.find('first another'), set([]))

Пример #13

Показать файл

 def test_should_be_able_to_find_using_AND_OR_and_NOT(self):
     index = Index()
     index.add_document('doc1', 'this is my first document')
     index.add_document('doc2', 'this is my second document')
     index.add_document('doc3', 'another document')
     self.assertEquals(index.find('this document'), set(['doc1', 'doc2']))
     self.assertEquals(index.find('this another'), set())
     self.assertEquals(index.find('a b'), set())
     self.assertEquals(index.find('another'), set(['doc3']))
     self.assertEquals(index.find('first another'), set([]))

Пример #14

Показать файл

 def test_should_be_able_to_find_by_term(self):
     index = Index()
     index.add_document('doc1', 'this is my first document')
     index.add_document('doc2', 'this is my second document')
     index.add_document('doc3', 'another document')
     self.assertEquals(index.find_by_term('document'),
                       set(['doc1', 'doc2', 'doc3']))
     self.assertEquals(index.find_by_term('DOCUMENT'),
                       set(['doc1', 'doc2', 'doc3']))
     self.assertEquals(index.find_by_term('this'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('is'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('my'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('first'), set(['doc1']))
     self.assertEquals(index.find_by_term('second'), set(['doc2']))
     self.assertEquals(index.find_by_term('another'), set(['doc3']))

Пример #15

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_should_be_able_to_find_by_term(self):
     index = Index()
     index.add_document('doc1', 'this is my first document')
     index.add_document('doc2', 'this is my second document')
     index.add_document('doc3', 'another document')
     self.assertEquals(index.find_by_term('document'),
                       set(['doc1', 'doc2', 'doc3']))
     self.assertEquals(index.find_by_term('DOCUMENT'),
                       set(['doc1', 'doc2', 'doc3']))
     self.assertEquals(index.find_by_term('this'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('is'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('my'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('first'), set(['doc1']))
     self.assertEquals(index.find_by_term('second'), set(['doc2']))
     self.assertEquals(index.find_by_term('another'), set(['doc3']))

Пример #16

Показать файл

 def test_should_add_documents_with_name_and_content(self):
     index = Index()
     index.add_document('test', 'this is my first document')
     index.add_document('test2', 'this is my second document')
     self.assertEquals(len(index), 2)
     self.assertEquals(index._documents, set(['test', 'test2']))

Пример #17

Показать файл

#!/usr/bin/env python
# coding: utf-8
# Tip: run this script with `python -i example.py`
# (or `ipython -i example.py`), so you can interactively do searches by
# executing: `my_index.search('...search terms...')`

from nltk.corpus import machado
from index import Index


print 'Creating index...'
my_index = Index()
filenames = machado.fileids()[50:]
for filename in filenames:
    my_index.add_document(filename, machado.raw(filename))

print 'Searching...'
print my_index.find('brasil azul')

Пример #18

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_should_add_documents_with_name_and_content(self):
     index = Index()
     index.add_document('test', 'this is my first document')
     index.add_document('test2', 'this is my second document')
     self.assertEquals(len(index), 2)
     self.assertEquals(index._documents, set(['test', 'test2']))

Пример #19

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_passing_stopwords_should_remove_these_words_from_token_list(self):
     index = Index(stopwords=['yes', 'no', ',', '.', '!'])
     index.add_document('coffee', 'Yes, sir! No, Joyce.')
     self.assertEquals(index._index, {'sir': set(['coffee']),
                                      'joyce': set(['coffee'])},)

Пример #20

Показать файл

Файл: test_index.py Проект: sergio-garcia-clones/nlplaying

 def test_passing_a_stemmer_should_stem_search_term_before_matching(self):
     porter_stemmer = PorterStemmer()
     index = Index(stemmer=porter_stemmer)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index.find_by_term('liked'), set(['coffee']))

Пример #21

Показать файл

Файл: build_index.py Проект: sdmaslennikov/books-search

# coding: utf-8
from index import Index
import os

def iterate_over_dir(dir):
    for root, subFolders, files in os.walk(dir):
        for filename in files:
            file_path = os.path.join(root, filename)
            yield file_path


ind = Index()
for s in iterate_over_dir('../../books_search_b/not_sort_book/T'):
    try: 
        ind.add_document(s)
    except: 
        pass
    
ind.save('test.ind')

Пример #22

Показать файл

 def test_passing_a_stemmer_should_stem_search_term_before_matching(self):
     porter_stemmer = PorterStemmer()
     index = Index(stemmer=porter_stemmer)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index.find_by_term('liked'), set(['coffee']))