예제 #1
0
    def test_resetsearchindexes_command_existing_dir(self,
                                                     getdefaultlocale_mock):
        self.options["interactive"] = False

        os.mkdir(self.new_index_dir)
        index.create_in(self.new_index_dir, fields.Schema(content=fields.TEXT),
                        'resource')
        self.assertTrue(os.path.exists(self.new_index_dir))

        with self.settings(WIRECLOUD_INDEX_DIR=self.new_index_dir):
            try:
                call_command('resetsearchindexes', **self.options)
            except SystemExit:
                raise CommandError('')

        self.options['stdout'].seek(0)
        self.assertEqual(self.options['stdout'].read(), '')
        self.options['stderr'].seek(0)
        self.assertEqual(self.options['stderr'].read(), '')
        self.assertTrue(os.path.exists(self.new_index_dir))
        for search_index in get_available_search_engines():
            self.assertTrue(
                index.exists_in(self.new_index_dir,
                                indexname=search_index.indexname))
예제 #2
0
def test_merged_lengths():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    with TempIndex(s, "mergedlengths") as ix:
        w = ix.writer()
        w.add_document(f1=u("A B C"), f2=u("X"))
        w.add_document(f1=u("B C D E"), f2=u("Y Z"))
        w.commit()

        w = ix.writer()
        w.add_document(f1=u("A"), f2=u("B C D E X Y"))
        w.add_document(f1=u("B C"), f2=u("X"))
        w.commit(NO_MERGE)

        w = ix.writer()
        w.add_document(f1=u("A B X Y Z"), f2=u("B C"))
        w.add_document(f1=u("Y X"), f2=u("A B"))
        w.commit(NO_MERGE)

        with ix.reader() as dr:
            assert_equal(dr.stored_fields(0)["f1"], u("A B C"))
            assert_equal(dr.doc_field_length(0, "f1"), 3)
            assert_equal(dr.doc_field_length(2, "f2"), 6)
            assert_equal(dr.doc_field_length(4, "f1"), 5)
예제 #3
0
def test_lengths_ram():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    st = RamStorage()
    ix = st.create_index(s)
    w = ix.writer()
    w.add_document(f1=u("A B C D E"), f2=u("X Y Z"))
    w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T"))
    w.add_document(f1=u("D E F"), f2=u("U V A B C D E"))
    w.commit()

    dr = ix.reader()
    assert_equal(dr.stored_fields(0)["f1"], "A B C D E")
    assert_equal(dr.doc_field_length(0, "f1"), 5)
    assert_equal(dr.doc_field_length(1, "f1"), 8)
    assert_equal(dr.doc_field_length(2, "f1"), 3)
    assert_equal(dr.doc_field_length(0, "f2"), 3)
    assert_equal(dr.doc_field_length(1, "f2"), 4)
    assert_equal(dr.doc_field_length(2, "f2"), 7)

    assert_equal(dr.field_length("f1"), 16)
    assert_equal(dr.field_length("f2"), 14)
    assert_equal(dr.max_field_length("f1"), 8)
    assert_equal(dr.max_field_length("f2"), 7)
예제 #4
0
def test_resultspage():
    schema = fields.Schema(id=fields.STORED, content=fields.TEXT)
    ix = RamStorage().create_index(schema)

    domain = ("alfa", "bravo", "bravo", "charlie", "delta")
    w = ix.writer()
    for i, lst in enumerate(permutations(domain, 3)):
        w.add_document(id=text_type(i), content=u(" ").join(lst))
    w.commit()

    with ix.searcher() as s:
        q = query.Term("content", u("bravo"))
        r = s.search(q, limit=10)
        tops = list(r)

        rp = s.search_page(q, 1, pagelen=5)
        assert_equal(rp.scored_length(), 5)
        assert_equal(list(rp), tops[0:5])
        assert_equal(rp[10:], [])

        rp = s.search_page(q, 2, pagelen=5)
        assert_equal(list(rp), tops[5:10])

        rp = s.search_page(q, 1, pagelen=10)
        assert_equal(len(rp), 54)
        assert_equal(rp.pagecount, 6)
        rp = s.search_page(q, 6, pagelen=10)
        assert_equal(len(list(rp)), 4)
        assert rp.is_last_page()

        assert_raises(ValueError, s.search_page, q, 0)
        assert_raises(ValueError, s.search_page, q, 7)

        rp = s.search_page(query.Term("content", "glonk"), 1)
        assert_equal(len(rp), 0)
        assert rp.is_last_page()
예제 #5
0
def test_extend_empty():
    schema = fields.Schema(id=fields.STORED, words=fields.KEYWORD)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=1, words=u("alfa bravo charlie"))
    w.add_document(id=2, words=u("bravo charlie delta"))
    w.add_document(id=3, words=u("charlie delta echo"))
    w.add_document(id=4, words=u("delta echo foxtrot"))
    w.add_document(id=5, words=u("echo foxtrot golf"))
    w.commit()

    with ix.searcher() as s:
        # Get an empty results object
        r1 = s.search(query.Term("words", u("hotel")))
        # Copy it
        r1c = r1.copy()
        # Get a non-empty results object
        r2 = s.search(query.Term("words", u("delta")))
        # Copy it
        r2c = r2.copy()
        # Extend r1 with r2
        r1c.extend(r2c)
        assert_equal([hit["id"] for hit in r1c], [2, 3, 4])
        assert_equal(r1c.scored_length(), 3)
예제 #6
0
 def test_lengths_ram(self):
     s = fields.Schema(f1 = fields.KEYWORD(stored = True, scorable = True),
                       f2 = fields.KEYWORD(stored = True, scorable = True))
     st = RamStorage()
     ix = st.create_index(s)
     w = ix.writer()
     w.add_document(f1 = u"A B C D E", f2 = u"X Y Z")
     w.add_document(f1 = u"B B B B C D D Q", f2 = u"Q R S T")
     w.add_document(f1 = u"D E F", f2 = u"U V A B C D E")
     w.commit()
     
     dr = ix.reader()
     ls1 = [dr.doc_field_length(i, "f1") for i in xrange(0, 3)]
     ls2 = [dr.doc_field_length(i, "f2") for i in xrange(0, 3)]
     self.assertEqual(dr.stored_fields(0)["f1"], "A B C D E")
     self.assertEqual(dr.doc_field_length(0, "f1"), 5)
     self.assertEqual(dr.doc_field_length(1, "f1"), 8)
     self.assertEqual(dr.doc_field_length(2, "f1"), 3)
     self.assertEqual(dr.doc_field_length(0, "f2"), 3)
     self.assertEqual(dr.doc_field_length(1, "f2"), 4)
     self.assertEqual(dr.doc_field_length(2, "f2"), 7)
     
     self.assertEqual(ix.field_length("f1"), 16)
     self.assertEqual(ix.field_length("f2"), 14)
def test_euro_chars():
    schema = fields.Schema(text=fields.TEXT)
    qp = default.QueryParser("text", schema)
    q = qp.parse(u("stra\xdfe"))
    assert q.__class__ == query.Term
    assert q.text == u("stra\xdfe")
def test_andmaybe_none():
    schema = fields.Schema(f=fields.TEXT, year=fields.NUMERIC)
    qp = default.QueryParser("f", schema)
    _ = qp.parse(u("Dahmen ANDMAYBE @year:[2000 TO]"))
예제 #9
0
 def get_schema(self):
     return fields.Schema(post_id=fields.ID(stored=True),
                          tags=fields.KEYWORD(commas=True),
                          title=fields.TEXT(stored=True),
                          text=fields.TEXT)
예제 #10
0
DEFAULT_SIGNATURE = """
Thanks!
The Tree.io Team
http://www.tree.io
            """

#
# Search index (Whoosh)
#
SEARCH_DISABLED = False
SEARCH_ENGINE = 'db'

from whoosh import fields
WHOOSH_SCHEMA = fields.Schema(id=fields.ID(stored=True, unique=True),
                              name=fields.TEXT(stored=True),
                              type=fields.TEXT(stored=True),
                              content=fields.TEXT,
                              url=fields.ID(stored=True))

WHOOSH_INDEX = path.join(PROJECT_ROOT, 'storage/search')

#
# CACHING
#
#CACHE_BACKEND = 'dummy://'
CACHE_BACKEND = 'locmem://?timeout=30'
#CACHE_BACKEND = 'memcached://127.0.0.1:11211/?timeout=30'

#CACHE_BACKEND="johnny.backends.locmem://"

JOHNNY_MIDDLEWARE_KEY_PREFIX = 'jc_treeio'
예제 #11
0
def test_analyzing_terms():
    schema = fields.Schema(text=fields.TEXT(analyzer=analysis.StemmingAnalyzer()))
    qp = default.QueryParser("text", schema)
    q = qp.parse(u("Indexed!"))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.text, "index")
예제 #12
0
 def test_empty_index(self):
     schema = fields.Schema(key=fields.ID(stored=True), value=fields.TEXT)
     st = store.RamStorage()
     self.assertRaises(index.EmptyIndexError, index.Index, st, schema)
예제 #13
0
def _do_merge(writerclass):
    schema = fields.Schema(key=fields.ID(stored=True, unique=True),
                           value=fields.TEXT(stored=True, spelling=True,
                                             vector=True))

    domain = {"a": "aa", "b": "bb cc", "c": "cc dd ee", "d": "dd ee ff gg",
              "e": "ee ff gg hh ii", "f": "ff gg hh ii jj kk",
              "g": "gg hh ii jj kk ll mm", "h": "hh ii jj kk ll mm nn oo",
              "i": "ii jj kk ll mm nn oo pp qq ww ww ww ww ww ww",
              "j": "jj kk ll mm nn oo pp qq rr ss",
              "k": "kk ll mm nn oo pp qq rr ss tt uu"}

    with TempIndex(schema) as ix:
        w = ix.writer()
        for key in "abc":
            w.add_document(key=u(key), value=u(domain[key]))
        w.commit()

        w = ix.writer()
        for key in "def":
            w.add_document(key=u(key), value=u(domain[key]))
        w.commit(merge=False)

        w = writerclass(ix, procs=3)
        del domain["b"]
        w.delete_by_term("key", u("b"))

        domain["e"] = "xx yy zz"
        w.update_document(key=u("e"), value=u(domain["e"]))

        for key in "ghijk":
            w.add_document(key=u(key), value=u(domain[key]))
        w.commit(optimize=True)

        assert len(ix._segments()) == 1

        with ix.searcher() as s:
            r = s.reader()

            assert s.doc_count() == len(domain)

            assert "".join(r.field_terms("key")) == "acdefghijk"
            assert " ".join(r.field_terms("value")) == "aa cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu ww xx yy zz"

            for key in domain:
                docnum = s.document_number(key=key)
                assert docnum is not None

                length = r.doc_field_length(docnum, "value")
                assert length
                assert _byten(len(domain[key].split())) == length

                sf = r.stored_fields(docnum)
                assert domain[key] == sf["value"]

            words = sorted(set((" ".join(domain.values())).split()))
            assert words == list(r.field_terms("value"))

            for word in words:
                hits = s.search(query.Term("value", word))
                for hit in hits:
                    assert word in hit["value"].split()
예제 #14
0
def test_short_prefix():
    s = fields.Schema(name=fields.ID, value=fields.TEXT)
    qp = qparser.QueryParser("value", schema=s)
    q = qp.parse(u("s*"))
    assert_equal(q.__class__.__name__, "Prefix")
    assert_equal(q.text, "s")
예제 #15
0
def test_empty_index():
    schema = fields.Schema(key=fields.ID(stored=True), value=fields.TEXT)
    st = RamStorage()
    assert_raises(index.EmptyIndexError, st.open_index, schema=schema)
예제 #16
0
def test_badnames():
    s = fields.Schema()
    with pytest.raises(fields.FieldConfigurationError):
        s.add("_test", fields.ID)
    with pytest.raises(fields.FieldConfigurationError):
        s.add("a f", fields.ID)
예제 #17
0
    def __init__(self, notebooks):

        # Index directory of whoosh, located in notebookPath.
        self.schema = fields.Schema(
            path = fields.TEXT(stored=True),
            title = fields.TEXT(stored=True),
            content = fields.TEXT(stored=True),
            tags = fields.KEYWORD(commas=True))

        self.notebookName = notebooks[0][0]
        self.notebookPath = notebooks[0][1]
        self.notePath = os.path.join(self.notebookPath, "notes").replace(os.sep, '/')
        self.htmlPath = os.path.join(self.notebookPath, "html", "notes").replace(os.sep, '/')
        self.indexdir = os.path.join(self.notePath, ".indexdir").replace(os.sep, '/')
        self.attachmentPath = os.path.join(self.notebookPath, "attachments").replace(os.sep, '/')
        self.configfile = os.path.join(self.notebookPath, "notebook.conf").replace(os.sep, '/')
        cssPath = os.path.join(self.notebookPath, "css").replace(os.sep, '/')
        self.cssfile = os.path.join(cssPath, "notebook.css").replace(os.sep, '/')
        self.searchcssfile = os.path.join(cssPath, "search-window.css").replace(os.sep, '/')
        self.qsettings = QSettings(self.configfile, QSettings.IniFormat)

        if os.path.exists(self.configfile):
            self.extensions = readListFromSettings(self.qsettings,
                                                   "extensions")
            self.fileExt = self.qsettings.value("fileExt")
            self.attachmentImage = self.qsettings.value("attachmentImage")
            self.attachmentDocument = self.qsettings.value("attachmentDocument")
            self.version = self.qsettings.value("version")
            self.geometry = self.qsettings.value("geometry")
            self.windowstate = self.qsettings.value("windowstate")
            self.mathjax = self.qsettings.value('mathJax')
            if 'extensionsConfig' not in set(self.qsettings.childGroups()):
                self.extcfg = self.qsettings.value('extensionsConfig',  defaultValue={})
                writeDictToSettings(self.qsettings, 'extensionsConfig', self.extcfg)
            else:
                self.extcfg = readDictFromSettings(self.qsettings, 'extensionsConfig')
        else:
            self.extensions = []
            self.fileExt = ""
            self.attachmentImage = []
            self.attachmentDocument = []
            self.version = None
            self.geometry = None
            self.windowstate = None
            self.mathjax = ''
            self.extcfg = {}

        self.faulty_exts=[]

        # Default enabled python-markdown extensions.
        # http://pythonhosted.org/Markdown/extensions/index.html
        if not self.extensions:
            self.extensions = [
                   'nl2br'           # newline to break
                 , 'strkundr'        # bold-italics-underline-delete style
                 , 'codehilite'      # code syntax highlight
                 , 'fenced_code'     # code block
                 , 'headerid'        # add id to headers
                 , 'headerlink'      # add anchor to headers
                 , 'footnotes'
                 , 'asciimathml'
                 ]
            writeListToSettings(self.qsettings, "extensions", self.extensions)

        while True:
             print(self.extensions)
             try:
                 markdown.markdown("",extensions=self.extensions)
             except AttributeError as e:
                 remove_this = NOT_EXT.findall(e.args[0])[0]
                 if remove_this in self.extensions:
                     print("Found invalid markdown extension",remove_this,". Please consider removing it.")
                     print('If you want to permanently disable this, just hit OK in the Notebook Settings dialog')
                     self.extensions.remove(remove_this)
                     self.faulty_exts.append(remove_this)
             except ImportError as e:
                 if e.name.startswith('mdx_') and e.name[4:] in self.extensions:
                     print('Found missing markdown extension', e.name[4:], ', temporarily disabling.')
                     print('If you want to permanently disable this, just hit OK in the Notebook Settings dialog')
                     self.extensions.remove(e.name[4:])
                     self.faulty_exts.append(e.name[4:])
                 elif e.name in self.extensions:
                     print('Found missing markdown extension', e.name, ', temporarily disabling.')
                     print('If you want to permanently disable this, just hit OK in the Notebook Settings dialog')
                     self.extensions.remove(e.name)
                     self.faulty_exts.append(e.name)
             else:
                 self.md = markdown.Markdown(self.extensions, extension_configs=self.extcfg)
                 break

        # Default file extension name
        if not self.fileExt:
            self.fileExt = ".md"
            self.qsettings.setValue("fileExt", self.fileExt)

        # Image file types that will be copied to attachmentDir
        # Inserted as image link
        if not self.attachmentImage:
            self.attachmentImage = [".jpg", ".jpeg", ".png", ".gif", ".svg"]
            self.qsettings.setValue("attachmentImage", self.attachmentImage)

        # Document file types that will be copied to attachmentDir
        # Inserted as link
        if not self.attachmentDocument:
            self.attachmentDocument = [".pdf", ".doc", ".odt"]
            self.qsettings.setValue("attachmentDocument", self.attachmentDocument)

        # Migrate notebookPath to v0.3.0 folder structure
        if not self.version:
            notebookDir = QDir(self.notebookPath)

            # move all markdown files to notes/
            dirList = notebookDir.entryList(QDir.Dirs | QDir.NoDotAndDotDot)
            if 'css' in dirList:
                dirList.remove('css')
            fileList = notebookDir.entryList(['*.md', '*.mkd', '*.markdown'])
            notebookDir.mkdir('notes')
            for d in dirList + fileList:
                notebookDir.rename(d, os.path.join('notes', d).replace(os.sep, '/'))

            # remove .indexdir folder
            oldIndexDir = QDir(os.path.join(self.notebookPath, '.indexdir'.replace(os.sep, '/')))
            indexFileList = oldIndexDir.entryList()
            for f in indexFileList:
                oldIndexDir.remove(f)
            notebookDir.rmdir('.indexdir')

            # rename notes.css to css/notebook.css
            oldCssFile = os.path.join(self.notebookPath, 'notes.css').replace(os.sep, '/')
            QDir().mkpath(cssPath)
            if os.path.exists(oldCssFile):
                QFile.rename(oldCssFile, self.cssfile)

            self.version = '0'

        if not self.mathjax:
            self.mathjax = 'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'
            self.qsettings.setValue('mathJax', self.mathjax)
예제 #18
0
def _do_basic(writerclass):
    # Create the domain data

    # List of individual words added to the index
    words = []
    # List of string values added to the index
    docs = []
    # A ring buffer for creating string values
    buf = deque()
    for ls in permutations(u("abcd")):
        word = "".join(ls)
        # Remember this word is in the index (to check lexicon)
        words.append(word)

        # Add this word on to the end, pop the first word off to create N word
        # documents where N <= 10
        buf.append(word)
        if len(buf) > 10:
            buf.popleft()
        # Create a copy of the buffer and shuffle it to create a document value
        # and add it to the list of document values
        doc = list(buf)
        random.shuffle(doc)
        docs.append(" ".join(doc))
    # Shuffle the list of document values
    random.shuffle(docs)

    schema = fields.Schema(text=fields.TEXT(stored=True, spelling=True,
                                            vector=True),
                           row=fields.NUMERIC(stored=True))

    with TempIndex(schema, storage_debug=True) as ix:
        # Add the domain data to the index
        with writerclass(ix, procs=3) as w:
            for i, value in enumerate(docs):
                w.add_document(text=value, row=i)

        with ix.searcher() as s:
            r = s.reader()

            # Check the lexicon
            for word, term in izip(words, r.field_terms("text")):
                assert word == term
            # Check the doc count
            assert r.doc_count_all() == len(docs)

            # Check the word graph
            assert r.has_word_graph("text")
            flat = [w.decode("latin1") for w in r.word_graph("text").flatten()]
            assert flat == words

            # Check there are lengths
            total = sum(r.doc_field_length(docnum, "text", 0)
                        for docnum in xrange(r.doc_count_all()))
            assert total > 0

            # Check per-doc info
            for i, value in enumerate(docs):
                pieces = value.split()
                docnum = s.document_number(row=i)

                # Check stored value
                sv = r.stored_fields(docnum)
                assert sv["text"] == value

                # Check vectors
                vr = r.vector(docnum, "text")
                # Get the terms and positions from the vector matcher
                iv = list(vr.items_as("positions"))
                # What the vector should look like
                ov = sorted((text, [i]) for i, text in enumerate(pieces))
                assert iv == ov

                # Check field length
                assert r.doc_field_length(docnum, "text") == len(pieces)
예제 #19
0
def test_stopped():
    schema = fields.Schema(text=fields.TEXT)
    qp = default.QueryParser("text", schema)
    q = qp.parse(u("a b"), debug=True)
    assert_equal(q, query.NullQuery)
예제 #20
0
class ISPWhoosh(object):
    """
    Helper class to index the ISP model with Whoosh to allow full-text search
    """
    schema = fields.Schema(
        id=fields.ID(unique=True, stored=True),
        is_ffdn_member=fields.BOOLEAN(),
        is_disabled=fields.BOOLEAN(),
        name=fields.TEXT(),
        shortname=fields.TEXT(),
        description=fields.TEXT(),
        covered_areas=fields.KEYWORD(scorable=True, commas=True, lowercase=True),
        step=fields.NUMERIC(signed=False),
    )

    primary_key=schema._fields['id']

    @staticmethod
    def get_index_dir():
        return current_app.config.get('WHOOSH_INDEX_DIR', 'whoosh')

    @classmethod
    def get_index(cls):
        idxdir = cls.get_index_dir()
        if index.exists_in(idxdir):
            idx = index.open_dir(idxdir)
        else:
            if not os.path.exists(idxdir):
                os.makedirs(idxdir)
            idx = index.create_in(idxdir, cls.schema)
        return idx

    @classmethod
    def _search(cls, s, terms):
        return s.search(qparser.MultifieldParser([
            'name', 'shortname', 'description', 'covered_areas'
        ], schema=cls.schema).parse(terms),
           mask=whoosh.query.Term('is_disabled', True))

    @classmethod
    def search(cls, terms):
        with ISPWhoosh.get_index().searcher() as s:
            sres = cls._search(s, terms)
            ranks = {}
            for rank, r in enumerate(sres):
                ranks[r['id']] = rank

            if not len(ranks):
                return []

            _res = ISP.query.filter(ISP.id.in_(ranks.keys()))

        return sorted(_res, key=lambda r: ranks[r.id])

    @classmethod
    def update_document(cls, writer, model):
        kw = {
            'id': unicode(model.id),
            '_stored_id': model.id,
            'is_ffdn_member': model.is_ffdn_member,
            'is_disabled': model.is_disabled,
            'name': model.name,
            'shortname': model.shortname,
            'description': model.json.get('description'),
            'covered_areas': ','.join(model.covered_areas_names()),
            'step': model.json.get('progressStatus')
        }
        writer.update_document(**kw)

    @classmethod
    def _after_flush(cls, app, changes):
        isp_changes = []
        for change in changes:
            if change[0].__class__ == ISP:
                update = change[1] in ('update', 'insert')
                isp_changes.append((update, change[0]))

        if not len(changes):
            return

        idx = cls.get_index()
        with idx.writer() as writer:
            for update, model in isp_changes:
                if update:
                    cls.update_document(writer, model)
                else:
                    writer.delete_by_term(cls.primary_key, model.id)
def test_write_empty_vector():
    schema = fields.Schema(text=fields.TEXT(vector=True))
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(
                text=u". . . . . . . . . . . . . . . . . . . . . . . . 1")
예제 #22
0
def test_nested_children():
    schema = fields.Schema(t=fields.ID(stored=True),
                           track=fields.NUMERIC(stored=True),
                           album_name=fields.TEXT(stored=True),
                           song_name=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        with w.group():
            w.add_document(t=u("album"), album_name=u("alfa bravo charlie"))
            w.add_document(t=u("track"),
                           track=1,
                           song_name=u("delta echo foxtrot"))
            w.add_document(t=u("track"),
                           track=2,
                           song_name=u("golf hotel india"))
            w.add_document(t=u("track"),
                           track=3,
                           song_name=u("juliet kilo lima"))
        with w.group():
            w.add_document(t=u("album"), album_name=u("mike november oskar"))
            w.add_document(t=u("track"),
                           track=1,
                           song_name=u("papa quebec romeo"))
            w.add_document(t=u("track"),
                           track=2,
                           song_name=u("sierra tango ultra"))
            w.add_document(t=u("track"),
                           track=3,
                           song_name=u("victor whiskey xray"))
        with w.group():
            w.add_document(t=u("album"), album_name=u("yankee zulu one"))
            w.add_document(t=u("track"),
                           track=1,
                           song_name=u("two three four"))
            w.add_document(t=u("track"),
                           track=2,
                           song_name=u("five six seven"))
            w.add_document(t=u("track"),
                           track=3,
                           song_name=u("eight nine ten"))

    with ix.searcher() as s:
        pq = query.Term("t", "album")
        aq = query.Term("album_name", "november")

        r = s.search(query.NestedChildren(pq, pq), limit=None)
        assert len(r) == 9
        assert [str(hit["t"]) for hit in r] == ["track"] * 9

        ncq = query.NestedChildren(pq, aq)
        assert list(ncq.docs(s)) == [5, 6, 7]
        r = s.search(ncq, limit=None)
        assert len(r) == 3
        assert [str(hit["song_name"]) for hit in r] == [
            "papa quebec romeo", "sierra tango ultra", "victor whiskey xray"
        ]

        zq = query.NestedChildren(pq, query.Term("album_name", "zulu"))
        f = sorting.StoredFieldFacet("song_name")
        r = s.search(zq, sortedby=f)
        assert [hit["track"] for hit in r] == [3, 2, 1]
예제 #23
0
파일: __init__.py 프로젝트: somair/zorna
import os
from whoosh import index, store, fields
from whoosh.index import create_in
from whoosh.qparser import QueryParser
from django.db.models.signals import post_syncdb
from django.conf import settings

PAGES_WHOOSH_SCHEMA = fields.Schema(title=fields.TEXT(stored=True),
                                    content=fields.TEXT(stored=True),
                                    url=fields.ID(stored=True, unique=True))


def create_index(sender=None, **kwargs):
    if not os.path.exists(settings.HAYSTACK_WHOOSH_PATH):
        os.mkdir(settings.HAYSTACK_WHOOSH_PATH)
    ix = create_in(settings.HAYSTACK_WHOOSH_PATH, PAGES_WHOOSH_SCHEMA,
                   "ZORNA_PAGES")


post_syncdb.connect(create_index)
예제 #24
0
from whoosh import index, fields, qparser
import os

schema = fields.Schema(title=fields.TEXT(stored=True),
                       description=fields.TEXT(stored=True),
                       transcript=fields.TEXT,
                       url=fields.STORED)

if not os.path.exists("index"):
    os.mkdir("index")
    search_index = index.create_in("index", schema)
else:
    search_index = index.open_dir("index")

queryparser = qparser.QueryParser("transcript", schema)


def search(q):
    with search_index.searcher() as s:
        results = s.search(queryparser.parse(q))
        r = list(results)
        r.sort(key=lambda x: x.rank)
        return [res.fields() for res in r]


def add_to_index(title, description, transcript, url):
    w = search_index.writer()
    w.add_document(title=title,
                   description=description,
                   transcript=transcript,
                   url=url)
def test_unicode_num():
    schema = fields.Schema(num=fields.NUMERIC)
    parser = default.QueryParser(u("num"), schema=schema)
    q = parser.parse(u("num:1"))

    _ = text_type(q)
예제 #26
0
def test_add_reader():
    schema = fields.Schema(i=fields.ID(stored=True, unique=True),
                           a=fields.TEXT(stored=True, spelling=True),
                           b=fields.TEXT(vector=True))
    with TempIndex(schema, "addreader") as ix:
        with ix.writer() as w:
            w.add_document(i=u("0"),
                           a=u("alfa bravo charlie delta"),
                           b=u("able baker coxwell dog"))
            w.add_document(i=u("1"),
                           a=u("bravo charlie delta echo"),
                           b=u("elf fabio gong hiker"))
            w.add_document(i=u("2"),
                           a=u("charlie delta echo foxtrot"),
                           b=u("india joker king loopy"))
            w.add_document(i=u("3"),
                           a=u("delta echo foxtrot golf"),
                           b=u("mister noogie oompah pancake"))

        with ix.writer() as w:
            w.delete_by_term("i", "1")
            w.delete_by_term("i", "3")

        with ix.writer() as w:
            w.add_document(i=u("4"),
                           a=u("hotel india juliet kilo"),
                           b=u("quick rhubarb soggy trap"))
            w.add_document(i=u("5"),
                           a=u("india juliet kilo lima"),
                           b=u("umber violet weird xray"))

        with ix.reader() as r:
            assert_equal(r.doc_count_all(), 4)

            sfs = list(r.all_stored_fields())
            assert_equal(sfs, [
                {
                    "i": u("4"),
                    "a": u("hotel india juliet kilo")
                },
                {
                    "i": u("5"),
                    "a": u("india juliet kilo lima")
                },
                {
                    "i": u("0"),
                    "a": u("alfa bravo charlie delta")
                },
                {
                    "i": u("2"),
                    "a": u("charlie delta echo foxtrot")
                },
            ])

            assert_equal(list(r.lexicon("a")), [
                "alfa", "bravo", "charlie", "delta", "echo", "foxtrot",
                "hotel", "india", "juliet", "kilo", "lima"
            ])

            vs = []
            for docnum in r.all_doc_ids():
                v = r.vector(docnum, "b")
                vs.append(list(v.all_ids()))
            assert_equal(vs, [["quick", "rhubarb", "soggy", "trap"],
                              ["umber", "violet", "weird", "xray"],
                              ["able", "baker", "coxwell", "dog"],
                              ["india", "joker", "king", "loopy"]])

            gr = r.word_graph("a")
            assert_equal(list(gr.flatten_strings()), [
                "alfa",
                "bravo",
                "charlie",
                "delta",
                "echo",
                "foxtrot",
                "hotel",
                "india",
                "juliet",
                "kilo",
                "lima",
            ])
def test_empty_querystring():
    s = fields.Schema(content=fields.TEXT, title=fields.TEXT, id=fields.ID)
    qp = default.QueryParser("content", s)
    q = qp.parse(u(""))
    assert q == query.NullQuery
예제 #28
0
파일: schema.py 프로젝트: eukaryote/knowhow
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division

import hashlib

import whoosh.fields as F

# This scheme defines the structure of a single knowhow snippet.
SCHEMA = F.Schema(
    # unique identifier
    id=F.ID(unique=True, stored=True),
    # a multi-valued analyzed field
    tag=F.KEYWORD(stored=True, field_boost=2.0),
    # the text content of the snippet
    content=F.TEXT(stored=True),
    # all searchable fields, for use as a default field
    text=F.TEXT(stored=False),
    # when the snippet was last modified
    updated=F.DATETIME(stored=True),
)

# Function to create a hasher object for generating id of a snippet.
IdGenerator = hashlib.sha256

# The number of hexadecimal characters in an id
ID_LENGTH = IdGenerator().digest_size * 2


def identifier(doc):
    """
def test_stopped():
    schema = fields.Schema(text=fields.TEXT)
    qp = default.QueryParser("text", schema)
    q = qp.parse(u("a b"))
    assert q == query.NullQuery
예제 #30
0
def test_noscorables2():
    schema = fields.Schema(field=fields.ID)
    with TempIndex(schema, "noscorables2") as ix:
        writer = ix.writer()
        writer.add_document(field=u('foo'))
        writer.commit()