def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') os.mkdir('test-index/files') from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index()
class IndexerTest(unittest.TestCase): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') os.mkdir('test-index/files') from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index() def assertSeqEqual(self, s1, s2): # First argument is the db result we're testing, second is the # desired result. Some db results don't have iterable rows, so we # have to work around that. # Also work around some dbs not returning items in the expected # order. s1 = list([tuple([r[n] for n in range(len(r))]) for r in s1]) s1.sort() if s1 != s2: self.fail('contents of %r != %r'%(s1, s2)) def test_basics(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah']), [('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah', 'hello']), []) def test_change(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), 'a the hello') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_clear(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), '') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_stopwords(self): """Test that we can find a text with a stopword in it.""" stopword = "with" self.assert_(self.dex.is_stopword(stopword.upper())) self.dex.add_text(('test', '1', 'bar'), '%s hello world' % stopword) self.dex.add_text(('test', '2', 'bar'), 'blah a %s world' % stopword) self.dex.add_text(('test', '3', 'bar'), 'blah Blub river') self.dex.add_text(('test', '4', 'bar'), 'blah river %s' % stopword) self.assertSeqEqual(self.dex.find(['with','world']), [('test', '1', 'bar'), ('test', '2', 'bar')]) def test_extremewords(self): """Testing too short or too long words.""" short = "b" long = "abcdefghijklmnopqrstuvwxyz" self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short) self.dex.add_text(('test', '3', 'a'), 'blah Blub river') self.dex.add_text(('test', '4', 'a'), 'blah river %s %s' % (short, long)) self.assertSeqEqual(self.dex.find([short,'world', long, short]), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find([long]),[]) # special test because some faulty code indexed length(word)>=2 # but only considered length(word)>=3 to be significant self.dex.add_text(('test', '5', 'a'), 'blah py %s %s' % (short, long)) self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')]) def test_casesensitity(self): """Test if searches are case-in-sensitive.""" self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb') self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB') self.assertSeqEqual(self.dex.find(['aaaa']), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find(['BBBB']), [('test', '1', 'a'), ('test', '2', 'a')]) def test_wordsplitting(self): """Test if word splitting works.""" self.dex.add_text(('test', '1', 'a'), 'aaaa-aaa bbbb*bbb') self.dex.add_text(('test', '2', 'a'), 'aaaA-aaa BBBB*BBB') for k in 'aaaa', 'aaa', 'bbbb', 'bbb': self.assertSeqEqual(self.dex.find([k]), [('test', '1', 'a'), ('test', '2', 'a')]) def tearDown(self): shutil.rmtree('test-index')
class IndexerTest(unittest.TestCase): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') os.mkdir('test-index/files') from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index() def assertSeqEqual(self, s1, s2): # First argument is the db result we're testing, second is the # desired result. Some db results don't have iterable rows, so we # have to work around that. # Also work around some dbs not returning items in the expected # order. s1 = list([tuple([r[n] for n in range(len(r))]) for r in s1]) s1.sort() if s1 != s2: self.fail('contents of %r != %r' % (s1, s2)) def test_basics(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah']), [('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah', 'hello']), []) def test_change(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), 'a the hello') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_clear(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), '') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_stopwords(self): """Test that we can find a text with a stopword in it.""" stopword = "with" self.assertTrue(self.dex.is_stopword(stopword.upper())) self.dex.add_text(('test', '1', 'bar'), '%s hello world' % stopword) self.dex.add_text(('test', '2', 'bar'), 'blah a %s world' % stopword) self.dex.add_text(('test', '3', 'bar'), 'blah Blub river') self.dex.add_text(('test', '4', 'bar'), 'blah river %s' % stopword) self.assertSeqEqual(self.dex.find(['with', 'world']), [('test', '1', 'bar'), ('test', '2', 'bar')]) def test_extremewords(self): """Testing too short or too long words.""" # skip this for FTS test if (isinstance(self, sqliteFtsIndexerTest) or isinstance(self, postgresqlFtsIndexerTest)): pytest.skip("extremewords not tested for native FTS backends") short = "b" long = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short) self.dex.add_text(('test', '3', 'a'), 'blah Blub river') self.dex.add_text(('test', '4', 'a'), 'blah river %s %s' % (short, long)) self.assertSeqEqual(self.dex.find([short, 'world', long, short]), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find([long]), []) # special test because some faulty code indexed length(word)>=2 # but only considered length(word)>=3 to be significant self.dex.add_text(('test', '5', 'a'), 'blah py %s %s' % (short, long)) self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')]) def test_casesensitivity(self): """Test if searches are case-in-sensitive.""" self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb') self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB') self.assertSeqEqual(self.dex.find(['aaaa']), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find(['BBBB']), [('test', '1', 'a'), ('test', '2', 'a')]) def test_wordsplitting(self): """Test if word splitting works.""" self.dex.add_text(('test', '1', 'a'), 'aaaa-aaa bbbb*bbb') self.dex.add_text(('test', '2', 'a'), 'aaaA-aaa BBBB*BBB') for k in 'aaaa', 'aaa', 'bbbb', 'bbb': self.assertSeqEqual(self.dex.find([k]), [('test', '1', 'a'), ('test', '2', 'a')]) def test_manyresults(self): """Test if searches find many results.""" for i in range(123): self.dex.add_text(('test', str(i), 'many'), 'many') self.assertEqual(len(self.dex.find(['many'])), 123) def test_unicode(self): """Test with unicode words. see: https://issues.roundup-tracker.org/issue1344046""" russian = u'\u0440\u0443\u0441\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442Spr\xfcnge' german = u'Spr\xfcnge' self.dex.add_text(('test', '1', 'a'), german) self.dex.add_text(('test', '2', 'a'), russian + u' ' + german) self.assertSeqEqual(self.dex.find([u'Spr\xfcnge']), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual( self.dex.find([u'\u0440\u0443\u0441\u0441\u043a\u0438\u0439']), [('test', '2', 'a')]) def tearDown(self): shutil.rmtree('test-index')
class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): def setUp(self): RDBMSIndexerTest.setUp(self) from roundup.backends.indexer_sqlite_fts import Indexer self.dex = Indexer(self.db) self.dex.db = self.db def test_phrase_and_near(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') # test two separate words for sanity self.assertSeqEqual(self.dex.find(['"hello" "world"']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo')]) # now check the phrase self.assertSeqEqual(self.dex.find(['"hello world"']), [ ('test', '1', 'foo'), ]) # now check the phrase with near explicitly 0 intervening items self.assertSeqEqual(self.dex.find(['NEAR(hello world, 0)']), [ ('test', '1', 'foo'), ]) # now check the phrase with near explicitly 1 intervening item self.assertSeqEqual(self.dex.find(['NEAR(hello world, 1)']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ]) # now check the phrase with near explicitly 3 intervening item self.assertSeqEqual(self.dex.find(['NEAR(hello world, 3)']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) def test_prefix(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.assertSeqEqual(self.dex.find(['hel*']), [('test', '1', 'foo'), ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo')]) def test_bool_start(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.assertSeqEqual(self.dex.find(['hel* NOT helh NOT blech']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ]) self.assertSeqEqual(self.dex.find(['hel* NOT helh NOT blech OR the']), [ ('test', '1', 'foo'), ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) self.assertSeqEqual(self.dex.find(['helh OR hello']), [ ('test', '1', 'foo'), ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) self.assertSeqEqual(self.dex.find(['helh AND hello']), []) # matches if line starts with hello self.assertSeqEqual(self.dex.find(['^hello']), [ ('test', '4', 'foo'), ]) self.assertSeqEqual(self.dex.find(['hello']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) def test_query_errors(self): """test query phrases that generate an error. Also test the correction""" self.dex.add_text(('test', '1', 'foo'), 'a the hello-world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') # handle known error that roundup recognizes and tries to diagnose with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['the hello-world']) error = ("Search failed. Try quoting any terms that include a '-' " "and retry the search.") self.assertEqual(str(ctx.exception), error) self.assertSeqEqual(self.dex.find(['the "hello-world"']), [ ('test', '1', 'foo'), ]) # handle known error that roundup recognizes and tries to diagnose with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['hello world + ^the']) error = 'Query error: syntax error near "^"' self.assertEqual(str(ctx.exception), error)
def setUp(self): RDBMSIndexerTest.setUp(self) from roundup.backends.indexer_sqlite_fts import Indexer self.dex = Indexer(self.db) self.dex.db = self.db
class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest): def setUp(self): postgresqlOpener.setUp(self) RDBMSIndexerTest.setUp(self) from roundup.backends.indexer_postgresql_fts import Indexer self.dex = Indexer(self.db) self.dex.db = self.db def tearDown(self): RDBMSIndexerTest.tearDown(self) postgresqlOpener.tearDown(self) def test_websearch_syntax(self): """Test searches using websearch_to_tsquery. These never throw errors regardless of how wacky the input. """ self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.dex.add_text(('test', '5', 'foo'), 'a car drove') self.dex.add_text(('test', '6', 'foo'), 'a car driving itself') self.dex.add_text(('test', '7', 'foo'), "let's drive in the car") self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie') # test two separate words for sanity self.assertSeqEqual(self.dex.find(['"hello" "world"']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo')]) # now check the phrase self.assertSeqEqual(self.dex.find(['"hello world"']), [ ('test', '1', 'foo'), ]) # test negation self.assertSeqEqual(self.dex.find(['hello world -blech']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ]) # phrase negation self.assertSeqEqual(self.dex.find(['hello world -"blah hello"']), [ ('test', '1', 'foo'), ('test', '4', 'foo'), ]) # test without or self.assertSeqEqual(self.dex.find(['blah blech']), [ ('test', '4', 'foo'), ]) # test with or self.assertSeqEqual(self.dex.find(['blah or blech']), [ ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) # stemmer test for english self.assertSeqEqual(self.dex.find(['ts:drive']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo')]) # stemmer is not disabled by quotes 8-( self.assertSeqEqual(self.dex.find(['ts:"drive"']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo')]) # this is missing ts: at the start, so uses the websearch # parser. We search for operator characters and wanr the user # Otherwise "hello <-> world" is the same as "hello world" # and is not a phrase search. with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['hello <-> world']) self.assertIn('do a tsquery search', ctx.exception.args[0]) def test_tsquery_syntax(self): """Because websearch_to_tsquery doesn't allow prefix searches, near searches with any value except 1 (phrase search), allow use of to_tsquery by prefixing the search term wih ts:. However, unlike websearch_to_tsquery, this will throw a psycopg2.errors.SyntaxError on bad input. SyntaxError is re-raised as IndexerQueryError. But it makes a bunch of useful expert functionality available. """ self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.dex.add_text(('test', '5', 'foo'), 'a car drove') self.dex.add_text(('test', '6', 'foo'), 'a car driving itself') self.dex.add_text(('test', '7', 'foo'), "let's drive in the car") self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie') self.dex.db.commit() # test two separate words for sanity self.assertSeqEqual(self.dex.find(['ts:hello & world']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo')]) # now check the phrase self.assertSeqEqual(self.dex.find(['ts:hello <-> world']), [ ('test', '1', 'foo'), ]) # test negation self.assertSeqEqual(self.dex.find(['ts:hello & world & !blech']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ]) self.assertSeqEqual( self.dex.find(['ts:hello & world & !(blah <-> hello)']), [ ('test', '1', 'foo'), ('test', '4', 'foo'), ]) # test without or self.assertSeqEqual(self.dex.find(['ts:blah & blech']), [ ('test', '4', 'foo'), ]) # test with or self.assertSeqEqual(self.dex.find(['ts:blah | blech']), [ ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) # stemmer test for english self.assertSeqEqual(self.dex.find(['ts:drive']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo')]) # stemmer is not disabled by quotes 8-( self.assertSeqEqual(self.dex.find(['ts:"drive"']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo')]) # test with syntax error with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['ts:blah blech']) self.assertEqual(ctx.exception.args[0], 'syntax error in tsquery: "blah blech"\n') # now check the phrase Note unlike sqlite, order matters, # hello must come first. self.assertSeqEqual(self.dex.find(['ts:hello <-> world']), [ ('test', '1', 'foo'), ]) # now check the phrase with explicitly 1 intervening item self.assertSeqEqual(self.dex.find(['ts:hello <2> world']), [ ('test', '3', 'foo'), ]) # now check the phrase with near explicitly 1 or 3 intervening items self.assertSeqEqual( self.dex.find(['ts:(hello <4> world) | (hello<2>world)']), [ ('test', '3', 'foo'), ('test', '4', 'foo'), ]) # now check the phrase with near explicitly 3 intervening item # with prefix for world. self.assertSeqEqual(self.dex.find(['ts:hello <4> wor:*']), [ ('test', '4', 'foo'), ]) def test_invalid_language(self): import psycopg2 from roundup.configuration import IndexerOption IndexerOption.valid_langs.append("foo") self.db.config["INDEXER_LANGUAGE"] = "foo" with self.assertRaises(psycopg2.errors.UndefinedObject) as ctx: # psycopg2.errors.UndefinedObject: text search configuration # "foo" does not exist self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.assertIn('search configuration "foo" does', ctx.exception.args[0]) self.db.rollback() with self.assertRaises(ValueError) as ctx: self.dex.find(['"hello" "world"']) self.assertIn('search configuration "foo" does', ctx.exception.args[0]) self.db.rollback() self.db.config["INDEXER_LANGUAGE"] = "english"
def setUp(self): postgresqlOpener.setUp(self) RDBMSIndexerTest.setUp(self) from roundup.backends.indexer_postgresql_fts import Indexer self.dex = Indexer(self.db) self.dex.db = self.db
def setUp(self): # remove previous test, ignore errors if os.path.exists(config.DATABASE): shutil.rmtree(config.DATABASE) self.db = self.module.Database(config, 'admin') self.dex = Indexer(self.db)
def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') from roundup.backends.indexer_xapian import Indexer self.dex = Indexer(db)
class IndexerTest(unittest.TestCase): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') os.mkdir('test-index/files') from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index() def assertSeqEqual(self, s1, s2): # first argument is the db result we're testing, second is the # desired result some db results don't have iterable rows, so we # have to work around that # Also work around some dbs not returning items in the expected # order. This would be *so* much easier with python2.4's sorted. s1 = list(s1) s1.sort() if [i for x,y in zip(s1, s2) for i,j in enumerate(y) if x[i] != j]: self.fail('contents of %r != %r'%(s1, s2)) def test_basics(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah']), [('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah', 'hello']), []) def test_change(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), 'a the hello') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_clear(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), '') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def tearDown(self): shutil.rmtree('test-index')
class IndexerTest(unittest.TestCase): def setUp(self): if os.path.exists("test-index"): shutil.rmtree("test-index") os.mkdir("test-index") os.mkdir("test-index/files") from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index() def assertSeqEqual(self, s1, s2): # First argument is the db result we're testing, second is the # desired result. Some db results don't have iterable rows, so we # have to work around that. # Also work around some dbs not returning items in the expected # order. s1 = list([tuple([r[n] for n in range(len(r))]) for r in s1]) s1.sort() if s1 != s2: self.fail("contents of %r != %r" % (s1, s2)) def test_basics(self): self.dex.add_text(("test", "1", "foo"), "a the hello world") self.dex.add_text(("test", "2", "foo"), "blah blah the world") self.assertSeqEqual(self.dex.find(["world"]), [("test", "1", "foo"), ("test", "2", "foo")]) self.assertSeqEqual(self.dex.find(["blah"]), [("test", "2", "foo")]) self.assertSeqEqual(self.dex.find(["blah", "hello"]), []) def test_change(self): self.dex.add_text(("test", "1", "foo"), "a the hello world") self.dex.add_text(("test", "2", "foo"), "blah blah the world") self.assertSeqEqual(self.dex.find(["world"]), [("test", "1", "foo"), ("test", "2", "foo")]) self.dex.add_text(("test", "1", "foo"), "a the hello") self.assertSeqEqual(self.dex.find(["world"]), [("test", "2", "foo")]) def test_clear(self): self.dex.add_text(("test", "1", "foo"), "a the hello world") self.dex.add_text(("test", "2", "foo"), "blah blah the world") self.assertSeqEqual(self.dex.find(["world"]), [("test", "1", "foo"), ("test", "2", "foo")]) self.dex.add_text(("test", "1", "foo"), "") self.assertSeqEqual(self.dex.find(["world"]), [("test", "2", "foo")]) def test_stopwords(self): """Test that we can find a text with a stopword in it.""" stopword = "with" self.assert_(self.dex.is_stopword(stopword.upper())) self.dex.add_text(("test", "1", "bar"), "%s hello world" % stopword) self.dex.add_text(("test", "2", "bar"), "blah a %s world" % stopword) self.dex.add_text(("test", "3", "bar"), "blah Blub river") self.dex.add_text(("test", "4", "bar"), "blah river %s" % stopword) self.assertSeqEqual(self.dex.find(["with", "world"]), [("test", "1", "bar"), ("test", "2", "bar")]) def test_extremewords(self): """Testing too short or too long words.""" short = "b" long = "abcdefghijklmnopqrstuvwxyz" self.dex.add_text(("test", "1", "a"), "%s hello world" % short) self.dex.add_text(("test", "2", "a"), "blah a %s world" % short) self.dex.add_text(("test", "3", "a"), "blah Blub river") self.dex.add_text(("test", "4", "a"), "blah river %s %s" % (short, long)) self.assertSeqEqual(self.dex.find([short, "world", long, short]), [("test", "1", "a"), ("test", "2", "a")]) self.assertSeqEqual(self.dex.find([long]), []) # special test because some faulty code indexed length(word)>=2 # but only considered length(word)>=3 to be significant self.dex.add_text(("test", "5", "a"), "blah py %s %s" % (short, long)) self.assertSeqEqual(self.dex.find(["py"]), [("test", "5", "a")]) def test_casesensitity(self): """Test if searches are case-in-sensitive.""" self.dex.add_text(("test", "1", "a"), "aaaa bbbb") self.dex.add_text(("test", "2", "a"), "aAaa BBBB") self.assertSeqEqual(self.dex.find(["aaaa"]), [("test", "1", "a"), ("test", "2", "a")]) self.assertSeqEqual(self.dex.find(["BBBB"]), [("test", "1", "a"), ("test", "2", "a")]) def test_wordsplitting(self): """Test if word splitting works.""" self.dex.add_text(("test", "1", "a"), "aaaa-aaa bbbb*bbb") self.dex.add_text(("test", "2", "a"), "aaaA-aaa BBBB*BBB") for k in "aaaa", "aaa", "bbbb", "bbb": self.assertSeqEqual(self.dex.find([k]), [("test", "1", "a"), ("test", "2", "a")]) def tearDown(self): shutil.rmtree("test-index")
class IndexerTest(unittest.TestCase): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') os.mkdir('test-index/files') from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index() def assertSeqEqual(self, s1, s2): # First argument is the db result we're testing, second is the # desired result. Some db results don't have iterable rows, so we # have to work around that. # Also work around some dbs not returning items in the expected # order. s1 = list([tuple([r[n] for n in range(len(r))]) for r in s1]) s1.sort() if s1 != s2: self.fail('contents of %r != %r'%(s1, s2)) def test_basics(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah']), [('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah', 'hello']), []) def test_change(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), 'a the hello') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_clear(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), '') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def tearDown(self): shutil.rmtree('test-index')