Пример #1
0
    def test_generates_vids_of_the_found_docs(self):

        class MyDict(dict):
            pass

        class FakeSearcher(object):
            def search(self, query, limit=20):
                # returns result of the search need by search_datasets.
                result1 = MyDict({'vid': 'vid1', 'bvid': 'bvid1', 'type': 'type1'})
                result1.score = 0.5
                result2 = MyDict({'vid': 'vid2', 'bvid': 'bvid2', 'type': 'b'})
                result2.score = 0.6
                return [result1, result2]

            def __enter__(self, *args, **kwargs):
                return self

            def __exit__(self, *args, **kwargs):
                pass

        class FakeIdentifierIndex(object):
            schema = '?'

            def searcher(*args, **kwargs):
                return FakeSearcher()

        search = Search(self.lib)
        search._dataset_index = FakeIdentifierIndex()
        ret = search.search_partitions('about me')
        self.assertTrue(hasattr(ret, 'next'))
        vids = [x for x in ret]
        self.assertIn('vid1', vids)
        self.assertIn('vid2', vids)
Пример #2
0
    def test_add_document_to_writer_for_each_given_identifier(self):
        # prepare state

        # TODO: It is so complicated. Find another way to mock indexer.
        fake_writer = fudge.Fake()\
            .expects('add_document')\
            .expects('commit')

        class FakeSearcher(object):
            pass

        FakeSearcher.documents = fudge.Fake()\
            .expects_call()\
            .returns([])

        search = Search(self.lib)

        search._identifier_index = fudge.Fake('Index')\
            .provides('writer')\
            .returns(fake_writer)\
            .provides('searcher')\
            .returns(FakeSearcher())

        # testing
        identifiers = [
            {'identifier': 'ident1', 'type': 'type1', 'name': 'name1'},
            {'identifier': 'ident2', 'type': 'type2', 'name': 'name2'}]

        search.index_identifiers(identifiers)
        fudge.verify()
Пример #3
0
    def test_returns_given_terms_if_place_vids_do_not_exist(self):
        # prepare state
        search = Search(self.lib)

        # testing
        ret = search.expand_place_ids('California')
        self.assertEquals(ret, 'California')
Пример #4
0
    def test_uses_cached_identifier(self):
        # prepare state
        search = Search(self.lib)

        # Use a string as index, it is not valid index, but it does not matter here.
        search._identifier_index = 'INDEX'

        self.assertEquals(search.identifier_index, 'INDEX')
Пример #5
0
    def test_removes_index_dir(self, fake_exists, fake_rmtree):
        # prepare state.
        search = Search(self.lib)
        fake_exists.expects_call().with_args(search.d_index_dir).returns(True)
        fake_rmtree.expects_call().with_args(search.d_index_dir)

        # testing
        search.reset()
        self.assertIsNone(search._dataset_index)
Пример #6
0
    def test_returns_years_range(self):

        # prepare state
        search = Search(self.lib)

        # testing
        from_year = 1995
        to_year = 1996
        ret = search.from_to_as_term(from_year, to_year)
        self.assertEquals(ret, '[1995 TO 1996]')
Пример #7
0
    def test_returns_second_year_if_wrong_first_given(self):

        # prepare state
        search = Search(self.lib)

        # testing
        from_year = '1996'
        to_year = 'not-year'
        ret = search.from_to_as_term(from_year, to_year)
        self.assertEquals(ret, '[1996 TO]')
Пример #8
0
    def test_returns_None_if_both_are_wrong(self):

        # prepare state
        search = Search(self.lib)

        # testing
        from_year = 'not-year'
        to_year = 'not-year'
        ret = search.from_to_as_term(from_year, to_year)
        self.assertIsNone(ret)
Пример #9
0
    def test_generates_vids_found_by_searcher(self):
        # prepare state.

        search = Search(self.lib)
        search._dataset_index = self._get_fake_identifier()

        # testing
        datasets_gen = search.datasets
        self.assertTrue(hasattr(datasets_gen, 'next'))
        datasets = [x for x in datasets_gen]
        self.assertEquals(datasets, ['vid2'])
Пример #10
0
    def test_indexes_library_datasets(self):

        # prepare state.
        DatasetFactory()
        DatasetFactory()

        search = Search(self.lib)
        search.index_dataset = fudge.Fake().expects_call()

        # testing
        with fudge.patched_context(search, 'all_datasets', []):
            search.index_datasets()
Пример #11
0
    def test_uses_library_driver_backend(self):
        self._my_library.config.services.search = None

        # switch to sqlite.
        self._my_library.database.driver = 'sqlite'
        search = Search(self._my_library)
        self.assertIsInstance(search.backend, SQLiteSearchBackend)

        # switch to postgres.
        self._my_library.database.driver = 'postgres'
        search = Search(self._my_library)
        self.assertIsInstance(search.backend, PostgreSQLSearchBackend)
Пример #12
0
    def test_contains_generator_with_documents_found_by_searcher(self):
        # prepare state
        search = Search(self.lib)
        search._identifier_index = self._get_fake_identifier()

        # testing
        identifiers_gen = search.identifiers
        self.assertTrue(hasattr(identifiers_gen, 'next'))

        identifiers = [x for x in identifiers_gen]
        self.assertEquals(len(identifiers), 2)
        self.assertIn('identifier', identifiers[0])
        self.assertIn('identifier', identifiers[1])
Пример #13
0
    def test_generates_vids_of_the_partitions_found_by_searcher(self):
        # prepare state.

        search = Search(self.lib)
        search._dataset_index = self._get_fake_identifier()

        # testing
        partitions_gen = search.partitions

        # it returns generator.
        self.assertTrue(hasattr(partitions_gen, 'next'))
        partitions = [x for x in partitions_gen]
        self.assertEquals(partitions, ['vid1'])
Пример #14
0
    def test_generates_results_found_by_searcher(self):
        # prepare state
        search = Search(self.lib)
        search._identifier_index = self._get_fake_identifier()

        # testing
        ret = search.search_identifiers('about me')

        # it is a generator.
        self.assertTrue(hasattr(ret, 'next'))
        expected_result = [(0.5, 'bvid1', 'p', False), (0.6, 'bvid2', 'b', False)]
        result = [x for x in ret]
        self.assertEquals(result, expected_result)
Пример #15
0
    def test_indexes_library_datasets(self):
        ds1 = MagicMock(spec=Dataset)
        ds2 = MagicMock(spec=Dataset)
        ds3 = MagicMock(spec=Dataset)
        self._my_library.datasets = [ds1, ds2, ds3]

        fake_backend = MagicMock(spec=SQLiteSearchBackend)
        fake_backend.dataset_index = Mock()
        fake_backend.partition_index = Mock()
        fake_backend.identifier_index = Mock()
        search = Search(self._my_library, backend=fake_backend)
        search.index_library_datasets()
        self.assertEqual(len(fake_backend.dataset_index.index_one.mock_calls), 3)
Пример #16
0
    def test_indexes_library_datasets(self):
        ds1 = MagicMock(spec=Dataset)
        ds2 = MagicMock(spec=Dataset)
        ds3 = MagicMock(spec=Dataset)
        self._my_library.datasets = [ds1, ds2, ds3]

        fake_backend = MagicMock(spec=SQLiteSearchBackend)
        fake_backend.dataset_index = Mock()
        fake_backend.partition_index = Mock()
        fake_backend.identifier_index = Mock()
        search = Search(self._my_library, backend=fake_backend)
        search.index_library_datasets()
        self.assertEqual(len(fake_backend.dataset_index.index_one.mock_calls),
                         3)
Пример #17
0
    def test_tick_fn_gets_each_vid(self):

        # prepare state.
        DatasetFactory()
        DatasetFactory()

        search = Search(self.lib)
        search.index_dataset = fudge.Fake().expects_call()
        tick_f = fudge.Fake()\
            .expects_call().with_args('datasets: 1 partitions: 0')\
            .next_call().with_args('datasets: 2 partitions: 0')

        # testing
        with fudge.patched_context(search, 'all_datasets', []):
            search.index_datasets(tick_f=tick_f)
Пример #18
0
    def test_logs_error_to_library_logger(self, fake_exists):

        # prepare state.
        SCHEMA = 'schema'

        # We have to create Search instance before mocking because __init__ uses os modules.
        search = Search(self.lib)

        fake_exists.expects_call().raises(Exception('My fake exception.'))
        fake_error = fudge.Fake('error').expects_call()

        # testing
        with fudge.patched_context(self.sqlite_db.logger, 'error', fake_error):
            with self.assertRaises(Exception):
                search.get_or_new_index(SCHEMA, search.d_index_dir)
Пример #19
0
    def test_uses_backend_from_config(self, fake_init):
        # Disable backend initialization to reduce amount of mocks.
        fake_init.return_value = None

        self._my_library.config.services.search = 'whoosh'
        search = Search(self._my_library)
        self.assertIsInstance(search.backend, WhooshSearchBackend)
Пример #20
0
    def test_opens_existing_index_if_path_exists(self, fake_exists):
        # first assert signatures of the functions we are going to mock did not change.
        assert_spec(index.open_dir, ['dirname', 'indexname', 'readonly', 'schema'])

        # prepare state.
        SCHEMA = 'schema'

        # We have to create Search instance before mocking because __init__ uses os modules.
        search = Search(self.lib)

        fake_exists.expects_call().with_args(search.d_index_dir).returns(True)
        fake_open_dir = fudge.Fake().expects_call().with_args(search.d_index_dir)

        # testing
        with fudge.patched_context(index, 'open_dir', fake_open_dir):
            search.get_or_new_index(SCHEMA, search.d_index_dir)
Пример #21
0
 def test_raises_missing_backend_exception_if_config_contains_invalid_backend(
         self):
     # services.search
     try:
         Search(self._my_library)
     except Exception as exc:
         self.assertIn('Missing backend', str(exc))
Пример #22
0
    def test_returns_dict_with_datasets_found_by_searcher(self):

        search = Search(self.lib)
        search._dataset_index = self._get_fake_identifier()
        ret = search.search_datasets('about me')

        self.assertIsInstance(ret, dict)
        self.assertIn('bvid1', ret)
        self.assertIn('bvid2', ret)

        # scores copied properly
        self.assertEquals(ret['bvid1'].p_score, 0.5)
        self.assertEquals(ret['bvid1'].b_score, 0)

        self.assertEquals(ret['bvid2'].p_score, 0)
        self.assertEquals(ret['bvid2'].b_score, 0.6)
Пример #23
0
 def test_uses_default_backend_if_library_database_search_is_not_implemented(
         self, fake_init):
     # Disable backend initialization to reduce amount of mocks.
     fake_init.return_value = None
     self._my_library.config.services.search = None
     with patch.object(self._my_library.database, 'driver', 'mysql'):
         search = Search(self._my_library)
         self.assertIsInstance(search.backend, WhooshSearchBackend)
Пример #24
0
    def test_creates_new_index_if_path_does_not_exist(self, fake_exists, fake_makedirs):
        # first assert signatures of the functions we are going to mock did not change.
        assert_spec(index.create_in, ['dirname', 'schema', 'indexname'])

        # prepare state.
        SCHEMA = 'schema'
        DIR = 'the-dir'

        # We have to create Search instance before mocking because __init__ uses os modules.
        search = Search(self.lib)

        fake_exists.expects_call().with_args(DIR).returns(False)
        fake_makedirs.expects_call().with_args(DIR)
        fake_create_in = fudge.Fake().expects_call().with_args(DIR, SCHEMA)

        # testing
        with fudge.patched_context(index, 'create_in', fake_create_in):
            search.get_or_new_index(SCHEMA, DIR)
Пример #25
0
    def test_returns_place_vids(self):
        # first assert signatures of the functions we are going to mock did not change.
        assert_spec(Search.search_identifiers, ['self', 'search_phrase', 'limit'])
        assert_spec(GVid.parse, ['cls', 'gvid'])

        # prepare state
        search = Search(self.lib)
        score = 1
        vid = 'vid-1'
        t = 'type'
        name = 'California1'
        fake_search = fudge.Fake().expects_call().returns([(score, vid, t, name)])
        fake_parse = fudge.Fake().expects_call().returns([])

        # testing
        with fudge.patched_context(Search, 'search_identifiers', fake_search):
            with fudge.patched_context(GVid, 'parse', fake_parse):
                ret = search.expand_place_ids('California')
                self.assertEquals(ret, [vid])
Пример #26
0
    def test_feeds_tick_function_with_indexed_dataset(self):
        # prepare mocks
        fake_backend = MagicMock(spec=SQLiteSearchBackend)
        fake_backend.dataset_index = Mock()
        fake_backend.partition_index = Mock()
        fake_backend.identifier_index = Mock()

        tick_f = Mock()

        fake_library = MagicMock(spec=Library)
        fake_dataset = MagicMock(spec=Dataset)
        fake_library.datasets = [fake_dataset]

        # run
        search = Search(fake_library, backend=fake_backend)
        search.index_library_datasets(tick_f=tick_f)

        # test
        tick_f.assert_called_once_with('datasets: 1 partitions: 0')
Пример #27
0
    def test_feeds_tick_function_with_indexed_dataset(self):
        # prepare mocks
        fake_backend = MagicMock(spec=SQLiteSearchBackend)
        fake_backend.dataset_index = Mock()
        fake_backend.partition_index = Mock()
        fake_backend.identifier_index = Mock()

        tick_f = Mock()

        fake_library = MagicMock(spec=Library)
        fake_dataset = MagicMock(spec=Dataset)
        fake_library.datasets = [fake_dataset]

        # run
        search = Search(fake_library, backend=fake_backend)
        search.index_library_datasets(tick_f=tick_f)

        # test
        tick_f.assert_called_once_with('datasets: 1 partitions: 0')
Пример #28
0
    def test_postgres_query(self):
        self._my_library.config.services.search = None

        # switch to postgres.
        self._my_library.database.driver = 'postgres'
        search = Search(self._my_library)
        self.assertIsInstance(search.backend, PostgreSQLSearchBackend)

        be = search.backend

        pg = be._get_dataset_index()

        q = pg._make_query_from_terms(
            'source healthindicators.gov diabetes asthma')

        print str(q[0])
        print q[1]
Пример #29
0
    def __init__(self, config=None, search=None, echo=None, read_only=False):
        from sqlalchemy.exc import OperationalError
        from ambry.orm.exc import DatabaseMissingError

        if config:
            self._config = config
        else:
            self._config = get_runconfig()

        self.logger = logger

        self.read_only = read_only  # allow optimizations that assume we aren't building bundles.

        self._echo = echo

        self._fs = LibraryFilesystem(config)

        self._db = Database(self._fs.database_dsn, echo=echo)

        self._account_password = self.config.accounts.password

        self._warehouse = None  # Will be populated in the warehouse property.

        try:
            self._db.open()
        except OperationalError as e:

            raise DatabaseMissingError(
                "Failed to open database '{}': {} ".format(self._db.dsn, e))

        self.processes = None  # Number of multiprocessing proccors. Default to all of them

        if search:
            self._search = Search(self, search)
        else:
            self._search = None
Пример #30
0
 def test_converts_by_to_terms(self):
     search = Search(self.lib)
     cterms = search.make_query_from_terms({'by': 'Beslan'})
     expected = '( type:p AND keywords:(Beslan) )'
     self.assertEquals(cterms, expected)
Пример #31
0
 def test_converts_source_to_terms(self):
     search = Search(self.lib)
     cterms = search.make_query_from_terms({'source': 'Beslan'})
     expected = ' (type:b AND keywords:Beslan ) AND '
     self.assertEquals(cterms, expected)
Пример #32
0
 def test_converts_with_to_terms(self):
     search = Search(self.lib)
     cterms = search.make_query_from_terms({'with': 'Beslan'})
     expected = '( type:p AND doc:(Beslan) )'
     self.assertEquals(cterms, expected)
Пример #33
0
 def test_joins_terms_with_or(self):
     search = Search(self.lib)
     cterms = search.make_query_from_terms({'by': 'Beslan', 'about': 'Beslan'})
     expected = '( type:b AND doc:(Beslan) ) OR ( type:p AND keywords:(Beslan) AND doc:(Beslan) )'
     self.assertEquals(cterms, expected)
Пример #34
0
    def search(self):
        if not self._search:
            self._search = Search(self)

        return self._search
Пример #35
0
 def test_converts_string_to_terms(self):
     search = Search(self.lib)
     cterms = search.make_query_from_terms('about Beslan')
     expected = '( type:b AND doc:(beslan) ) OR ( type:p AND doc:(beslan) )'
     self.assertEquals(cterms, expected)