def modify_datapackage(dp, parameters, *_):
    dataset_id = parameters['dataset-id']
    loaded = parameters.get('loaded')
    private = parameters.get('private')
    owner = parameters.get('owner')
    datapackage_url = parameters.get('datapackage-url')
    if ES_ADDRESS:
        registry = PackageRegistry(ES_ADDRESS)
        datapackage = copy.deepcopy(dp)
        params = {}
        if 'babbageModel' in datapackage:
            model = datapackage['babbageModel']
            del datapackage['babbageModel']
            params['model'] = model
        if private is not None:
            datapackage['private'] = private
        if owner is not None:
            datapackage['owner'] = owner
        if datapackage_url:
            params['datapackage_url'] = datapackage_url
            params['datapackage'] = datapackage
        if loaded is not None:
            params['loaded'] = loaded
            params['loading_status'] = 'done' if loaded else 'loading-data'
        registry.update_model(dataset_id, **params)
    return dp
 def update_model_in_registry(pkg, loaded):
     try:
         registry = PackageRegistry(self.output_es)
     except Exception as exception:
         logger.info('STATUS: FAILED TO UPDATE MODEL')
         logger.exception(exception)
         return
     owner = self.owner_id
     dataset_name = '{}_{}'.format(
         self.config.get(CONFIG_TAXONOMY_ID),
         self.config.get(CONFIG_EXTRA_METADATA_DATASET_NAME),
     )
     dataset_id = '{}:{}'.format(
         owner,
         dataset_name
     )
     private = self.config.get(CONFIG_EXTRA_PRIVATE)
     # TODO: replace by real URL
     datapackage_url = 'datapackage-url'
     datapackage = copy.deepcopy(pkg.descriptor)
     datapackage.update(dict(
         private=private,
         owner=owner
     ))
     registry.save_model(
         dataset_id,
         datapackage_url,
         datapackage,
         datapackage.get('babbage_model'),
         dataset_name,
         'openspending',
         'done' if loaded else 'loading-data',
         loaded
     )
     logger.info('STATUS: UPDATED MODEL')
    def setUp(self):

        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='packages')
        except NotFoundError:
            pass
        self.pr = PackageRegistry(es_instance=self.es)
示例#4
0
    def setUp(self):

        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index=PACKAGES_INDEX_NAME)
        except NotFoundError:
            pass
        self.pr = PackageRegistry(es_instance=self.es,
                                  index_name=PACKAGES_INDEX_NAME)
示例#5
0
def remove_package(id, index, es_host):
    """Remove package(s) with id."""
    if es_host is None:
        raise click.UsageError('No es-host provided. See help for details.')

    es = Elasticsearch(hosts=[es_host], use_ssl='https' in es_host)
    pr = PackageRegistry(es_instance=es, index_name=index)
    for i in id:
        ret = pr.delete_model(i)
        if ret is False:
            click.echo('Package "{}" not found in index.'.format(i))
示例#6
0
    def setUp(self):
        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='users')
            self.es.indices.delete(index='packages')
        except NotFoundError:
            pass
        self.es.indices.create('users')
        time.sleep(1)

        self.pr = PackageRegistry(es_connection_string=LOCAL_ELASTICSEARCH)
        self.pr.save_model(self.DATASET_NAME, 'datapackage_url', {}, {},
                           'dataset', 'author', '', True)
class PublishAPITests(unittest.TestCase):

    def setUp(self):
        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='users')
            self.es.indices.delete(index='packages')
        except NotFoundError:
            pass
        self.es.indices.create('users')
        time.sleep(1)

        self.pr = PackageRegistry(es_connection_string=LOCAL_ELASTICSEARCH)
        self.pr.save_model('name', 'datapackage_url', {}, {}, 'dataset', 'author', '', True)

    def test__initial_value__none(self):
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is None)

    def test__toggle__published(self):
        module.toggle_publish('name', token, toggle=True)
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is True)

    def test__toggle_twice__not_published(self):
        module.toggle_publish('name', token, toggle=True)
        module.toggle_publish('name', token, toggle=True)
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is False)

    def test__force_publish_initial__correct(self):
        module.toggle_publish('name', token, publish=True)
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is False)

    def test__force_publish__correct(self):
        module.toggle_publish('name', token, toggle=True)
        module.toggle_publish('name', token, publish=True)
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is False)

    def test__force_unpublish_initial__correct(self):
        module.toggle_publish('name', token, publish=False)
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is True)

    def test__force_unpublish__correct(self):
        module.toggle_publish('name', token, toggle=True)
        module.toggle_publish('name', token, toggle=True)
        module.toggle_publish('name', token, publish=False)
        pkg = self.pr.get_package('name')
        assert(pkg.get('private') is True)
    def setUp(self):
        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='users')
            self.es.indices.delete(index='packages')
        except NotFoundError:
            pass
        self.es.indices.create('users')
        time.sleep(1)

        self.pr = PackageRegistry(es_connection_string=LOCAL_ELASTICSEARCH)
        self.pr.save_model('name', 'datapackage_url', {}, {}, 'dataset', 'author', '', True)
示例#9
0
class UpdateDefaultParamsAPITests(unittest.TestCase):

    DATASET_NAME = 'owner:datasetid'
    DEFAULT_PARAMS = {'param1': True, 'param2': 'hello'}

    def setUp(self):
        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index=USERS_INDEX_NAME)
            self.es.indices.delete(index=PACKAGES_INDEX_NAME)
        except NotFoundError:
            pass
        self.es.indices.create(USERS_INDEX_NAME)
        time.sleep(1)

        self.pr = PackageRegistry(es_connection_string=LOCAL_ELASTICSEARCH,
                                  index_name=PACKAGES_INDEX_NAME)
        self.pr.save_model(self.DATASET_NAME, 'datapackage_url', {}, {},
                           'dataset', 'author', '', True)

    def test__initial_value__none(self):
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('defaultParams') is None)

    def test__update_params__empty_params(self):
        module.update_params('owner:datasetid', token, {})
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('defaultParams') == {})

    def test__update_params__with_value(self):
        module.update_params('owner:datasetid', token, self.DEFAULT_PARAMS)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('defaultParams') == self.DEFAULT_PARAMS)

    def test__update_params__bad_owner(self):
        module.update_params('badowner:datasetid', token, self.DEFAULT_PARAMS)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('defaultParams') is None)

    def test__update_params__bad_package_id(self):
        module.update_params('owner:baddatasetid', token, self.DEFAULT_PARAMS)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('defaultParams') is None)
示例#10
0
def create_app():
    logging.info('OS-API create_app')

    _app = Flask('os_api')
    _app.wsgi_app = ProxyFix(_app.wsgi_app)

    registry = PackageRegistry(es_connection_string=os.environ.get(
        'OS_ELASTICSEARCH_ADDRESS', 'localhost:9200'))
    manager = OSCubeManager(get_engine(), registry)

    logging.info('OS-API configuring query blueprints')
    _app.register_blueprint(configure_babbage_api(_app, manager),
                            url_prefix='/api/3')
    _app.register_blueprint(infoAPI, url_prefix='/api/3')

    _app.extensions['model_registry'] = registry

    CORS(_app)
    Sentry(_app, dsn=os.environ.get('SENTRY_DSN', ''))

    logging.info('OS-API app created')
    return _app
class SearchTest(unittest.TestCase):

    # Actions

    def setUp(self):

        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='packages')
        except NotFoundError:
            pass
        self.pr = PackageRegistry(es_instance=self.es)

    def indexSomeRecords(self, amount):
        self.es.indices.delete(index='packages')
        for i in range(amount):
            body = {
                'id': True,
                'package': i,
                'model': 'str%s' % i,
                'origin_url': {
                    'name': 'innername'
                }
            }
            self.es.index('packages', 'package', body)
        self.es.indices.flush('packages')

    def indexSomeRecordsToTestMapping(self):
        for i in range(3):
            self.pr.save_model('package-id-%d' % i,
                               '', {
                                   'author': '*****@*****.**' % i,
                                   'title': 'This dataset is number test%d' % i
                               }, {}, {},
                               '*****@*****.**' % (i+1), '', True)

    def indexSomeRealLookingRecords(self, amount):
        for i in range(amount):
            self.pr.save_model('package-id-%d' % i,
                               '', {
                                   'author': 'The one and only author number%d' % (i+1),
                                   'title': 'This dataset is number%d' % i
                               }, {}, {},
                               'The one and only author number%d' % (i+1), '', True)

    def indexSomePrivateRecords(self):
        i = 0
        for owner in ['owner1', 'owner2']:
            for private in [True, False]:
                for loaded in [True, False]:
                    for content in ['cat', 'dog']:
                        self.pr.save_model('%s-%s-%s-%s' % (owner, private, loaded, content),
                                           '', {
                                               'author': 'The one and only author number%d' % (i+1),
                                               'title': 'This dataset is number%d, content is %s' % (i, content),
                                               'owner': owner,
                                               'private': private
                                           }, {}, {},
                                           'The one and only author number%d' % (i+1), '', loaded)
                        i += 1
        self.es.indices.flush('packages')

    # Tests
    def test___search___all_values_and_empty(self):
        self.assertEquals(len(module.search('package', None)), 0)

    def test___search___all_values_and_one_result(self):
        self.indexSomeRecords(1)
        self.assertEquals(len(module.search('package', None)), 1)

    def test___search___all_values_and_two_results(self):
        self.indexSomeRecords(2)
        self.assertEquals(len(module.search('package', None)), 2)

    def test___search___filter_simple_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(len(module.search('package', None, {'model': ['"str7"']})), 1)

    def test___search___filter_numeric_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(len(module.search('package', None, {'package': ["7"]})), 1)

    def test___search___filter_boolean_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(len(module.search('package', None, {'id': ["true"]})), 10)

    def test___search___filter_multiple_properties(self):
        self.indexSomeRecords(10)
        self.assertEquals(len(module.search('package', None, {'model': ['"str6"'], 'package': ["6"]})), 1)

    def test___search___filter_multiple_values_for_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(len(module.search('package', None, {'model': ['"str6"','"str7"']})), 2)

    def test___search___filter_inner_property(self):
        self.indexSomeRecords(7)
        self.assertEquals(len(module.search('package', None, {"origin_url.name": ['"innername"']})), 7)

    def test___search___filter_no_results(self):
        self.assertEquals(len(module.search('package', None, {'model': ['"str6"'], 'package': ["7"]})), 0)

    def test___search___filter_bad_value(self):
        self.assertEquals(module.search('package', None, {'model': ['str6'], 'package': ["6"]}), None)

    def test___search___filter_nonexistent_kind(self):
        self.assertEquals(module.search('box', None, {'model': ['str6'], 'package': ["6"]}), None)

    def test___search___filter_nonexistent_property(self):
        self.assertEquals(module.search('box', None, {'model': ['str6'], 'boxing': ["6"]}), None)

    def test___search___q_param_no_recs_no_results(self):
        self.indexSomeRealLookingRecords(0)
        self.assertEquals(len(module.search('package', None, {'q': ['"author"']})), 0)

    def test___search___q_param_some_recs_no_results(self):
        self.indexSomeRealLookingRecords(2)
        self.assertEquals(len(module.search('package', None, {'q': ['"writer"']})), 0)

    def test___search___q_param_some_recs_some_results(self):
        self.indexSomeRealLookingRecords(2)
        results = module.search('package', None, {'q': ['"number1"']})
        self.assertEquals(len(results), 1)

    def test___search___q_param_some_recs_all_results(self):
        self.indexSomeRealLookingRecords(10)
        results = module.search('package', None, {'q': ['"dataset shataset"']})
        self.assertEquals(len(results), 10)

    def test___search___empty_anonymous_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', None)
        self.assertEquals(len(recs), 4)
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'owner1-False-True-cat',
                                  'owner2-False-True-cat',
                                  'owner1-False-True-dog',
                                  'owner2-False-True-dog',
                                  })

    def test___search___empty_authenticated_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', 'owner1')
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'owner1-False-False-cat',
                                  'owner1-False-True-cat',
                                  'owner1-True-False-cat',
                                  'owner1-True-True-cat',
                                  'owner2-False-True-cat',
                                  'owner1-False-False-dog',
                                  'owner1-False-True-dog',
                                  'owner1-True-False-dog',
                                  'owner1-True-True-dog',
                                  'owner2-False-True-dog',
                                  })
        self.assertEquals(len(recs), 10)

    def test___search___q_param_anonymous_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', None, {'q': ['"cat"']})
        self.assertEquals(len(recs), 2)
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'owner1-False-True-cat',
                                  'owner2-False-True-cat',
                                  })

    def test___search___q_param_anonymous_search_with_param(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', None, {'q': ['"cat"'], 'package.owner': ['"owner1"']})
        self.assertEquals(len(recs), 1)
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'owner1-False-True-cat'})

    def test___search___q_param_authenticated_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', 'owner1', {'q': ['"cat"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'owner1-False-False-cat',
                                  'owner1-False-True-cat',
                                  'owner1-True-False-cat',
                                  'owner1-True-True-cat',
                                  'owner2-False-True-cat',
                                  })
        self.assertEquals(len(recs), 5)

    def test___search___q_param_with_similar_param(self):
        self.indexSomeRecordsToTestMapping()
        recs = module.search('package', None, {'q': ['"test2"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'package-id-2'})
        self.assertEquals(len(recs), 1)

        recs = module.search('package', None, {'q': ['"dataset"'], 'package.author': ['"*****@*****.**"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'package-id-2'})
        self.assertEquals(len(recs), 1)

        recs = module.search('package', None, {'package.author': ['"*****@*****.**"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'package-id-2'})
        self.assertEquals(len(recs), 1)
示例#12
0
class PublishDeleteAPITests(unittest.TestCase):

    DATASET_NAME = 'owner:datasetid'

    def setUp(self):
        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='users')
            self.es.indices.delete(index='packages')
        except NotFoundError:
            pass
        self.es.indices.create('users')
        time.sleep(1)

        self.pr = PackageRegistry(es_connection_string=LOCAL_ELASTICSEARCH)
        self.pr.save_model(self.DATASET_NAME, 'datapackage_url', {}, {},
                           'dataset', 'author', '', True)

    def test__initial_value__none(self):
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is None)

    def test__delete(self):
        ret = module.delete_package(self.DATASET_NAME, token)
        assert (ret['success'] is True)
        try:
            pkg = self.pr.get_package(self.DATASET_NAME)
            assert (pkg is None)
        except KeyError:
            pass
        ret = module.delete_package(self.DATASET_NAME, token)
        assert (ret['success'] is False)

    def test__toggle__published(self):
        module.toggle_publish(self.DATASET_NAME, token, toggle=True)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is True)

    def test__toggle_twice__not_published(self):
        module.toggle_publish(self.DATASET_NAME, token, toggle=True)
        module.toggle_publish(self.DATASET_NAME, token, toggle=True)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is False)

    def test__force_publish_initial__correct(self):
        module.toggle_publish(self.DATASET_NAME, token, publish=True)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is False)

    def test__force_publish__correct(self):
        module.toggle_publish(self.DATASET_NAME, token, toggle=True)
        module.toggle_publish(self.DATASET_NAME, token, publish=True)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is False)

    def test__force_unpublish_initial__correct(self):
        module.toggle_publish(self.DATASET_NAME, token, publish=False)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is True)

    def test__force_unpublish__correct(self):
        module.toggle_publish(self.DATASET_NAME, token, toggle=True)
        module.toggle_publish(self.DATASET_NAME, token, toggle=True)
        module.toggle_publish(self.DATASET_NAME, token, publish=False)
        pkg = self.pr.get_package(self.DATASET_NAME)
        assert (pkg.get('private') is True)
示例#13
0
    if target_index is None:
        target_index = 'packages-1'

    assert source_index != target_index

    logging.info('SOURCE INDEX %s', source_index)
    logging.info('TARGET INDEX %s', target_index)
    logging.info('BACKUP INDEX %s', backup_index)

    try:
        logging.info('DELETING TARGET INDEX')
        es.indices.delete(target_index)
    except NotFoundError:
        logging.info('TARGET INDEX NOT FOUND')

    source_pr = PackageRegistry(es_instance=es, index_name=source_index)
    backup_pr = PackageRegistry(es_instance=es, index_name=backup_index)

    reindex(source_pr, backup_pr)

    if es.indices.exists_alias(source_index, 'packages'):
        es.indices.delete_alias(source_index, 'packages')

    target_pr = PackageRegistry(es_instance=es, index_name=target_index)
    reindex(source_pr, target_pr)

    es.indices.delete(source_index)
    es.indices.put_alias(target_index, 'packages')

    # Find orphan DB tables
    used_tables = set()
示例#14
0
import os

from elasticsearch import Elasticsearch

from os_package_registry import PackageRegistry

# ## ElasticSearch
_es_engine = None


def _get_es_engine():
    global _es_engine
    if _es_engine is None:
        es_host = os.environ['OS_ELASTICSEARCH_ADDRESS']
        _es_engine = Elasticsearch(hosts=[es_host], use_ssl='https' in es_host)
    return _es_engine


# Package Registry
package_registry = PackageRegistry(es_instance=_get_es_engine())
示例#15
0
class SearchTest(unittest.TestCase):

    # Actions

    def setUp(self):

        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index=PACKAGES_INDEX_NAME)
        except NotFoundError:
            pass
        self.pr = PackageRegistry(es_instance=self.es,
                                  index_name=PACKAGES_INDEX_NAME)

    def indexSomeRecords(self, amount):
        self.es.indices.delete(index=PACKAGES_INDEX_NAME)
        for i in range(amount):
            body = {
                'id': True,
                'package': i,
                'model': 'str%s' % i,
                'origin_url': {
                    'name': 'innername'
                }
            }
            self.es.index(PACKAGES_INDEX_NAME, 'package', body)
        self.es.indices.flush(PACKAGES_INDEX_NAME)

    def indexSomeRecordsToTestMapping(self):
        for i in range(3):
            self.pr.save_model(
                'package-id-%d' % i, '', {
                    'author': '*****@*****.**' % i,
                    'title': 'This dataset is number test%d' % i
                }, {}, 'dataset-name-%s' % i, '*****@*****.**' % (i + 1),
                '', True)

    def indexSomeRealLookingRecords(self, amount):
        for i in range(amount):
            self.pr.save_model(
                'package-id-%d' % i, '', {
                    'author': 'The one and only author number%d' % (i + 1),
                    'title': 'This dataset is number%d' % i
                }, {}, 'dataset-name-%s' % i,
                'The one and only author number%d' % (i + 1), '', True)

    def indexSomePrivateRecords(self):
        i = 0
        for owner in ['owner1', 'owner2']:
            for private in [True, False]:
                for loaded in [True, False]:
                    for content in ['cat', 'dog']:
                        self.pr.save_model(
                            '%s-%s-%s-%s' % (owner, private, loaded, content),
                            '', {
                                'author':
                                'The one and only author number%d' % (i + 1),
                                'title':
                                'This dataset is number%d, content is %s' %
                                (i, content),
                                'owner':
                                owner,
                                'private':
                                private
                            }, {}, 'dataset-name-%s' % content,
                            'The one and only author number%d' % (i + 1), '',
                            loaded)
                        i += 1
        self.es.indices.flush(PACKAGES_INDEX_NAME)

    # Tests
    def test___search___all_values_and_empty(self):
        self.assertEquals(len(module.search('package', None)), 0)

    def test___search___all_values_and_one_result(self):
        self.indexSomeRecords(1)
        self.assertEquals(len(module.search('package', None)), 1)

    def test___search___all_values_and_two_results(self):
        self.indexSomeRecords(2)
        self.assertEquals(len(module.search('package', None)), 2)

    def test___search___filter_simple_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(
            len(module.search('package', None, {'model': ['"str7"']})), 1)

    def test___search___filter_numeric_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(
            len(module.search('package', None, {'package': ["7"]})), 1)

    def test___search___filter_boolean_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(
            len(module.search('package', None, {'id': ["true"]})), 10)

    def test___search___filter_multiple_properties(self):
        self.indexSomeRecords(10)
        self.assertEquals(
            len(
                module.search('package', None, {
                    'model': ['"str6"'],
                    'package': ["6"]
                })), 1)

    def test___search___filter_multiple_values_for_property(self):
        self.indexSomeRecords(10)
        self.assertEquals(
            len(module.search('package', None,
                              {'model': ['"str6"', '"str7"']})), 2)

    def test___search___filter_inner_property(self):
        self.indexSomeRecords(7)
        self.assertEquals(
            len(
                module.search('package', None,
                              {"origin_url.name": ['"innername"']})), 7)

    def test___search___filter_no_results(self):
        self.assertEquals(
            len(
                module.search('package', None, {
                    'model': ['"str6"'],
                    'package': ["7"]
                })), 0)

    def test___search___filter_bad_value(self):
        self.assertEquals(
            module.search('package', None, {
                'model': ['str6'],
                'package': ["6"]
            }), None)

    def test___search___filter_nonexistent_kind(self):
        self.assertEquals(
            module.search('box', None, {
                'model': ['str6'],
                'package': ["6"]
            }), None)

    def test___search___filter_nonexistent_property(self):
        self.assertEquals(
            module.search('box', None, {
                'model': ['str6'],
                'boxing': ["6"]
            }), None)

    def test___search___q_param_no_recs_no_results(self):
        self.indexSomeRealLookingRecords(0)
        self.assertEquals(
            len(module.search('package', None, {'q': ['"author"']})), 0)

    def test___search___q_param_some_recs_no_results(self):
        self.indexSomeRealLookingRecords(2)
        self.assertEquals(
            len(module.search('package', None, {'q': ['"writer"']})), 0)

    def test___search___q_param_some_recs_some_results(self):
        self.indexSomeRealLookingRecords(2)
        results = module.search('package', None, {'q': ['"number1"']})
        self.assertEquals(len(results), 1)

    def test___search___q_param_some_recs_all_results(self):
        self.indexSomeRealLookingRecords(10)
        results = module.search('package', None, {'q': ['"dataset shataset"']})
        self.assertEquals(len(results), 10)

    def test___search___empty_anonymous_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', None)
        self.assertEquals(len(recs), 4)
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(
            ids, {
                'owner1-False-True-cat',
                'owner2-False-True-cat',
                'owner1-False-True-dog',
                'owner2-False-True-dog',
            })

    def test___search___empty_authenticated_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', 'owner1')
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(
            ids, {
                'owner1-False-False-cat',
                'owner1-False-True-cat',
                'owner1-True-False-cat',
                'owner1-True-True-cat',
                'owner2-False-True-cat',
                'owner1-False-False-dog',
                'owner1-False-True-dog',
                'owner1-True-False-dog',
                'owner1-True-True-dog',
                'owner2-False-True-dog',
            })
        self.assertEquals(len(recs), 10)

    def test___search___q_param_anonymous_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', None, {'q': ['"cat"']})
        self.assertEquals(len(recs), 2)
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {
            'owner1-False-True-cat',
            'owner2-False-True-cat',
        })

    def test___search___q_param_anonymous_search_with_param(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', None, {
            'q': ['"cat"'],
            'package.owner': ['"owner1"']
        })
        self.assertEquals(len(recs), 1)
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'owner1-False-True-cat'})

    def test___search___q_param_authenticated_search(self):
        self.indexSomePrivateRecords()
        recs = module.search('package', 'owner1', {'q': ['"cat"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(
            ids, {
                'owner1-False-False-cat',
                'owner1-False-True-cat',
                'owner1-True-False-cat',
                'owner1-True-True-cat',
                'owner2-False-True-cat',
            })
        self.assertEquals(len(recs), 5)

    def test___search___q_param_with_similar_param(self):
        self.indexSomeRecordsToTestMapping()
        recs = module.search('package', None, {'q': ['"test2"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'package-id-2'})
        self.assertEquals(len(recs), 1)

        recs = module.search('package', None, {
            'q': ['"dataset"'],
            'package.author': ['"*****@*****.**"']
        })
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'package-id-2'})
        self.assertEquals(len(recs), 1)

        recs = module.search('package', None,
                             {'package.author': ['"*****@*****.**"']})
        ids = set([r['id'] for r in recs])
        self.assertSetEqual(ids, {'package-id-2'})
        self.assertEquals(len(recs), 1)
示例#16
0
import os

from elasticsearch import Elasticsearch

from os_package_registry import PackageRegistry

# ## ElasticSearch
_es_engine = None

PACKAGES_INDEX_NAME = os.environ.get('OS_ES_PACKAGES_INDEX_NAME', 'packages')


def _get_es_engine():
    global _es_engine
    if _es_engine is None:
        es_host = os.environ['OS_ELASTICSEARCH_ADDRESS']
        _es_engine = Elasticsearch(hosts=[es_host], use_ssl='https' in es_host)
    return _es_engine


package_registry = PackageRegistry(es_instance=_get_es_engine(),
                                   index_name=PACKAGES_INDEX_NAME)
示例#17
0
    try:
        logging.info('DELETING TARGET INDEX')
        es.indices.delete(target_index)
    except NotFoundError:
        logging.info('TARGET INDEX NOT FOUND')

    source_pr = PackageRegistry(es_instance=es, index_name=source_index)
    backup_pr = PackageRegistry(es_instance=es, index_name=backup_index)

    reindex(source_pr, backup_pr)

    if es.indices.exists_alias(source_index, 'packages'):
        es.indices.delete_alias(source_index, 'packages')

    target_pr = PackageRegistry(es_instance=es, index_name=target_index)
    reindex(source_pr, target_pr)

    es.indices.delete(source_index)
    es.indices.put_alias(target_index, 'packages')

    # Find orphan DB tables
    used_tables = set()
    for pkg_id in target_pr.list_models():
        rec = target_pr.get_raw(pkg_id)
        fact_table = rec[3].get('fact_table')  # model
        if fact_table is not None:
            used_tables.add(fact_table)

    engine = create_engine(os.environ['OS_CONDUCTOR_ENGINE'])
    meta = MetaData()
    def test_save_and_update(self, package_registry: PackageRegistry):
        name = 'owner:my-name'
        dp_url = 'http://dp_url'
        dp = {'name': 'moses'}
        model = {'my-model': 'is-great'}
        dataset_name = 'ds-name'
        author = 'authors-name'
        status = 'FUNKY'
        loaded = False

        def check():
            name_, dp_url_, dp_, model_, dataset_name_, author_, status_, loaded_ = \
                package_registry.get_raw(name)
            assert name_ == name
            assert dp_url_ == dp_url
            assert dp_ == dp
            assert model_ == model
            assert dataset_name_ == dataset_name
            assert author_ == author
            assert status_ == status
            assert loaded_ == loaded

        package_registry.save_model(name, dp_url, dp, model, dataset_name, author, status, loaded)
        check()

        author = 'moses'
        package_registry.update_model(name, author=author)
        check()

        status = 'BLINKY'
        package_registry.update_model(name, status=status)
        check()

        status = 'AWESOME'
        loaded = True
        package_registry.update_model(name, status=status, loaded=loaded)
        check()

        # Update existing property of model
        model = {'my-model': 'replace model'}
        dp = {'name': 'replace name'}
        package_registry.update_model(name, model=model, datapackage=dp)
        check()

        # Replace model
        model = {'completely': 'replace model'}
        dp = {'completely': 'replace dp'}
        package_registry.update_model(name, model=model, datapackage=dp)
        check()