def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') helpers.reset_db() # Creating 3 resources with 3 retention policies: # to remove 10, 20 and 90% of data when the # resource gets to its size limit cls.retention = [10, 90, 20, 50] cls.resource_ids = [] package = factories.Dataset() for i, ret in enumerate(cls.retention): data = { 'resource': { 'retention': cls.retention[i], 'package_id': package['id'] }, } result = helpers.call_action('datastore_ts_create', **data) cls.resource_ids.append(result['resource_id']) engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') helpers.reset_db() ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') resource = model.Package.get('annakarenina').resources[0] cls.data = { 'resource_id': resource.id, 'aliases': u'b\xfck2', 'fields': [{'id': 'book', 'type': 'text'}, {'id': 'author', 'type': 'text'}, {'id': 'rating with %', 'type': 'text'}], 'records': [{'book': 'annakarenina', 'author': 'tolstoy', 'rating with %': '90%'}, {'book': 'warandpeace', 'author': 'tolstoy', 'rating with %': '42%'}] } engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user)
def _create_alias_table(self): mapping_sql = ''' SELECT DISTINCT pg_relation_size(dependee.oid) AS size, substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id", dependee.relname AS name, dependee.oid AS oid, dependent.relname AS alias_of -- dependent.oid AS oid FROM pg_class AS dependee LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid WHERE (dependee.oid != dependent.oid OR dependent.oid IS NULL) AND (dependee.relname IN (SELECT tablename FROM pg_catalog.pg_tables) OR dependee.relname IN (SELECT viewname FROM pg_catalog.pg_views)) AND dependee.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname='public') ORDER BY dependee.oid DESC; ''' create_alias_table_sql = u'CREATE OR REPLACE VIEW "_table_metadata_ts" AS {0}'.format(mapping_sql) try: connection = db._get_engine( {'connection_url': self.write_url}).connect() connection.execute(create_alias_table_sql) finally: connection.close()
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') cls.dataset = model.Package.get('annakarenina') cls.resource = cls.dataset.resources[0] cls.data = { 'resource_id': cls.resource.id, 'force': True, } postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') helpers.reset_db() # Creating 3 resources with 3 retention policies: # to remove 10, 20 and 90% of data when the # resource gets to its size limit cls.retention = [10, 90, 20, 50] cls.resource_ids = [] package = factories.Dataset() for i, ret in enumerate(cls.retention): data = { 'resource': { 'retention': cls.retention[i], 'package_id': package['id'] }, } result = helpers.call_action('datastore_ts_create', **data) cls.resource_ids.append(result['resource_id']) engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']} ) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def _read_connection_has_correct_privileges(self): ''' Returns True if the right permissions are set for the read only user. A table is created by the write user to test the read only user. ''' write_connection = db._get_engine( {'connection_url': self.write_url}).connect() read_connection_user = sa_url.make_url(self.read_url).username drop_foo_sql = u'DROP TABLE IF EXISTS _foo' write_connection.execute(drop_foo_sql) try: write_connection.execute(u'CREATE TEMP TABLE _foo ()') for privilege in ['INSERT', 'UPDATE', 'DELETE']: test_privilege_sql = u"SELECT has_table_privilege(%s, '_foo', %s)" have_privilege = write_connection.execute( test_privilege_sql, (read_connection_user, privilege)).first()[0] if have_privilege: return False finally: write_connection.execute(drop_foo_sql) write_connection.close() return True
def setup_class(cls): p.load('timeseries') helpers.reset_db() ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) cls.dataset = model.Package.get('annakarenina') cls.data = { 'resource_id': '', 'force': True, 'method': 'insert', 'records': [{'author': 'tolstoy5', 'published': '2005-03-05'}, {'author': 'tolstoy6'}, {'author': 'tolstoy7', 'published': '2005-03-05'} ] } cls.data['resource_id'] = cls.dataset.resources[0].id result = helpers.call_action('datastore_ts_create', **cls.data) cls.data['resource_id'] = cls.dataset.resources[1].id result = helpers.call_action('datastore_ts_create', **cls.data) datastore_test_helpers.set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user)
def _create_alias_table(self): mapping_sql = ''' SELECT DISTINCT pg_relation_size(dependee.oid) AS size, substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id", dependee.relname AS name, dependee.oid AS oid, dependent.relname AS alias_of -- dependent.oid AS oid FROM pg_class AS dependee LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid WHERE (dependee.oid != dependent.oid OR dependent.oid IS NULL) AND (dependee.relname IN (SELECT tablename FROM pg_catalog.pg_tables) OR dependee.relname IN (SELECT viewname FROM pg_catalog.pg_views)) AND dependee.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname='public') ORDER BY dependee.oid DESC; ''' create_alias_table_sql = u'CREATE OR REPLACE VIEW "_table_metadata_ts" AS {0}'.format( mapping_sql) try: connection = db._get_engine({ 'connection_url': self.write_url }).connect() connection.execute(create_alias_table_sql) finally: connection.close()
def _read_connection_has_correct_privileges(self): ''' Returns True if the right permissions are set for the read only user. A table is created by the write user to test the read only user. ''' write_connection = db._get_engine({ 'connection_url': self.write_url }).connect() read_connection_user = sa_url.make_url(self.read_url).username drop_foo_sql = u'DROP TABLE IF EXISTS _foo' write_connection.execute(drop_foo_sql) try: write_connection.execute(u'CREATE TEMP TABLE _foo ()') for privilege in ['INSERT', 'UPDATE', 'DELETE']: test_privilege_sql = u"SELECT has_table_privilege(%s, '_foo', %s)" have_privilege = write_connection.execute( test_privilege_sql, (read_connection_user, privilege)).first()[0] if have_privilege: return False finally: write_connection.execute(drop_foo_sql) write_connection.close() return True
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') cls.dataset = model.Package.get('annakarenina') cls.resource = cls.dataset.resources[0] cls.data = { 'resource_id': cls.resource.id, 'force': True, 'aliases': 'books3', 'fields': [{'id': u'b\xfck', 'type': 'text'}, {'id': 'author', 'type': 'text'}, {'id': 'published'}, {'id': u'characters', u'type': u'_text'}, {'id': 'rating with %'}], 'records': [{u'b\xfck': 'annakarenina', 'author': 'tolstoy', 'published': '2005-03-01', 'nested': ['b', {'moo': 'moo'}], u'characters': [u'Princess Anna', u'Sergius'], 'rating with %': '60%'}, {u'b\xfck': 'warandpeace', 'author': 'tolstoy', 'nested': {'a': 'b'}, 'rating with %': '99%'} ] } postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True # Make an organization, because private datasets must belong to one. cls.organization = tests.call_action_api( cls.app, 'organization_create', name='test_org', apikey=cls.sysadmin_user.apikey) cls.expected_records = [{u'published': u'2005-03-01T00:00:00', u'_id': 1, u'nested': [u'b', {u'moo': u'moo'}], u'b\xfck': u'annakarenina', u'author': u'tolstoy', u'characters': [u'Princess Anna', u'Sergius'], u'rating with %': u'60%'}, {u'published': None, u'_id': 2, u'nested': {u'a': u'b'}, u'b\xfck': u'warandpeace', u'author': u'tolstoy', u'characters': None, u'rating with %': u'99%'}] engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']} ) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def setup_class(cls): wsgiapp = middleware.make_app(config['global_conf'], **config) cls.app = paste.fixture.TestApp(wsgiapp) if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') resource = model.Package.get('annakarenina').resources[0] cls.data = { 'resource_id': resource.id, 'force': True, 'aliases': 'books', 'fields': [{ 'id': u'b\xfck', 'type': 'text' }, { 'id': 'author', 'type': 'text' }, { 'id': 'published' }, { 'id': u'characters', u'type': u'_text' }], 'records': [{ u'b\xfck': 'annakarenina', 'author': 'tolstoy', 'published': '2005-03-01', 'nested': ['b', { 'moo': 'moo' }], u'characters': [u'Princess Anna', u'Sergius'] }, { u'b\xfck': 'warandpeace', 'author': 'tolstoy', 'nested': { 'a': 'b' } }] } postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True engine = db._get_engine( {'connection_url': config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def _resource_exists(context, data_dict): ''' Returns true if the resource exists in CKAN and in the datastore ''' model = _get_or_bust(context, 'model') res_id = _get_or_bust(data_dict, 'resource_id') if not model.Resource.get(res_id): return False resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') results = db._get_engine(data_dict).execute(resources_sql, id=res_id) return results.rowcount > 0
def _is_read_only_database(self): ''' Returns True if no connection has CREATE privileges on the public schema. This is the case if replication is enabled.''' for url in [self.ckan_url, self.write_url, self.read_url]: connection = db._get_engine({'connection_url': url}).connect() try: sql = u"SELECT has_schema_privilege('public', 'CREATE')" is_writable = connection.execute(sql).first()[0] finally: connection.close() if is_writable: return False return True
def configure(self, config): self.config = config # check for ckan.datastore.write_url and ckan.datastore.read_url if (not 'ckan.datastore.write_url' in config): error_msg = 'ckan.datastore.write_url not found in config' raise DatastoreException(error_msg) # Legacy mode means that we have no read url. Consequently sql search is not # available and permissions do not have to be changed. In legacy mode, the # datastore runs on PG prior to 9.0 (for example 8.4). self.legacy_mode = _is_legacy_mode(self.config) # Check whether users have disabled datastore_search_sql self.enable_sql_search = p.toolkit.asbool( self.config.get('ckan.datastore.sqlsearch.enabled', True)) datapusher_formats = config.get('datapusher.formats', '').split() self.datapusher_formats = datapusher_formats or DEFAULT_FORMATS # Check whether we are running one of the paster commands which means # that we should ignore the following tests. if sys.argv[0].split( '/')[-1] == 'paster' and 'timeseries' in sys.argv[1:]: log.warn('Omitting permission checks because you are ' 'running paster commands.') return self.ckan_url = self.config['sqlalchemy.url'] self.write_url = self.config['ckan.datastore.write_url'] if self.legacy_mode: self.read_url = self.write_url log.warn('Legacy mode active. ' 'The sql search will not be available.') else: self.read_url = self.config['ckan.datastore.read_url'] self.read_engine = db._get_engine({'connection_url': self.read_url}) if not model.engine_is_pg(self.read_engine): log.warn('We detected that you do not use a PostgreSQL ' 'database. The DataStore will NOT work and DataStore ' 'tests will be skipped.') return if self._is_read_only_database(): log.warn('We detected that CKAN is running on a read ' 'only database. Permission checks and the creation ' 'of _table_metadata are skipped.') else: self._check_urls_and_permissions() self._create_alias_table( ) # create another _table_metadata_ts for resource size
def configure(self, config): self.config = config # check for ckan.datastore.write_url and ckan.datastore.read_url if (not 'ckan.datastore.write_url' in config): error_msg = 'ckan.datastore.write_url not found in config' raise DatastoreException(error_msg) # Legacy mode means that we have no read url. Consequently sql search is not # available and permissions do not have to be changed. In legacy mode, the # datastore runs on PG prior to 9.0 (for example 8.4). self.legacy_mode = _is_legacy_mode(self.config) # Check whether users have disabled datastore_search_sql self.enable_sql_search = p.toolkit.asbool( self.config.get('ckan.datastore.sqlsearch.enabled', True)) datapusher_formats = config.get('datapusher.formats', '').split() self.datapusher_formats = datapusher_formats or DEFAULT_FORMATS # Check whether we are running one of the paster commands which means # that we should ignore the following tests. if sys.argv[0].split('/')[-1] == 'paster' and 'timeseries' in sys.argv[1:]: log.warn('Omitting permission checks because you are ' 'running paster commands.') return self.ckan_url = self.config['sqlalchemy.url'] self.write_url = self.config['ckan.datastore.write_url'] if self.legacy_mode: self.read_url = self.write_url log.warn('Legacy mode active. ' 'The sql search will not be available.') else: self.read_url = self.config['ckan.datastore.read_url'] self.read_engine = db._get_engine( {'connection_url': self.read_url}) if not model.engine_is_pg(self.read_engine): log.warn('We detected that you do not use a PostgreSQL ' 'database. The DataStore will NOT work and DataStore ' 'tests will be skipped.') return if self._is_read_only_database(): log.warn('We detected that CKAN is running on a read ' 'only database. Permission checks and the creation ' 'of _table_metadata are skipped.') else: self._check_urls_and_permissions() self._create_alias_table() # create another _table_metadata_ts for resource size
def _is_legacy_mode(config): ''' Decides if the DataStore should run on legacy mode Returns True if `ckan.datastore.read_url` is not set in the provided config object or CKAN is running on Postgres < 9.x ''' write_url = config.get('ckan.datastore.write_url') engine = db._get_engine({'connection_url': write_url}) connection = engine.connect() return (not config.get('ckan.datastore.read_url') or not db._pg_version_is_at_least(connection, '9.0'))
def setup_class(cls): wsgiapp = middleware.make_app(config['global_conf'], **config) cls.app = paste.fixture.TestApp(wsgiapp) if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user)
def before_show(self, resource_dict): # Modify the resource url of datastore resources so that # they link to the datastore dumps. # if resource_dict.get('url_type') == 'datastore': connection = db._get_engine( {'connection_url': self.write_url}).connect() if db._is_timeseries({"connection": connection},resource_dict['id']): resource_dict['url'] = p.toolkit.url_for( controller='ckanext.timeseries.controller.datastore_ts:TimeseriesController', action='dump', resource_id=resource_dict['id'], qualified=True) if 'datastore_active' not in resource_dict: resource_dict[u'datastore_active'] = False return resource_dict
def setup_class(cls): if not pylons.config.get('ckan.datastore.read_url'): raise nose.SkipTest('Datastore runs on legacy mode, skipping...') engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) datastore_test_helpers.clear_db(cls.Session) create_tables = [ u'CREATE TABLE test_a (id_a text)', u'CREATE TABLE test_b (id_b text)', u'CREATE TABLE "TEST_C" (id_c text)', u'CREATE TABLE test_d ("α/α" integer)', ] for create_table_sql in create_tables: cls.Session.execute(create_table_sql)
def setup_class(cls): if not pylons.config.get('ckan.datastore.read_url'): raise nose.SkipTest('Datastore runs on legacy mode, skipping...') engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']} ) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) datastore_test_helpers.clear_db(cls.Session) create_tables = [ u'CREATE TABLE test_a (id_a text)', u'CREATE TABLE test_b (id_b text)', u'CREATE TABLE "TEST_C" (id_c text)', u'CREATE TABLE test_d ("α/α" integer)', ] for create_table_sql in create_tables: cls.Session.execute(create_table_sql)
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') helpers.reset_db() ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') resource = model.Package.get('annakarenina').resources[0] cls.data = { 'resource_id': resource.id, 'aliases': u'b\xfck2', 'fields': [{ 'id': 'book', 'type': 'text' }, { 'id': 'author', 'type': 'text' }, { 'id': 'rating with %', 'type': 'text' }], 'records': [{ 'book': 'annakarenina', 'author': 'tolstoy', 'rating with %': '90%' }, { 'book': 'warandpeace', 'author': 'tolstoy', 'rating with %': '42%' }] } engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user)
def before_show(self, resource_dict): # Modify the resource url of datastore resources so that # they link to the datastore dumps. # if resource_dict.get('url_type') == 'datastore': connection = db._get_engine({ 'connection_url': self.write_url }).connect() if db._is_timeseries({"connection": connection}, resource_dict['id']): resource_dict['url'] = p.toolkit.url_for( controller= 'ckanext.timeseries.controller.datastore_ts:TimeseriesController', action='dump', resource_id=resource_dict['id'], qualified=True) if 'datastore_active' not in resource_dict: resource_dict[u'datastore_active'] = False return resource_dict
def setup_class(cls): p.load('timeseries') helpers.reset_db() ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) cls.dataset = model.Package.get('annakarenina') cls.data = { 'resource_id': '', 'force': True, 'method': 'insert', 'records': [{ 'author': 'tolstoy5', 'published': '2005-03-05' }, { 'author': 'tolstoy6' }, { 'author': 'tolstoy7', 'published': '2005-03-05' }] } cls.data['resource_id'] = cls.dataset.resources[0].id result = helpers.call_action('datastore_ts_create', **cls.data) cls.data['resource_id'] = cls.dataset.resources[1].id result = helpers.call_action('datastore_ts_create', **cls.data) datastore_test_helpers.set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user)
def setup_class(cls): wsgiapp = middleware.make_app(config['global_conf'], **config) cls.app = paste.fixture.TestApp(wsgiapp) if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') resource = model.Package.get('annakarenina').resources[0] cls.data = { 'resource_id': resource.id, 'force': True, 'aliases': 'books', 'fields': [{'id': u'b\xfck', 'type': 'text'}, {'id': 'author', 'type': 'text'}, {'id': 'published'}, {'id': u'characters', u'type': u'_text'}], 'records': [{u'b\xfck': 'annakarenina', 'author': 'tolstoy', 'published': '2005-03-01', 'nested': ['b', {'moo': 'moo'}], u'characters': [u'Princess Anna', u'Sergius']}, {u'b\xfck': 'warandpeace', 'author': 'tolstoy', 'nested': {'a': 'b'}}] } postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True engine = db._get_engine({ 'connection_url': config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") plugin = p.load('timeseries') if plugin.legacy_mode: # make sure we undo adding the plugin p.unload('timeseries') raise nose.SkipTest("SQL tests are not supported in legacy mode") ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') cls.dataset = model.Package.get('annakarenina') resource = cls.dataset.resources[0] cls.data = { 'resource_id': resource.id, 'force': True, 'aliases': 'books4', 'fields': [{'id': u'b\xfck', 'type': 'text'}, {'id': 'author', 'type': 'text'}, {'id': 'published'}], 'records': [{u'b\xfck': 'annakarenina', 'author': 'tolstoy', 'published': '2005-03-01', 'nested': ['b', {'moo': 'moo'}]}, {u'b\xfck': 'warandpeace', 'author': 'tolstoy', 'nested': {'a': 'b'}}] } postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True # Make an organization, because private datasets must belong to one. cls.organization = tests.call_action_api( cls.app, 'organization_create', name='test_org', apikey=cls.sysadmin_user.apikey) cls.expected_records = [{u'_full_text': [u"'annakarenina'", u"'b'", u"'moo'", u"'tolstoy'", u"'2005'"], u'_id': 1, u'author': u'tolstoy', u'b\xfck': u'annakarenina', u'nested': [u'b', {u'moo': u'moo'}], u'published': u'2005-03-01T00:00:00'}, {u'_full_text': [u"'tolstoy'", u"'warandpeac'", u"'b'"], u'_id': 2, u'author': u'tolstoy', u'b\xfck': u'warandpeace', u'nested': {u'a': u'b'}, u'published': None}] cls.expected_join_results = [{u'first': 1, u'second': 1}, {u'first': 1, u'second': 2}] engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def datastore_search(context, data_dict): '''Search a DataStore resource. The datastore_search action allows you to search data in a resource. DataStore resources that belong to private CKAN resource can only be read by you if you have access to the CKAN resource and send the appropriate authorization. :param resource_id: id or alias of the resource to be searched against :type resource_id: string :param filters: matching conditions to select, e.g {"key1": "a", "key2": "b"} (optional) :type filters: dictionary :param q: full text query. If it's a string, it'll search on all fields on each row. If it's a dictionary as {"key1": "a", "key2": "b"}, it'll search on each specific field (optional) :type q: string or dictionary :param distinct: return only distinct rows (optional, default: false) :type distinct: bool :param plain: treat as plain text query (optional, default: true) :type plain: bool :param language: language of the full text query (optional, default: english) :type language: string :param limit: maximum number of rows to return (optional, default: 100) :type limit: int :param offset: offset this number of rows (optional) :type offset: int :param fields: fields to return (optional, default: all fields in original order) :type fields: list or comma separated string :param sort: comma separated field names with ordering e.g.: "fieldname1, fieldname2 desc" :type sort: string Setting the ``plain`` flag to false enables the entire PostgreSQL `full text search query language`_. A listing of all available resources can be found at the alias ``_table_metadata``. .. _full text search query language: http://www.postgresql.org/docs/9.1/static/datatype-textsearch.html#DATATYPE-TSQUERY If you need to download the full resource, read :ref:`dump`. **Results:** The result of this action is a dictionary with the following keys: :rtype: A dictionary with the following keys :param fields: fields/columns and their extra metadata :type fields: list of dictionaries :param offset: query offset value :type offset: int :param limit: query limit value :type limit: int :param filters: query filters :type filters: list of dictionaries :param total: number of total matching records :type total: int :param records: list of matching results :type records: list of dictionaries ''' schema = context.get('schema', dsschema.datastore_search_schema()) data_dict, errors = _validate(data_dict, schema, context) if errors: raise p.toolkit.ValidationError(errors) res_id = data_dict['resource_id'] data_dict['connection_url'] = config['ckan.datastore.write_url'] resources_sql = sqlalchemy.text(u'''SELECT alias_of FROM "_table_metadata" WHERE name = :id''') results = db._get_engine(data_dict).execute(resources_sql, id=res_id) # Resource only has to exist in the datastore (because it could be an alias) if not results.rowcount > 0: raise p.toolkit.ObjectNotFound(p.toolkit._( 'Resource "{0}" was not found.'.format(res_id) )) if not data_dict['resource_id'] in WHITELISTED_RESOURCES: # Replace potential alias with real id to simplify access checks resource_id = results.fetchone()[0] if resource_id: data_dict['resource_id'] = resource_id p.toolkit.check_access('datastore_ts_search', context, data_dict) result = db.search(context, data_dict) result.pop('id', None) result.pop('connection_url') datastore_helpers.remove_autogen(result) return result
def datastore_delete(context, data_dict): '''Deletes a table or a set of records from the DataStore. :param resource_id: resource id that the data will be deleted from. (optional) :type resource_id: string :param force: set to True to edit a read-only resource :type force: bool (optional, default: False) :param filters: filters to apply before deleting (eg {"name": "fred"}). If missing delete whole table and all dependent views. (optional) :type filters: dictionary **Results:** :returns: Original filters sent. :rtype: dictionary ''' schema = context.get('schema', dsschema.datastore_upsert_schema()) # Remove any applied filters before running validation. filters = data_dict.pop('filters', None) data_dict, errors = _validate(data_dict, schema, context) if filters is not None: if not isinstance(filters, dict): raise p.toolkit.ValidationError({ 'filters': [ 'filters must be either a dict or null.' ] }) data_dict['filters'] = filters if errors: raise p.toolkit.ValidationError(errors) p.toolkit.check_access('datastore_ts_delete', context, data_dict) if not data_dict.pop('force', False): resource_id = data_dict['resource_id'] _check_read_only(context, resource_id) data_dict['connection_url'] = config['ckan.datastore.write_url'] res_id = data_dict['resource_id'] resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') results = db._get_engine(data_dict).execute(resources_sql, id=res_id) res_exists = results.rowcount > 0 if not res_exists: raise p.toolkit.ObjectNotFound(p.toolkit._( u'Resource "{0}" was not found.'.format(res_id) )) result = db.delete(context, data_dict) # Set the datastore_active flag on the resource if necessary model = _get_or_bust(context, 'model') resource = model.Resource.get(data_dict['resource_id']) if (not data_dict.get('filters') and resource.extras.get('datastore_active') is True): log.debug( 'Setting datastore_active=True on resource {0}'.format(resource.id) ) p.toolkit.get_action('resource_patch')( context, {'id': data_dict['resource_id'], 'datastore_active': False}) result.pop('id', None) result.pop('connection_url') datastore_helpers.remove_autogen(result) return result
def datastore_info(context, data_dict): ''' Returns information about the data imported, such as column names and types. :rtype: A dictionary describing the columns and their types. :param id: Id of the resource we want info about :type id: A UUID ''' def _type_lookup(t): if t in ['numeric', 'integer']: return 'number' if t.startswith('timestamp'): return "date" return "text" p.toolkit.check_access('datastore_ts_info', context, data_dict) resource_id = _get_or_bust(data_dict, 'id') resource = p.toolkit.get_action('resource_show')(context, {'id':resource_id}) data_dict['connection_url'] = config['ckan.datastore.read_url'] resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') results = db._get_engine(data_dict).execute(resources_sql, id=resource_id) res_exists = results.rowcount > 0 if not res_exists: raise p.toolkit.ObjectNotFound(p.toolkit._( u'Resource "{0}" was not found.'.format(resource_id) )) info = {'schema': {}, 'meta': {}} schema_results = None meta_results = None try: schema_sql = sqlalchemy.text(u''' SELECT column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name = :resource_id; ''') schema_results = db._get_engine(data_dict).execute(schema_sql, resource_id=resource_id) for row in schema_results.fetchall(): k = row[0] v = row[1] if k.startswith('_'): # Skip internal rows continue info['schema'][k] = _type_lookup(v) # We need to make sure the resource_id is a valid resource_id before we use it like # this, we have done that above. meta_sql = sqlalchemy.text(u''' SELECT count(_id) FROM "{0}"; '''.format(resource_id)) meta_results = db._get_engine(data_dict).execute(meta_sql, resource_id=resource_id) info['meta']['count'] = meta_results.fetchone()[0] finally: if schema_results: schema_results.close() if meta_results: meta_results.close() return info
def datastore_upsert(context, data_dict): '''Updates or inserts into a table in the DataStore The datastore_upsert API action allows you to add or edit records to an existing DataStore resource. In order for the *upsert* and *update* methods to work, a unique key has to be defined via the datastore_create action. The available methods are: *upsert* Update if record with same key already exists, otherwise insert. Requires unique key. *insert* Insert only. This method is faster that upsert, but will fail if any inserted record matches an existing one. Does *not* require a unique key. *update* Update only. An exception will occur if the key that should be updated does not exist. Requires unique key. :param resource_id: resource id that the data is going to be stored under. :type resource_id: string :param force: set to True to edit a read-only resource :type force: bool (optional, default: False) :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a","b"]}] (optional) :type records: list of dictionaries :param method: the method to use to put the data into the datastore. Possible options are: upsert, insert, update (optional, default: upsert) :type method: string **Results:** :returns: The modified data object. :rtype: dictionary ''' schema = context.get('schema', dsschema.datastore_upsert_schema()) records = data_dict.pop('records', None) data_dict, errors = _validate(data_dict, schema, context) if records: data_dict['records'] = records if errors: raise p.toolkit.ValidationError(errors) p.toolkit.check_access('datastore_ts_upsert', context, data_dict) if not data_dict.pop('force', False): resource_id = data_dict['resource_id'] _check_read_only(context, resource_id) data_dict['connection_url'] = config['ckan.datastore.write_url'] res_id = data_dict['resource_id'] resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') results = db._get_engine(data_dict).execute(resources_sql, id=res_id) res_exists = results.rowcount > 0 if not res_exists: raise p.toolkit.ObjectNotFound(p.toolkit._( u'Resource "{0}" was not found.'.format(res_id) )) result = db.upsert(context, data_dict) result.pop('id', None) result.pop('connection_url') datastore_helpers.remove_autogen(result) return result
def _execute_sql(self, sql, *args): engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) session = orm.scoped_session(orm.sessionmaker(bind=engine)) return session.connection().execute(sql, *args)
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') helpers.reset_db() ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') cls.dataset = model.Package.get('annakarenina') cls.resource = cls.dataset.resources[0] cls.data = { 'resource_id': cls.resource.id, 'force': True, 'aliases': 'books3', 'fields': [{'id': 'author', 'type': 'text'}, {'id': 'published'}], 'records': [{'author': 'tolstoy1', 'published': '2005-03-01'}, {'author': 'tolstoy2'} ] } cls.data2 = { 'resource_id': cls.resource.id, 'force': True, 'method': 'insert', 'records': [{'author': 'tolstoy3', 'published': '2005-03-03'}, {'author': 'tolstoy4'} ] } cls.data3 = { 'resource_id': cls.resource.id, 'force': True, 'method': 'insert', 'records': [{'author': 'tolstoy5', 'published': '2005-03-05'}, {'author': 'tolstoy6'}, {'author': 'tolstoy7', 'published': '2005-03-05'} ] } cls.startdata = utcnow() postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True cls.enddata = utcnow() cls.startdata2 = utcnow() time.sleep(2) postparams = '%s=1' % json.dumps(cls.data2) res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True cls.enddata2 = utcnow() cls.startdata3 = utcnow() time.sleep(2) postparams = '%s=1' % json.dumps(cls.data3) res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True cls.enddata3 = utcnow() # Make an organization, because private datasets must belong to one. cls.organization = tests.call_action_api( cls.app, 'organization_create', name='test_org', apikey=cls.sysadmin_user.apikey) cls.expected_records = [{u'published': u'2005-03-01T00:00:00', u'_id': 1, u'author': u'tolstoy1'}, {u'published': None, u'_id': 2, u'author': u'tolstoy2'}, {u'published': u'2005-03-03T00:00:00', u'_id': 3, u'author': u'tolstoy3'}, {u'published': None, u'_id': 4, u'author': u'tolstoy4'}, {u'published': u'2005-03-05T00:00:00', u'_id': 5, u'author': u'tolstoy5'}, {u'published': None, u'_id': 6, u'author': u'tolstoy6'}, {u'published': u'2005-03-05T00:00:00', u'_id': 7, u'author': u'tolstoy7'}] cls.expected_records1 = [{u'published': u'2005-03-01T00:00:00', u'_id': 1, u'author': u'tolstoy1'}, {u'published': None, u'_id': 2, u'author': u'tolstoy2'}] cls.expected_records12 = [{u'published': u'2005-03-01T00:00:00', u'_id': 1, u'author': u'tolstoy1'}, {u'published': None, u'_id': 2, u'author': u'tolstoy2'}, {u'published': u'2005-03-03T00:00:00', u'_id': 3, u'author': u'tolstoy3'}, {u'published': None, u'_id': 4, u'author': u'tolstoy4'}] cls.expected_records23 = [{u'published': u'2005-03-03T00:00:00', u'_id': 3, u'author': u'tolstoy3'}, {u'published': None, u'_id': 4, u'author': u'tolstoy4'}, {u'published': u'2005-03-05T00:00:00', u'_id': 5, u'author': u'tolstoy5'}, {u'published': None, u'_id': 6, u'author': u'tolstoy6'}, {u'published': u'2005-03-05T00:00:00', u'_id': 7, u'author': u'tolstoy7'}] cls.expected_records3 = [{u'published': u'2005-03-05T00:00:00', u'_id': 5, u'author': u'tolstoy5'}, {u'published': None, u'_id': 6, u'author': u'tolstoy6'}, {u'published': u'2005-03-05T00:00:00', u'_id': 7, u'author': u'tolstoy7'}] engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']} ) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def setup_class(cls): engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']} ) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
def setup_class(cls): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('timeseries') helpers.reset_db() ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') cls.dataset = model.Package.get('annakarenina') cls.resource = cls.dataset.resources[0] cls.data = { 'resource_id': cls.resource.id, 'force': True, 'aliases': 'books3', 'fields': [{ 'id': 'author', 'type': 'text' }, { 'id': 'published' }], 'records': [{ 'author': 'tolstoy1', 'published': '2005-03-01' }, { 'author': 'tolstoy2' }] } cls.data2 = { 'resource_id': cls.resource.id, 'force': True, 'method': 'insert', 'records': [{ 'author': 'tolstoy3', 'published': '2005-03-03' }, { 'author': 'tolstoy4' }] } cls.data3 = { 'resource_id': cls.resource.id, 'force': True, 'method': 'insert', 'records': [{ 'author': 'tolstoy5', 'published': '2005-03-05' }, { 'author': 'tolstoy6' }, { 'author': 'tolstoy7', 'published': '2005-03-05' }] } cls.startdata = utcnow() postparams = '%s=1' % json.dumps(cls.data) auth = {'Authorization': str(cls.sysadmin_user.apikey)} res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True cls.enddata = utcnow() cls.startdata2 = utcnow() postparams = '%s=1' % json.dumps(cls.data2) res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True cls.enddata2 = utcnow() cls.startdata3 = utcnow() postparams = '%s=1' % json.dumps(cls.data3) res = cls.app.post('/api/action/datastore_ts_create', params=postparams, extra_environ=auth) res_dict = json.loads(res.body) assert res_dict['success'] is True cls.enddata3 = utcnow() # Make an organization, because private datasets must belong to one. cls.organization = tests.call_action_api( cls.app, 'organization_create', name='test_org', apikey=cls.sysadmin_user.apikey) cls.expected_records = [{ u'published': u'2005-03-01T00:00:00', u'_id': 1, u'author': u'tolstoy1' }, { u'published': None, u'_id': 2, u'author': u'tolstoy2' }, { u'published': u'2005-03-03T00:00:00', u'_id': 3, u'author': u'tolstoy3' }, { u'published': None, u'_id': 4, u'author': u'tolstoy4' }, { u'published': u'2005-03-05T00:00:00', u'_id': 5, u'author': u'tolstoy5' }, { u'published': None, u'_id': 6, u'author': u'tolstoy6' }, { u'published': u'2005-03-05T00:00:00', u'_id': 7, u'author': u'tolstoy7' }] cls.expected_records1 = [{ u'published': u'2005-03-01T00:00:00', u'_id': 1, u'author': u'tolstoy1' }, { u'published': None, u'_id': 2, u'author': u'tolstoy2' }] cls.expected_records12 = [{ u'published': u'2005-03-01T00:00:00', u'_id': 1, u'author': u'tolstoy1' }, { u'published': None, u'_id': 2, u'author': u'tolstoy2' }, { u'published': u'2005-03-03T00:00:00', u'_id': 3, u'author': u'tolstoy3' }, { u'published': None, u'_id': 4, u'author': u'tolstoy4' }] cls.expected_records23 = [{ u'published': u'2005-03-03T00:00:00', u'_id': 3, u'author': u'tolstoy3' }, { u'published': None, u'_id': 4, u'author': u'tolstoy4' }, { u'published': u'2005-03-05T00:00:00', u'_id': 5, u'author': u'tolstoy5' }, { u'published': None, u'_id': 6, u'author': u'tolstoy6' }, { u'published': u'2005-03-05T00:00:00', u'_id': 7, u'author': u'tolstoy7' }] cls.expected_records3 = [{ u'published': u'2005-03-05T00:00:00', u'_id': 5, u'author': u'tolstoy5' }, { u'published': None, u'_id': 6, u'author': u'tolstoy6' }, { u'published': u'2005-03-05T00:00:00', u'_id': 7, u'author': u'tolstoy7' }] engine = db._get_engine( {'connection_url': pylons.config['ckan.datastore.write_url']}) cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))