def get_hash(self, resource_id): """Gets the hash of a datastore table. Args: resource_id (str): The datastore resource id. Returns: str: The datastore resource hash. Raises: NotFound: If `hash_table_id` isn't set or not in datastore. NotAuthorized: If unable to authorize ckan user. Examples: >>> CKAN(hash_table='hash_jhb34rtj34t').get_hash('rid') Traceback (most recent call last): NotFound: {u'item': u'package', u'message': u'Package \ `hash_jhb34rtj34t` was not found!'} """ if not self.hash_table_pack: message = 'Package `%s` was not found!' % self.hash_table raise NotFound({'message': message, 'item': 'package'}) if not self.hash_table_id: message = 'No resources found in package `%s`!' % self.hash_table raise NotFound({'message': message, 'item': 'resource'}) kwargs = { 'resource_id': self.hash_table_id, 'filters': { 'datastore_id': resource_id }, 'fields': 'hash', 'limit': 1 } err_msg = 'Resource `%s` was not found' % resource_id alt_msg = 'Hash table `%s` was not found' % self.hash_table_id try: result = self.datastore_search(**kwargs) resource_hash = result['records'][0]['hash'] except NotFound: message = '%s in datastore!' % alt_msg raise NotFound({'message': message, 'item': 'datastore'}) except ValidationError as err: if err.error_dict.get('resource_id') == ['Not found: Resource']: raise NotFound('%s in filestore.' % err_msg) else: raise err except IndexError: print('%s in hash table.' % err_msg) resource_hash = None if self.verbose: print('Resource `%s` hash is `%s`.' % (resource_id, resource_hash)) return resource_hash
def fetch_resource(self, resource_id, user_agent=None, stream=True): """Fetches a single resource from filestore. Args: resource_id (str): The filestore resource id. Kwargs: user_agent (str): The user agent. stream (bool): Stream content (default: True). Returns: obj: requests.Response object. Raises: NotFound: If unable to find the resource. NotAuthorized: If access to fetch resource is denied. Examples: >>> CKAN(quiet=True).fetch_resource('rid') Traceback (most recent call last): NotFound: Resource `rid` was not found in filestore. """ user_agent = user_agent or self.user_agent err_msg = 'Resource `%s` was not found in filestore.' % resource_id try: resource = self.resource_show(id=resource_id) except NotFound: raise NotFound(err_msg) except ValidationError as err: if err.error_dict.get('resource_id') == ['Not found: Resource']: raise NotFound(err_msg) else: raise err url = resource.get('perma_link') or resource.get('url') if self.verbose: print('Downloading url %s...' % url) headers = {'User-Agent': user_agent} r = requests.get(url, stream=stream, headers=headers) err_msg = 'Access to fetch resource %s was denied.' % resource_id if any('403' in h.headers.get('x-ckan-error', '') for h in r.history): raise NotAuthorized(err_msg) elif r.status_code == 401: raise NotAuthorized(err_msg) else: return r
def test_no_choices_on_not_found(self, LocalCKAN): lc = Mock() lc.action.datastore_search.side_effect = NotFound() LocalCKAN.return_value = lc assert_equals(scheming_datastore_choices( {'datastore_choices_resource': 'not-found'}), []) lc.action.datastore_search.assert_called_once()
def test_no_choices_on_not_authorized(self, LocalCKAN): lc = Mock() lc.action.datastore_search.side_effect = NotFound() LocalCKAN.return_value = lc assert (scheming_datastore_choices( {"datastore_choices_resource": "not-allowed"}) == []) lc.action.datastore_search.assert_called_once()
def get_package_id(self, resource_id): """Gets the package id of a single resource on filestore. Args: resource_id (str): The filestore resource id. Returns: str: The package id. Examples: >>> CKAN(quiet=True).get_package_id('rid') Resource `rid` was not found in filestore. """ err_msg = 'Resource `%s` was not found in filestore.' % resource_id try: resource = self.resource_show(id=resource_id) except NotFound: print(err_msg) return None except ValidationError as err: if err.error_dict.get('resource_id') == ['Not found: Resource']: raise NotFound(err_msg) else: raise err else: revision = self.revision_show(id=resource['revision_id']) try: return revision['packages'][0] except IndexError: return resource['package_id']
def setUp(self): year = FinancialYear.objects.create(slug="2030-31") Sphere.objects.create(financial_year=year, name='Provincial') self.national = Sphere.objects.create(financial_year=year, name='National') south_africa = Government.objects.create(sphere=self.national, name='South Africa') Department.objects.create(government=south_africa, name='The Presidency', vote_number=1, intro="") user = User.objects.create_user( username=USERNAME, password=PASSWORD, is_staff=True, is_superuser=True, is_active=True, ) EmailAddress.objects.create( user=user, email=EMAIL, verified=True, ) self.path = os.path.dirname(__file__) # Patch CKAN API self.ckan_patch = patch('budgetportal.datasets.ckan') self.CKANMockClass = self.ckan_patch.start() self.CKANMockClass.action.package_search.return_value = {'results': []} self.CKANMockClass.action.package_show.side_effect = NotFound() self.CKANMockClass.action.group_show.side_effect = NotFound() self.addCleanup(self.ckan_patch.stop) self.ckan_patch2 = patch('budgetportal.models.ckan') self.CKANMockClass2 = self.ckan_patch2.start() self.CKANMockClass2.action.package_search.return_value = { 'results': [] } self.CKANMockClass2.action.package_show.side_effect = NotFound() self.CKANMockClass2.action.group_show.side_effect = NotFound() self.addCleanup(self.ckan_patch2.stop) super(BulkUploadTestCase, self).setUp()
def _action_get_dataset(context, data_dict): ''' common code for actions that need to retrieve a dataset based on the dataset type and organization name or id ''' lc, geno, results = _action_find_dataset(context, data_dict) if not results: raise NotFound() if len(results) > 1: raise ValidationError({'owner_org': _("Multiple datasets exist for type %s") % data_dict['dataset_type']}) return lc, geno, results[0]
def update_filestore(self, resource_id, **kwargs): """Updates a single resource on filestore. Args: resource_id (str): The filestore resource id. **kwargs: Keyword arguments that are passed to resource_create. Kwargs: url (str): New file url (for file link). filepath (str): New file path (for file upload). fileobj (obj): New file like object (for file upload). post (bool): Post data using requests instead of ckanapi. name (str): The resource name. description (str): The resource description. hash (str): The resource hash. Returns: obj: requests.Response object if `post` option is specified, ckan resource object otherwise. Examples: >>> CKAN(quiet=True).update_filestore('rid') Resource `rid` was not found in filestore. """ err_msg = 'Resource `%s` was not found in filestore.' % resource_id try: resource = self.resource_show(id=resource_id) except NotFound: print(err_msg) return None except ValidationError as err: if err.error_dict.get('resource_id') == ['Not found: Resource']: raise NotFound(err_msg) else: raise err else: resource['package_id'] = self.get_package_id(resource_id) if self.verbose: print('Updating resource %s...' % resource_id) f, args, data = self.get_filestore_update_func(resource, **kwargs) return self._update_filestore(f, *args, **data)
def create_table(self, resource_id, fields, **kwargs): """Creates a datastore table for an existing filestore resource. Args: resource_id (str): The filestore resource id. fields (List[dict]): fields/columns and their extra metadata. **kwargs: Keyword arguments that are passed to datastore_create. Kwargs: force (bool): Create resource even if read-only. aliases (List[str]): name(s) for read only alias(es) of the resource. primary_key (List[str]): field(s) that represent a unique key. indexes (List[str]): index(es) on table. Returns: dict: The newly created data object. Raises: ValidationError: If unable to validate user on ckan site. NotFound: If unable to find resource. Examples: >>> CKAN(quiet=True).create_table('rid', fields=[{'id': 'field', \ 'type': 'text'}]) Traceback (most recent call last): NotFound: Resource `rid` was not found in filestore. """ kwargs.setdefault('force', self.force) kwargs['resource_id'] = resource_id kwargs['fields'] = fields err_msg = 'Resource `%s` was not found in filestore.' % resource_id if self.verbose: print('Creating table `%s` in datastore...' % resource_id) try: return self.datastore_create(**kwargs) except ValidationError as err: if err.error_dict.get('resource_id') == ['Not found: Resource']: raise NotFound(err_msg) else: raise
def insert_records(self, resource_id, records, **kwargs): """Inserts records into a datastore table. Args: resource_id (str): The datastore resource id. records (List[dict]): The records to insert. **kwargs: Keyword arguments that are passed to datastore_create. Kwargs: method (str): Insert method. One of ['update, 'insert', 'upsert'] (default: 'insert'). force (bool): Create resource even if read-only. start (int): Row number to start from (zero indexed). stop (int): Row number to stop at (zero indexed). chunksize (int): Number of rows to write at a time. Returns: int: Number of records inserted. Raises: NotFound: If unable to find the resource. Examples: >>> CKAN(quiet=True).insert_records('rid', [{'field': 'value'}]) Traceback (most recent call last): NotFound: Resource `rid` was not found in filestore. """ recoded = pr.json_recode(records) chunksize = kwargs.pop('chunksize', 0) start = kwargs.pop('start', 0) stop = kwargs.pop('stop', None) kwargs.setdefault('force', self.force) kwargs.setdefault('method', 'insert') kwargs['resource_id'] = resource_id count = 1 for chunk in ft.chunk(recoded, chunksize, start=start, stop=stop): length = len(chunk) if self.verbose: print('Adding records %i - %i to resource %s...' % (count, count + length - 1, resource_id)) kwargs['records'] = chunk err_msg = 'Resource `%s` was not found in filestore.' % resource_id try: self.datastore_upsert(**kwargs) except requests.exceptions.ConnectionError as err: if 'Broken pipe' in err.message[1]: print( 'Chunksize too large. Try using a smaller chunksize.') return 0 else: raise err except NotFound: # Keep exception message consistent with the others raise NotFound(err_msg) except ValidationError as err: if err.error_dict.get('resource_id') == [ 'Not found: Resource' ]: raise NotFound(err_msg) else: raise err count += length return count