def test_munge_filename_multiple_pass(self): '''Munging filename multiple times produces same result.''' for org, exp in self.munge_list: first_munge = munge_filename_legacy(org) assert_equal(first_munge, exp) second_munge = munge_filename_legacy(first_munge) assert_equal(second_munge, exp)
def test_munge_filename_multiple_pass(self): '''Munging filename multiple times produces same result.''' for org, exp in self.munge_list: first_munge = munge_filename_legacy(org) nose_tools.assert_equal(first_munge, exp) second_munge = munge_filename_legacy(first_munge) nose_tools.assert_equal(second_munge, exp)
def update_data_dict(self, data_dict, url_field, file_field, clear_field): '''Manipulate data from the data_dict. This needs to be called before it reaches any validators. `url_field` is the name of the field where the upload is going to be. `file_field` is name of the key where the FieldStorage is kept (i.e the field where the file data actually is). `clear_field` is the name of a boolean field which requests the upload to be deleted. ''' self.url = data_dict.get(url_field, '') self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) if not self.storage_path: return if isinstance(self.upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filename = self.upload_field_storage.filename self.filename = str(datetime.datetime.utcnow()) + self.filename self.filename = munge.munge_filename_legacy(self.filename) self.filepath = os.path.join(self.storage_path, self.filename) data_dict[url_field] = self.filename self.upload_file = _get_underlying_file(self.upload_field_storage) # keep the file if there has been no change elif self.old_filename and not self.old_filename.startswith('http'): if not self.clear: data_dict[url_field] = self.old_filename if self.clear and self.url == self.old_filename: data_dict[url_field] = ''
def update_data_dict(self, data_dict, url_field, file_field, clear_field): ''' Manipulate data from the data_dict. url_field is the name of the field where the upload is going to be. file_field is name of the key where the FieldStorage is kept (i.e the field where the file data actually is). clear_field is the name of a boolean field which requests the upload to be deleted. This needs to be called before it reaches any validators''' self.url = data_dict.get(url_field, '') self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) if not self.storage_path: return if isinstance(self.upload_field_storage, cgi.FieldStorage): self.filename = self.upload_field_storage.filename self.filename = str(datetime.datetime.utcnow()) + self.filename self.filename = munge.munge_filename_legacy(self.filename) self.filepath = os.path.join(self.storage_path, self.filename) data_dict[url_field] = self.filename self.upload_file = self.upload_field_storage.file self.tmp_filepath = self.filepath + '~' # keep the file if there has been no change elif self.old_filename and not self.old_filename.startswith('http'): if not self.clear: data_dict[url_field] = self.old_filename if self.clear and self.url == self.old_filename: data_dict[url_field] = ''
def _create_uploaded_filename(uploaded_file_field): # type: (UploadedFileWrapper) -> str """Create a filename for storage for the new uploaded file """ now = str(datetime.datetime.utcnow()) filename = '{}-{}'.format(now, uploaded_file_field.filename) return munge_filename_legacy(filename)
def download(self, filename): ''' Provide a download by either redirecting the user to the url stored or downloading the uploaded file from S3. ''' filename = munge.munge_filename_legacy(filename) key_path = os.path.join(self.storage_path, filename) if key_path is None: log.warning("Key '%s' not found in bucket '%s'", key_path, self.bucket_name) try: url = self.get_signed_url_to_key(key_path) h.redirect_to(url) except ClientError as ex: if ex.response['Error']['Code'] in ['NoSuchKey', '404']: if config.get( 'ckanext.s3filestore.filesystem_download_fallback', False): log.info('Attempting filesystem fallback for resource %s', id) default_upload = DefaultUpload(self.upload_to) return default_upload.download(filename) # Uploader interface does not know about s3 errors raise OSError(errno.ENOENT)
def metadata(self, filename): ''' Provide metadata about the download, such as might be obtained from a HTTP HEAD request. Returns a dict that includes 'ContentType', 'ContentLength', 'Hash', and 'LastModified', and may include other keys depending on the implementation. ''' filename = munge.munge_filename_legacy(filename) key_path = os.path.join(self.storage_path, filename) if filename is None: log.warning("Key '%s' not found in bucket '%s'", filename, self.bucket_name) try: client = self.get_s3_client() metadata = client.head_object(Bucket=self.bucket_name, Key=key_path) metadata['content_type'] = metadata['ContentType'] metadata['size'] = metadata['ContentLength'] metadata['hash'] = metadata['ETag'] return self.as_clean_dict(metadata) except ClientError as ex: if ex.response['Error']['Code'] in ['NoSuchKey', '404']: if config.get( 'ckanext.s3filestore.filesystem_download_fallback', False): log.info('Attempting filesystem fallback for resource %s', id) default_upload = DefaultUpload(self.upload_to) return default_upload.metadata(filename) # Uploader interface does not know about s3 errors raise OSError(errno.ENOENT)
def delete(self, filename): ''' Delete file we are pointing at''' filename = munge.munge_filename_legacy(filename) key_path = os.path.join(self.storage_path, filename) try: self.clear_key(key_path) except ClientError as ex: log.warning('Key \'%s\' not found in bucket \'%s\' for delete', key_path, self.bucket_name) pass
def update_data_dict(self, data_dict, url_field, file_field, clear_field): log.debug( "ckanext.s3filestore.uploader: update_data_dic: %s, url %s, file %s, clear %s", data_dict, url_field, file_field, clear_field) '''Manipulate data from the data_dict. This needs to be called before it reaches any validators. `url_field` is the name of the field where the upload is going to be. `file_field` is name of the key where the FieldStorage is kept (i.e the field where the file data actually is). `clear_field` is the name of a boolean field which requests the upload to be deleted. ''' self.url = data_dict.get(url_field, '') self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) self.upload_file = None self.preserve_filename = data_dict.get('preserve_filename', False) if not self.storage_path: return if isinstance(self.upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filename = self.upload_field_storage.filename if not self.preserve_filename: self.filename = str(datetime.datetime.utcnow()) + self.filename self.filename = munge.munge_filename_legacy(self.filename) self.filepath = os.path.join(self.storage_path, self.filename) if hasattr(self.upload_field_storage, 'mimetype'): self.mimetype = self.upload_field_storage.mimetype else: try: self.mimetype = mimetypes.guess_type(self.filename, strict=False)[0] except Exception: pass data_dict[url_field] = self.filename self.upload_file = _get_underlying_file(self.upload_field_storage) log.debug( "ckanext.s3filestore.uploader: is allowed upload type: filename: %s, upload_file: %s, data_dict: %s", self.filename, self.upload_file, data_dict) # keep the file if there has been no change elif self.old_filename and not self.old_filename.startswith('http'): if not self.clear: data_dict[url_field] = self.old_filename if self.clear and self.url == self.old_filename: data_dict[url_field] = '' else: log.debug( "ckanext.s3filestore.uploader: is not allowed upload type: filename: %s, upload_file: %s, data_dict: %s", self.filename, self.upload_file, data_dict)
def update_data_dict(self, data_dict, url_field, file_field, clear_field): log.debug('update_data_dict') '''Manipulate data from the data_dict. This needs to be called before it reaches any validators. `url_field` is the name of the field where the upload is going to be. `file_field` is name of the key where the FieldStorage is kept (i.e the field where the file data actually is). `clear_field` is the name of a boolean field which requests the upload to be deleted. ''' self.url = data_dict.get(url_field, '') self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) if not self.storage_path: return if hasattr(self.upload_field_storage, 'filename'): self.filename = self.upload_field_storage.filename self.filename = str(datetime.datetime.utcnow()) + self.filename self.filename = munge.munge_filename_legacy(self.filename) self.filepath = os.path.join(self.storage_path, self.filename) bucket_endpoint = config.get('ckanext.cloud_storage.s3.endpoint') self.remote_filepath = os.path.join(bucket_endpoint, self.filepath) #log.debug(self.remote_filepath) #log.debug(self.filename) data_dict[url_field] = self.remote_filepath self.upload_file = self.upload_field_storage.file # keep the file if there has been no change elif self.old_filename and not self.old_filename.startswith('http'): if not self.clear: data_dict[url_field] = self.old_filename if self.clear and self.url == self.old_filename: data_dict[url_field] = ''
def test_munge_filename(self): '''Munge a list of filenames gives expected results.''' for org, exp in self.munge_list: munge = munge_filename_legacy(org) nose_tools.assert_equal(munge, exp)
def test_munge_filename_legacy_pass(original, expected): """Munging filename multiple times produces same result.""" first_munge = munge_filename_legacy(original) assert first_munge == expected second_munge = munge_filename_legacy(first_munge) assert second_munge == expected
def test_munge_filename(self): '''Munge a list of filenames gives expected results.''' for org, exp in self.munge_list: munge = munge_filename_legacy(org) assert_equal(munge, exp)
def group_dictize(group, context, include_groups=True, include_tags=True, include_users=True, include_extras=True, packages_field='datasets', **kw): ''' Turns a Group object and related into a dictionary. The related objects like tags are included unless you specify it in the params. :param packages_field: determines the format of the `packages` field - can be `datasets` or None. ''' assert packages_field in ('datasets', 'dataset_count', None) if packages_field == 'dataset_count': dataset_counts = context.get('dataset_counts', None) result_dict = d.table_dictize(group, context) result_dict.update(kw) result_dict['display_name'] = group.title or group.name if include_extras: result_dict['extras'] = extras_dict_dictize( group._extras, context) context['with_capacity'] = True if packages_field: def get_packages_for_this_group(group_, just_the_count=False): # Ask SOLR for the list of packages for this org/group q = { 'facet': 'false', 'rows': 0, } if group_.is_organization: q['fq'] = 'owner_org:"{0}"'.format(group_.id) else: q['fq'] = 'groups:"{0}"'.format(group_.name) # Allow members of organizations to see private datasets. if group_.is_organization: is_group_member = (context.get('user') and authz.has_user_permission_for_group_or_org( group_.id, context.get('user'), 'read')) if is_group_member: context['ignore_capacity_check'] = True if not just_the_count: # Is there a packages limit in the context? try: packages_limit = context['limits']['packages'] except KeyError: q['rows'] = 1000 # Only the first 1000 datasets are returned else: q['rows'] = packages_limit search_context = dict((k, v) for (k, v) in context.items() if k != 'schema') search_results = logic.get_action('package_search')(search_context, q) return search_results['count'], search_results['results'] if packages_field == 'datasets': package_count, packages = get_packages_for_this_group(group) result_dict['packages'] = packages else: if dataset_counts is None: package_count, packages = get_packages_for_this_group( group, just_the_count=True) else: # Use the pre-calculated package_counts passed in. facets = dataset_counts if group.is_organization: package_count = facets['owner_org'].get(group.id, 0) else: package_count = facets['groups'].get(group.name, 0) result_dict['package_count'] = package_count if include_tags: # group tags are not creatable via the API yet, but that was(/is) a # future intention (see kindly's commit 5c8df894 on 2011/12/23) result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) if include_groups: # these sub-groups won't have tags or extras for speed result_dict['groups'] = group_list_dictize( _get_members(context, group, 'groups'), context, include_groups=True) if include_users: result_dict['users'] = user_list_dictize( _get_members(context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename_legacy(image_url) result_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % result_dict.get('image_url'), qualified=True ) return result_dict
def user_dictize(user: Union[model.User, tuple[model.User, str]], context: Context, include_password_hash: bool = False, include_plugin_extras: bool = False) -> dict[str, Any]: model = context['model'] if context.get('with_capacity'): # Fix type: "User" is not iterable user, capacity = user #type: ignore result_dict = d.table_dictize(user, context, capacity=capacity) else: result_dict = d.table_dictize(user, context) assert isinstance(user, model.User) password_hash = result_dict.pop('password') del result_dict['reset_key'] result_dict['display_name'] = user.display_name result_dict['email_hash'] = user.email_hash result_dict['number_created_packages'] = user.number_created_packages( include_private_and_draft=context.get( 'count_private_and_draft_datasets', False)) requester = context.get('user') result_dict.pop('reset_key', None) apikey = result_dict.pop('apikey', None) email = result_dict.pop('email', None) plugin_extras = result_dict.pop('plugin_extras', None) if context.get('keep_email', False): result_dict['email'] = email if context.get('keep_apikey', False): result_dict['apikey'] = apikey if requester == user.name: result_dict['apikey'] = apikey result_dict['email'] = email if authz.is_sysadmin(requester): result_dict['apikey'] = apikey result_dict['email'] = email if include_password_hash: result_dict['password_hash'] = password_hash if include_plugin_extras: result_dict['plugin_extras'] = copy.deepcopy( plugin_extras) if plugin_extras else plugin_extras image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): # munge here should not have any effect, only doing it in case # of potential vulnerability of dodgy api input. image_url = munge.munge_filename_legacy(image_url) result_dict['image_display_url'] = h.url_for_static( 'uploads/user/%s' % result_dict.get('image_url'), qualified=True) return result_dict
def group_dictize(group, context, include_groups=True, include_tags=True, include_users=True, include_extras=True, packages_field='datasets', **kw): ''' Turns a Group object and related into a dictionary. The related objects like tags are included unless you specify it in the params. :param packages_field: determines the format of the `packages` field - can be `datasets` or None. ''' assert packages_field in ('datasets', 'dataset_count', None) if packages_field == 'dataset_count': dataset_counts = context.get('dataset_counts', None) result_dict = d.table_dictize(group, context) result_dict.update(kw) result_dict['display_name'] = group.title or group.name if include_extras: result_dict['extras'] = extras_dict_dictize(group._extras, context) context['with_capacity'] = True if packages_field: def get_packages_for_this_group(group_, just_the_count=False): # Ask SOLR for the list of packages for this org/group q = { 'facet': 'false', 'rows': 0, } if group_.is_organization: q['fq'] = 'owner_org:"{0}"'.format(group_.id) else: q['fq'] = 'groups:"{0}"'.format(group_.name) # Allow members of organizations to see private datasets. if group_.is_organization: is_group_member = (context.get('user') and authz.has_user_permission_for_group_or_org( group_.id, context.get('user'), 'read')) if is_group_member: q['include_private'] = True if not just_the_count: # Is there a packages limit in the context? try: packages_limit = context['limits']['packages'] except KeyError: q['rows'] = 1000 # Only the first 1000 datasets are returned else: q['rows'] = packages_limit search_context = dict( (k, v) for (k, v) in context.items() if k != 'schema') search_results = logic.get_action('package_search')(search_context, q) return search_results['count'], search_results['results'] if packages_field == 'datasets': package_count, packages = get_packages_for_this_group(group) result_dict['packages'] = packages else: if dataset_counts is None: package_count, packages = get_packages_for_this_group( group, just_the_count=True) else: # Use the pre-calculated package_counts passed in. facets = dataset_counts if group.is_organization: package_count = facets['owner_org'].get(group.id, 0) else: package_count = facets['groups'].get(group.name, 0) result_dict['package_count'] = package_count if include_tags: # group tags are not creatable via the API yet, but that was(/is) a # future intention (see kindly's commit 5c8df894 on 2011/12/23) result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) if include_groups: # these sub-groups won't have tags or extras for speed result_dict['groups'] = group_list_dictize(_get_members( context, group, 'groups'), context, include_groups=True) if include_users: result_dict['users'] = user_list_dictize( _get_members(context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename_legacy(image_url) result_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % result_dict.get('image_url'), qualified=True) return result_dict
def group_dictize(group, context, include_groups=True, include_tags=True, include_users=True, include_extras=True, packages_field='datasets', **kw): assert packages_field in ('datasets', 'dataset_count', None) if packages_field == 'dataset_count': dataset_counts = context.get('dataset_counts', None) result_dict = d.table_dictize(group, context) result_dict.update(kw) result_dict['display_name'] = group.title or group.name if include_extras: result_dict[ 'extras'] = ckan.lib.dictization.model_dictize.extras_dict_dictize( group._extras, context) context['with_capacity'] = True if packages_field: def get_packages_for_this_group(group_, just_the_count=False): q = { 'facet': 'false', 'rows': 0, } if group_.is_organization: q['fq'] = 'owner_org:"{0}"'.format(group_.id) else: q['fq'] = 'groups:"{0}"'.format(group_.name) if group_.is_organization: is_group_member = (context.get('user') and authz.has_user_permission_for_group_or_org( group_.id, context.get('user'), 'read')) if is_group_member: q['include_private'] = True if not just_the_count: try: packages_limit = context['limits']['packages'] except KeyError: q['rows'] = 1000 # Only the first 1000 datasets are returned else: q['rows'] = packages_limit search_context = dict( (k, v) for (k, v) in context.items() if k != 'schema') search_results = package_search(search_context, q) return search_results['count'], search_results['results'] if packages_field == 'datasets': package_count, packages = get_packages_for_this_group(group) result_dict['packages'] = packages else: if dataset_counts is None: package_count, packages = get_packages_for_this_group( group, just_the_count=True) else: facets = dataset_counts if group.is_organization: package_count = facets['owner_org'].get(group.id, 0) else: package_count = facets['groups'].get(group.name, 0) result_dict['package_count'] = package_count if include_tags: result_dict[ 'tags'] = ckan.lib.dictization.model_dictize.tag_list_dictize( ckan.lib.dictization.model_dictize._get_members( context, group, 'tags'), context) if include_groups: result_dict[ 'groups'] = ckan.lib.dictization.model_dictize.group_list_dictize( ckan.lib.dictization.model_dictize._get_members( context, group, 'groups'), context, include_groups=True) if include_users: result_dict[ 'users'] = ckan.lib.dictization.model_dictize.user_list_dictize( ckan.lib.dictization.model_dictize._get_members( context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename_legacy(image_url) result_dict['image_display_url'] = helpers.url_for_static( 'uploads/group/%s' % result_dict.get('image_url'), qualified=True) return result_dict