示例#1
0
文件: test_munge.py 项目: Hoedic/ckan
 def test_munge_filename_multiple_pass(self):
     '''Munging filename multiple times produces same result.'''
     for org, exp in self.munge_list:
         first_munge = munge_filename(org)
         nose_tools.assert_equal(first_munge, exp)
         second_munge = munge_filename(first_munge)
         nose_tools.assert_equal(second_munge, exp)
示例#2
0
 def test_munge_filename_multiple_pass(self):
     '''Munging filename multiple times produces same result.'''
     for org, exp in self.munge_list:
         first_munge = munge_filename(org)
         assert_equal(first_munge, exp)
         second_munge = munge_filename(first_munge)
         assert_equal(second_munge, exp)
示例#3
0
def test_munge_filename_pass(original, expected):
    """Munging filename multiple times produces same result."""
    first_munge = munge_filename(original)
    assert first_munge == expected
    assert isinstance(first_munge, text_type)
    second_munge = munge_filename(first_munge)
    assert second_munge == expected
    assert isinstance(second_munge, text_type)
示例#4
0
class ResourceUpload(object):
    def __init__(self, resource):
        path = get_storage_path()
        if not path:
            self.storage_path = None
            return
        self.storage_path = os.path.join(path, 'resources')
        try:
            os.makedirs(self.storage_path)
        except OSError, e:
            ## errno 17 is file already exists
            if e.errno != 17:
                raise
        self.filename = None

        url = resource.get('url')
        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, cgi.FieldStorage):
            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            self.upload_file = upload_field_storage.file
        elif self.clear:
            resource['url_type'] = ''
    def __init__(self, resource):
        """
        Support for uploading resources to any storage provider
        implemented by the apache-libcloud library.

        :param resource: The resource dict.
        """
        super(ResourceCloudStorage, self).__init__()

        self.filename = None
        self.old_filename = None
        self.file = None
        self.resource = resource

        upload_field_storage = resource.pop('upload', None)
        self._clear = resource.pop('clear_upload', None)

        # Check to see if a file has been provided
        if isinstance(upload_field_storage, cgi.FieldStorage):
            self.filename = munge.munge_filename(upload_field_storage.filename)
            self.file_upload = upload_field_storage.file
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
        elif self._clear and resource.get('id'):
            # Apparently, this is a created-but-not-commited resource whose
            # file upload has been canceled. We're copying the behaviour of
            # ckaenxt-s3filestore here.
            old_resource = model.Session.query(model.Resource).get(
                resource['id'])

            self.old_filename = old_resource.url
            resource['url_type'] = ''
    def __init__(self, resource):
        """
        Support for uploading resources to any storage provider
        implemented by the apache-libcloud library.

        :param resource: The resource dict.
        """
        super(ResourceCloudStorage, self).__init__()

        self.filename = None
        self.old_filename = None
        self.file = None
        self.resource = resource

        upload_field_storage = resource.pop('upload', None)
        self._clear = resource.pop('clear_upload', None)

        # Check to see if a file has been provided
        if isinstance(upload_field_storage, cgi.FieldStorage):
            self.filename = munge.munge_filename(upload_field_storage.filename)
            self.file_upload = upload_field_storage.file
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
        elif self._clear and resource.get('id'):
            # Apparently, this is a created-but-not-commited resource whose
            # file upload has been canceled. We're copying the behaviour of
            # ckaenxt-s3filestore here.
            old_resource = model.Session.query(
                model.Resource
            ).get(
                resource['id']
            )

            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#7
0
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        ''' Manipulate data from the data_dict.  url_field is the name of the
        field where the upload is going to be. file_field is name of the key
        where the FieldStorage is kept (i.e the field where the file data
        actually is). clear_field is the name of a boolean field which
        requests the upload to be deleted.  This needs to be called before
        it reaches any validators'''

        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)

        if not self.storage_path:
            return

        if isinstance(self.upload_field_storage, cgi.FieldStorage):
            self.filename = self.upload_field_storage.filename
            self.filename = str(datetime.datetime.utcnow()) + self.filename
            self.filename = munge.munge_filename(self.filename)
            self.filepath = os.path.join(self.storage_path, self.filename)
            data_dict[url_field] = self.filename
            self.upload_file = self.upload_field_storage.file
            self.tmp_filepath = self.filepath + '~'
        ### keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
示例#8
0
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading is performed by
        `upload()`.
        Create a storage path in the format:
        <ckanext.azurefilestore.storage_path>/resources/
        '''
        super(AzureResourceUploader, self).__init__()
        path = config.get('ckanext.azurefilestore.storage_path', '')
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        self.old_filename = None

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.mimetype = resource.get('mimetype')
            if not self.mimetype:
                try:
                    self.mimetype = resource['mimetype'] = mimetypes.guess_type(self.filename, strict=False)[0]
                except Exception:
                    pass
            self.upload_file = _get_underlying_file(upload_field_storage)
        elif self.clear and resource.get('id'):
            # New, not yet created resources can be marked for deletion if the
            # user cancels an upload and enters a URL instead.
            old_resource = model.Session.query(model.Resource) \
                .get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#9
0
    def download(self, id, filename=None):
        '''
        Provide a download by either redirecting the user to the url stored or
        downloading the uploaded file from S3.
        '''

        if not self.use_filename or filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)
        key_path = self.get_path(id, filename)
        key = filename

        if key is None:
            log.warning("Key '%s' not found in bucket '%s'", key_path,
                        self.bucket_name)

        try:
            url = self.get_signed_url_to_key(key_path)
            h.redirect_to(url)

        except ClientError as ex:
            if ex.response['Error']['Code'] in ['NoSuchKey', '404']:
                # attempt fallback
                default_resource_upload = DefaultResourceUpload(self.resource)
                return default_resource_upload.download(id, self.filename)
            else:
                # Controller will raise 404 for us
                raise OSError(errno.ENOENT)
示例#10
0
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading performed by
        `upload()`.

        Create a storage path in the format:
        <ckanext.s3filestore.aws_storage_path>/resources/
        '''

        super(S3ResourceUploader, self).__init__()

        path = config.get('ckanext.s3filestore.aws_storage_path', '')
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        self.old_filename = None

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, cgi.FieldStorage):
            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            self.upload_file = upload_field_storage.file
        elif self.clear and resource.get('id'):
            # New, not yet created resources can be marked for deletion if the
            # users cancels an upload and enters a URL instead.
            old_resource = model.Session.query(model.Resource) \
                .get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#11
0
文件: uploader.py 项目: tbalaz/test
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        ''' Manipulate data from the data_dict.  url_field is the name of the
        field where the upload is going to be. file_field is name of the key
        where the FieldStorage is kept (i.e the field where the file data
        actually is). clear_field is the name of a boolean field which
        requests the upload to be deleted.  This needs to be called before
        it reaches any validators'''

        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)

        if not self.storage_path:
            return

        if isinstance(self.upload_field_storage, cgi.FieldStorage):
            self.filename = self.upload_field_storage.filename
            self.filename = str(datetime.datetime.utcnow()) + self.filename
            self.filename = munge.munge_filename(self.filename)
            self.filepath = os.path.join(self.storage_path, self.filename)
            data_dict[url_field] = self.filename
            self.upload_file = self.upload_field_storage.file
            self.tmp_filepath = self.filepath + '~'
        ### keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading performed by
        `upload()`.

        Create a storage path in the format:
        <ckanext.s3filestore.aws_storage_path>/resources/
        '''
        super(S3ResourceUploader, self).__init__()
        path = config.get('ckanext.cloud_storage.path', '')
        self.resource = resource
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, cgi.FieldStorage):
            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            self.upload_file = upload_field_storage.file
        elif self.clear:
            old_resource = model.Session.query(model.Resource).get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#13
0
def resource_dictize(res, context):
    model = context['model']
    resource = d.table_dictize(res, context)
    resource_group_id = resource['resource_group_id']
    extras = resource.pop("extras", None)
    if extras:
        resource.update(extras)
    # some urls do not have the protocol this adds http:// to these
    url = resource['url']
    ## for_edit is only called at the times when the dataset is to be edited
    ## in the frontend. Without for_edit the whole qualified url is returned.
    if resource.get('url_type') == 'upload' and not context.get('for_edit'):
        resource_group = model.Session.query(
            model.ResourceGroup).get(resource_group_id)
        last_part = url.split('/')[-1]
        cleaned_name = munge.munge_filename(last_part)
        resource['url'] = h.url_for(controller='package',
                                    action='resource_download',
                                    id=resource_group.package_id,
                                    resource_id=res.id,
                                    filename=cleaned_name,
                                    qualified=True)
    elif not urlparse.urlsplit(url).scheme and not context.get('for_edit'):
        resource['url'] = u'http://' + url.lstrip('/')
    return resource
示例#14
0
def resource_dictize(res, context):
    model = context['model']
    resource = d.table_dictize(res, context)
    resource_group_id = resource['resource_group_id']
    extras = resource.pop("extras", None)
    if extras:
        resource.update(extras)
    resource['format'] = _unified_resource_format(res.format)
    # some urls do not have the protocol this adds http:// to these
    url = resource['url']
    ## for_edit is only called at the times when the dataset is to be edited
    ## in the frontend. Without for_edit the whole qualified url is returned.
    if resource.get('url_type') == 'upload' and not context.get('for_edit'):
        resource_group = model.Session.query(
            model.ResourceGroup).get(resource_group_id)
        last_part = url.split('/')[-1]
        cleaned_name = munge.munge_filename(last_part)
        resource['url'] = h.url_for(controller='package',
                                    action='resource_download',
                                    id=resource_group.package_id,
                                    resource_id=res.id,
                                    filename=cleaned_name,
                                    qualified=True)
    elif not urlparse.urlsplit(url).scheme and not context.get('for_edit'):
        resource['url'] = u'http://' + url.lstrip('/')
    return resource
    def __init__(self, resource):
        log.info('Start a fake upload to copy resource at {}'.format(
            resource['upload']))

        path = get_storage_path()
        config_mimetype_guess = config.get('ckan.mimetype_guess', 'file_ext')

        if not path:
            self.storage_path = None
            return

        self.storage_path = os.path.join(path, 'resources')
        try:
            os.makedirs(self.storage_path)
        except OSError as e:
            # errno 17 is file already exists
            if e.errno != 17:
                raise

        self.filename = None
        self.mimetype = None

        url = resource.get('url')

        upload_path = resource.pop('upload', None)
        resource['upload'] = 'Local File'

        upload_field_storage = open(upload_path)
        self.clear = resource.pop('clear_upload', None)

        if config_mimetype_guess == 'file_ext':
            self.mimetype = mimetypes.guess_type(url)[0]

        self.filesize = 0  # bytes

        self.filename = upload_field_storage.filename
        self.filename = munge.munge_filename(self.filename)
        resource['url'] = self.filename
        resource['url_type'] = 'upload'
        resource['last_modified'] = datetime.datetime.utcnow()

        self.upload_file = upload_field_storage.file
        self.upload_file.seek(0, os.SEEK_END)
        self.filesize = self.upload_file.tell()
        # go back to the beginning of the file buffer
        self.upload_file.seek(0, os.SEEK_SET)

        # check if the mimetype failed from guessing with the url
        if not self.mimetype and config_mimetype_guess == 'file_ext':
            self.mimetype = mimetypes.guess_type(self.filename)[0]

        if not self.mimetype and config_mimetype_guess == 'file_contents':
            try:
                self.mimetype = magic.from_buffer(self.upload_file.read(),
                                                  mime=True)
                self.upload_file.seek(0, os.SEEK_SET)
            except IOError as e:
                # Not that important if call above fails
                self.mimetype = None
示例#16
0
    def path_from_filename(self, rid, filename):
        """
        Returns a bucket path for the given resource_id and filename.

        :param rid: The resource ID.
        :param filename: The unmunged resource filename.
        """
        return os.path.join('resources', rid, munge.munge_filename(filename))
示例#17
0
def _get_resource_s3_path(resource_dict):
    download_url = resource_dict.get("download_url") or resource_dict.get("hdx_rel_url")
    if "download/" in download_url:
        url = download_url.split("download/")[1]
    else:
        url = resource_dict.get("name")
    munged_resource_name = munge.munge_filename(url)
    return munged_resource_name
示例#18
0
def group_list_dictize(obj_list, context,
                       sort_key=lambda x:x['display_name'], reverse=False,
                       with_package_counts=True):

    active = context.get('active', True)
    with_private = context.get('include_private_packages', False)

    if with_package_counts:
        query = search.PackageSearchQuery()
        q = {'q': '+capacity:public' if not with_private else '*:*',
             'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
             'facet.limit': -1, 'rows': 1}
        query.run(q)

    result_list = []

    for obj in obj_list:
        if context.get('with_capacity'):
            obj, capacity = obj
            group_dict = d.table_dictize(obj, context, capacity=capacity)
        else:
            group_dict = d.table_dictize(obj, context)
        group_dict.pop('created')
        if active and obj.state not in ('active', 'pending'):
            continue

        group_dict['display_name'] = (group_dict.get('title') or
                                      group_dict.get('name'))

        image_url = group_dict.get('image_url')
        group_dict['image_display_url'] = image_url
        if image_url and not image_url.startswith('http'):
            #munge here should not have an effect only doing it incase
            #of potential vulnerability of dodgy api input
            image_url = munge.munge_filename(image_url)
            group_dict['image_display_url'] = h.url_for_static(
                'uploads/group/%s' % group_dict.get('image_url'),
                qualified=True
            )

        if with_package_counts:
            facets = query.facets
            if obj.is_organization:
                group_dict['packages'] = facets['owner_org'].get(obj.id, 0)
            else:
                group_dict['packages'] = facets['groups'].get(obj.name, 0)

        if context.get('for_view'):
            if group_dict['is_organization']:
                plugin = plugins.IOrganizationController
            else:
                plugin = plugins.IGroupController
            for item in plugins.PluginImplementations(plugin):
                group_dict = item.before_view(group_dict)

        result_list.append(group_dict)
    return sorted(result_list, key=sort_key, reverse=reverse)
示例#19
0
文件: uploader.py 项目: tino097/ckan
    def __init__(self, resource: dict[str, Any]) -> None:
        path = get_storage_path()
        config_mimetype_guess = config.get_value('ckan.mimetype_guess')

        if not path:
            self.storage_path = None
            return
        self.storage_path = os.path.join(path, 'resources')
        try:
            os.makedirs(self.storage_path)
        except OSError as e:
            # errno 17 is file already exists
            if e.errno != 17:
                raise
        self.filename = None
        self.mimetype = None

        url = resource.get('url')

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if url and config_mimetype_guess == 'file_ext' and urlparse(url).path:
            self.mimetype = mimetypes.guess_type(url)[0]

        if bool(upload_field_storage) and \
                isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filesize = 0  # bytes

            self.filename = upload_field_storage.filename
            assert self.filename is not None
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.upload_file = _get_underlying_file(upload_field_storage)
            assert self.upload_file is not None
            self.upload_file.seek(0, os.SEEK_END)
            self.filesize = self.upload_file.tell()
            # go back to the beginning of the file buffer
            self.upload_file.seek(0, os.SEEK_SET)

            # check if the mimetype failed from guessing with the url
            if not self.mimetype and config_mimetype_guess == 'file_ext':
                self.mimetype = mimetypes.guess_type(self.filename)[0]

            if not self.mimetype and config_mimetype_guess == 'file_contents':
                try:
                    self.mimetype = magic.from_buffer(self.upload_file.read(),
                                                      mime=True)
                    self.upload_file.seek(0, os.SEEK_SET)
                except IOError:
                    # Not that important if call above fails
                    self.mimetype = None

        elif self.clear:
            resource['url_type'] = ''
示例#20
0
def group_list_dictize(obj_list, context,
                       sort_key=lambda x:x['display_name'], reverse=False,
                       with_package_counts=True):

    active = context.get('active', True)
    with_private = context.get('include_private_packages', False)

    if with_package_counts:
        query = search.PackageSearchQuery()
        q = {'q': '+capacity:public' if not with_private else '*:*',
             'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
             'facet.limit': -1, 'rows': 1}
        query.run(q)

    result_list = []

    for obj in obj_list:
        if context.get('with_capacity'):
            obj, capacity = obj
            group_dict = d.table_dictize(obj, context, capacity=capacity)
        else:
            group_dict = d.table_dictize(obj, context)
        group_dict.pop('created')
        if active and obj.state not in ('active', 'pending'):
            continue

        group_dict['display_name'] = (group_dict.get('title') or
                                      group_dict.get('name'))

        image_url = group_dict.get('image_url')
        group_dict['image_display_url'] = image_url
        if image_url and not image_url.startswith('http'):
            #munge here should not have an effect only doing it incase
            #of potential vulnerability of dodgy api input
            image_url = munge.munge_filename(image_url)
            group_dict['image_display_url'] = h.url_for_static(
                'uploads/group/%s' % group_dict.get('image_url'),
                qualified=True
            )

        if with_package_counts:
            facets = query.facets
            if obj.is_organization:
                group_dict['packages'] = facets['owner_org'].get(obj.id, 0)
            else:
                group_dict['packages'] = facets['groups'].get(obj.name, 0)

        if context.get('for_view'):
            if group_dict['is_organization']:
                plugin = plugins.IOrganizationController
            else:
                plugin = plugins.IGroupController
            for item in plugins.PluginImplementations(plugin):
                group_dict = item.before_view(group_dict)

        result_list.append(group_dict)
    return sorted(result_list, key=sort_key, reverse=reverse)
示例#21
0
    def __init__(self, resource):
        """
        Support for uploading resources to any storage provider
        implemented by the apache-libcloud library.

        :param resource: The resource dict.
        """
        super(ResourceCloudStorage, self).__init__()

        self.filename = None
        self.old_filename = None
        self.file = None
        self.resource = resource

        upload_field_storage = resource.pop('upload', None)
        self._clear = resource.pop('clear_upload', None)
        multipart_name = resource.pop('multipart_name', None)

        # Check to see if a file has been provided
        if isinstance(upload_field_storage, cgi.FieldStorage):
            self.filename = munge.munge_filename(upload_field_storage.filename)
            self.file_upload = upload_field_storage.file
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.utcnow()
        elif multipart_name and self.can_use_advanced_aws:
            # This means that file was successfully uploaded and stored
            # at cloud.
            # Currently implemented just AWS version
            resource['url'] = munge.munge_filename(multipart_name)
            resource['url_type'] = 'upload'
        elif self._clear and resource.get('id'):
            # Apparently, this is a created-but-not-commited resource whose
            # file upload has been canceled. We're copying the behaviour of
            # ckaenxt-s3filestore here.
            old_resource = model.Session.query(
                model.Resource
            ).get(
                resource['id']
            )

            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#22
0
 def path_from_filename(self, rid, filename):
     """
     Returns a bucket path for the given resource_id and filename.
     If there is no parent directory name specified, the root of 
     the bucket will contain the resources directory.
     :param rid: The resource ID.
     :param filename: The unmunged resource filename.
     """
     return os.path.join(self.parent_directory_name, 'resources', rid,
                         munge.munge_filename(filename))
示例#23
0
 def file_remove_s3(resource_id, resource_url):
     try:
         uploader = S3ResourceUploader({})
         # resource_name = find_filename_in_url(resource_url)
         munged_resource_name = munge.munge_filename(resource_url)
         filepath = uploader.get_path(resource_id, munged_resource_name)
         uploader.clear_key(filepath)
     except Exception, e:
         msg = 'Couldn\'t delete file from S3'
         log.warning(msg + str(e))
示例#24
0
def group_dictize(group, context):
    model = context['model']
    result_dict = d.table_dictize(group, context)

    result_dict['display_name'] = group.display_name

    result_dict['extras'] = extras_dict_dictize(
        group._extras, context)

    context['with_capacity'] = True

    result_dict['packages'] = d.obj_list_dictize(
        _get_members(context, group, 'packages'),
        context)

    query = search.PackageSearchQuery()
    if group.is_organization:
        q = {'q': 'owner_org:"%s" +capacity:public' % group.id, 'rows': 1}
    else:
        q = {'q': 'groups:"%s" +capacity:public' % group.name, 'rows': 1}
    result_dict['package_count'] = query.run(q)['count']

    result_dict['tags'] = tag_list_dictize(
        _get_members(context, group, 'tags'),
        context)

    result_dict['groups'] = group_list_dictize(
        _get_members(context, group, 'groups'),
        context)

    result_dict['users'] = user_list_dictize(
        _get_members(context, group, 'users'),
        context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        #munge here should not have an effect only doing it incase
        #of potential vulnerability of dodgy api input
        image_url = munge.munge_filename(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'),
            qualified = True
        )
    return result_dict
示例#25
0
    def __init__(self, resource):
        path = get_storage_path()
        config_mimetype_guess = config.get('ckan.mimetype_guess', 'file_ext')

        if not path:
            self.storage_path = None
            return
        self.storage_path = os.path.join(path, 'resources')
        try:
            os.makedirs(self.storage_path)
        except OSError as e:
            # errno 17 is file already exists
            if e.errno != 17:
                raise
        self.filename = None
        self.mimetype = None

        url = resource.get('url')

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if config_mimetype_guess == 'file_ext':
            self.mimetype = mimetypes.guess_type(url)[0]

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filesize = 0  # bytes

            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.upload_file = _get_underlying_file(upload_field_storage)
            self.upload_file.seek(0, os.SEEK_END)
            self.filesize = self.upload_file.tell()
            # go back to the beginning of the file buffer
            self.upload_file.seek(0, os.SEEK_SET)

            # check if the mimetype failed from guessing with the url
            if not self.mimetype and config_mimetype_guess == 'file_ext':
                self.mimetype = mimetypes.guess_type(self.filename)[0]

            if not self.mimetype and config_mimetype_guess == 'file_contents':
                try:
                    self.mimetype = magic.from_buffer(self.upload_file.read(),
                                                      mime=True)
                    self.upload_file.seek(0, os.SEEK_SET)
                except IOError as e:
                    # Not that important if call above fails
                    self.mimetype = None

        elif self.clear:
            resource['url_type'] = ''
示例#26
0
    def path_from_filename(self, rid, filename):
        """
        Returns a bucket path for the given resource_id and filename.

        :param rid: The resource ID.
        :param filename: The unmunged resource filename.
        """
        return os.path.join(
            'resources',
            rid,
            munge.munge_filename(filename)
        )
示例#27
0
    def delete(self, id, filename=None):
        ''' Delete file we are pointing at'''

        if filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)
        key_path = self.get_path(id, filename)
        try:
            self.clear_key(key_path)
        except ClientError as ex:
            log.warning("Key '%s' not found in bucket '%s' for delete",
                        key_path, self.bucket_name)
            pass
示例#28
0
    def delete(self, id, filename=None):
        ''' Delete file we are pointing at'''

        if filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)
        key_path = self.get_path(id, filename)
        try:
            self.clear_key(key_path)
        except ClientError:
            log.warning('Key {0} not found in bucket {1} for delete'.format(
                key_path, self.bucket_name))
            pass
示例#29
0
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading performed by
        `upload()`.

        Create a storage path in the format:
        <ckanext.s3filestore.aws_storage_path>/resources/
        '''

        super(S3ResourceUploader, self).__init__()

        self.use_filename = toolkit.asbool(
            config.get('ckanext.s3filestore.use_filename', False))
        path = config.get('ckanext.s3filestore.aws_storage_path', '')
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        self.old_filename = None
        self.url = resource['url']
        # Hold onto resource just in case we need to fallback to Default ResourceUpload from core ckan.lib.uploader
        self.resource = resource

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filesize = 0  # bytes

            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.mimetype = resource.get('mimetype')
            if not self.mimetype:
                try:
                    self.mimetype = resource[
                        'mimetype'] = mimetypes.guess_type(self.filename,
                                                           strict=False)[0]
                except Exception:
                    pass
            self.upload_file = _get_underlying_file(upload_field_storage)
            self.upload_file.seek(0, os.SEEK_END)
            self.filesize = self.upload_file.tell()
            # go back to the beginning of the file buffer
            self.upload_file.seek(0, os.SEEK_SET)
        elif self.clear and resource.get('id'):
            # New, not yet created resources can be marked for deletion if the
            # users cancels an upload and enters a URL instead.
            old_resource = model.Session.query(model.Resource) \
                .get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#30
0
    def metadata(self, id, filename=None):
        if filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)
        key_path = self.get_path(id, filename)
        key = filename

        if key is None:
            log.warning("Key '%s' not found in bucket '%s'", key_path,
                        self.bucket_name)

        try:
            # Small workaround to manage downloading of large files
            # We are using redirect to minio's resource public URL
            client = self.get_s3_client()

            metadata = client.head_object(Bucket=self.bucket_name,
                                          Key=key_path)
            metadata['content_type'] = metadata['ContentType']

            # Drop non public metadata
            metadata.pop('ServerSideEncryption', None)
            metadata.pop('SSECustomerAlgorithm', None)
            metadata.pop('SSECustomerKeyMD5', None)
            metadata.pop('SSEKMSKeyId', None)
            metadata.pop('StorageClass', None)
            metadata.pop('RequestCharged', None)
            metadata.pop('ReplicationStatus', None)
            metadata.pop('ObjectLockLegalHoldStatus', None)

            metadata['size'] = metadata['ContentLength']
            metadata['hash'] = metadata['ETag']
            return self.as_clean_dict(metadata)
        except ClientError as ex:
            if ex.response['Error']['Code'] in ['NoSuchKey', '404']:
                if config.get(
                        'ckanext.s3filestore.filesystem_download_fallback',
                        False):
                    log.info('Attempting filesystem fallback for resource %s',
                             id)

                    default_resource_upload = DefaultResourceUpload(
                        self.resource)
                    return default_resource_upload.metadata(id)

            #Uploader interface does not know about s3 errors
            raise OSError(errno.ENOENT)
示例#31
0
    def get_path(self, id, filename=None):
        '''Return the key used for this resource in S3.

        Keys are in the form:
        <ckanext.s3filestore.aws_storage_path>/resources/<resource id>/<filename>

        e.g.:
        my_storage_path/resources/165900ba-3c60-43c5-9e9c-9f8acd0aa93f/data.csv
        '''

        if filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)

        directory = self.get_directory(id, self.storage_path)
        filepath = os.path.join(directory, filename)
        return filepath
示例#32
0
def _add_user_extras(user_obj, user_dict):
    for key, value in user_obj.extras.iteritems():
        if key in user_dict:
            log.warning(
                "Trying to override user data with extra variable '%s'", key)
            continue
        if key in ('blog', 'www_page', 'translations'):
            if value:
                user_dict[key] = json.loads(value)
        else:
            user_dict[key] = value

    image_url = user_dict.get('image_url', None)
    user_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        image_url = munge.munge_filename(image_url)
        user_dict['image_display_url'] = helpers.url_for_static(
            'uploads/user/%s' % user_dict.get('image_url'), qualified=True)
    return user_dict
示例#33
0
def validate(context, resource, schema_config):

    schema_name = resource.get("validator_schema")
    if not schema_name:
        return
    if schema_name not in schema_config:
        raise IOError("Could not find schema")

    schema = schema_config.get(schema_name).schema

    upload_field_storage = resource.get("upload")
    log.debug(upload_field_storage)

    if isinstance(upload_field_storage, FileStorage):
        file_string = upload_field_storage._file.read()
    elif isinstance(upload_field_storage, cgi.FieldStorage):
        file_string = upload_field_storage.file.read()
    else:
        raise plugins.toolkit.ValidationError({
            "No file uploaded": [
                "Please choose a file to upload (not a link), you might need to reselect the file"
            ]
        })
    filename = munge.munge_filename(upload_field_storage.filename)
    extension = filename.split(".")[-1]
    scheme = "stream"
    file_upload = cStringIO.StringIO(file_string)
    if extension == "csv":
        scheme = "text"
        file_upload = file_string.decode("utf-8").encode("ascii", "ignore")
    checks = ["schema"]
    if schema.get("transpose"):
        file_upload = transpose(file_upload, extension)
    if "custom-constraint" in schema:
        checks.append(
            {"custom-constraint": schema.get("custom-constraint", {})})
    report = goodtables.validate(file_upload,
                                 format=extension,
                                 scheme=scheme,
                                 schema=schema,
                                 checks=checks)
    log.info(report)
    return report, schema
def delete_ckan_record(package_id):
    """
    Remove a dataset and its associated resource from CKAN
    :param package_id:
    :return: Nothing
    """

    # First, verify and get the resource ID
    package_record = get_ckan_record(package_id)
    if len(package_record) == 0:
        logger.warn("Cannot find record {0} to delete".format(package_id))
        return

    # Get rid of the resource
    remote_ckan_url = Config.get('ckan', 'remote_url')
    remote_ckan_api = Config.get('ckan', 'remote_api_key')
    user_agent = Config.get('web', 'user_agent')

    # Delete the local file if it exists

    gcdocs_file = os.path.join(
        doc_intake_dir,
        munge_filename(os.path.basename(
            package_record['resources'][0]['name'])))
    if os.path.exists(gcdocs_file):
        os.remove(gcdocs_file)

    with RemoteCKAN(remote_ckan_url,
                    user_agent=user_agent,
                    apikey=remote_ckan_api) as ckan_instance:
        try:
            delete_blob(
                ckan_container, 'resources/{0}/{1}'.format(
                    package_record['resources'][0]['id'],
                    package_record['resources'][0]['name'].lower()))
            ckan_instance.action.package_delete(id=package_record['id'])
            ckan_instance.action.dataset_purge(id=package_record['id'])
            logger.info("Deleted expired CKAN record {0}".format(
                package_record['id']))
        except Exception as ex:
            logger.error("Unexpected error when deleting record {0}".format(
                ex.message))
示例#35
0
def resource_dictize(res, context):
    model = context['model']
    resource = d.table_dictize(res, context)
    extras = resource.pop("extras", None)
    if extras:
        resource.update(extras)
    # some urls do not have the protocol this adds http:// to these
    url = resource['url']
    ## for_edit is only called at the times when the dataset is to be edited
    ## in the frontend. Without for_edit the whole qualified url is returned.
    if resource.get('url_type') == 'upload' and not context.get('for_edit'):
        cleaned_name = munge.munge_filename(url)
        resource['url'] = h.url_for('resource.download',
                                    id=resource['package_id'],
                                    resource_id=res.id,
                                    filename=cleaned_name,
                                    qualified=True)
    elif resource['url'] and not urlparse.urlsplit(url).scheme and not context.get('for_edit'):
        resource['url'] = u'http://' + url.lstrip('/')
    return resource
示例#36
0
def resource_dictize(res, context):
    model = context['model']
    resource = d.table_dictize(res, context)
    extras = resource.pop("extras", None)
    if extras:
        resource.update(extras)
    # some urls do not have the protocol this adds http:// to these
    url = resource['url']
    ## for_edit is only called at the times when the dataset is to be edited
    ## in the frontend. Without for_edit the whole qualified url is returned.
    if resource.get('url_type') == 'upload' and not context.get('for_edit'):
        cleaned_name = munge.munge_filename(url)
        resource['url'] = h.url_for('resource.download',
                                    id=resource['package_id'],
                                    resource_id=res.id,
                                    filename=cleaned_name,
                                    qualified=True)
    elif resource['url'] and not urlparse.urlsplit(url).scheme and not context.get('for_edit'):
        resource['url'] = u'http://' + url.lstrip('/')
    return resource
示例#37
0
文件: logic.py 项目: haphut/ytp
def _add_user_extras(user_obj, user_dict):
    for key, value in user_obj.extras.iteritems():
        if key in user_dict:
            log.warning("Trying to override user data with extra variable '%s'", key)
            continue
        if key in ('blog', 'www_page', 'translations'):
            if value:
                user_dict[key] = json.loads(value)
        else:
            user_dict[key] = value

    image_url = user_dict.get('image_url', None)
    user_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        image_url = munge.munge_filename(image_url)
        user_dict['image_display_url'] = helpers.url_for_static(
            'uploads/user/%s' % user_dict.get('image_url'),
            qualified=True
        )
    return user_dict
示例#38
0
    def __init__(self, file_dict):
        path = uploader.get_storage_path()
        if not path:
            self.storage_path = None
            return
        self.storage_path = os.path.join(path, 'global')
        try:
            os.makedirs(self.storage_path)
        except OSError as e:
            # errno 17 is file already exists
            if e.errno != 17:
                raise
        self.filename = os.path.basename(file_dict.get('filename')) if file_dict.get('filename') else None

        upload_field_storage = file_dict.pop('upload', None)

        if isinstance(upload_field_storage, cgi.FieldStorage):
            self._update_filename(upload_field_storage)
            self.filename = munge.munge_filename(self.filename)
            file_dict['filename'] = self.filename
            self.upload_file = upload_field_storage.file
示例#39
0
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        """ Manipulate data from the data_dict.  url_field is the name of the
        field where the upload is going to be. file_field is name of the key
        where the FieldStorage is kept (i.e the field where the file data
        actually is). clear_field is the name of a boolean field which
        requests the upload to be deleted.  This needs to be called before
        it reaches any validators"""

        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)

        if not self.storage_path:
            return

        if self.old_filename:
            self.old_filepath = os.path.join(self.storage_path,
                                             data_dict.get('name'),
                                             self.old_filename)

        if isinstance(self.upload_field_storage, (ALLOWED_UPLOAD_TYPES)):
            self.filename = self.upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            organization_storagepath = os.path.join(self.storage_path,
                                                    data_dict.get('name'))
            _make_dirs_if_not_existing(organization_storagepath)
            self.filepath = os.path.join(organization_storagepath,
                                         self.filename)
            data_dict[url_field] = self.filename
            data_dict['url_type'] = 'upload'
            self.upload_file = _get_underlying_file(self.upload_field_storage)
            self.tmp_filepath = self.filepath + '~'
        # keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
示例#40
0
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading performed by
        `upload()`.

        Create a storage path in the format:
        <ckanext.s3filestore.aws_storage_path>/resources/
        '''

        super(S3ResourceUploader, self).__init__()

        path = config.get('ckanext.s3filestore.aws_storage_path', '')
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        self.old_filename = None

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.mimetype = resource.get('mimetype')
            if not self.mimetype:
                try:
                    self.mimetype = resource['mimetype'] = mimetypes.guess_type(self.filename, strict=False)[0]
                except Exception:
                    pass
            self.upload_file = _get_underlying_file(upload_field_storage)
        elif self.clear and resource.get('id'):
            # New, not yet created resources can be marked for deletion if the
            # users cancels an upload and enters a URL instead.
            old_resource = model.Session.query(model.Resource) \
                .get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
示例#41
0
        def walk(bucket, dir, files):
            for file in files:
                full_path = os.path.join(resource_path, dir, file)
                if not os.path.isfile(full_path) or full_path.endswith('~'):
                    continue

                key_name = full_path[len(resource_path):]
                for key in bucket.list(prefix=key_name.lstrip('/')):
                    key.delete()

                resource_id = key_name.replace('/', '')
                resource = model.Resource.get(resource_id)
                if not resource:
                    continue
                last_part = resource.url.split('/')[-1]
                file_name = munge.munge_filename(last_part)
                key_name = key_name + '/' + file_name

                key = s3key.Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(full_path)

                print 'Archived %s' % key_name
                os.remove(full_path)
示例#42
0
        def walk(bucket, dir, files):
            for file in files:
                full_path = os.path.join(resource_path, dir, file)
                if not os.path.isfile(full_path) or full_path.endswith('~'):
                    continue

                key_name = full_path[len(resource_path):]
                for key in bucket.list(prefix=key_name.lstrip('/')):
                    key.delete()

                resource_id = key_name.replace('/', '')
                resource = model.Resource.get(resource_id)
                if not resource:
                    continue
                last_part = resource.url.split('/')[-1]
                file_name = munge.munge_filename(last_part)
                key_name = key_name + '/' + file_name

                key = s3key.Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(full_path)

                print 'Archived %s' % key_name
                os.remove(full_path)
示例#43
0
 def test_munge_filename(self):
     '''Munge a list of filenames gives expected results.'''
     for org, exp in self.munge_list:
         munge = munge_filename(org)
         assert_equal(munge, exp)
示例#44
0
    def _save_shape_file_as_resource(self, lat, lon, shape_file_name, watershed_des, organization):
        source = 'delineate.delineatewatershed._save_shape_file_as_resource():'
        ajax_response = d_helper.AJAXResponse()

        if not self._validate_file_name(shape_file_name):
            ajax_response.success = False
            ajax_response.message = 'Invalid shape file name:%s.' % shape_file_name + '\nFile name needs to have only ' \
                                                                                      'alphanumeric characters and ' \
                                                                                      'dash, hyphen or space characters.'
            return ajax_response.to_json()

        # TODO: make the saving of the file to temp directory a separate function
        ckan_default_dir = d_helper.StringSettings.ckan_user_session_temp_dir
        session_id = base.session['id']
        shape_files_source_dir = os.path.join(ckan_default_dir, session_id, 'ShapeFiles')
        target_zip_dir = os.path.join(ckan_default_dir, session_id, 'ShapeZippedFile') 
        shape_zip_file = os.path.join(target_zip_dir, shape_file_name + '.zip')

        if not os.path.isdir(shape_files_source_dir):
            log.error(source + 'CKAN error: Expected shape file source dir path (%s) is missing.'
                      % shape_files_source_dir)

            ajax_response.success = False
            ajax_response.message = _('Failed to save the watershed shape file.')
            return ajax_response.to_json()

        if not os.path.exists(shape_zip_file):
            #create the watershed zip file first
            if os.path.isdir(target_zip_dir):
                shutil.rmtree(target_zip_dir)
            
            os.makedirs(target_zip_dir)
            files_to_archive = shape_files_source_dir + '/' + 'Watershed.*'
            zipper = zipfile.ZipFile(shape_zip_file, 'w')
            for file_to_zip in glob.glob(files_to_archive):
                zipper.write(file_to_zip, os.path.basename(file_to_zip), compress_type=zipfile.ZIP_DEFLATED)
            
            zipper.close()

        # TODO: make the creation of a new package a new function
        # create a package
        package_create_action = tk.get_action('package_create')
        
        # create unique package name using the current time stamp as a postfix to any package name
        unique_postfix = datetime.now().isoformat().replace(':', '-').replace('.', '-').lower()
        pkg_title = shape_file_name  # + '_'
        pkg_name = shape_file_name.replace(' ', '-').lower()
        data_dict = {
                    'name': pkg_name + '_' + unique_postfix,
                    'type': 'geographic-feature-set',
                    'title': pkg_title,
                    'author': tk.c.userObj.name if tk.c.userObj else tk.c.author,   # TODO: userObj is None always. Need to retrieve user full name
                    'notes': 'This is a dataset that contains a watershed shape zip file for an outlet'
                             ' location at latitude:%s and longitude:%s. ' % (lat, lon) + watershed_des,
                    'owner_org': organization,
                    'variable_name': '',  # extra metadata field begins from here
                    'variable_unit': '',
                    'north_extent': '',
                    'south_extent': '',
                    'east_extent': '',
                    'west_extent': '',
                    'projection': 'WGS_1984',   # this what our delineation service sets for the watershed
                    'dataset_type': 'geographic-feature-set'
                    }
        
        context = {'model': base.model, 'session': base.model.Session, 'user': tk.c.user or tk.c.author, 'save': 'save'}
        try:
            pkg_dict = package_create_action(context, data_dict)
            log.info(source + 'A new dataset was created with name: %s' % data_dict['title'])
        except Exception as e:
            log.error(source + 'Failed to create a new dataset for saving watershed shape file as'
                               ' a resource.\n Exception: %s' % e)

            ajax_response.success = False
            ajax_response.message = _('Failed to create a new dataset for'
                                      ' saving watershed shape file as a resource.')
            return ajax_response.to_json()

        # TODO: make the add resource to a package a new function
        if not 'resources' in pkg_dict:
            pkg_dict['resources'] = []

        file_name = munge.munge_filename(shape_file_name + '.zip')
        resource = {'url': file_name, 'url_type': 'upload'}
        upload = uploader.ResourceUpload(resource)
        upload.filename = file_name
        upload.upload_file = open(shape_zip_file, 'r')
        data_dict = {'format': 'zip', 'name': file_name, 'url': file_name, 'url_type': 'upload'}
        pkg_dict['resources'].append(data_dict)

        try:
            context['defer_commit'] = True
            context['use_cache'] = False
            # update the package
            package_update_action = tk.get_action('package_update')
            package_update_action(context, pkg_dict)
            context.pop('defer_commit')
        except Exception as e:
            log.error(source + 'Failed to update the new dataset for adding watershed shape file as'
                               ' a resource.\n Exception: %s' % e)

            ajax_response.success = False
            ajax_response.message = _('Failed to save watershed shape file as a resource.')
            return ajax_response.to_json()

        # Get out resource_id resource from model as it will not appear in
        # package_show until after commit
        upload.upload(context['package'].resources[-1].id, uploader.get_max_resource_size())
        base.model.repo.commit()
        ajax_response.success = True
        ajax_response.message = _('Watershed shape file was saved as a resource.')
        return ajax_response.to_json()
示例#45
0
def group_dictize(group, context):
    result_dict = d.table_dictize(group, context)

    result_dict['display_name'] = group.display_name

    result_dict['extras'] = extras_dict_dictize(
        group._extras, context)

    include_datasets = context.get('include_datasets', True)

    q = {
        'facet': 'false',
        'rows': 0,
    }

    if group.is_organization:
        q['fq'] = 'owner_org:"{0}"'.format(group.id)
    else:
        q['fq'] = 'groups:"{0}"'.format(group.name)

    is_group_member = (context.get('user') and
         new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
    if is_group_member:
        context['ignore_capacity_check'] = True

    if include_datasets:
        q['rows'] = 1000    # Only the first 1000 datasets are returned

    context_ = dict((k, v) for (k, v) in context.items() if k != 'schema')
    search_results = logic.get_action('package_search')(context_, q)

    if include_datasets:
        result_dict['packages'] = search_results['results']

    result_dict['package_count'] = search_results['count']

    context['with_capacity'] = True
    result_dict['tags'] = tag_list_dictize(
        _get_members(context, group, 'tags'),
        context)

    result_dict['groups'] = group_list_dictize(
        _get_members(context, group, 'groups'),
        context)

    result_dict['users'] = user_list_dictize(
        _get_members(context, group, 'users'),
        context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        #munge here should not have an effect only doing it incase
        #of potential vulnerability of dodgy api input
        image_url = munge.munge_filename(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'),
            qualified = True
        )
    return result_dict
示例#46
0
文件: tasks.py 项目: CI-WATER/portal
def _save_ueb_package_as_dataset(service_call_results, model_config_dataset_id):
    source = 'uebpackage.tasks._save_ueb_package_as_dataset():'
    ckan_default_dir = uebhelper.StringSettings.ckan_user_session_temp_dir  # '/tmp/ckan'

    # get the matching model configuration dataset object
    model_config_dataset_obj = base.model.Package.get(model_config_dataset_id)
    model_config_dataset_title = model_config_dataset_obj.title
    model_config_dataset_owner_org = model_config_dataset_obj.owner_org
    model_config_dataset_author = model_config_dataset_obj.author

    # create a directory for saving the file
    # this will be a dir in the form of: /tmp/ckan/{random_id}
    random_id = base.model.types.make_uuid()
    destination_dir = os.path.join(ckan_default_dir, random_id)
    os.makedirs(destination_dir)

    model_pkg_filename = uebhelper.StringSettings.ueb_input_model_package_default_filename   # 'ueb_model_pkg.zip'
    model_pkg_file = os.path.join(destination_dir, model_pkg_filename)

    bytes_to_read = 16 * 1024

    try:
        with open(model_pkg_file, 'wb') as file_obj:
            while True:
                data = service_call_results.read(bytes_to_read)
                if not data:
                    break
                file_obj.write(data)
    except Exception as e:
        log.error(source + 'Failed to save the ueb_package zip file to temporary '
                           'location for UEB model configuration dataset ID: %s \n '
                           'Exception: %s' % (model_config_dataset_id, e))
        raise e

    log.info(source + 'ueb_package zip file was saved to temporary location for '
                      'UEB model configuration dataset ID: %s' % model_config_dataset_id)

    # upload the file to CKAN file store
    # resource_metadata = _upload_file(model_pkg_file)
    # if resource_metadata:
    #     log.info(source + 'UEB model package zip file was uploaded for model configuration dataset ID:%s' % model_config_dataset_id)
    # else:
    #     log.error(source + 'Failed to upload UEB model package zip file '
    #                        'for model configuration dataset ID: %s' % model_config_dataset_id)
    #     return
    #
    # # retrieve some of the file meta data
    # resource_url = resource_metadata.get('_label')  # this will return datetime stamp/filename
    #
    # resource_url = '/storage/f/' + resource_url
    # if resource_url.startswith('/'):
    #     resource_url = base.config.get('ckan.site_url', '').rstrip('/') + resource_url
    # else:
    #     resource_url = base.config.get('ckan.site_url', '') + resource_url
    #
    # resource_created_date = resource_metadata.get('_creation_date')
    # resource_name = resource_metadata.get('filename_original')
    # resource_size = resource_metadata.get('_content_length')
    #
    # # add the uploaded ueb model pkg data file as a resource to the dataset
    # resource_create_action = tk.get_action('resource_create')
    # context = {'model': base.model, 'session': base.model.Session, 'save': 'save'}
    # user = uebhelper.get_site_user()
    # context['user'] = user.get('name')
    # context['ignore_auth'] = True
    # context['validate'] = False

    user = uebhelper.get_site_user()
    # create a package
    package_create_action = tk.get_action('package_create')

    # create unique package name using the current time stamp as a postfix to any package name
    unique_postfix = datetime.now().isoformat().replace(':', '-').replace('.', '-').lower()
    pkg_title = model_config_dataset_title

    data_dict = {
                    'name': 'model_package_' + unique_postfix,  # this needs to be unique as required by DB
                    'type': 'model-package',  # dataset type as defined in custom dataset plugin
                    'title': pkg_title,
                    'owner_org': model_config_dataset_owner_org,
                    'author': model_config_dataset_author,
                    'notes': 'UEB model package',
                    'pkg_model_name': 'UEB',
                    'model_version': '1.0',
                    'north_extent': '',
                    'south_extent': '',
                    'east_extent': '',
                    'west_extent': '',
                    'simulation_start_day': '',
                    'simulation_end_day': '',
                    'time_step': '',
                    'package_type': u'Input',
                    'package_run_status': 'Not yet submitted',
                    'package_run_job_id': '',
                    'dataset_type': 'model-package'
                 }

    context = {'model': base.model, 'session': base.model.Session, 'ignore_auth': True, 'user': user.get('name'), 'save': 'save'}
    try:
        uebhelper.register_translator()     # this is needed since we are creating a package in a background operation
        pkg_dict = package_create_action(context, data_dict)
        log.info(source + 'A new dataset was created for UEB input model package with name: %s' % data_dict['title'])
    except Exception as e:
        log.error(source + 'Failed to create a new dataset for ueb input model package for'
                           ' the related model configuration dataset title: %s \n Exception: %s' % (pkg_title, e))
        raise e

    pkg_id = pkg_dict['id']

    if not 'resources' in pkg_dict:
        pkg_dict['resources'] = []

    file_name = munge.munge_filename(model_pkg_filename)
    resource = {'url': file_name, 'url_type': 'upload'}
    upload = uploader.ResourceUpload(resource)
    upload.filename = file_name
    upload.upload_file = open(model_pkg_file, 'r')
    data_dict = {'format': 'zip', 'name': file_name, 'url': file_name, 'url_type': 'upload'}
    pkg_dict['resources'].append(data_dict)

    try:
        context['defer_commit'] = True
        context['use_cache'] = False
        # update the package
        package_update_action = tk.get_action('package_update')
        package_update_action(context, pkg_dict)
        context.pop('defer_commit')
    except Exception as e:
        log.error(source + ' Failed to update the new dataset for adding the input model package zip file as'
                            ' a resource.\n Exception: %s' % e)

        raise e

    # link this newly created model package dataset to the model configuration dataset
    package_relationship_create_action = tk.get_action('package_relationship_create')
    data_dict = {'subject': pkg_id, 'object': model_config_dataset_id, 'type': 'links_to'}
    package_relationship_create_action(context, data_dict)

    # Get out resource_id resource from model as it will not appear in
    # package_show until after commit
    upload.upload(context['package'].resources[-1].id, uploader.get_max_resource_size())
    base.model.repo.commit()

    # update the related model configuration dataset to show that the package is available

    data_dict = {'package_availability': 'Available'}
    update_msg = 'system auto updated ueb package dataset'
    background_task = True
    try:
        updated_package = uebhelper.update_package(model_config_dataset_id, data_dict, update_msg, background_task)
        log.info(source + 'UEB model configuration dataset was updated as a result of '
                          'receiving model input package for dataset:%s' % updated_package['name'])
    except Exception as e:
        log.error(source + 'Failed to update UEB model configuration dataset after '
                           'receiving model input package for dataset ID:%s \n'
                           'Exception: %s' % (model_config_dataset_id, e))
        raise e
示例#47
0
文件: test_munge.py 项目: Hoedic/ckan
 def test_munge_filename(self):
     '''Munge a list of filenames gives expected results.'''
     for org, exp in self.munge_list:
         munge = munge_filename(org)
         nose_tools.assert_equal(munge, exp)
示例#48
0
    def check_and_create_csv(self, context, resource):

        log.debug("Resource: %s" % str(resource))

        resource_filename = os.path.basename(resource.get('url'))
        if not resource_filename:
            return
        log.debug('resource_filename: %s' % resource_filename)

        try:

            # get the config of this plugin
            infoplus_schema_file = pluginconf.get('ckanext.resourcecsv.schemas.infoplus', False)
            if not infoplus_schema_file:
                pass
            # log.debug('Infoplus_schema_file: %s' % str(infoplus_schema_file))
            infoplus_schema = json.load(self._load_schema_module_path(infoplus_schema_file))

        except Exception as e:
            log.error('ResourceCSV Plugin scheming error: %s' % str(e))
            return

        # log.debug('infoplus_schema: %s' % str(infoplus_schema))

        # check if the file is in the schema

        munged_filename = munge_filename(resource_filename)
        log.debug('munged_filename: %s' % munged_filename)
        coldef = []
        for key,cdef in infoplus_schema.iteritems():
            if munge_filename(key) != resource_filename:
                continue # skip
            coldef = cdef
            break

        if not len(coldef):
            log.info("Key %s not found in munged infoplus_schema" % str(munged_filename))
            return

        # download the file to a tmp location

        # with tempfile.NamedTemporaryFile(mode='ab+') as tmpfile:
        # tmpfile = tempfile.NamedTemporaryFile(mode='ab+')

        uploadfile = os.path.join('/tmp/', resource_filename) + '.csv'

        tmpfile = open(uploadfile, 'ab+')

        log.info("Downloading %s" % os.path.basename(resource.get('url')))

        data = urllib2.urlopen(resource.get('url')).readlines()

        # write a dummy header row
        tmpfile.write( self.DELIMITER.join('%s %d' % (_('Column'), i+1) for i in range(0, len(coldef) + 1)) + "\r" )

        # replace the defined characters in each line with a delimiter
        for line in data:

            # ignore any lines that start with a comment
            if line.startswith('#'):
                continue # skip
            if line.startswith('*'):
                continue # skip
            if line.startswith('%'):
                continue # skip

            # line = line.encode('utf-8')

            tl = list(line) # explode

            # inject the delimiter
            for col in coldef:
                tl[col] = self.DELIMITER

            line = "".join(tl) # implode

            log.debug(line)

            tmpfile.write(line)

        # establish a connection to ckan
        try:
            site_url = pluginconf.get('ckan.site_url', None)
            api_key = model.User.get(context['user']).apikey.encode('utf8')
            ckan = ckanapi.RemoteCKAN(site_url,
                apikey=api_key,
                user_agent='ckanapi/1.0 (+%s)' % site_url
            )
            log.debug("Connected to %s" % site_url)
        except ckanapi.NotAuthorized, e:
            log.error('User not authorized')
            return False
示例#49
0
def group_dictize(group, context,
                  include_groups=True,
                  include_tags=True,
                  include_users=True,
                  include_extras=True,
                  packages_field='datasets',
                  **kw):
    '''
    Turns a Group object and related into a dictionary. The related objects
    like tags are included unless you specify it in the params.

    :param packages_field: determines the format of the `packages` field - can
    be `datasets`, `dataset_count`, `none_but_include_package_count` or None.
    If set to `dataset_count` or `none_but_include_package_count` then you
    can precalculate dataset counts in advance by supplying:
    context['dataset_counts'] = get_group_dataset_counts()
    '''
    assert packages_field in ('datasets', 'dataset_count',
                              'none_but_include_package_count', None)
    if packages_field in ('dataset_count', 'none_but_include_package_count'):
        dataset_counts = context.get('dataset_counts', None)

    result_dict = d.table_dictize(group, context)
    result_dict.update(kw)

    result_dict['display_name'] = group.title or group.name

    if include_extras:
        result_dict['extras'] = extras_dict_dictize(
            group._extras, context)

    context['with_capacity'] = True

    if packages_field:
        def get_packages_for_this_group(group_):
            # Ask SOLR for the list of packages for this org/group
            q = {
                'facet': 'false',
                'rows': 0,
            }

            if group_.is_organization:
                q['fq'] = 'owner_org:"{0}"'.format(group_.id)
            else:
                q['fq'] = 'groups:"{0}"'.format(group_.name)

            # Allow members of organizations to see private datasets.
            if group_.is_organization:
                is_group_member = (context.get('user') and
                    new_authz.has_user_permission_for_group_or_org(
                        group_.id, context.get('user'), 'read'))
                if is_group_member:
                    context['ignore_capacity_check'] = True

            if not context.get('for_view'):
                q['rows'] = 1000    # Only the first 1000 datasets are returned

            search_context = dict((k, v) for (k, v) in context.items() if k != 'schema')
            search_results = logic.get_action('package_search')(search_context, q)
            return search_results['count'], search_results['results']
        if packages_field == 'datasets':
            package_count, packages = get_packages_for_this_group(group)
            result_dict['packages'] = packages
        else:
            # i.e. packages_field is 'dataset_count' or
            # 'none_but_include_package_count'
            if dataset_counts is None:
                package_count, packages = get_packages_for_this_group(group)
            else:
                # Use the pre-calculated package_counts passed in.
                facets = dataset_counts
                if group.is_organization:
                    package_count = facets['owner_org'].get(group.id, 0)
                else:
                    package_count = facets['groups'].get(group.name, 0)
            if packages_field != 'none_but_include_package_count':
                result_dict['packages'] = package_count

        result_dict['package_count'] = package_count

    if include_tags:
        # group tags are not creatable via the API yet, but that was(/is) a
        # future intention (see kindly's commit 5c8df894 on 2011/12/23)
        result_dict['tags'] = tag_list_dictize(
            _get_members(context, group, 'tags'),
            context)

    if include_groups:
        # these sub-groups won't have tags or extras for speed
        result_dict['groups'] = group_list_dictize(
            _get_members(context, group, 'groups'),
            context, include_groups=True)

    if include_users:
        result_dict['users'] = user_list_dictize(
            _get_members(context, group, 'users'),
            context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        #munge here should not have an effect only doing it incase
        #of potential vulnerability of dodgy api input
        image_url = munge.munge_filename(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'),
            qualified = True
        )
    return result_dict
示例#50
0
def package_update(context, data_dict):

    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if 'return_package_dict' is True in the
              context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''


    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    # FIXME: first modifications to package_updade begin here:
    # tag strings are reconstructed because validators are stripping
    # tags passed and only taking taks as tag_string values
    # image upload support has also been added here
    old_data = get_action('package_show')(context, {'id': pkg.id})

    '''
    Constructing the tag_string from the given tags.
    There must be at least one tag, otherwise the tag_string will be empty and a validation error
    will be raised.
    '''
    if not data_dict.get('tag_string'):
        data_dict['tag_string'] = ', '.join(
                h.dict_list_reduce(data_dict.get('tags', {}), 'name'))


    for key, value in old_data.iteritems() :
        if key not in data_dict :
            data_dict[key] = value

    #data_dict['resources'] = data_dict.get('resources', old_data.get('resources'))


#     iso_topic_cat = data_dict.get('iso_topic_string', [])
#     if isinstance(iso_topic_cat, basestring):
#         iso_topic_cat = [iso_topic_cat]
#
#     data_dict['iso_topic_string'] = ','.join(iso_topic_cat)


    #Set the package last modified date
    data_dict['record_last_modified'] = str(datetime.date.today())

    # If the Created Date has not yet been set, then set it
    if data_dict['edc_state'] == 'DRAFT' and not data_dict.get('record_create_date'):
        data_dict['record_create_date'] = str(datetime.date.today())

    # If the Publish Date has not yet been set, then set it
    if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get('record_publish_date'):
        data_dict['record_publish_date'] = str(datetime.date.today())

    # If the Archive Date has not yet been set, then set it
    if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get('record_archive_date'):
        data_dict['record_archive_date'] = str(datetime.date.today())

    _check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    image_url = old_data.get('image_url', None)

    upload = uploader.Upload('edc', image_url)
    upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload')

    #Adding image display url for the uploaded image
    image_url = data_dict.get('image_url')
    data_dict['image_display_url'] = image_url

    if image_url and not image_url.startswith('http'):
        image_url = munge.munge_filename(image_url)
        data_dict['image_display_url'] = h.url_for_static('uploads/edc/%s' % data_dict.get('image_url'), qualified=True)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)
    # FIXME: modifications to package_update end here^

    data, errors = _validate(data_dict, schema, context)
#     log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
#               errors, context.get('user'),
#               context.get('package').name if context.get('package') else '',
#               data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")



    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)


    upload.upload(uploader.get_max_image_size())

    #TODO the next two blocks are copied from ckan/ckan/logic/action/update.py
    # This codebase is currently hard to maintain because large chunks of the
    # CKAN action API and the CKAN controllers are simply overriden. This is
    # probably worse than just forking CKAN would have been, because in that
    # case at least we could track changes. - @deniszgonjanin

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    if data.get('resources'):
        for index, resource in enumerate(data['resources']):
            resource['id'] = pkg.resources[index].id

    # Create default views for resources if necessary
    if data.get('resources'):
        logic.get_action('package_create_default_resource_views')(
            {'model': context['model'], 'user': context['user'],
             'ignore_auth': True},
            {'package': data})

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})


    '''
    Send state change notifications if required; Added by Khalegh Mamakani
    Using a thread to run the job in the background so that package_update will not wait for notifications sending.
    '''

    old_state = old_data.get('edc_state')

    context = {'model': model, 'session': model.Session,
               'user': c.user or c.author, 'auth_user_obj': c.userobj}

    dataset_url = config.get('ckan.site_url') + h.url_for(controller='package', action="read", id = data_dict['name'])
    import threading

    notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url) )
    notify_thread.start()

    return output