Пример #1
0
    def check_config(self):
        exit = False
        for key in ('ckanext.s3filestore.aws_access_key_id',
                    'ckanext.s3filestore.aws_secret_access_key',
                    'ckanext.s3filestore.aws_bucket_name'):
            if not config.get(key):
                print 'You must set the "{0}" option in your ini file'.format(
                    key)
                exit = True
        if exit:
            sys.exit(1)

        print 'All configuration options defined'
        bucket_name = config.get('ckanext.s3filestore.aws_bucket_name')
        public_key = config.get('ckanext.s3filestore.aws_access_key_id')
        secret_key = config.get('ckanext.s3filestore.aws_secret_access_key')

        S3_conn = boto.connect_s3(public_key, secret_key)

        # Check if bucket exists
        bucket = S3_conn.lookup(bucket_name)
        if bucket is None:
            print 'Bucket {0} does not exist, trying to create it...'.format(
                bucket_name)
            try:
                bucket = S3_conn.create_bucket(bucket_name)
            except boto.exception.StandardError as e:
                print 'An error was found while creating the bucket:'
                print str(e)
                sys.exit(1)
        print 'Configuration OK!'
Пример #2
0
def catalog_uri():
    '''
    Returns an URI for the whole catalog

    This will be used to uniquely reference the CKAN instance on the RDF
    serializations and as a basis for eg datasets URIs (if not present on
    the metadata).

    The value will be the first found of:

        1. The `ckanext.dcat.base_uri` config option (recommended)
        2. The `ckan.site_url` config option
        3. `http://` + the `app_instance_uuid` config option (minus brackets)

    A warning is emited if the third option is used.

    Returns a string with the catalog URI.
    '''

    uri = config.get('ckanext.dcat.base_uri')
    if not uri:
        uri = config.get('ckan.site_url')
    if not uri:
        app_uuid = config.get('app_instance_uuid')
        if app_uuid:
            uri = 'http://' + app_uuid.replace('{', '').replace('}', '')
            log.critical('Using app id as catalog URI, you should set the ' +
                         '`ckanext.dcat.base_uri` or `ckan.site_url` option')
        else:
            uri = 'http://' + str(uuid.uuid4())
            log.critical('Using a random id as catalog URI, you should set ' +
                         'the `ckanext.dcat.base_uri` or `ckan.site_url` ' +
                         'option')

    return uri
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
Пример #4
0
    def test_pagination_keeps_only_supported_params(self, mock_request):

        mock_request.params = {
            'a': 1,
            'b': 2,
            'modified_since': '2018-03-22',
            'profiles': 'schemaorg'
        }
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in range(10)],
        }
        data_dict = {'page': None}

        pagination = _pagination_info(query, data_dict)

        assert pagination['count'] == 12
        assert pagination['items_per_page'] == config.get(
            'ckanext.dcat.datasets_per_page')
        assert pagination['current'] == 'http://example.com/feed/catalog.xml?' \
                                        'modified_since=2018-03-22&profiles=schemaorg&page=1'
        assert pagination[
            'first'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1'
        assert pagination[
            'last'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2'
        assert pagination[
            'next'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2'
        assert 'previous' not in pagination
Пример #5
0
    def test_resource_upload(self):
        '''Test a basic resource file upload'''
        factories.Sysadmin(apikey="my-test-key")

        app = self._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket='my-bucket', Key=key)

        #conn = boto.connect_s3()
        #bucket = conn.get_bucket('my-bucket')
        # test the key exists
        #assert_true(bucket.lookup(key))
        # test the file contains what's expected
        obj = s3.get_object(Bucket='my-bucket', Key=key)
        data = obj['Body'].read()
        assert_equal(data, open(file_path).read())
Пример #6
0
    def test_resource_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        app = self._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        dataset = factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))

        # clear upload
        url = toolkit.url_for(controller='package', action='resource_edit',
                              id=dataset['id'], resource_id=resource['id'])
        env = {'REMOTE_USER': sysadmin['name'].encode('ascii')}
        app.post(url, {'clear_upload': True,
                       'url': 'http://asdf', 'save': 'save'},
                 extra_environ=env)

        # key shouldn't exist
        assert_false(bucket.lookup(key))
Пример #7
0
    def test_group_image_upload(self):
        '''Test a group image file upload'''
        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = 'somename.png'

        img_uploader = Uploader(file_name, file=open(file_path))

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create', context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name,
                                save='save')

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))

        # requesting image redirects to s3
        app = self._get_test_app()
        # attempt redirect to linked url
        image_file_url = '/uploads/group/{0}'.format(file_name)
        r = app.get(image_file_url, status=[302, 301])
        assert_equal(r.location, 'https://my-bucket.s3.amazonaws.com/my-path/storage/uploads/group/{0}'
                                 .format(file_name))
Пример #8
0
    def test_pagination_keeps_only_supported_params(self, mock_request):

        mock_request.params = {'a': 1, 'b': 2, 'modified_since': '2018-03-22', 'profiles': 'schemaorg'}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 12)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1')
        eq_(pagination['first'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1')
        eq_(pagination['last'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2')
        eq_(pagination['next'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2')
        assert 'previous' not in pagination
Пример #9
0
    def test_pagination_same_results_than_page_size(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = ''

        # No page defined (defaults to 1)
        query = {
            'count': 10,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 10)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://example.com?page=1')
        eq_(pagination['first'], 'http://example.com?page=1')
        eq_(pagination['last'], 'http://example.com?page=1')
        assert 'next' not in pagination
        assert 'previous' not in pagination
Пример #10
0
def prepare_summary_mail(context, source_id, status):
    extra_vars = get_mail_extra_vars(context, source_id, status)
    body = render('emails/summary_email.txt', extra_vars)
    subject = '{} - Harvesting Job Successful - Summary Notification'\
        .format(config.get('ckan.site_title'))

    return subject, body
Пример #11
0
def prepare_error_mail(context, source_id, status):
    extra_vars = get_mail_extra_vars(context, source_id, status)
    body = render('emails/error_email.txt', extra_vars)
    subject = '{} - Harvesting Job - Error Notification'\
        .format(config.get('ckan.site_title'))

    return subject, body
Пример #12
0
    def _gen_new_name(cls, title, existing_name=None, append_type=None):
        '''
        Returns a 'name' for the dataset (URL friendly), based on the title.

        If the ideal name is already used, it will append a number to it to
        ensure it is unique.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        '''

        # If append_type was given, use it. Otherwise, use the configured default.
        # If nothing was given and no defaults were set, use 'number-sequence'.
        if append_type:
            append_type_param = append_type
        else:
            append_type_param = config.get(
                'ckanext.harvest.default_dataset_name_append',
                'number-sequence')

        ideal_name = munge_title_to_name(title)
        ideal_name = re.sub('-+', '-', ideal_name)  # collapse multiple dashes
        return cls._ensure_name_is_unique(ideal_name,
                                          existing_name=existing_name,
                                          append_type=append_type_param)
Пример #13
0
def get_dataset_rdf_url(default=False):
    url = toolkit.request.url
    url = url.split('?')[0]
    dataset_name = url.split('/')[-1]
    dataset_type = toolkit.get_action('package_show')(None, {
        'id': dataset_name
    })['type']

    uri = config.get('ckanext.dcat.base_uri')
    if not uri:
        uri = config.get('ckan.site_url')

    if default:
        return uri + '/' + 'dataset' + '/' + dataset_name  # + '.ttl?profiles=gsq_dataset'
    else:
        return uri + '/' + 'dataset' + '/' + dataset_name  # + '.ttl?profiles=gsq_' + dataset_type + ',gsq_dataset'
Пример #14
0
def get_validation_badge(resource, in_listing=False):

    if in_listing and not asbool(
            config.get('ckanext.validation.show_badges_in_listings', True)):
        return ''

    if not resource.get('validation_status'):
        return ''

    messages = {
        'success': _('Valid data'),
        'failure': _('Invalid data'),
        'error': _('Error during validation'),
        'unknown': _('Data validation unknown'),
    }

    if resource['validation_status'] in ['success', 'failure', 'error']:
        status = resource['validation_status']
    else:
        status = 'unknown'

    validation_url = url_for('validation_read',
                             id=resource['package_id'],
                             resource_id=resource['id'])

    badge_url = url_for_static(
        '/images/badges/data-{}-flat.svg'.format(status))

    return u'''
<a href="{validation_url}" class="validation-badge">
    <img src="{badge_url}" alt="{alt}" title="{title}"/>
</a>'''.format(validation_url=validation_url,
               badge_url=badge_url,
               alt=messages[status],
               title=resource.get('validation_timestamp', ''))
Пример #15
0
def show_validation_schemas():
    """ Returns a list of validation schemas"""
    schema_directory = config.get('ckanext.validation.schema_directory')
    if schema_directory:
        return _files_from_directory(schema_directory).keys()
    else:
        return []
    def test_pagination_keeps_params(self, mock_request):

        mock_request.params = {'a': 1, 'b': 2}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in xrange(10)],
        }
        data_dict = {'page': None}

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 12)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=1')
        eq_(pagination['first'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=1')
        eq_(pagination['last'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=2')
        eq_(pagination['next'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=2')
        assert 'previous' not in pagination
Пример #17
0
    def test_pagination_same_results_than_page_size(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = ''

        # No page defined (defaults to 1)
        query = {
            'count': 10,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 10)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://example.com?page=1')
        eq_(pagination['first'], 'http://example.com?page=1')
        eq_(pagination['last'], 'http://example.com?page=1')
        assert 'next' not in pagination
        assert 'previous' not in pagination
Пример #18
0
    def after_map(self, map):

        controller = 'ckanext.dcat.controllers:DCATController'
        route = config.get('ckanext.dcat.json_endpoint', '/dcat.json')
        map.connect(route, controller=controller, action='dcat_json')

        return map
Пример #19
0
    def test_group_image_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = "somename.png"

        img_uploader = Uploader(file_name, file=open(file_path))

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create', context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name)

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))

        # clear upload
        helpers.call_action('group_update', context=context,
                            id='my-group', name='my-group',
                            image_url="http://asdf", clear_upload=True)

        # key shouldn't exist
        assert_false(bucket.lookup(key))
    def test_resource_upload(self):
        '''Test a basic resource file upload'''
        factories.Sysadmin(apikey="my-test-key")

        app = helpers._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket=BUCKET_NAME, Key=key)

        # test the file contains what's expected
        obj = s3.get_object(Bucket=BUCKET_NAME, Key=key)
        data = obj['Body'].read()
        assert_equal(data, open(file_path).read())
Пример #21
0
def datarequest_suggested_description():
    """Returns a datarequest suggested description from admin config

    :rtype: string

    """
    return config.get('ckanext.data_qld.datarequest_suggested_description', '')
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        assert str(catalog) == utils.catalog_uri()

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title,
                            config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage,
                            URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
Пример #23
0
    def _get_user_name(self):
        '''
        Returns the name of the user that will perform the harvesting actions
        (deleting, updating and creating datasets)

        By default this will be the internal site admin user. This is the
        recommended setting, but if necessary it can be overridden with the
        `ckanext.spatial.harvest.user_name` config option, eg to support the
        old hardcoded 'harvest' user:

           ckanext.spatial.harvest.user_name = harvest

        '''
        if self._user_name:
            return self._user_name

        context = {
            'model': model,
            'ignore_auth': True,
            'defer_commit': True,  # See ckan/ckan#1714
        }
        self._site_user = p.toolkit.get_action('get_site_user')(context, {})

        config_user_name = config.get('ckanext.spatial.harvest.user_name')
        if config_user_name:
            self._user_name = config_user_name
        else:
            self._user_name = self._site_user['name']

        return self._user_name
Пример #24
0
    def test_pagination_without_site_url(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 12)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://ckan.example.com/feed/catalog.xml?page=1')
        eq_(pagination['first'], 'http://ckan.example.com/feed/catalog.xml?page=1')
        eq_(pagination['last'], 'http://ckan.example.com/feed/catalog.xml?page=2')
        eq_(pagination['next'], 'http://ckan.example.com/feed/catalog.xml?page=2')
        assert 'previous' not in pagination
Пример #25
0
    def test_pagination_with_dcat_base_uri(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in range(10)],
        }
        data_dict = {'page': None}

        pagination = _pagination_info(query, data_dict)

        assert pagination['count'] == 12
        assert pagination['items_per_page'] == config.get(
            'ckanext.dcat.datasets_per_page')
        assert pagination[
            'current'] == 'http://example.com/data/feed/catalog.xml?page=1'
        assert pagination[
            'first'] == 'http://example.com/data/feed/catalog.xml?page=1'
        assert pagination[
            'last'] == 'http://example.com/data/feed/catalog.xml?page=2'
        assert pagination[
            'next'] == 'http://example.com/data/feed/catalog.xml?page=2'
        assert 'previous' not in pagination
Пример #26
0
 def uploaded_file_redirect(self, upload_to, filename):
     '''Redirect static file requests to their location on S3.'''
     host_name = config.get('ckanext.s3filestore.host_name')
     # Remove last characted if it's a slash
     if host_name[-1] == '/':
         host_name = host_name[:-1]
     storage_path = S3Uploader.get_storage_path(upload_to)
     filepath = os.path.join(storage_path, filename)
     #host = config.get('ckanext.s3.filestore.hostname')
     # redirect_url = 'https://{bucket_name}.minio.omc.ckan.io/{filepath}' \
     #     .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
     #             filepath=filepath)
     redirect_url = '{host_name}/{bucket_name}/{filepath}'\
                       .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
                       filepath=filepath,
                       host_name=host_name)
     redirect(redirect_url)
Пример #27
0
 def uploaded_file_redirect(self, upload_to, filename):
     '''Redirect static file requests to their location on S3.'''
     host_name = config.get('ckanext.s3filestore.host_name')
     # Remove last characted if it's a slash
     if host_name[-1] == '/':
         host_name = host_name[:-1]
     storage_path = S3Uploader.get_storage_path(upload_to)
     filepath = os.path.join(storage_path, filename)
     #host = config.get('ckanext.s3.filestore.hostname')
     # redirect_url = 'https://{bucket_name}.minio.omc.ckan.io/{filepath}' \
     #     .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
     #             filepath=filepath)
     redirect_url = '{host_name}/{bucket_name}/{filepath}'\
                       .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
                       filepath=filepath,
                       host_name=host_name)
     redirect(redirect_url)
Пример #28
0
def profanity_checking_enabled():
    """Check to see if YTP comments extension is enabled and `check_for_profanity` is enabled

    :rtype: bool

    """
    return ytp_comments_enabled() \
        and toolkit.asbool(config.get('ckan.comments.check_for_profanity', False))
Пример #29
0
    def __init__(self, filename, url):
        self.folder = config.get('hdx.download_with_cache.folder', '/tmp/')
        if not self.folder.endswith('/'):
            self.folder += '/'

        self.filename = filename
        self.full_file_path = self.folder + self.filename
        self.url = url
Пример #30
0
def harvest_source_index_clear(context, data_dict):
    '''
    Clears all datasets, jobs and objects related to a harvest source, but
    keeps the source itself.  This is useful to clean history of long running
    harvest sources to start again fresh.

    :param id: the id of the harvest source to clear
    :type id: string
    '''

    check_access('harvest_source_clear', context, data_dict)
    harvest_source_id = data_dict.get('id')

    source = HarvestSource.get(harvest_source_id)
    if not source:
        log.error('Harvest source %s does not exist', harvest_source_id)
        raise NotFound('Harvest source %s does not exist' % harvest_source_id)

    harvest_source_id = source.id

    conn = make_connection()
    query = ''' +%s:"%s" +site_id:"%s" ''' % (
        'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))

    solr_commit = toolkit.asbool(config.get('ckan.search.solr_commit', 'true'))
    if toolkit.check_ckan_version(max_version='2.5.99'):
        # conn is solrpy
        try:
            conn.delete_query(query)
            if solr_commit:
                conn.commit()
        except Exception as e:
            log.exception(e)
            raise SearchIndexError(e)
        finally:
            conn.close()
    else:
        # conn is pysolr
        try:
            conn.delete(q=query, commit=solr_commit)
        except Exception as e:
            log.exception(e)
            raise SearchIndexError(e)

    return {'id': harvest_source_id}
Пример #31
0
 def _should_use_download_with_cache(self, dataset_name):
     if not S3Controller.datasets_for_download_with_cache:
         datasets_str = config.get('hdx.download_with_cache.datasets')
         if datasets_str:
             S3Controller.datasets_for_download_with_cache = datasets_str.split(',')
     if S3Controller.datasets_for_download_with_cache \
             and dataset_name in S3Controller.datasets_for_download_with_cache:
         return True
     return False
Пример #32
0
 def graph_from_catalog(self, catalog_dict, catalog_ref):
   g = self.g
   
   # dct:language
   language = config.get('ckan.locale_default', 'en')
   if language in self.language_mapping:
     mdrlang_language = self.language_mapping[language]
     g.remove((catalog_ref, DCT.language, Literal(language)))
     g.add((catalog_ref, DCT.language, Literal(getattr(MDRLANG, mdrlang_language))))
Пример #33
0
def _send_mail(user_ids, action_type, datarequest, job_title):
    for user_id in user_ids:
        try:
            user_data = model.User.get(user_id)
            extra_vars = {
                'datarequest': datarequest,
                'user': user_data,
                'site_title': config.get('ckan.site_title'),
                'site_url': config.get('ckan.site_url')
            }
            subject = base.render_jinja2(
                'emails/subjects/{0}.txt'.format(action_type), extra_vars)
            body = base.render_jinja2(
                'emails/bodies/{0}.txt'.format(action_type), extra_vars)
            tk.enqueue_job(mailer.mail_user, [user_data, subject, body],
                           title=job_title)
        except Exception:
            logging.exception(
                "Error sending notification to {0}".format(user_id))
Пример #34
0
def clean_harvest_log():
    from datetime import datetime, timedelta
    from ckantoolkit import config
    from ckanext.harvest.model import clean_harvest_log

    # Log time frame - in days
    log_timeframe = tk.asint(config.get("ckan.harvest.log_timeframe", 30))
    condition = datetime.utcnow() - timedelta(days=log_timeframe)

    # Delete logs older then the given date
    clean_harvest_log(condition=condition)
Пример #35
0
    def validator(key, data, errors, context):
        """
        Return a value for a core field using a multilingual dict.
        """
        data[key] = fluent_text_output(data[key])

        k = key[-1]
        new_key = key[:-1] + (k[:-len(LANG_SUFFIX)],)

        if new_key in data:
            data[new_key] = scheming_language_text(data[key], config.get('ckan.locale_default', 'en'))
Пример #36
0
def resource_formats(field):
    """Returns a list of resource formats from admin config

    :rtype: Array resource formats

    """
    resource_formats = config.get('ckanext.data_qld.resource_formats',
                                  '').split('\r\n')
    return [{
        'value': resource_format.strip().upper(),
        'label': resource_format.strip().upper()
    } for resource_format in resource_formats]
Пример #37
0
def is_prod():
    environment = config.get('ckan.site_url', '')
    if 'training' in environment:
        return False
    elif 'dev' in environment:
        return False
    elif 'staging' in environment:
        return False
    elif 'ckan' in environment:
        return False
    else:
        return True
Пример #38
0
def fluent_text_output(value):
    """
    Return stored json representation as a multilingual dict, if
    value is already a dict just pass it through.
    """
    if isinstance(value, dict):
        return value
    try:
        return json.loads(value)
    except ValueError:
        # plain string in the db, assume default locale
        return {config.get('ckan.locale_default', 'en'): value}
Пример #39
0
    def test_group_image_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = "somename.png"

        img_uploader = FlaskFileStorage(filename=file_name,
                                        stream=open(file_path),
                                        content_type='image/png')

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create',
                                context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name)

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket='my-bucket', Key=key)

        #conn = boto.connect_s3()
        #bucket = conn.get_bucket('my-bucket')
        # test the key exists
        #assert_true(bucket.lookup(key))

        # clear upload
        helpers.call_action('group_update',
                            context=context,
                            id='my-group',
                            name='my-group',
                            image_url="http://asdf",
                            clear_upload=True)

        # key shouldn't exist
        #assert_false(bucket.lookup(key))
        try:
            s3.head_object(Bucket='my-bucket', Key=key)
            assert_false(True, "file should not exist")
        except:
            # passed
            assert_true(True, "passed")
Пример #40
0
    def _publisher_graph(self, dataset_ref, dataset_dict):
        if any([
                self._get_dataset_value(dataset_dict, 'publisher_uri'),
                self._get_dataset_value(dataset_dict, 'publisher_name'),
                dataset_dict.get('organization'),
        ]):

            publisher_uri = publisher_uri_from_dataset_dict(dataset_dict)
            if publisher_uri:
                publisher_details = URIRef(publisher_uri)
            else:
                # No organization nor publisher_uri
                publisher_details = BNode()

            self.g.add((publisher_details, RDF.type, SCHEMA.Organization))
            self.g.add((dataset_ref, SCHEMA.publisher, publisher_details))
            self.g.add((dataset_ref, SCHEMA.sourceOrganization,
                        publisher_details))  # noqa

            publisher_name = self._get_dataset_value(dataset_dict,
                                                     'publisher_name')
            if not publisher_name and dataset_dict.get('organization'):
                publisher_name = dataset_dict['organization']['title']
                self._add_multilang_value(publisher_details,
                                          SCHEMA.name,
                                          multilang_values=publisher_name)
            else:
                g.add((publisher_details, SCHEMA.name,
                       Literal(publisher_name)))  # noqa

            contact_point = BNode()
            self.g.add((publisher_details, SCHEMA.contactPoint, contact_point))

            self.g.add((contact_point, SCHEMA.contactType,
                        Literal('customer service')))  # noqa

            publisher_url = self._get_dataset_value(dataset_dict,
                                                    'publisher_url')  # noqa
            if not publisher_url and dataset_dict.get('organization'):
                publisher_url = dataset_dict['organization'].get(
                    'url') or config.get('ckan.site_url', '')  # noqa

            self.g.add((contact_point, SCHEMA.url, Literal(publisher_url)))
            items = [
                ('publisher_email', SCHEMA.email,
                 ['contact_email', 'maintainer_email',
                  'author_email'], Literal),  # noqa
                ('publisher_name', SCHEMA.name,
                 ['contact_name', 'maintainer', 'author'], Literal),  # noqa
            ]

            self._add_triples_from_dict(dataset_dict, contact_point, items)
Пример #41
0
def scheming_language_text(text, prefer_lang=None):
    """
    :param text: {lang: text} dict or text string
    :param prefer_lang: choose this language version if available

    Convert "language-text" to users' language by looking up
    languag in dict or using gettext if not a dict
    """
    if not text:
        return u''

    assert text != {}
    if hasattr(text, 'get'):
        try:
            if prefer_lang is None:
                prefer_lang = lang()
        except TypeError:
            pass  # lang() call will fail when no user language available
        else:
            try:
                return text[prefer_lang]
            except KeyError:
                pass

        default_locale = config.get('ckan.locale_default', 'en')
        try:
            return text[default_locale]
        except KeyError:
            pass

        l, v = sorted(text.items())[0]
        return v

    if isinstance(text, str):
        text = text.decode('utf-8')
    t = _(text)
    return t
Пример #42
0
    def test_resource_upload(self):
        '''Test a basic resource file upload'''
        factories.Sysadmin(apikey="my-test-key")

        app = self._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))
        # test the file contains what's expected
        assert_equal(bucket.get_key(key).get_contents_as_string(),
                     open(file_path).read())
Пример #43
0
    def resource_download(self, id, resource_id, filename=None):
        '''
        Provide a download by either redirecting the user to the url stored or
        downloading the uploaded file from S3.
        '''
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'auth_user_obj': c.userobj}

        try:
            rsc = get_action('resource_show')(context, {'id': resource_id})
            get_action('package_show')(context, {'id': id})
        except NotFound:
            abort(404, _('Resource not found'))
        except NotAuthorized:
            abort(401, _('Unauthorized to read resource %s') % id)

        if rsc.get('url_type') == 'upload':
            upload = uploader.get_resource_uploader(rsc)
            bucket_name = config.get('ckanext.s3filestore.aws_bucket_name')
            region = config.get('ckanext.s3filestore.region_name')
            host_name = config.get('ckanext.s3filestore.host_name')
            bucket = upload.get_s3_bucket(bucket_name)

            if filename is None:
                filename = os.path.basename(rsc['url'])
            key_path = upload.get_path(rsc['id'], filename)
            key = filename

            if key is None:
                log.warn('Key \'{0}\' not found in bucket \'{1}\''
                         .format(key_path, bucket_name))

            try:
                # Small workaround to manage downloading of large files
                # We are using redirect to minio's resource public URL
                s3 = upload.get_s3_session()
                client = s3.client(service_name='s3', endpoint_url=host_name)
                url = client.generate_presigned_url(ClientMethod='get_object',
                                                    Params={'Bucket': bucket.name,
                                                            'Key': key_path},
                                                    ExpiresIn=60)
                redirect(url)

            except ClientError as ex:
                if ex.response['Error']['Code'] == 'NoSuchKey':
                    # attempt fallback
                    if config.get(
                            'ckanext.s3filestore.filesystem_download_fallback',
                            False):
                        log.info('Attempting filesystem fallback for resource {0}'
                                 .format(resource_id))
                        url = toolkit.url_for(
                            controller='ckanext.s3filestore.controller:S3Controller',
                            action='filesystem_resource_download',
                            id=id,
                            resource_id=resource_id,
                            filename=filename)
                        redirect(url)

                    abort(404, _('Resource data not found'))
                else:
                    raise ex