Exemplo n.º 1
0
def create_sort_groups():

    sysadmin = get_sysadmins()[0].name
    context = {'user': sysadmin, 'ignore_auth': True}
    groups = _group_list(context, {})

    for group_name in groups:
        gr = toolkit.get_action('group_show')({
            'user': None
        }, {
            'id': group_name,
            'include_datasets': True,
            'include_dataset_count': False,
            'include_extras': True,
            'include_users': False,
            'include_groups': False,
            'include_tags': False,
            'include_followers': False
        })
        if gr.get('state') == 'active':
            sort_gr = {
                'group_id': gr.get('id', ''),
                'title_mk': gr.get('title_translated', {}).get('mk', ''),
                'title_en': gr.get('title_translated', {}).get('en', ''),
                'title_sq': gr.get('title_translated', {}).get('sq', '')
            }

            sg = SortGroupsModel(**sort_gr)
            sg.save()

    return
Exemplo n.º 2
0
def create_sort_organizations():

    sysadmin = get_sysadmins()[0].name
    context = {'user': sysadmin, 'ignore_auth': True}
    orgs = ckan_organization_list(context, {})
    sort_org = []

    for org_name in orgs:
        org = toolkit.get_action('organization_show')(
            {
                'user': None
            }, {
                'id': org_name,
                'include_datasets': True,
                'include_dataset_count': False,
                'include_extras': True,
                'include_users': False,
                'include_groups': False,
                'include_tags': False,
                'include_followers': False
            })
        if org.get('state') == 'active':
            sort_org = {
                'org_id': org.get('id', ''),
                'title_mk': org.get('title_translated', {}).get('mk', ''),
                'title_en': org.get('title_translated', {}).get('en', ''),
                'title_sq': org.get('title_translated', {}).get('sq', '')
            }

            so = SortOrganizationsModel(**sort_org)
            so.save()

    return
Exemplo n.º 3
0
def prepare_zip_resources(context, data_dict):
    """Creates zip archive and stores it under CKAN's storage path.

    :param resources: a list of ids of the resources
    :type resources: list

    :return: a dictionary containing the zip_id of the created archive
    :rtype: dict
    """
    file_name = uuid.uuid4().hex + '.{ext}'.format(ext='zip')
    file_path = helpers.get_storage_path_for(
        'temp-us_ed_theme') + '/' + file_name
    resourceArchived = False
    package_id = None

    try:
        resource_ids = data_dict.get('resources')
        with zipfile.ZipFile(file_path, 'w') as zip:
            for resource_id in resource_ids:
                data_dict = {'id': resource_id}
                resource = toolkit.get_action('resource_show')({}, data_dict)

                url = resource.get('url')
                if resource['url_type'] == 'upload':
                    name = url.split('/')[-1]
                else:
                    name = resource['name']
                    if os.path.splitext(name)[-1] == '':
                        _format = resource['format']
                        if _format:
                            name += '.{ext}'.format(ext=_format.lower())

                if package_id is None:
                    package_id = resource['package_id']

                headers = {'Authorization': get_sysadmins()[0].apikey}
                try:
                    r = requests.get(url, headers=headers)
                except Exception:
                    continue

                content_type = r.headers['Content-Type'].split(';')[0]

                if content_type in SUPPORTED_RESOURCE_MIMETYPES:
                    resourceArchived = True
                    zip.writestr(name, r.content)
    except Exception, ex:
        log.error('An error occured while preparing zip archive. Error: %s' %
                  ex)
        raise
Exemplo n.º 4
0
def get_admin_email():
    """Loads the admin email.

    If a system configuration is present, it is preffered to the CKAN sysadmins.
    The configuration property is ``ckanext.datagovmk.site_admin_email``.

    If no email is configured explicitly, then the email of the first CKAN
    sysadmin is used.

    :returns: ``str`` the email of the sysadmin to which to send emails with
        issues.

    """
    sysadmin_email = config.get('ckanext.datagovmk.site_admin_email', False)
    if sysadmin_email:
        name = sysadmin_email.split('@')[0]
        return {'email': sysadmin_email, 'name': name}
    sysadmins = get_sysadmins()
    if sysadmins:
        return {
            'email': sysadmins[0].email,
            'name': sysadmins[0].fullname or sysadmins[0].name
        }
    return None
Exemplo n.º 5
0
def download(context,
             resource,
             url_timeout=30,
             max_content_length='default',
             method='GET'):
    '''Given a resource, tries to download it.

    Params:
      resource - dict of the resource

    Exceptions from tidy_url may be propagated:
       LinkInvalidError if the URL is invalid

    If there is an error performing the download, raises:
       DownloadException - connection problems etc.
       DownloadError - HTTP status code is an error or 0 length

    If download is not suitable (e.g. too large), raises:
       ChooseNotToDownload

    If the basic GET fails then it will try it with common API
    parameters (SPARQL, WMS etc) to get a better response.

    Returns a dict of results of a successful download:
      mimetype, size, hash, headers, saved_file, url_redirected_to
    '''
    from ckanext.archiver import default_settings as settings
    from pylons import config

    if max_content_length == 'default':
        max_content_length = settings.MAX_CONTENT_LENGTH

    url = resource['url']
    url = tidy_url(url)

    if (resource.get('url_type') == 'upload' and not url.startswith('http')):
        url = context['site_url'].rstrip('/') + url

    hosted_externally = not url.startswith(config['ckan.site_url'])
    if resource.get('url_type') == 'upload' and hosted_externally:
        # ckanext-cloudstorage for example does that

        # enable ckanext.archiver.archive_cloud for qa to work on cloud resources
        # till https://github.com/ckan/ckanext-qa/issues/48 is resolved
        # Warning: this will result in double storage of all files below archival filesize limit

        if not config.get('ckanext.archiver.archive_cloud', False):
            raise ChooseNotToDownload(
                'Skipping resource hosted externally to download resource: %s'
                % url, url)

    headers = _set_user_agent_string({})

    if len(get_sysadmins()) > 0:
        sysadmin = get_sysadmins()[0]
        if url.startswith(config.get('ckan.site_url', '')):
            headers['Authorization'] = sysadmin.apikey

    # start the download - just get the headers
    # May raise DownloadException
    method_func = {'GET': requests.get, 'POST': requests.post}[method]
    res = requests_wrapper(
        log,
        method_func,
        url,
        timeout=url_timeout,
        stream=True,
        headers=headers,
        verify=verify_https(),
    )
    url_redirected_to = res.url if url != res.url else None

    if context.get('previous') and ('etag' in res.headers):
        if context.get('previous').etag == res.headers['etag']:
            log.info("ETAG matches, not downloading content")
            raise NotChanged("etag suggests content has not changed")

    if not res.ok:  # i.e. 404 or something
        raise DownloadError(
            'Server reported status error: %s %s' %
            (res.status_code, res.reason), url_redirected_to)
    log.info('GET started successfully. Content headers: %r', res.headers)

    # record headers
    mimetype = _clean_content_type(res.headers.get('content-type', '').lower())

    # make sure resource content-length does not exceed our maximum
    content_length = res.headers.get('content-length')

    if content_length:
        try:
            content_length = int(content_length)
        except ValueError:
            # if there are multiple Content-Length headers, requests
            # will return all the values, comma separated
            if ',' in content_length:
                try:
                    content_length = int(content_length.split(',')[0])
                except ValueError:
                    pass
    if isinstance(content_length, int) and \
       int(content_length) >= max_content_length:
        # record fact that resource is too large to archive
        log.warning(
            'Resource too large to download: %s > max (%s). '
            'Resource: %s %r', content_length, max_content_length,
            resource['id'], url)
        raise ChooseNotToDownload(
            _('Content-length %s exceeds maximum '
              'allowed value %s') % (content_length, max_content_length),
            url_redirected_to)
    # content_length in the headers is useful but can be unreliable, so when we
    # download, we will monitor it doesn't go over the max.

    # continue the download - stream the response body
    def get_content():
        return res.content

    log.info('Downloading the body')
    content = requests_wrapper(log, get_content)

    # APIs can return status 200, but contain an error message in the body
    if response_is_an_api_error(content):
        raise DownloadError(_('Server content contained an API error message: %s') % \
                            content[:250],
                            url_redirected_to)

    content_length = len(content)
    if content_length > max_content_length:
        raise ChooseNotToDownload(
            _("Content-length %s exceeds maximum allowed value %s") %
            (content_length, max_content_length), url_redirected_to)

    log.info('Saving resource')
    try:
        length, hash, saved_file_path = _save_resource(resource, res,
                                                       max_content_length)
    except ChooseNotToDownload, e:
        raise ChooseNotToDownload(str(e), url_redirected_to)
    def send_request(self):
        '''Send mail to resource owner.

        :param data: Contact form data.
        :type data: object

        :rtype: json
        '''
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user,
            'auth_user_obj': c.userobj
        }
        try:
            if p.toolkit.request.method == 'POST':
                data = dict(toolkit.request.POST)
                _get_action('requestdata_request_create', data)
        except NotAuthorized:
            abort(403, _('Unauthorized to update this dataset.'))
        except ValidationError as e:
            error = {'success': False, 'error': {'fields': e.error_dict}}

            return json.dumps(error)

        data_dict = {'id': data['package_id']}
        package = _get_action('package_show', data_dict)
        sender_name = data.get('sender_name', '')
        user_obj = context['auth_user_obj']
        data_dict = {'id': user_obj.id, 'permission': 'read'}

        organizations = _get_action('organization_list_for_user', data_dict)

        orgs = []
        for i in organizations:
            orgs.append(i['display_name'])
        org = ','.join(orgs)
        dataset_name = package['name']
        dataset_title = package['title']
        email = user_obj.email
        message = data['message_content']
        creator_user_id = package['creator_user_id']
        data_owner =\
            _get_action('user_show', {'id': creator_user_id}).get('name')
        if len(get_sysadmins()) > 0:
            sysadmin = get_sysadmins()[0].name
            context_sysadmin = {
                'model': model,
                'session': model.Session,
                'user': sysadmin,
                'auth_user_obj': c.userobj
            }
            to = package['maintainer']
            if to is None:
                message = {
                    'success': False,
                    'error': {
                        'fields': {
                            'email': 'Dataset maintainer email not found.'
                        }
                    }
                }

                return json.dumps(message)
            maintainers = to.split(',')
            data_dict = {'users': []}
            users_email = []
            only_org_admins = False
            data_maintainers = []
            # Get users objects from maintainers list
            for id in maintainers:
                try:
                    user =\
                        toolkit.get_action('user_show')(context_sysadmin,
                                                        {'id': id})
                    data_dict['users'].append(user)
                    users_email.append(user['email'])
                    data_maintainers.append(user['fullname'] or user['name'])
                except NotFound:
                    pass
            mail_subject =\
                config.get('ckan.site_title') + ': New data request "'\
                                                + dataset_title + '"'

            if len(users_email) == 0:
                admins = self._org_admins_for_dataset(dataset_name)

                for admin in admins:
                    users_email.append(admin.get('email'))
                    data_maintainers.append(admin.get('fullname'))
                only_org_admins = True

            content = _get_email_configuration(sender_name,
                                               data_owner,
                                               dataset_name,
                                               email,
                                               message,
                                               org,
                                               data_maintainers,
                                               only_org_admins=only_org_admins)

            response_message = \
                emailer.send_email(content, users_email, mail_subject)

            # notify package creator that new data request was made
            _get_action('requestdata_notification_create', data_dict)
            data_dict = {'package_id': data['package_id'], 'flag': 'request'}

            action_name = 'requestdata_increment_request_data_counters'
            _get_action(action_name, data_dict)

            return json.dumps(response_message)
        else:
            message = {
                'success': True,
                'message': 'Request sent, but email message was not sent.'
            }

            return json.dumps(message)
Exemplo n.º 7
0
    def send_request(self):
        '''Send mail to resource owner.

        :param data: Contact form data.
        :type data: object

        :rtype: json
        '''
        context = {'model': model, 'session': model.Session,
                   'user': c.user, 'auth_user_obj': c.userobj}
        try:
            if p.toolkit.request.method == 'POST':
                data = dict(toolkit.request.POST)
                _get_action('requestdata_request_create', data)
        except NotAuthorized:
            abort(403, _('Unauthorized to update this dataset.'))
        except ValidationError as e:
            error = {
                'success': False,
                'error': {
                    'fields': e.error_dict
                }
            }

            return json.dumps(error)

        data_dict = {'id': data['package_id']}
        package = _get_action('package_show', data_dict)
        sender_name = data.get('sender_name', '')
        user_obj = context['auth_user_obj']
        data_dict = {
            'id': user_obj.id,
            'permission': 'read'
        }

        organizations = _get_action('hdx_organization_list_for_user', data_dict)

        orgs = []
        for i in organizations:
                orgs.append(i['display_name'])
        org = ','.join(orgs)
        dataset_name = package['name']
        dataset_title = package['title']
        email = user_obj.email
        message = data['message_content']
        creator_user_id = package['creator_user_id']
        data_owner =\
            _get_action('user_show', {'id': creator_user_id}).get('name')
        if len(get_sysadmins()) > 0:
            sysadmin = get_sysadmins()[0].name
            context_sysadmin = {
                'model': model,
                'session': model.Session,
                'user': sysadmin,
                'auth_user_obj': c.userobj
            }
            to = package['maintainer']
            if to is None:
                message = {
                    'success': False,
                    'error': {
                        'fields': {
                            'email': 'Dataset maintainer email not found.'
                        }
                    }
                }

                return json.dumps(message)
            maintainers = to.split(',')
            data_dict = {
                'users': []
            }
            users_email = []
            only_org_admins = False
            data_maintainers = []
            data_maintainers_ids = []
            # Get users objects from maintainers list
            user={}
            for id in maintainers:
                try:
                    user =\
                        toolkit.get_action('user_show')(context_sysadmin,
                                                        {'id': id})
                    data_dict['users'].append(user)
                    users_email.append({'display_name': user.get('fullname'), 'email': user.get('email')})
                    data_maintainers.append(user['fullname'] or user['name'])
                    data_maintainers_ids.append(user['name'] or user['id'])
                except NotFound:
                    pass
            mail_subject =\
                config.get('ckan.site_title') + ': New data request "'\
                                                + dataset_title + '"'

            if len(users_email) == 0:
                admins = self._org_admins_for_dataset(dataset_name)
                # admins=og_create.get_organization_admins(package.get('owner_org'))

                for admin in admins:
                    users_email.append({'display_name': admin.get('fullname'), 'email': admin.get('email')})
                    data_maintainers.append(admin.get('fullname'))
                    data_maintainers_ids.append(admin.get('name') or admin.get('id'))
                only_org_admins = True

            # content = _get_email_configuration(
            #     sender_name, data_owner, dataset_name, email,
            #     message, org, data_maintainers,
            #     data_maintainers_ids=data_maintainers_ids,
            #     only_org_admins=only_org_admins)

            # response_message = \
            #     emailer.send_email(content, users_email, mail_subject)
            subject = sender_name + u' has requested access to one of your datasets'
            email_data = {
                'user_fullname': sender_name,
                'user_email': email,
                'msg': message,
                'org_name': package.get('organization').get('title'),
                'dataset_link': h.url_for('dataset_read', id=dataset_name, qualified=True),
                'dataset_title': dataset_title,
                'maintainer_fullname': user.get('display_name') or user.get('fullname') if user else 'HDX user',
                 'requestdata_org_url': h.url_for('requestdata_organization_requests', id=package.get('owner_org'),
                                                 qualified=True)
            }
            hdx_mailer.mail_recipient(users_email, subject, email_data, footer='*****@*****.**',
                                      snippet='email/content/request_data_to_admins.html')

            subject = u'Request for access to metadata-only dataset'
            email_data = {
                'user_fullname': sender_name,
                'msg': message,
                'org_name': package.get('organization').get('title'),
                'dataset_link': h.url_for('dataset_read', id=dataset_name, qualified=True),
                'dataset_title': dataset_title,
            }
            hdx_mailer.mail_recipient(users_email, subject, email_data, footer=email,
                                      snippet='email/content/request_data_to_user.html')

            # notify package creator that new data request was made
            _get_action('requestdata_notification_create', data_dict)
            data_dict = {
                'package_id': data['package_id'],
                'flag': 'request'
            }

            action_name = 'requestdata_increment_request_data_counters'
            _get_action(action_name, data_dict)
            response_dict = {
                'success': True,
                'message': 'Email message was successfully sent.'
            }
            return json.dumps(response_dict)
        else:
            message = {
                'success': True,
                'message': 'Request sent, but email message was not sent.'
            }

            return json.dumps(message)
Exemplo n.º 8
0
    def search(self):
        package_type = self._guess_package_type()

        try:
            context = {'model': model, 'user': c.user,
                       'auth_user_obj': c.userobj}
            check_access('site_read', context)
        except NotAuthorized:
            abort(403, _('Not authorized to see this page'))

        # unicode format (decoded from utf8)
        q = c.q = request.params.get('q', u'')
        search_for = request.params.get('_search-for', u'datasets')

        # Store search query in KWH data
        try:
            if q:
                sysadmin = get_sysadmins()[0].name
                sysadmin_context = {
                    'user': sysadmin,
                    'ignore_auth': True
                }

                kwh_data = {
                    'type': 'search_query',
                    'title': q
                }
                logic.get_action(u'kwh_data_create')(
                    sysadmin_context, kwh_data
                )

                if search_for not in SKIP_SEARCH_FOR:
                    query_ctx = {
                        'ignore_auth': True
                    }
                    query_ctx.update(context)
                    query_data = {
                        'query_text': q,
                        'query_type': 'dataset'
                    }
                    logic.get_action('user_query_create')(
                        query_ctx, query_data
                    )
        except Exception as e:
            log.debug('Error while storing data: %s' % str(e))

        c.query_error = False
        page = h.get_page_number(request.params)

        limit = int(config.get('ckan.datasets_per_page', 20))

        # most search operations should reset the page counter:
        params_nopage = [(k, v) for k, v in request.params.items()
                         if k != 'page']

        def drill_down_url(alternative_url=None, **by):
            return h.add_url_param(alternative_url=alternative_url,
                                   controller='package', action='search',
                                   new_params=by)

        c.drill_down_url = drill_down_url

        def remove_field(key, value=None, replace=None):
            return h.remove_url_param(key, value=value, replace=replace,
                                      controller='package', action='search',
                                      alternative_url=package_type)

        c.remove_field = remove_field

        sort_by = request.params.get('sort', None)
        params_nosort = [(k, v) for k, v in params_nopage if k != 'sort']

        def _sort_by(fields):
            """
            Sort by the given list of fields.
            Each entry in the list is a 2-tuple: (fieldname, sort_order)
            eg - [('metadata_modified', 'desc'), ('name', 'asc')]
            If fields is empty, then the default ordering is used.
            """
            params = params_nosort[:]

            if fields:
                sort_string = ', '.join('%s %s' % f for f in fields)
                params.append(('sort', sort_string))
            return search_url(params, package_type)

        c.sort_by = _sort_by
        if not sort_by:
            c.sort_by_fields = []
        else:
            c.sort_by_fields = [field.split()[0]
                                for field in sort_by.split(',')]

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params, package_type)

        c.search_url_params = urlencode(_encode_params(params_nopage))

        try:
            c.fields = []
            # c.fields_grouped will contain a dict of params containing
            # a list of values eg {'tags':['tag1', 'tag2']}
            c.fields_grouped = {}
            search_extras = {}
            fq = ''
            for (param, value) in request.params.items():
                if param not in ['q', 'page', 'sort'] \
                        and len(value) and not param.startswith('_'):
                    if not param.startswith('ext_'):
                        c.fields.append((param, value))
                        fq += ' %s:"%s"' % (param, value)
                        if param not in c.fields_grouped:
                            c.fields_grouped[param] = [value]
                        else:
                            c.fields_grouped[param].append(value)
                    else:
                        search_extras[param] = value

            context = {'model': model, 'session': model.Session,
                       'user': c.user, 'for_view': True,
                       'auth_user_obj': c.userobj}

            # Unless changed via config options, don't show other dataset
            # types any search page. Potential alternatives are do show them
            # on the default search page (dataset) or on one other search page
            search_all_type = config.get(
                'ckan.search.show_all_types', 'dataset')
            search_all = False

            try:
                # If the "type" is set to True or False, convert to bool
                # and we know that no type was specified, so use traditional
                # behaviour of applying this only to dataset type
                search_all = asbool(search_all_type)
                search_all_type = 'dataset'
            # Otherwise we treat as a string representing a type
            except ValueError:
                search_all = True

            if not package_type:
                package_type = 'dataset'

            if not search_all or package_type != search_all_type:
                # Only show datasets of this particular type
                fq += ' +dataset_type:{type}'.format(type=package_type)

            facets = OrderedDict()

            default_facet_titles = {
                'organization': _('Functional Units'),
                'groups': _('Joint Analysis'),
                'tags': _('Tags')
            }

            for facet in h.facets():
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            # Facet titles
            for plugin in p.PluginImplementations(p.IFacets):
                facets = plugin.dataset_facets(facets, package_type)

            c.facet_titles = facets

            data_dict = {
                'q': q,
                'fq': fq.strip(),
                'facet.field': facets.keys(),
                'rows': limit,
                'start': (page - 1) * limit,
                'sort': sort_by,
                'extras': search_extras,
                'include_private': asbool(config.get(
                    'ckan.search.default_include_private', True)),
            }

            query = get_action('package_search')(context, data_dict)
            c.sort_by_selected = query['sort']

            c.page = h.Page(
                collection=query['results'],
                page=page,
                url=pager_url,
                item_count=query['count'],
                items_per_page=limit
            )
            if search_for == 'datasets':
                c.search_facets = query['search_facets']
            else:
                c.search_facets = {}
            c.page.items = query['results']
        except SearchQueryError as se:
            # User's search parameters are invalid, in such a way that is not
            # achievable with the web interface, so return a proper error to
            # discourage spiders which are the main cause of this.
            log.info('Dataset search query rejected: %r', se.args)
            abort(400, _('Invalid search query: {error_message}')
                  .format(error_message=str(se)))
        except SearchError as se:
            # May be bad input from the user, but may also be more serious like
            # bad code causing a SOLR syntax error, or a problem connecting to
            # SOLR
            log.error('Dataset search error: %r', se.args)
            c.query_error = True
            c.search_facets = {}
            c.page = h.Page(collection=[])
        except NotAuthorized:
            abort(403, _('Not authorized to see this page'))

        c.search_facets_limits = {}
        for facet in c.search_facets.keys():
            try:
                limit = int(request.params.get('_%s_limit' % facet,
                                               int(config.get('search.facets.default', 10))))
            except ValueError:
                abort(400, _('Parameter "{parameter_name}" is not '
                             'an integer').format(
                      parameter_name='_%s_limit' % facet))
            c.search_facets_limits[facet] = limit

        self._setup_template_variables(context, {},
                                       package_type=package_type)

        return render(self._search_template(package_type),
                      extra_vars={'dataset_type': package_type})