示例#1
0
    def index (self):
        LIMIT = 20

        page = int(request.params.get('page', 1))
        c.q = request.params.get('q', '')
        c.order_by = request.params.get('order_by', 'name')

        context = {'return_query': True, 'user': c.user or c.author,
                   'auth_user_obj': c.userobj}

        data_dict = {'q': c.q,
                     'limit': LIMIT,
                     'offset': (page - 1) * LIMIT,
                     'order_by': c.order_by}
        try:
            logic.check_access('user_list', context, data_dict)
        except logic.NotAuthorized:
            base.abort(401, _('Not authorized to see this page'))

        users_list = logic.get_action('user_list')(context, data_dict)
        c.users = users_list

        c.page = h.Page(
            collection=users_list,
            page=page,
            url=h.pager_url,
            item_count=users_list.count(),
            items_per_page=LIMIT
        )
        return base.render('muser/index.html')
示例#2
0
文件: update.py 项目: tbalaz/test
def harvest_jobs_run(context,data_dict):
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run',context,data_dict)

    source_id = data_dict.get('source_id',None)

    # Check if there are pending harvest jobs
    jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'})
    log.info('Number of jobs: %i', len(jobs))
    sent_jobs = []
    if len(jobs) == 0:
        log.info('No new harvest jobs.')
        return sent_jobs # i.e. []
        # Do not raise an exception as that will cause cron (which runs
        # this) to produce an error email.

    # Send each job to the gather queue
    publisher = get_gather_publisher()
    for job in jobs:
        context['detailed'] = False
        source = harvest_source_show(context,{'id':job['source']})
        if source['active']:
            publisher.send({'harvest_job_id': job['id']})
            log.info('Sent job %s to the gather queue' % job['id'])
            sent_jobs.append(job)

    publisher.close()

    # Record the running in harvest_status
    log.info('%i jobs sent to the gather queue to be harvested', len(sent_jobs))

    return sent_jobs
示例#3
0
文件: package.py 项目: icmurray/ckan
    def edit(self, id, data=None, errors=None, error_summary=None):
        package_type = self._get_package_type(id)
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'extras_as_string': True,
                   'save': 'save' in request.params,
                   'moderated': config.get('moderated'),
                   'pending': True,}

        if context['save'] and not data:
            return self._save_edit(id, context)
        try:
            old_data = get_action('package_show')(context, {'id':id})
            schema = self._db_to_form_schema(package_type=package_type)
            if schema and not data:
                old_data, errors = validate(old_data, schema, context=context)
            data = data or old_data
            # Merge all elements for the complete package dictionary
            c.pkg_dict = dict(old_data.items() + data.items())
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound:
            abort(404, _('Dataset not found'))

        c.pkg = context.get("package")
        c.pkg_json = json.dumps(data)

        try:
            check_access('package_update',context)
        except NotAuthorized, e:
            abort(401, _('User %r not authorized to edit %s') % (c.user, id))
示例#4
0
    def setup_template_variables(self, context, data_dict):
        authz_fn = logic.get_action("group_list_authz")
        c.groups_authz = authz_fn(context, data_dict)
        data_dict.update({"available_only": True})

        c.groups_available = authz_fn(context, data_dict)

        c.licenses = [("", "")] + base.model.Package.get_license_options()
        # CS: bad_spelling ignore 2 lines
        c.licences = c.licenses
        maintain.deprecate_context_item("licences", "Use `c.licenses` instead")
        c.is_sysadmin = ckan.authz.is_sysadmin(c.user)

        if c.pkg:
            # Used by the disqus plugin
            c.related_count = c.pkg.related_count

        if context.get("revision_id") or context.get("revision_date"):
            if context.get("revision_id"):
                rev = base.model.Session.query(base.model.Revision).filter_by(id=context["revision_id"]).first()
                c.revision_date = rev.timestamp if rev else "?"
            else:
                c.revision_date = context.get("revision_date")

        ## This is messy as auths take domain object not data_dict
        context_pkg = context.get("package", None)
        pkg = context_pkg or c.pkg
        if pkg:
            try:
                if not context_pkg:
                    context["package"] = pkg
                logic.check_access("package_change_state", context)
                c.auth_for_change_state = True
            except logic.NotAuthorized:
                c.auth_for_change_state = False
示例#5
0
def ignore_not_package_admin(key, data, errors, context):
    '''Ignore if the user is not allowed to administer the package specified.'''

    model = context['model']
    user = context.get('user')

    if 'ignore_auth' in context:
        return

    if user and authz.is_sysadmin(user):
        return

    authorized = False
    pkg = context.get('package')
    if pkg:
        try:
            logic.check_access('package_change_state',context)
            authorized = True
        except logic.NotAuthorized:
            authorized = False

    if (user and pkg and authorized):
        return

    # allow_state_change in the context will allow the state to be changed
    # FIXME is this the best way to cjeck for state only?
    if key == ('state',) and context.get('allow_state_change'):
        return
    data.pop(key)
示例#6
0
    def organization_index(self):
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'for_view': True,
                   'with_private': False}

        data_dict = {'all_fields': True}

        try:
            check_access('site_read', context)
        except NotAuthorized:
            abort(401, _('Not authorized to see this page'))

        # pass user info to context as needed to view private datasets of
        # orgs correctly
        if c.userobj:
            context['user_id'] = c.userobj.id
            context['user_is_admin'] = c.userobj.sysadmin

        results = get_action('organization_list')(context, data_dict)

        def org_key(org):
            title = org['title'].split(' | ')[-1 if c.language == 'fr' else 0]
            return normalize_strip_accents(title)

        results.sort(key=org_key)

        c.page = Page(
            collection=results,
            page=request.params.get('page', 1),
            url=h.pager_url,
            items_per_page=1000
        )
        return render('organization/index.html')
示例#7
0
文件: plugins.py 项目: AQUACROSS/ckan
    def setup_template_variables(self, context, data_dict):
        authz_fn = logic.get_action('group_list_authz')
        c.groups_authz = authz_fn(context, data_dict)
        data_dict.update({'available_only': True})

        c.groups_available = authz_fn(context, data_dict)

        c.licenses = [('', '')] + base.model.Package.get_license_options()
        # CS: bad_spelling ignore 2 lines
        c.licences = c.licenses
        maintain.deprecate_context_item('licences', 'Use `c.licenses` instead')
        c.is_sysadmin = ckan.authz.is_sysadmin(c.user)

        if context.get('revision_id') or context.get('revision_date'):
            if context.get('revision_id'):
                rev = base.model.Session.query(base.model.Revision) \
                                .filter_by(id=context['revision_id']) \
                                .first()
                c.revision_date = rev.timestamp if rev else '?'
            else:
                c.revision_date = context.get('revision_date')

        ## This is messy as auths take domain object not data_dict
        context_pkg = context.get('package', None)
        pkg = context_pkg or c.pkg
        if pkg:
            try:
                if not context_pkg:
                    context['package'] = pkg
                logic.check_access('package_change_state', context)
                c.auth_for_change_state = True
            except logic.NotAuthorized:
                c.auth_for_change_state = False
示例#8
0
    def setup_template_variables(self, context, data_dict):
        """
        Add variables to c just prior to the template being rendered. We should
        use the available groups for the current user, but should be optional
        in case this is a top level group
        """
        c.user_groups = c.userobj.get_groups('organization')
        local_ctx = {'model': model, 'session': model.Session,
                     'user': c.user or c.author}

        try:
            check_access('group_create', local_ctx)
            c.is_superuser_or_groupadmin = True
        except NotAuthorized:
            c.is_superuser_or_groupadmin = False

        if 'group' in context:
            group = context['group']
            # Only show possible groups where the current user is a member
            c.possible_parents = c.userobj.get_groups('organization', 'admin')

            c.parent = None
            grps = group.get_groups('organization')
            if grps:
                c.parent = grps[0]
            c.users = group.members_of_type(model.User)
示例#9
0
    def setup_template_variables(self, context, data_dict):
        toolkit.c.user_groups = toolkit.c.userobj.get_groups('organization')
        local_ctx = {'model': base.model, 'session': base.model.Session,
                     'user': toolkit.c.user or toolkit.c.author}

        try:
            logic.check_access('group_create', local_ctx)
            toolkit.c.is_superuser_or_groupadmin = True
        except logic.NotAuthorized:
            toolkit.c.is_superuser_or_groupadmin = False

        if 'group' in context:
            group = context['group']
            # Only show possible groups where the current user is a member
            toolkit.c.possible_parents = toolkit.c.userobj.get_groups(
                    'organization', 'admin')

            toolkit.c.parent = None
            grps = group.get_groups('organization')
            if grps:
                toolkit.c.parent = grps[0]
            toolkit.c.users = group.members_of_type(base.model.User)

        # Add the options for the custom 'Group Type' metadata field to the
        # template context.
        toolkit.c.cmap_group_types = ("Municipality", "County",
               "Other Government", "CMAP Project Team",
               "Nonprofit Organization", "Other")

        # Add the group's website URL to the template context.
        # This is so we can link the organization's logo in the site header to
        # the organization's website when showing an organization edit page.
        if 'website_url' in data_dict:
            toolkit.c.group_website_url = data_dict['website_url']
示例#10
0
文件: api.py 项目: berlinonline/ckan
    def __call__(self, environ, start_response):
        # we need to intercept and fix the api version
        # as it will have a "/" at the start
        routes_dict = environ['pylons.routes_dict']
        api_version = routes_dict.get('ver')
        if api_version:
            api_version = api_version[1:]
            routes_dict['ver'] = int(api_version)

        identify_user()
        try:
            context = {'model': model, 'user': c.user,
                       'auth_user_obj': c.userobj}
            logic.check_access('site_read', context)
        except NotAuthorized:
            response_msg = self._finish(403,
                                        _('Not authorized to see this page'))
            # Call start_response manually instead of the parent __call__
            # because we want to end the request instead of continuing.
            response_msg = response_msg.encode('utf8')
            body = '%i %s' % (response.status_int, response_msg)
            start_response(body, response.headers.items())
            return [response_msg]

        # avoid status_code_redirect intercepting error responses
        environ['pylons.status_code_redirect'] = True
        return base.BaseController.__call__(self, environ, start_response)
示例#11
0
文件: related.py 项目: ACTillage/ckan
    def list(self, id):
        """ List all related items for a specific dataset """
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author,
                   'auth_user_obj': c.userobj,
                   'for_view': True}
        data_dict = {'id': id}

        try:
            logic.check_access('package_show', context, data_dict)
        except logic.NotFound:
            base.abort(404, base._('Dataset not found'))
        except logic.NotAuthorized:
            base.abort(401, base._('Not authorized to see this page'))

        try:
            c.pkg_dict = logic.get_action('package_show')(context, data_dict)
            c.related_list = logic.get_action('related_list')(context,
                                                              data_dict)
            c.pkg = context['package']
            c.resources_json = h.json.dumps(c.pkg_dict.get('resources', []))
        except logic.NotFound:
            base.abort(404, base._('Dataset not found'))
        except logic.NotAuthorized:
            base.abort(401, base._('Unauthorized to read package %s') % id)

        return base.render("package/related_list.html")
示例#12
0
def ignore_not_package_admin(key, data, errors, context):
    '''Ignore if the user is not allowed to administer the package specified.'''

    model = context['model']
    user = context.get('user')

    if 'ignore_auth' in context:
        return

    if user and Authorizer.is_sysadmin(user):
        return

    authorized = False
    pkg = context.get('package')
    if pkg:
        try:
            check_access('package_change_state',context)
            authorized = True
        except NotAuthorized:
            authorized = False

    if (user and pkg and authorized):
        return

    data.pop(key)
示例#13
0
文件: plugins.py 项目: NeCTAR-RC/ckan
    def setup_template_variables(self, context, data_dict):
        authz_fn = logic.get_action('group_list_authz')
        c.groups_authz = authz_fn(context, data_dict)
        data_dict.update({'available_only': True})

        c.groups_available = authz_fn(context, data_dict)

        c.licenses = [('', '')] + base.model.Package.get_license_options()
        # CS: bad_spelling ignore 2 lines
        c.licences = c.licenses
        maintain.deprecate_context_item('licences', 'Use `c.licenses` instead')
        c.is_sysadmin = ckan.new_authz.is_sysadmin(c.user)

        if c.pkg:
            c.related_count = c.pkg.related_count

        ## This is messy as auths take domain object not data_dict
        context_pkg = context.get('package', None)
        pkg = context_pkg or c.pkg
        if pkg:
            try:
                if not context_pkg:
                    context['package'] = pkg
                logic.check_access('package_change_state', context)
                c.auth_for_change_state = True
            except logic.NotAuthorized:
                c.auth_for_change_state = False
示例#14
0
    def new(self, data=None, errors=None, error_summary=None):
        group_type = self._guess_group_type(True)
        if data:
            data['type'] = group_type

        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'extras_as_string': True,
                   'save': 'save' in request.params,
                   'parent': request.params.get('parent', None)}
        try:
            check_access('group_create', context)
        except NotAuthorized:
            abort(401, _('Unauthorized to create a group'))

        if context['save'] and not data:
            return self._save_new(context, group_type)

        data = data or {}
        errors = errors or {}
        error_summary = error_summary or {}
        vars = {'data': data, 'errors': errors, 'error_summary': error_summary}

        self._setup_template_variables(context, data, group_type=group_type)
        c.form = render(self._group_form(group_type=group_type),
                        extra_vars=vars)
        return render(self._new_template(group_type))
示例#15
0
    def edit(self, id, data=None, errors=None, error_summary=None):
        group_type = self._get_group_type(id.split('@')[0])
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'extras_as_string': True,
                   'save': 'save' in request.params,
                   'for_edit': True,
                   'parent': request.params.get('parent', None)
                   }
        data_dict = {'id': id}

        if context['save'] and not data:
            return self._save_edit(id, context)

        try:
            old_data = get_action('group_show')(context, data_dict)
            c.grouptitle = old_data.get('title')
            c.groupname = old_data.get('name')
            data = data or old_data
        except NotFound:
            abort(404, _('Group not found'))
        except NotAuthorized:
            abort(401, _('Unauthorized to read group %s') % '')

        group = context.get("group")
        c.group = group

        try:
            check_access('group_update', context)
        except NotAuthorized, e:
            abort(401, _('User %r not authorized to edit %s') % (c.user, id))
示例#16
0
def harvest_sources_reindex(context, data_dict):
    '''
        Reindexes all harvest source datasets with the latest status
    '''
    log.info('Reindexing all harvest sources')
    check_access('harvest_sources_reindex', context, data_dict)

    model = context['model']

    packages = model.Session.query(model.Package) \
                            .filter(model.Package.type==DATASET_TYPE_NAME) \
                            .filter(model.Package.state==u'active') \
                            .all()

    package_index = PackageSearchIndex()
    for package in packages:
        if 'extras_as_string'in context:
            del context['extras_as_string']
        context.update({'ignore_auth': True})
        package_dict = logic.get_action('harvest_source_show')(context,
            {'id': package.id})
        log.debug('Updating search index for harvest source {0}'.format(package.id))
        package_index.index_package(package_dict, defer_commit=True)

    package_index.commit()
    log.info('Updated search index for {0} harvest sources'.format(len(packages)))
示例#17
0
def harvest_sources_reindex(context, data_dict):
    """
        Reindexes all harvest source datasets with the latest status
    """
    log.info("Reindexing all harvest sources")
    check_access("harvest_sources_reindex", context, data_dict)

    model = context["model"]

    packages = (
        model.Session.query(model.Package)
        .filter(model.Package.type == DATASET_TYPE_NAME)
        .filter(model.Package.state == u"active")
        .all()
    )

    package_index = PackageSearchIndex()
    for package in packages:
        if "extras_as_string" in context:
            del context["extras_as_string"]
        context.update({"validate": False, "ignore_auth": True})
        package_dict = logic.get_action("package_show")(context, {"id": package.id})
        log.debug("Updating search index for harvest source {0}".format(package.id))
        package_index.index_package(package_dict, defer_commit=True)

    package_index.commit()
    log.info("Updated search index for {0} harvest sources".format(len(packages)))
示例#18
0
文件: api.py 项目: jgrocha/ckan
    def __call__(self, environ, start_response):
        # we need to intercept and fix the api version
        # as it will have a "/" at the start
        routes_dict = environ["pylons.routes_dict"]
        api_version = routes_dict.get("ver")
        if api_version:
            api_version = api_version[1:]
            routes_dict["ver"] = int(api_version)

        self._identify_user()
        try:
            context = {"model": model, "user": c.user or c.author}
            logic.check_access("site_read", context)
        except NotAuthorized:
            response_msg = self._finish(403, _("Not authorized to see this page"))
            # Call start_response manually instead of the parent __call__
            # because we want to end the request instead of continuing.
            response_msg = response_msg.encode("utf8")
            body = "%i %s" % (response.status_int, response_msg)
            start_response(body, response.headers.items())
            return [response_msg]

        # avoid status_code_redirect intercepting error responses
        environ["pylons.status_code_redirect"] = True
        return base.BaseController.__call__(self, environ, start_response)
示例#19
0
    def register(self, data=None, errors=None, error_summary=None):
        '''GET to display a form for registering a new user.
           or POST the form data to actually do the user registration.

           The bulk of this code is pulled directly from ckan/controlllers/user.py
        '''
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author,
                   'schema': schema.user_new_form_schema(),
                   'save': 'save' in request.params}

        try:
            check_access('user_create', context)
        except NotAuthorized:
            abort(401, _('Unauthorized to create a user'))

        if context['save'] and not data:
            uc = UserController()
            return uc._save_new(context)

        if c.user and not data:
            # #1799 Don't offer the registration form if already logged in
            return render('user/logout_first.html')

        data = data or {}
        errors = errors or {}
        error_summary = error_summary or {}

        vars = {'data': data, 'errors': errors, 'error_summary': error_summary}
        c.is_sysadmin = new_authz.is_sysadmin(c.user)
        c.form = render('user/new_user_form.html', extra_vars=vars)
        return render('user/new.html')
示例#20
0
    def read(self, id):
        context = {
            "model": model,
            "session": model.Session,
            "user": c.user or c.author,
            "extras_as_string": True,
            "for_view": True,
        }
        data_dict = {"id": id}

        try:
            logic.check_access("related_show", context, data_dict)
        except logic.NotAuthorized:
            abort(401, _("Not authorized to see this page"))

        related = model.Session.query(model.Related).filter(model.Related.id == id).first()
        if not related:
            abort(404, _("The requested related item was not found"))

        related.view_count = model.Related.view_count + 1

        model.Session.add(related)
        model.Session.commit()

        base.redirect(related.url)
示例#21
0
    def list(self, id):
        """ List all related items for a specific dataset """
        context = {
            "model": model,
            "session": model.Session,
            "user": c.user or c.author,
            "extras_as_string": True,
            "for_view": True,
        }
        data_dict = {"id": id}

        try:
            logic.check_access("package_show", context, data_dict)
        except logic.NotFound:
            base.abort(404, base._("Dataset not found"))
        except logic.NotAuthorized:
            base.abort(401, base._("Not authorized to see this page"))

        try:
            c.pkg_dict = logic.get_action("package_show")(context, data_dict)
            c.pkg = context["package"]
            c.resources_json = h.json.dumps(c.pkg_dict.get("resources", []))
        except logic.NotFound:
            base.abort(404, base._("Dataset not found"))
        except logic.NotAuthorized:
            base.abort(401, base._("Unauthorized to read package %s") % id)

        c.action = "related"
        c.related_count = c.pkg.related_count
        c.num_followers = _get_action("dataset_follower_count")(context, {"id": c.pkg.id})
        return base.render("related/related_list.html")
示例#22
0
def harvest_object_create(context, data_dict):
    """ Create a new harvest object

    :type guid: string (optional)
    :type content: string (optional)
    :type job_id: string 
    :type source_id: string (optional)
    :type package_id: string (optional)
    :type extras: dict (optional)
    """
    check_access('harvest_object_create', context, data_dict)
    data, errors = _validate(data_dict, harvest_object_create_schema(), context)

    if errors:
        raise logic.ValidationError(errors)

    obj = HarvestObject(
        guid=data.get('guid'),
        content=data.get('content'),
        job=data['job_id'],
        harvest_source_id=data.get('source_id'),
        package_id=data.get('package_id'),
        extras=[ HarvestObjectExtra(key=k, value=v) 
            for k, v in data.get('extras', {}).items() ]
    )

    obj.save()
    return harvest_object_dictize(obj, context)
示例#23
0
def package_autocomplete(context, data_dict):
    '''Returns packages containing the provided string in either the name
    or the title'''

    model = context['model']
    session = context['session']
    user = context['user']
    q = data_dict['q']

    like_q = u"%s%%" % q

    check_access('package_autocomplete', context, data_dict)

    query = model.Session.query(model.PackageRevision)
    query = query.filter(model.PackageRevision.state=='active')
    query = query.filter(model.PackageRevision.current==True)
    query = query.filter(or_(model.PackageRevision.name.ilike(like_q),
                                model.PackageRevision.title.ilike(like_q)))
    query = query.limit(10)

    q_lower = q.lower()
    pkg_list = []
    for package in query:
        if package.name.startswith(q_lower):
            match_field = 'name'
            match_displayed = package.name
        else:
            match_field = 'title'
            match_displayed = '%s (%s)' % (package.title, package.name)
        result_dict = {'name':package.name, 'title':package.title,
                       'match_field':match_field, 'match_displayed':match_displayed}
        pkg_list.append(result_dict)

    return pkg_list
示例#24
0
def group_package_show(context, data_dict):
    """
    Shows all packages belonging to a group.
    """
    model = context["model"]
    user = context["user"]
    id = data_dict['id']
    limit = data_dict.get("limit")

    group = model.Group.get(id)
    context['group'] = group
    if group is None:
        raise NotFound

    check_access('group_show', context, data_dict)

    query = model.Session.query(model.PackageRevision)\
        .filter(model.PackageRevision.state=='active')\
        .filter(model.PackageRevision.current==True)\
        .join(model.Member, model.Member.table_id==model.PackageRevision.id)\
        .join(model.Group, model.Group.id==model.Member.group_id)\
        .filter_by(id=group.id)\
        .order_by(model.PackageRevision.name)

    if limit:
        query = query.limit(limit)

    if context.get('return_query'):
        return query

    result = []
    for pkg_rev in query.all():
        result.append(package_dictize(pkg_rev, context))

    return result
示例#25
0
def tag_show(context, data_dict):
    '''Shows tag details'''

    model = context['model']
    api = context.get('api_version') or '1'
    id = data_dict['id']

    tag = model.Tag.get(id)
    context['tag'] = tag

    if tag is None:
        raise NotFound

    check_access('tag_show',context, data_dict)

    tag_dict = tag_dictize(tag,context)

    extended_packages = []
    for package in tag_dict['packages']:
        pkg = model.Package.get(package['id'])
        extended_packages.append(package_dictize(pkg,context))

    tag_dict['packages'] = extended_packages

    return tag_dict
示例#26
0
def group_list(context, data_dict):
    '''Returns a list of groups'''

    model = context['model']
    user = context['user']
    api = context.get('api_version') or '1'
    ref_group_by = 'id' if api == '2' else 'name';
    order_by = data_dict.get('order_by', 'name')
    if order_by not in set(('name', 'packages')):
        raise ValidationError('"order_by" value %r not implemented.' % order_by)
    all_fields = data_dict.get('all_fields',None)

    check_access('group_list',context, data_dict)

    query = model.Session.query(model.Group).join(model.GroupRevision)
    query = query.filter(model.GroupRevision.state=='active')
    query = query.filter(model.GroupRevision.current==True)

    if order_by == 'name':
        sort_by, reverse = 'name', False

    groups = query.all()

    if order_by == 'packages':
        sort_by, reverse = 'packages', True

    group_list = group_list_dictize(groups, context,
                                    lambda x:x[sort_by], reverse)

    if not all_fields:
        group_list = [group[ref_group_by] for group in group_list]

    return group_list
示例#27
0
def tag_list(context, data_dict):
    '''Returns a list of tags'''

    model = context['model']
    user = context['user']

    all_fields = data_dict.get('all_fields',None)

    check_access('tag_list',context, data_dict)

    q = data_dict.get('q','')
    if q:
        limit = data_dict.get('limit',25)
        offset = data_dict.get('offset',0)
        return_objects = data_dict.get('return_objects',True)

        query = query_for(model.Tag)
        query.run(query=q,
                  limit=limit,
                  offset=offset,
                  return_objects=return_objects,
                  username=user)
        tags = query.results
    else:
        tags = model.Session.query(model.Tag).all()

    tag_list = []
    if all_fields:
        for tag in tags:
            result_dict = tag_dictize(tag, context)
            tag_list.append(result_dict)
    else:
        tag_list = [tag.name for tag in tags]

    return tag_list
示例#28
0
def format_autocomplete(context, data_dict):
    '''Returns formats containing the provided string'''
    model = context['model']
    session = context['session']
    user = context['user']

    check_access('format_autocomplete', context, data_dict)

    q = data_dict.get('q', None)
    if not q:
        return []

    limit = data_dict.get('limit', 5)
    like_q = u'%' + q + u'%'

    query = session.query(model.ResourceRevision.format,
        func.count(model.ResourceRevision.format).label('total'))\
        .filter(and_(
            model.ResourceRevision.state == 'active',
            model.ResourceRevision.current == True
        ))\
        .filter(model.ResourceRevision.format.ilike(like_q))\
        .group_by(model.ResourceRevision.format)\
        .order_by('total DESC')\
        .limit(limit)

    return [resource.format for resource in query]
示例#29
0
def harvest_jobs_run(context,data_dict):
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run',context,data_dict)

    source_id = data_dict.get('source_id',None)

    # Check if there are pending harvest jobs
    jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'})
    if len(jobs) == 0:
        log.info('No new harvest jobs.')
        raise Exception('There are no new harvesting jobs')

    # Send each job to the gather queue
    publisher = get_gather_publisher()
    sent_jobs = []
    for job in jobs:
        context['detailed'] = False
        source = harvest_source_show(context,{'id':job['source']})
        if source['active']:
            publisher.send({'harvest_job_id': job['id']})
            log.info('Sent job %s to the gather queue' % job['id'])
            sent_jobs.append(job)

    publisher.close()
    return sent_jobs
示例#30
0
def harvest_job_create(context,data_dict):
    log.info('Harvest job create: %r', data_dict)
    check_access('harvest_job_create',context,data_dict)

    source_id = data_dict['source_id']

    # Check if source exists
    source = HarvestSource.get(source_id)
    if not source:
        log.warn('Harvest source %s does not exist', source_id)
        raise NotFound('Harvest source %s does not exist' % source_id)

    # Check if the source is active
    if not source.active:
        log.warn('Harvest job cannot be created for inactive source %s', source_id)
        raise Exception('Can not create jobs on inactive sources')

    # Check if there already is an unrun or currently running job for this source
    exists = _check_for_existing_jobs(context, source_id)
    if exists:
        log.warn('There is already an unrun job %r for this source %s', exists, source_id)
        raise HarvestJobExists('There already is an unrun job for this source')

    job = HarvestJob()
    job.source = source

    job.save()
    log.info('Harvest job saved %s', job.id)
    return harvest_job_dictize(job,context)
示例#31
0
def before_request():
    try:
        context = dict(model=model, user=g.user, auth_user_obj=g.userobj)
        logic.check_access(u'sysadmin', context)
    except logic.NotAuthorized:
        base.abort(403, _(u'Need to be system administrator to administer'))
示例#32
0
def comment_create(context, data_dict):
    pprint(data_dict)
    pprint(context)
    model = context['model']
    user = context['user']

    userobj = model.User.get(user)

    logic.check_access("comment_create", context, data_dict)

    # Validate that we have the required fields.
    if not all([data_dict.get('comment')]):
        raise logic.ValidationError("Comment text is required")

    thread_id = data_dict.get('thread_id')

    if not thread_id:
        url = data_dict.get('url')
        if url:
            thread = comment_model.CommentThread.from_url(url)
            thread_id = thread.id if thread else None

    if not thread_id:
        raise logic.ValidationError("Thread identifier or URL is required")

    # Cleanup the comment
    cleaned_comment = util.clean_input(data_dict.get('comment'))

    # Create the object
    cmt = comment_model.Comment(thread_id=thread_id,
                                comment=cleaned_comment)
    cmt.user_id = userobj.id
    cmt.subject = data_dict.get('subject', 'No subject')

    if 'creation_date' in context:
        cmt.creation_date = datetime.datetime.fromtimestamp(context['creation_date'])

    # Check if there is a parent ID and that it is valid
    # TODO, validity in this case includes checking parent is not
    # deleted.
    prt = data_dict.get('parent_id')
    if prt:
        parent = comment_model.Comment.get(prt)
        if parent:
            cmt.parent_id = parent.id

    # approval and spam checking removed

    model.Session.add(cmt)
    model.Session.commit()

    # Send a notification mail to subscribed users
    package = context['package']
    users = comment_model.CommentSubscription.get_subscribers(package)

    if users:
        for user in users:
            log.debug("Sending comment notification mail now to:" + str(user.name))
            util.send_comment_notification_mail(user.display_name, user.email, package, cmt)

    # Always send a notification mail to website admin
    admin_email = config.get("ckanext-comments.comment_notifications_admin_email", None)
    if admin_email:
        util.send_comment_notification_mail("Avoindata-admin", admin_email, package, cmt)

    return cmt.as_dict()
示例#33
0
def harvest_source_clear(context, data_dict):
    '''
    Clears all datasets, jobs and objects related to a harvest source, but
    keeps the source itself.  This is useful to clean history of long running
    harvest sources to start again fresh.

    :param id: the id of the harvest source to clear
    :type id: string
    '''

    check_access('harvest_source_clear', context, data_dict)

    harvest_source_id = data_dict.get('id')

    source = HarvestSource.get(harvest_source_id)
    if not source:
        log.error('Harvest source %s does not exist', harvest_source_id)
        raise NotFound('Harvest source %s does not exist' % harvest_source_id)

    harvest_source_id = source.id

    # Clear all datasets from this source from the index
    harvest_source_index_clear(context, data_dict)

    model = context['model']

    # CKAN-2.6 or above: related don't exist any more
    if toolkit.check_ckan_version(max_version='2.5.99'):

        sql = '''select id from related where id in (
                  select related_id from related_dataset where dataset_id in (
                      select package_id from harvest_object
                      where harvest_source_id = '{harvest_source_id}'));'''.format(
            harvest_source_id=harvest_source_id)
        result = model.Session.execute(sql)
        ids = []
        for row in result:
            ids.append(row[0])
        related_ids = "('" + "','".join(ids) + "')"

    sql = '''begin;
        update package set state = 'to_delete' where id in (
            select package_id from harvest_object
            where harvest_source_id = '{harvest_source_id}');'''.format(
        harvest_source_id=harvest_source_id)

    # CKAN-2.3 or above: delete resource views, resource revisions & resources
    if toolkit.check_ckan_version(min_version='2.3'):
        sql += '''
        delete from resource_view where resource_id in (
            select id from resource where package_id in (
                select id from package where state = 'to_delete'));
        delete from resource_revision where package_id in (
            select id from package where state = 'to_delete');
        delete from resource where package_id in (
            select id from package where state = 'to_delete');
        '''
    # Backwards-compatibility: support ResourceGroup (pre-CKAN-2.3)
    else:
        sql += '''
        delete from resource_revision where resource_group_id in (
            select id from resource_group where package_id in (
                select id from package where state = 'to_delete'));
        delete from resource where resource_group_id in (
            select id from resource_group where package_id in (
                select id from package where state = 'to_delete'));
        delete from resource_group_revision where package_id in (
            select id from package where state = 'to_delete');
        delete from resource_group where package_id in (
            select id from package where state = 'to_delete');
        '''
    # CKAN pre-2.5: authz models were removed in migration 078
    if toolkit.check_ckan_version(max_version='2.4.99'):
        sql += '''
        delete from package_role where package_id in (
            select id from package where state = 'to_delete');
        delete from user_object_role where id not in (
            select user_object_role_id from package_role)
            and context = 'Package';
        '''

    sql += '''
    delete from harvest_object_error where harvest_object_id in (
        select id from harvest_object
        where harvest_source_id = '{harvest_source_id}');
    delete from harvest_object_extra where harvest_object_id in (
        select id from harvest_object
        where harvest_source_id = '{harvest_source_id}');
    delete from harvest_object where harvest_source_id = '{harvest_source_id}';
    delete from harvest_gather_error where harvest_job_id in (
        select id from harvest_job where source_id = '{harvest_source_id}');
    delete from harvest_job where source_id = '{harvest_source_id}';
    delete from package_tag_revision where package_id in (
        select id from package where state = 'to_delete');
    delete from member_revision where table_id in (
        select id from package where state = 'to_delete');
    delete from package_extra_revision where package_id in (
        select id from package where state = 'to_delete');
    delete from package_revision where id in (
        select id from package where state = 'to_delete');
    delete from package_tag where package_id in (
        select id from package where state = 'to_delete');
    delete from package_extra where package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship_revision where subject_package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship_revision where object_package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship where subject_package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship where object_package_id in (
        select id from package where state = 'to_delete');
    delete from member where table_id in (
        select id from package where state = 'to_delete');
     '''.format(harvest_source_id=harvest_source_id)

    if toolkit.check_ckan_version(max_version='2.5.99'):
        sql += '''
        delete from related_dataset where dataset_id in (
            select id from package where state = 'to_delete');
        delete from related where id in {related_ids};
        delete from package where id in (
            select id from package where state = 'to_delete');
        '''.format(related_ids=related_ids)
    else:
        # CKAN-2.6 or above: related don't exist any more
        sql += '''
        delete from package where id in (
            select id from package where state = 'to_delete');
        '''

    sql += '''
    commit;
    '''
    model.Session.execute(sql)

    # Refresh the index for this source to update the status object
    get_action('harvest_source_reindex')(context, {'id': harvest_source_id})

    return {'id': harvest_source_id}
示例#34
0
def harvest_job_abort(context, data_dict):
    '''
    Aborts a harvest job. Given a harvest source_id, it looks for the latest
    one and (assuming it not already Finished) marks it as Finished. It also
    marks any of that source's harvest objects and (if not complete or error)
    marks them "ERROR", so any left in limbo are cleaned up. Does not actually
    stop running any queued harvest fetchs/objects.

    Specify either id or source_id.

    :param id: the job id to abort, or the id or name of the harvest source
               with a job to abort
    :type id: string
    :param source_id: the name or id of the harvest source with a job to abort
    :type source_id: string
    '''

    check_access('harvest_job_abort', context, data_dict)

    model = context['model']

    source_or_job_id = data_dict.get('source_id') or data_dict.get('id')
    if source_or_job_id:
        try:
            source = harvest_source_show(context, {'id': source_or_job_id})
        except NotFound:
            job = get_action('harvest_job_show')(context, {
                'id': source_or_job_id
            })
        else:
            # HarvestJob set status to 'Aborted'
            # Do not use harvest_job_list since it can use a lot of memory
            # Get the most recent job for the source
            job = model.Session.query(HarvestJob) \
                       .filter_by(source_id=source['id']) \
                       .order_by(HarvestJob.created.desc()).first()
            if not job:
                raise NotFound('Error: source has no jobs')
            job_id = job.id
            job = get_action('harvest_job_show')(context, {'id': job_id})

    if job['status'] != 'Finished':
        # i.e. New or Running
        job_obj = HarvestJob.get(job['id'])
        job_obj.status = new_status = 'Finished'
        model.repo.commit_and_remove()
        log.info('Harvest job changed status from "%s" to "%s"', job['status'],
                 new_status)
    else:
        log.info('Harvest job unchanged. Source %s status is: "%s"', job['id'],
                 job['status'])

    # HarvestObjects set to ERROR
    job_obj = HarvestJob.get(job['id'])
    objs = job_obj.objects
    for obj in objs:
        if obj.state not in ('COMPLETE', 'ERROR'):
            old_state = obj.state
            obj.state = 'ERROR'
            log.info('Harvest object changed state from "%s" to "%s": %s',
                     old_state, obj.state, obj.id)
        else:
            log.info('Harvest object not changed from "%s": %s', obj.state,
                     obj.id)
    model.repo.commit_and_remove()

    job_obj = HarvestJob.get(job['id'])
    return harvest_job_dictize(job_obj, context)
示例#35
0
def harvest_jobs_run(context, data_dict):
    '''
    Runs scheduled jobs, checks if any jobs need marking as finished, and
    resubmits queue items if needed.

    This should be called every few minutes (e.g. by a cron), or else jobs
    will never show as finished.

    This used to also 'run' new jobs created by the web UI, putting them onto
    the gather queue, but now this is done by default when you create a job. If
    you need to send do this explicitly, then use
    ``harvest_send_job_to_gather_queue``.

    :param source_id: the id of the harvest source, if you just want to check
                      for its finished jobs (optional)
    :type source_id: string
    '''
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)

    session = context['session']

    source_id = data_dict.get('source_id')

    # Scheduled jobs
    if not source_id:
        _make_scheduled_jobs(context, data_dict)

    context['return_objects'] = False

    # Flag finished jobs as such
    jobs = harvest_job_list(context, {
        'source_id': source_id,
        'status': u'Running'
    })
    if len(jobs):
        for job in jobs:
            if job['gather_finished']:
                num_objects_in_progress = \
                    session.query(HarvestObject.id) \
                           .filter(HarvestObject.harvest_job_id == job['id']) \
                           .filter(and_((HarvestObject.state != u'COMPLETE'),
                                        (HarvestObject.state != u'ERROR'))) \
                           .count()

                if num_objects_in_progress == 0:
                    job_obj = HarvestJob.get(job['id'])
                    job_obj.status = u'Finished'
                    log.info('Marking job as finished %s %s',
                             job_obj.source.url, job_obj.id)

                    # save the time of finish, according to the last running
                    # object
                    last_object = session.query(HarvestObject) \
                        .filter(HarvestObject.harvest_job_id == job['id']) \
                        .filter(HarvestObject.import_finished != None) \
                        .order_by(HarvestObject.import_finished.desc()) \
                        .first()
                    if last_object:
                        job_obj.finished = last_object.import_finished
                    else:
                        job_obj.finished = job['gather_finished']
                    job_obj.save()

                    # Reindex the harvest source dataset so it has the latest
                    # status
                    get_action('harvest_source_reindex')(
                        context, {
                            'id': job_obj.source.id
                        })
                else:
                    log.debug('Ongoing job:%s source:%s', job['id'],
                              job['source_id'])

    # resubmit old redis tasks
    resubmit_jobs()

    return []  # merely for backwards compatibility
示例#36
0
def harvest_objects_import(context, data_dict):
    '''
    Reimports the existing harvest objects, specified by either source_id,
    harvest_object_id or package_id.

    It performs the import stage with the last fetched objects, optionally
    belonging to a certain source.

    Please note that no objects will be fetched from the remote server.

    It will only affect the last fetched objects already present in the
    database.

    :param source_id: the id of the harvest source to import
    :type source_id: string
    :param guid: the guid of the harvest object to import
    :type guid: string
    :param harvest_object_id: the id of the harvest object to import
    :type harvest_object_id: string
    :param package_id: the id or name of the package to import
    :type package_id: string
    '''
    log.info('Harvest objects import: %r', data_dict)
    check_access('harvest_objects_import', context, data_dict)

    model = context['model']
    session = context['session']
    source_id = data_dict.get('source_id')
    guid = data_dict.get('guid')
    harvest_object_id = data_dict.get('harvest_object_id')
    package_id_or_name = data_dict.get('package_id')

    segments = context.get('segments')

    join_datasets = context.get('join_datasets', True)

    if guid:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .filter(HarvestObject.guid == guid) \
                   .filter(HarvestObject.current == True)

    elif source_id:
        source = HarvestSource.get(source_id)
        if not source:
            log.error('Harvest source %s does not exist', source_id)
            raise NotFound('Harvest source %s does not exist' % source_id)

        if not source.active:
            log.warn('Harvest source %s is not active.', source_id)
            raise Exception('This harvest source is not active')

        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .join(HarvestSource) \
                   .filter(HarvestObject.source == source) \
                   .filter(HarvestObject.current == True)

    elif harvest_object_id:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .filter(HarvestObject.id == harvest_object_id)
    elif package_id_or_name:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .join(Package) \
                   .filter(HarvestObject.current == True) \
                   .filter(Package.state == u'active') \
                   .filter(or_(Package.id == package_id_or_name,
                               Package.name == package_id_or_name))
        join_datasets = False
    else:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .filter(HarvestObject.current == True)

    if join_datasets:
        last_objects_ids = last_objects_ids.join(Package) \
            .filter(Package.state == u'active')

    last_objects_ids = last_objects_ids.all()

    last_objects_count = 0

    for obj_id in last_objects_ids:
        if segments and \
                str(hashlib.md5(obj_id[0]).hexdigest())[0] not in segments:
            continue

        obj = session.query(HarvestObject).get(obj_id)

        for harvester in PluginImplementations(IHarvester):
            if harvester.info()['name'] == obj.source.type:
                if hasattr(harvester, 'force_import'):
                    harvester.force_import = True
                harvester.import_stage(obj)
                break
        last_objects_count += 1
    log.info('Harvest objects imported: %s', last_objects_count)
    return last_objects_count
示例#37
0
def harvest_jobs_run(context, data_dict):
    '''
    Runs scheduled jobs, checks if any jobs need marking as finished, and
    resubmits queue items if needed.

    If ckanext.harvest.timeout is set:
    Check if the duration of the job is longer than ckanext.harvest.timeout, 
    then mark that job as finished as there is probably an underlying issue with the harvest process.

    This should be called every few minutes (e.g. by a cron), or else jobs
    will never show as finished.

    This used to also 'run' new jobs created by the web UI, putting them onto
    the gather queue, but now this is done by default when you create a job. If
    you need to send do this explicitly, then use
    ``harvest_send_job_to_gather_queue``.

    :param source_id: the id of the harvest source, if you just want to check
                      for its finished jobs (optional)
    :type source_id: string
    '''
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)
    timeout = config.get('ckan.harvest.timeout')

    session = context['session']

    source_id = data_dict.get('source_id')

    # Scheduled jobs
    if not source_id:
        _make_scheduled_jobs(context, data_dict)

    context['return_objects'] = False

    # Flag finished jobs as such
    jobs = harvest_job_list(
        context, {'source_id': source_id, 'status': u'Running'})
    if len(jobs):
        for job in jobs:
            job_obj = HarvestJob.get(job['id'])
            if timeout:
                last_time = job_obj.get_last_action_time()
                now = datetime.datetime.now()
                if now - last_time > datetime.timedelta(minutes=int(timeout)):
                    msg = 'Job {} timeout ({} minutes)\n'.format(job_obj.id, timeout)
                    msg += '\tJob created: {}\n'.format(job_obj.created)
                    msg += '\tJob gather finished: {}\n'.format(job_obj.created)
                    msg += '\tJob last action time: {}\n'.format(last_time)
                    
                    job_obj.status = u'Finished'
                    job_obj.finished = now
                    job_obj.save()

                    err = HarvestGatherError(message=msg, job=job_obj)
                    err.save()
                    log.info('Marking job as finished due to error: %s %s',
                            job_obj.source.url, job_obj.id)
                    continue

            if job['gather_finished']:
                num_objects_in_progress = \
                    session.query(HarvestObject.id) \
                           .filter(HarvestObject.harvest_job_id == job['id']) \
                           .filter(and_((HarvestObject.state != u'COMPLETE'),
                                        (HarvestObject.state != u'ERROR'))) \
                           .count()

                if num_objects_in_progress == 0:
                    
                    job_obj.status = u'Finished'
                    log.info('Marking job as finished %s %s',
                             job_obj.source.url, job_obj.id)

                    # save the time of finish, according to the last running
                    # object
                    last_object = session.query(HarvestObject) \
                        .filter(HarvestObject.harvest_job_id == job['id']) \
                        .filter(
                        HarvestObject.import_finished != None  # noqa: E711
                    ).order_by(HarvestObject.import_finished.desc()) \
                        .first()
                    if last_object:
                        job_obj.finished = last_object.import_finished
                    else:
                        job_obj.finished = job['gather_finished']
                    job_obj.save()

                    # Reindex the harvest source dataset so it has the latest
                    # status
                    get_action('harvest_source_reindex')(
                        context, {'id': job_obj.source.id})

                    status = get_action('harvest_source_show_status')(
                        context, {'id': job_obj.source.id})

                    notify_all = toolkit.asbool(config.get('ckan.harvest.status_mail.all'))
                    notify_errors = toolkit.asbool(config.get('ckan.harvest.status_mail.errored'))
                    last_job_errors = status['last_job']['stats'].get('errored', 0)
                    log.debug('Notifications: All:{} On error:{} Errors:{}'.format(notify_all, notify_errors, last_job_errors))
                    
                    if last_job_errors > 0 and (notify_all or notify_errors):
                        send_error_email(context, job_obj.source.id, status)
                    elif notify_all:
                        send_summary_email(context, job_obj.source.id, status)
                else:
                    log.debug('%d Ongoing jobs for %s (source:%s)',
                              num_objects_in_progress, job['id'], job['source_id'])
    log.debug('No jobs to send to the gather queue')

    # Resubmit old redis tasks
    resubmit_jobs()

    # Resubmit pending objects missing from Redis
    resubmit_objects()

    return []  # merely for backwards compatibility
示例#38
0
    def _hdx_edit(self, id, related_id, is_edit):
        #Taken from ckan/controller/related.py, paired down to just edits
        """
        Edit and New were too similar and so I've put the code together
        and try and do as much up front as possible.
        """
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj,
            'for_view': True
        }
        data_dict = {}

        tpl = 'related/edit.html'
        auth_name = 'related_update'
        auth_dict = {'id': related_id}
        action_name = 'related_update'

        try:
            related = logic.get_action('related_show')(context, {
                'id': related_id
            })
        except logic.NotFound:
            base.abort(404, _('Related item not found'))

        try:
            logic.check_access(auth_name, context, auth_dict)
        except logic.NotAuthorized:
            #If user can edit package, user can edit related item
            try:
                logic.check_access(
                    'package_update', {
                        'model': model,
                        'session': model.Session,
                        'user': c.user or c.author,
                        'for_view': True,
                        'auth_user_obj': c.userobj
                    }, {'id': id})
            except logic.NotAuthorized:
                base.abort(403, base._('Not authorized'))

        try:
            c.pkg_dict = logic.get_action('package_show')(context, {'id': id})
        except logic.NotFound:
            base.abort(404, _('Package not found'))

        data, errors, error_summary = {}, {}, {}

        if base.request.method == "POST":
            try:
                data = logic.clean_dict(
                    df.unflatten(
                        logic.tuplize_dict(
                            logic.parse_params(base.request.params))))

                data['id'] = related_id
                related = self.related_update(context, data)
                h.flash_success(_("Related item was successfully updated"))

                h.redirect_to(
                    controller=
                    'ckanext.hdx_package.controllers.dataset_controller:DatasetController',
                    action='read',
                    id=c.pkg_dict['name'])
            except df.DataError:
                base.abort(400, _(u'Integrity Error'))
            except logic.ValidationError, e:
                errors = e.error_dict
                error_summary = e.error_summary
 def test_user_create_chained_auth(self):
     ctd.CreateTestData.create()
     # check if chained auth fallbacks to built-in user_create
     check_access(u'user_create', {u'user': u'annafan'}, {})
示例#40
0
    def edit(self, id=None, data=None, errors=None, error_summary=None):
        context = {
            'save': 'save' in request.params,
            'schema': self._edit_form_to_db_schema(),
            'model': model,
            'session': model.Session,
            'user': c.user,
            'auth_user_obj': c.userobj,
            'keep_apikey': True,
            'keep_email': True
        }
        if id is None:
            if c.userobj:
                id = c.userobj.id
            else:
                abort(400, _('No user specified'))
        data_dict = {'id': id}

        try:
            check_access('user_update', context, data_dict)
        except NotAuthorized:
            abort(403, _('Unauthorized to edit a user.'))

        if (context['save']) and not data:
            return self._save_edit(id, context)

        try:
            old_data = get_action('user_show')(context, data_dict)

            schema = self._db_to_edit_form_schema()
            if schema:
                old_data, errors = \
                    dictization_functions.validate(old_data, schema, context)

            c.display_name = old_data.get('display_name')
            c.user_name = old_data.get('name')

            data = data or old_data

        except NotAuthorized:
            abort(403, _('Unauthorized to edit user %s') % '')
        except NotFound:
            abort(404, _('User not found'))

        user_obj = context.get('user_obj')

        if not (authz.is_sysadmin(c.user) or c.user == user_obj.name):
            abort(403,
                  _('User %s not authorized to edit %s') % (str(c.user), id))

        errors = errors or {}
        vars = {'data': data, 'errors': errors, 'error_summary': error_summary}

        self._setup_template_variables(
            {
                'model': model,
                'session': model.Session,
                'user': c.user
            }, data_dict)

        c.is_myself = True
        c.show_email_notifications = asbool(
            config.get('ckan.activity_streams_email_notifications'))
        c.form = render(self.edit_user_form, extra_vars=vars)

        return render('user/edit.html')
示例#41
0
    def edit(self, id, data=None, errors=None, error_summary=None):
        package_type = self._get_package_type(id)
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj,
            'save': 'save' in request.params
        }

        if context['save'] and not data:
            return self._save_edit(id, context, package_type=package_type)
        try:
            c.pkg_dict = get_action('package_show')(context, {'id': id})
            context['for_edit'] = True
            old_data = get_action('package_show')(context, {'id': id})
            # old data is from the database and data is passed from the
            # user if there is a validation error. Use users data if there.
            if data:
                old_data.update(data)
            data = old_data
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound:
            abort(404, _('Dataset not found'))
        # are we doing a multiphase add?
        if data.get('state', '').startswith('draft') and len(
                data.get('resources')) == 0:
            c.form_action = h.url_for(controller='package', action='new')
            c.form_style = 'new'
            return self.new(data=data,
                            errors=errors,
                            error_summary=error_summary)

        c.pkg = context.get("package")
        c.resources_json = h.json.dumps(data.get('resources', []))

        try:
            check_access('package_update', context)
        except NotAuthorized:
            abort(401, _('User %r not authorized to edit %s') % (c.user, id))
        # convert tags if not supplied in data
        if data and not data.get('tag_string'):
            data['tag_string'] = ', '.join(
                h.dict_list_reduce(c.pkg_dict.get('tags', {}), 'name'))
        errors = errors or {}
        form_snippet = self._package_form(package_type=package_type)
        form_vars = {
            'data': data,
            'errors': errors,
            'error_summary': error_summary,
            'action': 'edit',
            'dataset_type': package_type,
        }
        c.errors_json = h.json.dumps(errors)

        self._setup_template_variables(context, {'id': id},
                                       package_type=package_type)
        c.related_count = c.pkg.related_count

        # we have already completed stage 1
        form_vars['stage'] = ['active']
        if data.get('state', '').startswith('draft') and len(
                data.get('resources')) == 0:
            form_vars['stage'] = ['active', 'complete']

        edit_template = self._edit_template(package_type)
        c.form = ckan.lib.render.deprecated_lazy_render(
            edit_template, form_snippet,
            lambda: render(form_snippet, extra_vars=form_vars),
            'use of c.form is deprecated. please see '
            'ckan/templates/package/edit.html for an example '
            'of the new way to include the form snippet')
        return render(edit_template,
                      extra_vars={
                          'form_vars': form_vars,
                          'form_snippet': form_snippet,
                          'dataset_type': package_type
                      })
示例#42
0
def harvest_jobs_run(context, data_dict):
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)

    session = context['session']

    source_id = data_dict.get('source_id', None)

    if not source_id:
        _make_scheduled_jobs(context, data_dict)

    context['return_objects'] = False

    # Flag finished jobs as such
    jobs = harvest_job_list(context, {
        'source_id': source_id,
        'status': u'Running'
    })
    if len(jobs):
        for job in jobs:
            if job['gather_finished']:
                objects = session.query(HarvestObject.id) \
                          .filter(HarvestObject.harvest_job_id==job['id']) \
                          .filter(and_((HarvestObject.state!=u'COMPLETE'),
                                       (HarvestObject.state!=u'ERROR'))) \
                          .order_by(HarvestObject.import_finished.desc())

                if objects.count() == 0:
                    job_obj = HarvestJob.get(job['id'])
                    job_obj.status = u'Finished'

                    last_object = session.query(HarvestObject) \
                          .filter(HarvestObject.harvest_job_id==job['id']) \
                          .filter(HarvestObject.import_finished!=None) \
                          .order_by(HarvestObject.import_finished.desc()) \
                          .first()
                    if last_object:
                        job_obj.finished = last_object.import_finished
                    job_obj.save()
                    # Reindex the harvest source dataset so it has the latest
                    # status
                    get_action('harvest_source_reindex')(
                        context, {
                            'id': job_obj.source.id
                        })

    # resubmit old redis tasks
    resubmit_jobs()

    # Check if there are pending harvest jobs
    jobs = harvest_job_list(context, {
        'source_id': source_id,
        'status': u'New'
    })
    if len(jobs) == 0:
        log.info('No new harvest jobs.')
        raise Exception('There are no new harvesting jobs')

    # Send each job to the gather queue
    publisher = get_gather_publisher()
    sent_jobs = []
    for job in jobs:
        context['detailed'] = False
        source = harvest_source_show(context, {'id': job['source_id']})
        if source['active']:
            job_obj = HarvestJob.get(job['id'])
            job_obj.status = job['status'] = u'Running'
            job_obj.save()
            publisher.send({'harvest_job_id': job['id']})
            log.info('Sent job %s to the gather queue' % job['id'])
            sent_jobs.append(job)

    publisher.close()
    return sent_jobs
示例#43
0
def issues_report_csv(context, data_dict):

    is_download = data_dict.get('is_download', None)
    del data_dict['is_download']

    logic.check_access('issues_report_csv', context, data_dict)

    publisher_name = data_dict.get('publisher', None)

    issues = {}
    field_names = [
        'publisher', 'dataset', 'url', 'file_url', 'issue_type', 'issue_date',
        'issue_message'
    ]
    issuues_report = []

    # Get packages with issues
    if publisher_name:
        result = packages_with_issues_for_a_publisher(context, publisher_name)
        if result['count'] > 0:
            issues[publisher_name] = result['results']

    else:
        # Get all the publishers whose datasets have issues
        data_dict = {
            'q': 'issue_type:[\'\' TO *]',
            'facet.field': ['organization'],
            'rows': 0,
        }
        result = logic.get_action('package_search')(context, data_dict)
        if result['count'] > 0:
            publishers = result['facets']['organization']
            for publisher_name, count in publishers.iteritems():
                result = packages_with_issues_for_a_publisher(
                    context, publisher_name)
                issues[publisher_name] = result['results']

    def get_extra(pkg_dict, key, default=None):
        for extra in pkg_dict['extras']:
            if extra['key'] == key:
                if extra['value'][:1] == '"':
                    extra['value'] = json.loads(extra['value'])
                return extra['value']

        return default

    for publisher, datasets in issues.iteritems():
        for dataset in datasets:
            url = urljoin(site_url, '/dataset/' + dataset['name'])
            if len(dataset['resources']):
                file_url = dataset['resources'][0]['url']
            else:
                file_url = ''

            issuues_report.append({
                'publisher':
                publisher,
                'dataset':
                dataset['name'],
                'url':
                url,
                'file_url':
                file_url,
                'issue_type':
                get_extra(dataset, 'issue_type', ''),
                'issue_date':
                get_extra(dataset, 'issue_date', ''),
                'issue_message':
                get_extra(dataset, 'issue_message', ''),
                'publisher_title':
                dataset['organization']['title'],
                'dataset_title':
                format(dataset['title'])
            })
    if is_download:
        return issues_write_to_csv(field_names, issuues_report)
    else:
        return issuues_report
示例#44
0
    def _edit_or_new(self, id, related_id, is_edit):
        """
        Edit and New were too similar and so I've put the code together
        and try and do as much up front as possible.
        """
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj,
            'for_view': True
        }

        if is_edit:
            tpl = 'related/edit.html'
            auth_name = 'related_update'
            auth_dict = {'id': related_id}
            action_name = 'related_update'

            try:
                related = get_action('related_show')(context, {
                    'id': related_id
                })
            except NotFound:
                abort(404, _('Related item not found'))
        else:
            tpl = 'related/new.html'
            auth_name = 'related_create'
            auth_dict = {}
            action_name = 'related_create'

        try:
            check_access(auth_name, context, auth_dict)
        except NotAuthorized:
            abort(401, _('Not authorized'))

        try:
            c.pkg_dict = get_action('package_show')(context, {'id': id})
        except NotFound:
            abort(404, _('Package not found'))

        data, errors, error_summary = {}, {}, {}

        if request.method == "POST":
            try:
                data = clean_dict(
                    unflatten(tuplize_dict(parse_params(request.params))))

                if is_edit:
                    data['id'] = related_id
                else:
                    data['dataset_id'] = id
                    data['owner_id'] = c.userobj.id

                related = get_action(action_name)(context, data)

                if not is_edit:
                    h.flash_success(_("Related item was successfully created"))
                else:
                    h.flash_success(_("Related item was successfully updated"))

                h.redirect_to(controller='package',
                              action='read',
                              id=c.pkg_dict['name'])
            except DataError:
                abort(400, _(u'Integrity Error'))
            except ValidationError, e:
                errors = e.error_dict
                error_summary = e.error_summary
示例#45
0
def dge_dashboard_json_visited_datasets(context, data_dict):
    '''
    Get general visits to datos.gob.es or visits to datos.gob.es by sections 
    
    :param what: type of data to get, 'total' = total, 'org' = by organization
    :type destination: string
    
    :param destination: directory destination of json file
    :type destination: string
    
    :param prefix: filename prefix of json file
    :type prefix: string
    '''

    check_access('dge_dashboard_json_visited_datasets', context, data_dict)

    what = data_dict.get('what')
    destination = data_dict.get('destination')
    prefix = data_dict.get('prefix')

    model = context['model']
    results = []
    string_result = None

    if what in DgeDashboardJsonCommand.VISITED_DATASET_TYPES:
        year_month_day_list = []
        sql = '''select distinct year_month, end_day from dge_ga_packages
                     order by year_month desc;'''
        result = model.Session.execute(sql)
        if result:
            for row in result:
                year_month_day_list.append({'y_m': row[0], 'day': row[1]})
        if year_month_day_list:
            for item in year_month_day_list:
                y_m = item.get('y_m', '')
                day = item.get('day', 0)
                sql = None
                if what == 'total':
                    sql = '''select concat('"month": "', concat(s1.year_month, 
                             concat('", "day": ', concat(s1.end_day, 
                             concat(', "name": "', concat(p.name, 
                             concat('", "title": "', concat(replace(p.title, '"', E'\\''), 
                             concat('", "publisher": "', concat(g.title, 
                             concat('", "visits": ', s1.pageviews))))))))))) as dict from 
                             package p, "group" g, (select year_month, end_day, 
                             package_name, pageviews, publisher_id from 
                             dge_ga_packages where year_month like '{p0}'
                             and organization_id is not null
                             and publisher_id is not null)s1
                             where p.name = s1.package_name
                             and g.id like s1.publisher_id
                             order by s1.pageviews desc, p.title asc
                             limit 10;'''.format(p0=y_m)

                elif what == 'org':
                    sql = '''select concat('"org_id": "', organization_id, 
                             concat(concat('", "month": "', concat(s1.year_month, 
                             concat('", "day": ', concat(s1.end_day, 
                             concat(', "name": "', concat(p.name, 
                             concat('", "title": "', concat(replace(p.title, '"', E'\\''), 
                             concat('", "publisher": "', concat(g.title, 
                             concat('", "visits": ', concat(s1.pageviews,
                             concat(', "downloads": ', 
                             (select coalesce(sum(dgr.total_events),0) from 
                             dge_ga_resources dgr 
                             where dgr.organization_id = s1.organization_id
                             and dgr.package_name = s1.package_name
                             and dgr.publisher_id = s1.publisher_id
                             and dgr.year_month = s1.year_month
                             and dgr.end_day = s1.end_day
                             and dgr.resource_id is not null)))))))))))))))) as dict 
                             from package p, "group" g,
                             (select year_month, end_day, organization_id, 
                             package_name, pageviews, publisher_id, 
                             ROW_NUMBER() OVER (PARTITION BY organization_id 
                             order by pageviews DESC) as row_id
                             from dge_ga_packages where year_month like '{p0}'
                             and organization_id is not null
                             and publisher_id is not null
                             order by organization_id asc, pageviews desc, 
                             package_name asc)s1
                             where s1.row_id <= 10
                             and p.name = s1.package_name
                             and g.id like s1.publisher_id
                             order by s1.organization_id asc, s1.pageviews, 
                             p.title asc;'''.format(p0=y_m)
                if sql:
                    result = model.Session.execute(sql)
                    if result:
                        i = 1
                        for row in result:
                            results.append('{"order": %s, %s}' % (i, row[0]))
                            i = i + 1

    string_result = "[" + ",".join(results) + "]"
    string_result = string_result.encode('utf-8')

    return _write_file(string_result, destination, prefix, what)
示例#46
0
                              action='new_resource',
                              id=id)

        # get resources for sidebar
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj
        }
        try:
            pkg_dict = get_action('package_show')(context, {'id': id})
        except NotFound:
            abort(404, _('The dataset {id} could not be found.').format(id=id))
        try:
            check_access('resource_create', context,
                         {'package_id': pkg_dict['id']})
        except NotAuthorized:
            abort(401, _('Unauthorized to create a resource for this package'))

        package_type = pkg_dict['type'] or 'dataset'

        errors = errors or {}
        error_summary = error_summary or {}
        vars = {
            'data': data,
            'errors': errors,
            'error_summary': error_summary,
            'action': 'new',
            'resource_form_snippet': self._resource_form(package_type),
            'dataset_type': package_type
        }
示例#47
0
def dge_dashboard_json_drupal_published_contents(context, data_dict):
    '''
    Get dge_dashboard_drupal_contents table data and write json files

    :param what: what data must be get
    :type what: values: total|comments|org_comments

    :param destination: directory destination of json file
    :type destination: string

    :param prefix: filename prefix of json file
    :type prefix: string
    '''

    check_access('dge_dashboard_json_drupal_published_contents', context,
                 data_dict)

    what = data_dict.get('what')
    destination = data_dict.get('destination')
    prefix = data_dict.get('prefix')

    model = context['model']

    sql = None

    if what in DgeDashboardJsonCommand.DRUPAL_PUBLISHED_CONTENTS:
        if what == 'contents':
            sql = '''select concat('[', concat(string_agg(s2.dict, ','), ']')) 
                     from (select concat('{"date": "', concat(s1.year_month, 
                     concat('", ', concat(string_agg(s1.c , ', '), '}')))) as dict
                     from (select concat('"', concat(content_type, 
                     concat('": ', num_contents))) c, year_month
                     from dge_dashboard_drupal_contents where key like 'total'
                     and (content_type like 'app' OR content_type like 'success' 
                     OR content_type like 'initiative' OR content_type like 'request'))s1
                     group by s1.year_month order by s1.year_month)s2;'''
        elif what == 'comments':
            sql = '''select concat('[', concat(string_agg(s2.dict, ','), ']')) 
                      from(select concat('{"year": "', concat(s1.year_month, 
                      concat('", ', concat(string_agg(s1.c , ', '), '}')))) as dict 
                      from (select concat('"', concat(content_type, 
                      concat('": ', num_contents))) c, year_month
                      from dge_dashboard_drupal_contents where key like 'total'
                      and (content_type like 'dataset_comments' OR 
                      content_type like 'content_comments'))s1
                      group by s1.year_month order by s1.year_month)s2;'''
        elif what == 'org_comments':
            sql = '''select concat('[', concat(string_agg(s2.dict, ','), ']')) 
                     from (select concat('{"year": "', concat(s1.year_month,
                     concat('", "org": "', concat(key_value, '", ',
                     concat(string_agg(s1.c , ', '), '}'))))) as dict from
                     (select concat('"', concat(content_type, 
                     concat('": ', num_contents))) c, key_value, year_month from 
                     dge_dashboard_drupal_contents where key like 'org' and
                     (content_type like 'dataset_comments' OR 
                     content_type like 'content_comments'))s1 
                     group by s1.year_month, s1.key_value 
                     order by s1.year_month)s2;'''

    result = _execute_fetchone_sql(model, sql)
    if result and (what == 'comments' or what == 'org_comments'):
        data = json.loads(result)
        for row in data:
            if not "dataset_comments" in row:
                row['dataset_comments'] = 0L
            if not "content_comments" in row:
                row['content_comments'] = 0L
        result = json.dumps(data)
    return _write_file(result, destination, prefix, what)
示例#48
0
def dge_dashboard_csv_published_datasets_by_root_org(context, data_dict):
    '''
    Get dataset number by root organization 
    
    :param date: the creation date of datasets must be before this
    :type date: string

    :param import_date: year-month of creation date of datasets
    :type import_date: string

    :param save: True only if save in file, False if only print
    :type save: boolean.
    
    :param destination: directory destination of csv file
    :type destination: string
    
    :param filename: filename prefix of csv file
    :type filename: string
    '''
    check_access('dge_dashboard_csv_published_datasets_by_root_org', context,
                 data_dict)

    date = data_dict.get('date')
    import_date = data_dict.get('import_date')
    save = data_dict.get('save', False)
    destination = data_dict.get('destination')
    filename = data_dict.get('filename')
    model = context['model']
    results = []
    log.debug("Getting root organizations ....")
    sql = '''select ge.group_id, g.title, ge.value from group_extra ge, "group" g 
             where key like 'C_ID_UD_ORGANICA' and value in 
             (select distinct value from group_extra 
              where key like 'C_ID_DEP_UD_PRINCIPAL' and value is not null)
             and ge.group_id like g.id order by g.title asc;'''
    result = model.Session.execute(sql)
    for row in result:
        log.debug("Getting datasets number from root organization: %s ....",
                  row[1])
        dir3 = row[2] if row[2] else ''
        sql2 = '''select g.title, count(p.*) num_datasets from "group" g 
                  left OUTER JOIN ( (select slu.id, slu.owner_org from 
                  (select h.* from (select ROW_NUMBER() OVER 
                  (PARTITION BY pr.continuity_id 
                  order by pr.revision_timestamp asc) as rn, * 
                  from package_revision pr where 
                  pr.revision_timestamp < '{p0}'::timestamp 
                  and pr.type like 'dataset') h where h.rn = 1 ) sc, 
                  (select h.* from (select ROW_NUMBER() OVER 
                  (PARTITION BY pr.continuity_id 
                  order by pr.revision_timestamp desc) as rn, * 
                  from package_revision pr where 
                  pr.revision_timestamp < '{p0}'::timestamp 
                  and pr.type like 'dataset') h where h.rn = 1
                  and h.state like 'active' and h.type like 'dataset'
                  and h.private = false
                  and h.expired_timestamp >= '{p0}'::timestamp) slu 
                  where sc.continuity_id = slu.continuity_id)) as p
                  on g.id = p.owner_org where 
                  g.id in (select distinct ge.group_id
                  from group_extra ge
                  where key like 'C_ID_DEP_UD_PRINCIPAL' and value like '{p1}' 
                  and group_id not like (select ge.group_id from group_extra ge
                  where key like 'C_ID_UD_ORGANICA' and value like '{p1}')) 
                  group by g.id, g.title
                  order by num_datasets desc, g.title asc;'''.format(p0=date,
                                                                     p1=dir3)
        subresult = model.Session.execute(sql2)
        for subrow in subresult:
            results.append((import_date, row[1].encode('utf-8'),
                            subrow[0].encode('utf-8'), subrow[1]))

    if results:
        titleRow = ('Date', 'Root Organization', 'Organization',
                    'Dataset Number')
        if save:
            filename = _get_complete_filename(destination, filename, None,
                                              'csv')
            if filename:
                try:
                    outfile = open(filename, 'w')
                    try:
                        writer = csv.writer(outfile)
                        writer.writerow(titleRow)
                        for row in results:
                            writer.writerow(row)
                    except Exception as e:
                        log.error('Exception %s', e)
                        filename = None
                    finally:
                        outfile.close()
                except Exception as e:
                    log.error('Exception %s', e)
                    filename = None
        else:
            filename = None
            print 'Results:'
            print titleRow
            for row in results:
                print(row)
    return filename
示例#49
0
def dge_dashboard_json_published_datasets(context, data_dict):
    '''
    Get dge_dashboard_published_datasets table data and write json files

    :param what: what data must be get
    :type what: values: total|all|org|adm_level|num_res

    :param destination: directory destination of json file
    :type destination: string

    :param prefix: filename prefix of json file
    :type prefix: string
    '''

    check_access('dge_dashboard_json_published_datasets', context, data_dict)

    what = data_dict.get('what')
    destination = data_dict.get('destination')
    prefix = data_dict.get('prefix')

    model = context['model']

    sql = None

    if what in DgeDashboardJsonCommand.PUBLISHED_DATASETS_TYPES:
        if what == 'all':
            sql = '''select concat('[', concat(string_agg(r.dict, ','), ']'))
                     from (select concat('{', concat(s.ym, concat(', ', 
                     concat(s.num, concat(', ', concat(s.key, concat(', ', 
                     concat(s.key_value, '}')))))))) dict from 
                     (select concat('"year": "', concat(d.year_month, '"')) as ym, 
                     concat('"num_datasets": ', d.num_datasets) as num, 
                     concat('"key": "', concat(d.key, '"')) as key, 
                     concat('"key_value": "', concat(d.key_value, '"')) as key_value
                     from dge_dashboard_published_datasets d
                     where key like 'total' order by year_month asc) s)r;'''
        elif what == 'total':
            sql = '''select concat('[', concat(string_agg(r.dict, ','), ']'))
                     from (select concat('{', concat(s.ym, concat(', ', 
                     concat(s.value, '}')))) dict from 
                     (select concat('"year": "', concat(d.year_month, '"')) as ym, 
                     concat('"value": ', d.num_datasets) as value
                     from dge_dashboard_published_datasets d
                     where key like 'total' order by year_month asc) s)r;'''
        elif what == 'org':
            sql = '''select concat('[', concat(string_agg(r.dict, ','), ']'))
                     from (select concat('{', concat(s.ym, concat(', ', 
                     concat(s.value, concat(', ', concat(s.org, '}')))))) dict 
                     from (select concat('"year": "', concat(d.year_month, '"')) as ym, 
                     concat('"value": ', d.num_datasets) as value, 
                     concat('"org": "', concat(d.key_value, '"')) as org
                     from dge_dashboard_published_datasets d where
                     key like 'organization_id' order by year_month asc) s)r;'''
        elif what == 'adm_level':
            sql = '''select concat('[', concat(string_agg(r.dict, ','), ']'))
                     from (select concat('{"year": "', concat(s.ym, 
                     concat('", ', concat(string_agg(s.ff , ', '), '}')))) as dict 
                     from (select concat('"', concat(d.key_value, concat('"', 
                     concat(': ', d.num_datasets)))) as ff, d.year_month  as ym
                     from dge_dashboard_published_datasets d where 
                     key like 'administration_level'order by year_month asc )s 
                     group by ym )r;'''
        elif what == 'num_res':
            sql = '''select concat('[', concat(string_agg(r.dict, ','), ']'))
                     from (select concat('{', concat(s.ym, concat(', ', 
                     concat(s.value, concat(', ', concat(s.num_res, '}')))))) dict 
                     from (select concat('"year": "', concat(d.year_month, '"')) as ym, 
                     concat('"value": ', d.num_datasets) as value, 
                     concat('"num_res": "', concat(d.key_value, '"')) as num_res
                     from dge_dashboard_published_datasets d where
                     key like 'num_resources' order by year_month asc) s)r;'''

    result = _execute_fetchone_sql(model, sql)
    return _write_file(result, destination, prefix, what)
示例#50
0
def dge_dashboard_json_current_users(context, data_dict):
    '''
    Get active users by organization and write them into json file

    :param what: what data must be get
    :type what: values: org|adm_level|num_org

    :param destination: directory destination of json file
    :type destination: string

    :param prefix: filename prefix of json file
    :type prefix: string
    '''

    check_access('dge_dashboard_json_current_users_by_org', context, data_dict)

    what = data_dict.get('what')
    destination = data_dict.get('destination')
    prefix = data_dict.get('prefix')

    model = context['model']

    sql = None
    if what in DgeDashboardJsonCommand.USERS_TYPES:
        if what == 'org':
            log.debug("Getting drupal active users by organization")

            sql = '''SELECT concat('{"date": "', 
                     concat((DATE_FORMAT(now(), '%%Y-%%m-%%d')), 
                     concat('", "org_id":"', concat(s1.ckan_id, 
                     concat('", "users": "', 
                     concat(group_concat(s1.username), '"}')))))) as dict FROM
                     (SELECT fc.field_ckan_organization_id_value ckan_id, 
                     u.name username FROM profile p, field_data_field_root_agency ra, 
                     users u, taxonomy_term_data vo,
                     field_data_field_c_id_ud_organica fo, 
                     field_data_field_ckan_organization_id fc WHERE
                     p.pid = ra.entity_id AND p.uid = u.uid 
                     AND ra.field_root_agency_tid = vo.tid AND
                     ra.entity_type = 'profile2' AND p.type = 'agency_data' 
                     AND ra.bundle = 'agency_data' AND u.status=1 AND vo.vid = 3 
                     AND fo.entity_id = fc.entity_id 
                     AND fo.entity_id = ra.field_root_agency_tid
                     ORDER BY ckan_id, username asc)s1 GROUP BY s1.ckan_id;'''
        elif what == 'adm_level':
            sql = '''SELECT concat('{"date": "', 
                     concat((DATE_FORMAT(now(), '%%Y-%%m-%%d')), 
                     concat('", "adm_level":"', concat(s1.adm_level, 
                     concat('", "num_users": ', concat(s1.total_users, '}')))))) as dict 
                     FROM (SELECT LEFT(uo.field_c_id_ud_organica_value,1) adm_level, 
                     COUNT(u.name) total_users FROM profile p, 
                     field_data_field_root_agency ra, users u, 
                     taxonomy_term_data vo, field_data_field_c_id_ud_organica uo
                     WHERE p.pid = ra.entity_id AND p.uid = u.uid 
                     AND ra.field_root_agency_tid = vo.tid 
                     AND ra.entity_type = 'profile2' AND p.type = 'agency_data' 
                     AND ra.bundle = 'agency_data' AND uo.entity_type = 'taxonomy_term' 
                     AND uo.entity_id = vo.tid AND u.status=1 AND vo.vid = 3 
                     GROUP BY adm_level ORDER BY total_users DESC) s1;'''
        elif what == 'num_org':
            sql = '''SELECT concat('{"date": "', 
                     concat((DATE_FORMAT(now(), '%%Y-%%m-%%d')), 
                     concat('", "org_name":"', concat(s1.org, 
                     concat('", "num_users": ', concat(s1.total_users, '}')))))) as dict 
                     FROM (SELECT vo.name org, Count(u.uid) AS total_users FROM
                     profile p, field_data_field_root_agency ra, users u, 
                     taxonomy_term_data vo WHERE p.pid = ra.entity_id 
                     AND p.uid = u.uid AND ra.field_root_agency_tid = vo.tid 
                     AND ra.entity_type = 'profile2' AND p.type = 'agency_data' 
                     AND ra.bundle = 'agency_data' AND u.status=1 AND vo.vid = 3 
                     GROUP BY vo.tid ORDER BY total_users DESC, org ASC) s1;'''

    result = _execute_drupal_sql(sql)
    if result:
        result = result.decode('latin1').encode('utf-8')
    return _write_file(result, destination, prefix, what)
示例#51
0
def news_create(context, data_dict):
    '''Create a news.

    :param title: The title of the news.
    :type title: string

    :param description: Description of the news.
    :type description: string

    :param active: State of the news (optional). Default is true.
    :type active: boolean

    :param meta: Additional meta data for the news such as latitude/longitude etc.
    :type meta: string in JSON format

    :returns: the newly created news object
    :rtype: dictionary

    '''

    log.info('News create: %r', data_dict)
    l.check_access('news_create', context, data_dict)
    data, errors = df.validate(data_dict, schema.news_create_schema(), context)

    if errors:
        raise t.ValidationError(errors)

    title = data.get('title')
    name = gen_news_name(title)
    content = data.get('content', u'')
    meta = data.get('meta', u'{}')
    expiration_date = data.get('expiration_date')
    image_url = data.get('image_url', u'')

    m = context.get('model')
    user_obj = m.User.get(context.get('user'))

    news = ckanextNews(title=title,
                       name=name,
                       content=content,
                       meta=meta,
                       expiration_date=expiration_date,
                       image_url=image_url,
                       creator_id=user_obj.id)
    news.save()
    out = news_dictize(news)

    # Send mail notification to all news subscribed users except the creator of the news
    # TODO: Email notifications should be sent asynchronous using celery tasks
    # TODO: Setup email server for testing mode
    send_email_condition = config.get('testing', False)
    if not send_email_condition:
        users = _get_action('news_mail_subscribed_users_show')(
            {
                'ignore_auth': True
            }, {})
        vars = {
            'site_title_dk': config_option_show('ckan.site_title', 'da_DK'),
            'site_title_en': config_option_show('ckan.site_title', 'en'),
            'site_url': config.get('ckan.site_url'),
            'news_item_title': out['title'],
            'news_item_content': render_markdown(out['content'], True)
        }

        for u in users:
            if user_obj.id == u['subscriber_id']:
                continue

            u_obj = context['model'].User.get(u['subscriber_id'])
            if u_obj is None:
                continue

            vars['user_name'] = u_obj.name
            msg_body = render_jinja2('emails/news_published.txt', vars)
            msg_subject = render_jinja2('emails/news_published_subject.txt',
                                        vars)
            send_email(msg_body, u_obj.email, msg_subject)

    return out
示例#52
0
def dge_dashboard_json_current_distribution_format(context, data_dict):
    '''
    Get current distribution format global or by administration level
    
    :param what: type of data to get, 'adm_level' = administration_level, 'total' = total, 'org' = organization
    :type what: string
    
    :param destination: directory destination of json file
    :type destination: string
    
    :param prefix: filename prefix of json file
    :type prefix: string
    '''

    check_access('dge_dashboard_json_current_distribution_format', context,
                 data_dict)

    what = data_dict.get('what')
    destination = data_dict.get('destination')
    prefix = data_dict.get('prefix')

    model = context['model']
    sql = None

    if what in DgeDashboardJsonCommand.DISTRIBUTION_FORMAT_TYPES:
        sql = '''select concat('[', concat(string_agg(s2.dict, ','), ']')) from
                 (select concat('{"date": "', concat((to_char(now(), 'YYYY-MM-DD')), 
                 concat('", "format": "', concat(s1.f, concat('", "value": ', 
                 concat(s1.num, '}')))))) as dict from (select r.format as f, 
                 count(*) num from package p, resource r where
                 p.private = False and p.type like 'dataset' 
                 and p.state like 'active' and p.id = r.package_id 
                 and r.state like 'active' group by f
                 order by num desc)s1)s2; '''
        if what == 'adm_level':
            sql = '''select concat('[', concat(string_agg(s2.dict, ','), ']')) 
                     from (select concat('{"date":"', 
                     concat((to_char(now(), 'YYYY-MM-DD')), 
                     concat('", "format":"', concat(s1.f, concat('", "level":"', 
                     concat(s1.v, concat('", "value":', concat(num, '}')))))))) as dict 
                     from (select substring(g.value, 0, 2) as v, r.format as f, 
                     count(*) num from package_extra pe, package p, 
                     group_extra g, resource r where p.private = False 
                     and p.type like 'dataset' and p.state like 'active'
                     and p.id = pe.package_id and pe.key like 'publisher' 
                     and pe.state like 'active' and g.group_id = pe.value 
                     and g.key like 'C_ID_UD_ORGANICA'
                     and p.id = r.package_id and r.state like 'active'
                     group by v, f
                     order by v, num desc, f asc)s1)s2;'''
        elif what == 'org':
            sql = '''select concat('[', concat(string_agg(s2.dict, ','), ']')) 
                     from (select concat('{"date": "', 
                     concat((to_char(now(), 'YYYY-MM-DD')), 
                     concat('", "format": "', concat(s1.f, 
                     concat('", "org_id": "', concat(s1.o ,
                     concat('", "value": ', concat(s1.num, '}')))))))) as dict 
                     from (select r.format as f, p.owner_org as o, count(*) num 
                     from package p, resource r where p.private = False and 
                     p.type like 'dataset' and p.state like 'active'
                     and p.id = r.package_id and r.state like 'active'
                     group by f, o
                     order by num desc, o asc, f desc)s1)s2;'''

    result = _execute_fetchone_sql(model, sql)
    return _write_file(result, destination, prefix, what)
def audited_datastore_update_auth(context, data_dict=None):
    logic.check_access('datastore_upsert', context, data_dict)
    return {'success': True}
示例#54
0
def group_list(context, data_dict):
    """ A fix for the efficiency of group_list"""

    is_org = False

    check_access('group_list', context, data_dict)

    model = context['model']
    api = context.get('api_version')
    groups = data_dict.get('groups')
    group_type = data_dict.get('type', 'group')
    ref_group_by = 'id' if api == 2 else 'name'
    lite_list = data_dict.get('lite_list', False)

    sort = data_dict.get('sort', 'name')
    hide_empty = data_dict.get('hide_empty', False)
    q = data_dict.get('q')

    # order_by deprecated in ckan 1.8
    # if it is supplied and sort isn't use order_by and raise a warning
    order_by = data_dict.get('order_by', '')
    if order_by:
        log.warn('`order_by` deprecated please use `sort`')
        if not data_dict.get('sort'):
            sort = order_by

    # if the sort is packages and no sort direction is supplied we want to do a
    # reverse sort to maintain compatibility.
    if sort.strip() in ('packages', 'package_count'):
        sort = 'package_count desc'

    sort_info = _unpick_search(
        sort,
        allowed_fields=['name', 'packages', 'package_count', 'title'],
        total=1)

    all_fields = data_dict.get('all_fields', None)
    include_extras = all_fields and \
                     asbool(data_dict.get('include_extras', False))

    query = model.Session.query(model.Group)
    if include_extras:
        # this does an eager load of the extras, avoiding an sql query every
        # time group_list_dictize accesses a group's extra.
        query = query.options(sqlalchemy.orm.joinedload(model.Group._extras))

    query = query.filter(model.Group.state == 'active')
    if groups:
        query = query.filter(model.Group.name.in_(groups))
    if q:
        q = u'%{0}%'.format(q)
        query = query.filter(
            sqlalchemy.or_(
                model.Group.name.ilike(q),
                model.Group.title.ilike(q),
                model.Group.description.ilike(q),
            ))

    query = query.filter(model.Group.is_organization == is_org)
    if not is_org and group_type != 'group':
        query = query.filter(model.Group.type == group_type)

    groups = query.all()

    action = 'organization_show' if is_org else 'group_show'

    # The cache may leak private group information?
    @cache.region('short_term', 'action_group_show')
    def group_show_cached(action, group_id):
        data_dict['id'] = group_id
        return get_action(action)(context, data_dict)

    @cache.region('short_term', 'action_group_show_list')
    def group_show_list_cached(action, group_ids):
        g_list = []
        for group in group_ids:
            g_list.append(group_show_cached(action, group))
        return g_list

    g_list = group_show_list_cached(action, [g.id for g in groups])

    g_list = sorted(g_list,
                    key=lambda x: x[sort_info[0][0]],
                    reverse=sort_info[0][1] == 'desc')

    if hide_empty:
        g_list = [g for g in g_list if g['package_count'] > 0]

    if not all_fields:
        g_list = [group[ref_group_by] for group in g_list]

    return g_list
示例#55
0
def group_list(context, data_dict):
    logic.check_access('group_list', context, data_dict)
    return _domain_or_group_list(context, data_dict, "group")
def audited_datastore_create(context, data_dict=None):
    logic.check_access('audited_datastore_create', context, data_dict)
    log.debug('starting: audited_datastore_create')

    check_and_bust('fields', data_dict)
    check_and_bust('primary_key', data_dict)

    data_dict['fields'].append({
        "id": LAST_MODIFIED_COLUMN,
        "type": "timestamp"
    })
    data_dict['fields'].append({
        "id": DELETED_TIME_COLUMN,
        "type": "timestamp"
    })

    update_time = data_dict.pop(UPDATE_TIMESTAMP_FIELD, str(datetime.utcnow()))

    if re.compile('[+-]\d\d:\d\d$').search(update_time):
        update_time = to_timestamp_naive(update_time)

    records = data_dict.pop('records', [])
    # first create
    response = get_action('datastore_create')(context, data_dict)

    # this is needed when creating new resource in this step too
    data_dict.pop('resource', None)
    data_dict['resource_id'] = response['resource_id']

    records_size = 0

    if records:
        data_dict['connection_url'] = pylons.config['ckan.datastore.write_url']
        engine = db._get_engine(data_dict)
        context['connection'] = engine.connect()
        timeout = context.get('query_timeout', db._TIMEOUT)
        trans = context['connection'].begin()

        start = datetime.utcnow()

        # upsert
        chunk_index = 1
        records_size = len(records)
        chunk_num = records_size / CHUNK_UPSERT_NUMBER
        if records_size % CHUNK_UPSERT_NUMBER > 0:
            chunk_num += 1

        try:
            context['connection'].execute(
                u'SET LOCAL statement_timeout TO {0}'.format(timeout))

            records_chunk = []
            num_records = 0
            for record in records:
                record[LAST_MODIFIED_COLUMN] = update_time
                record[DELETED_TIME_COLUMN] = None
                records_chunk.append(record)
                num_records += 1
                if num_records >= CHUNK_UPSERT_NUMBER:
                    log.debug('insert chunk {0}/{1}'.format(
                        chunk_index, chunk_num))
                    insert_chunk_data_dict = get_upsert_data_dict(
                        data_dict, records_chunk, db._INSERT)
                    transaction_upsert(context, insert_chunk_data_dict,
                                       timeout, trans)
                    records_chunk = []
                    num_records = 0
                    chunk_index += 1

            # insert the leftover records
            if records_chunk:
                log.debug('insert chunk {0}/{1}'.format(
                    chunk_index, chunk_num))
                insert_chunk_data_dict = get_upsert_data_dict(
                    data_dict, records_chunk, db._INSERT)
                transaction_upsert(context, insert_chunk_data_dict, timeout,
                                   trans)

            trans.commit()
        finally:
            context['connection'].close()
        # don't need to catch exception and rollback, it's done in the transaction_upsert method

        end = datetime.utcnow()
        log.debug("create [db] lasted = {0}".format(end - start))

        data_dict.pop('connection_url', None)

    response['created_records'] = records_size
    return response
示例#57
0
def package_create(context, data_dict):
    model = context['model']
    user = context['user']

    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema()
    # We modify the schema here to replace owner_org_validator by our own
    if 'owner_org' in schema:
        schema['owner_org'] = [
            uds_oov if f is default_oov else f for f in schema['owner_org']
        ]

    check_access('package_create', context, data_dict)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work
                package_plugin.check_data_dict(data_dict)

    data, errors = lib_plugins.plugin_validate(package_plugin, context,
                                               data_dict, schema,
                                               'package_create')
    log.debug('package_create validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'), data.get('name'), data_dict)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    admins = []
    if user:
        user_obj = model.User.by_name(user.decode('utf8'))
        if user_obj:
            admins = [user_obj]
            data['creator_user_id'] = user_obj.id

    pkg = model_save.package_dict_save(data, context)

    # Needed to let extensions know the package id
    model.Session.flush()
    data['id'] = pkg.id

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

        item.after_create(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    ## need to let rest api create
    context["package"] = pkg
    ## this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug('Created object %s' % pkg.name)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    return_id_only = context.get('return_id_only', False)

    output = context['id'] if return_id_only \
        else get_action('package_show')(context, {'id': context['id']})

    return output
示例#58
0
def package_show(context, data_dict):
    class DatasetType:
        (specific_version, latest_version, unversioned) = range(3)

    #logging.warning('DATASETVERSIONS PACKAGE_SHOW')

    # The parent dataset is private so it doesn't appear in the lists
    # but we want to override the authentication checks so we can
    # access the child datasets that represent the different versions
    ignore_auth = context.get('ignore_auth')
    context['ignore_auth'] = True

    # Get the dataset we actually asked for
    requested_dataset = ckan_package_show(context, data_dict)

    version_to_display = requested_dataset

    parent_names = _get_parent_dataset_names(
        get_context(context), requested_dataset['id'])

    if len(parent_names) > 0:
        base_name = parent_names[0]
        dataset_type = DatasetType.specific_version
        all_version_names = _get_child_dataset_names(
            get_context(context), base_name)
    else:
        # Requesting the latest version or an unversioned dataset
        base_name = requested_dataset['name']

        all_version_names = _get_child_dataset_names(
            get_context(context), base_name)

        if len(all_version_names) > 0:
            dataset_type = DatasetType.latest_version
        else:
            dataset_type = DatasetType.unversioned

    all_active_versions = _get_ordered_active_dataset_versions(
        get_context(context),
        data_dict.copy(),  # Will get modified so make a copy
        all_version_names)

    # Show the most recent, public active version
    if dataset_type == DatasetType.latest_version and \
       len(all_active_versions) > 0:
        version_to_display = all_active_versions[0]

    if dataset_type in (DatasetType.unversioned, DatasetType.specific_version):
        # Do default CKAN authentication
        context['ignore_auth'] = ignore_auth
        logic.check_access('package_show', get_context(context), data_dict)

    version_to_display['_versions'] = _get_version_names_and_urls(
        all_active_versions, base_name)

    # Reindexing fails if we don't do this
    # Later versions of CKAN will not include these in the package
    # See https://github.com/ckan/ckan/issues/3114
    version_to_display.pop('relationships_as_subject', False)
    version_to_display.pop('relationships_as_object', False)

    return version_to_display
示例#59
0
def event_create(context, data_dict):
    '''Create an event.

    :param title: The title of the event.
    :type title: string

    :param description: Description of the event.
    :type description: string

    :param venue: Venue of the event.
    :type venue: string

    :param start: Start date of the event.
    :type start: string

    :param end: End date of the event.
    :type end: string

    :param active: State of the event (optional). Default is true.
    :type active: boolean

    :param meta: Additional meta data for the event such as latitude/longitude etc.
    :type meta: string in JSON format

    :returns: the newly created event
    :rtype: dictionary

    '''

    log.info('Event create: %r', data_dict)

    l.check_access('event_create', context, data_dict)

    data, errors = df.validate(data_dict, schema.event_create_schema(),
                               context)

    if errors:
        raise t.ValidationError(errors)

    title = data.get('title')
    name = gen_event_name(title)
    start = data.get('start')
    end = data.get('end')
    description = data.get('description', u'')
    venue = data.get('venue', u'')
    meta = data.get('meta', u'{}')
    active = data.get('active', False)

    m = context.get('model')
    user_obj = m.User.get(context.get('user'))

    event = ckanextEvent(title=title,
                         name=name,
                         description=description,
                         start=start,
                         end=end,
                         venue=venue,
                         meta=meta,
                         active=active,
                         creator_id=user_obj.id)
    event.save()

    out = event_dictize(event)
    return out
示例#60
0
def domain_list(context, data_dict):
    logic.check_access('group_list', context, data_dict)
    return _domain_or_group_list(context, data_dict, "eurovoc_domain")