Пример #1
0
    def edit(self, id, data=None, errors=None, error_summary=None):
        package_type = self._get_package_type(id)
        context = {'model': model, 'session': model.Session,
                   'user': c.user, 'auth_user_obj': c.userobj,
                   'save': 'save' in request.params}

        if context['save'] and not data:
            return self._save_edit(id, context, package_type=package_type)
        try:
            c.pkg_dict = get_action('package_show')(dict(context,
                                                         for_view=True),
                                                    {'id': id})
            context['for_edit'] = True
            old_data = get_action('package_show')(context, {'id': id})
            # old data is from the database and data is passed from the
            # user if there is a validation error. Use users data if there.
            if data:
                old_data.update(data)
            data = old_data
        except (NotFound, NotAuthorized):
            abort(404, _('Dataset not found'))
        # are we doing a multiphase add?
        if data.get('state', '').startswith('draft'):
            c.form_action = h.url_for(controller='package', action='new')
            c.form_style = 'new'
            return self.new(data=data, errors=errors,
                            error_summary=error_summary)

        c.pkg = context.get("package")
        c.resources_json = h.json.dumps(data.get('resources', []))

        try:
            check_access('package_update', context)
        except NotAuthorized:
            abort(403, _('User %r not authorized to edit %s') % (c.user, id))
        # convert tags if not supplied in data
        if data and not data.get('tag_string'):
            data['tag_string'] = ', '.join(h.dict_list_reduce(
                c.pkg_dict.get('tags', {}), 'name'))
        errors = errors or {}
        form_snippet = self._package_form(package_type=package_type)
        form_vars = {'data': data, 'errors': errors,
                     'error_summary': error_summary, 'action': 'edit',
                     'dataset_type': package_type,
                     }
        c.errors_json = h.json.dumps(errors)

        self._setup_template_variables(context, {'id': id},
                                       package_type=package_type)

        # we have already completed stage 1
        form_vars['stage'] = ['active']
        if data.get('state', '').startswith('draft'):
            form_vars['stage'] = ['active', 'complete']

        edit_template = self._edit_template(package_type)
        return render(edit_template,
                      extra_vars={'form_vars': form_vars,
                                  'form_snippet': form_snippet,
                                  'dataset_type': package_type})
Пример #2
0
    def _save_new(self, context, package_type=None):
        # The staged add dataset used the new functionality when the dataset is
        # partially created so we need to know if we actually are updating or
        # this is a real new.
        is_an_update = False
        ckan_phase = request.params.get('_ckan_phase')
        from ckan.lib.search import SearchIndexError
        try:
            data_dict = clean_dict(dict_fns.unflatten(
                tuplize_dict(parse_params(request.POST))))
            if ckan_phase:
                # prevent clearing of groups etc
                context['allow_partial_update'] = True
                # sort the tags
                if 'tag_string' in data_dict:
                    data_dict['tags'] = self._tag_string_to_list(data_dict['tag_string'])

                self._validate_dataset(data_dict)

                if data_dict.get('pkg_name'):
                    is_an_update = True
                    # This is actually an update not a save
                    data_dict['id'] = data_dict['pkg_name']
                    del data_dict['pkg_name']
                    # don't change the dataset state
                    data_dict['state'] = 'draft'
                    # this is actually an edit not a save
                    pkg_dict = get_action('package_update')(context, data_dict)

                    if request.params['save'] == 'go-metadata':
                        # redirect to add metadata
                        url = h.url_for(controller='package', action='new_metadata', id=pkg_dict['name'])
                    elif request.params['save'] == 'save-draft':
                        url = h.url_for(controller='package', action='read', id=pkg_dict['name'])
                    else:
                        # redirect to add dataset resources
                        url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name'])
                    redirect(url)
                # Make sure we don't index this dataset
                if request.params['save'] not in ['go-resource', 'go-metadata']:
                    data_dict['state'] = 'draft'
                # allow the state to be changed
                context['allow_state_change'] = True

            data_dict['type'] = package_type
            context['message'] = data_dict.get('log_message', '')
            pkg_dict = get_action('package_create')(context, data_dict)

            if ckan_phase and request.params['save'] != 'save-draft':
                url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name'])
                redirect(url)
            elif request.params['save'] == 'save-draft':
                url = h.url_for(controller='package', action='read', id=pkg_dict['name'])
                redirect(url)
            self._form_save_redirect(pkg_dict['name'], 'new', package_type=package_type)
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound, e:
            abort(404, _('Dataset not found'))
    def _save_new(self, context, package_type=None):
        # The staged add dataset used the new functionality when the dataset is
        # partially created so we need to know if we actually are updating or
        # this is a real new.
        is_an_update = False
        ckan_phase = request.params.get('_ckan_phase')
        from ckan.lib.search import SearchIndexError
        try:
            data_dict = clean_dict(dict_fns.unflatten(
                tuplize_dict(parse_params(request.POST))))
            if ckan_phase:
                # prevent clearing of groups etc
                context['allow_partial_update'] = True
                # sort the tags
                if 'tag_string' in data_dict:
                    data_dict['tags'] = self._tag_string_to_list(data_dict['tag_string'])
                if data_dict.get('pkg_name'):
                    is_an_update = True
                    # This is actually an update not a save
                    data_dict['id'] = data_dict['pkg_name']
                    del data_dict['pkg_name']
                    # don't change the dataset state
                    data_dict['state'] = 'draft'
                    # this is actually an edit not a save
                    pkg_dict = get_action('package_update')(context, data_dict)

                    if request.params['save'] == 'go-metadata':
                        # redirect to add metadata
                        url = h.url_for(controller='package', action='new_metadata', id=pkg_dict['name'])
                    elif request.params['save'] == 'save-draft':
                        url = h.url_for(controller='package', action='read', id=pkg_dict['name'])
                    else:
                        # redirect to add dataset resources
                        url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name'])
                    redirect(url)
                # Make sure we don't index this dataset
                if request.params['save'] not in ['go-resource', 'go-metadata']:
                    data_dict['state'] = 'draft'
                # allow the state to be changed
                context['allow_state_change'] = True

            data_dict['type'] = package_type
            context['message'] = data_dict.get('log_message', '')
            pkg_dict = get_action('package_create')(context, data_dict)

            if ckan_phase and request.params['save'] != 'save-draft':
                url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name'])
                redirect(url)
            elif request.params['save'] == 'save-draft':
                url = h.url_for(controller='package', action='read', id=pkg_dict['name'])
                redirect(url)
            self._form_save_redirect(pkg_dict['name'], 'new', package_type=package_type)
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound, e:
            abort(404, _('Dataset not found'))
    def _save_edit(self, name_or_id, context, package_type=None):
        from ckan.lib.search import SearchIndexError
        log.debug('Package save request name: %s POST: %r', name_or_id,
                  request.POST)
        try:
            data_dict = clean_dict(
                dict_fns.unflatten(tuplize_dict(parse_params(request.POST))))

            self._validate_dataset(data_dict)

            if '_ckan_phase' in data_dict:
                # we allow partial updates to not destroy existing resources
                context['allow_partial_update'] = True
                if 'tag_string' in data_dict:
                    data_dict['tags'] = self._tag_string_to_list(
                        data_dict['tag_string'])
                del data_dict['_ckan_phase']
                del data_dict['save']
            context['message'] = data_dict.get('log_message', '')
            data_dict['id'] = name_or_id

            # Obtengo la lista de extras del dataset y agrego en el data_dict los extras que falten
            # (no estaban en el request.POST), y reemplazo valores desactualizados
            extra_fields = get_action('package_show')(dict(context,
                                                           for_view=True), {
                                                               'id': name_or_id
                                                           })['extras']
            if 'extras' not in data_dict.keys():
                data_dict['extras'] = []
            for extra_field in extra_fields:
                found_extra_field = filter(
                    lambda x: x['key'] == extra_field['key'],
                    data_dict['extras'])
                if len(found_extra_field) == 0:
                    data_dict['extras'].append(extra_field)

            time_now = moment.now().isoformat()

            self._add_or_replace_extra(key='modified',
                                       value=time_now,
                                       extras=data_dict['extras'])

            self.__generate_spatial_extra_field(data_dict)

            pkg = get_action('package_update')(context, data_dict)
            c.pkg = context['package']
            c.pkg_dict = pkg

            self._form_save_redirect(pkg['name'],
                                     'edit',
                                     package_type=package_type)
        except NotAuthorized:
            abort(403, _('Unauthorized to read package %s') % id)
        except NotFound, e:
            abort(404, _('Dataset not found'))
    def resource_edit(self,
                      id,
                      resource_id,
                      data=None,
                      errors=None,
                      error_summary=None):
        context = {
            'model': model,
            'session': model.Session,
            'api_version': 3,
            'for_edit': True,
            'user': c.user,
            'auth_user_obj': c.userobj
        }
        data_dict = {'id': id}

        try:
            check_access('package_update', context, data_dict)
        except NotAuthorized:
            abort(403, _('User %r not authorized to edit %s') % (c.user, id))

        if request.method == 'POST' and not data:
            data = data or \
                clean_dict(dict_fns.unflatten(tuplize_dict(parse_params(
                                                           request.POST))))
            # we don't want to include save as it is part of the form
            del data['save']

            # Guardo los campos issued y modified
            package_data = get_action('resource_show')(context, {
                'id': resource_id
            })
            # Los packages creados sin el campo extra "issued" deben defaultear al campo "created"
            issued = package_data.get('issued',
                                      None) or package_data.get('created')
            data['issued'] = issued
            data['modified'] = moment.now().isoformat()

            data['package_id'] = id
            try:
                if resource_id:
                    data['id'] = resource_id
                    get_action('resource_update')(context, data)
                else:
                    get_action('resource_create')(context, data)
            except ValidationError, e:
                errors = e.error_dict
                error_summary = e.error_summary
                return self.resource_edit(id, resource_id, data, errors,
                                          error_summary)
            except NotAuthorized:
                abort(401, _('Unauthorized to edit this resource'))
    def _save_edit(self, name_or_id, context, package_type=None):
        from ckan.lib.search import SearchIndexError
        log.debug('Package save request name: %s POST: %r', name_or_id,
                  request.POST)
        try:
            data_dict = clean_dict(
                dict_fns.unflatten(tuplize_dict(parse_params(request.POST))))

            self._validate_dataset(data_dict)

            if '_ckan_phase' in data_dict:
                # we allow partial updates to not destroy existing resources
                context['allow_partial_update'] = True
                if 'tag_string' in data_dict:
                    data_dict['tags'] = self._tag_string_to_list(
                        data_dict['tag_string'])
                del data_dict['_ckan_phase']
                del data_dict['save']
            context['message'] = data_dict.get('log_message', '')
            data_dict['id'] = name_or_id

            self.__generate_spatial_extra_field(data_dict)

            pkg = get_action('package_update')(context, data_dict)
            c.pkg = context['package']
            c.pkg_dict = pkg

            self._form_save_redirect(pkg['name'],
                                     'edit',
                                     package_type=package_type)
        except NotAuthorized:
            abort(403, _('Unauthorized to read package %s') % id)
        except NotFound, e:
            abort(404, _('Dataset not found'))
    def search(self):
        package_type = self._guess_package_type()

        try:
            context = {'model': model, 'user': c.user or c.author, 'auth_user_obj': c.userobj}
            check_access('site_read', context)
        except NotAuthorized:
            abort(401, _('Not authorized to see this page'))

        q = c.q = request.params.get('q', u'')
        c.query_error = False
        page = self._get_page_number(request.params)
        limit = g.datasets_per_page
        params_nopage = [(k, v) for k, v in request.params.items() if k != 'page']

        def drill_down_url(alternative_url=None, **by):
            return h.add_url_param(
                alternative_url=alternative_url,
                controller='package',
                action='search',
                new_params=by
            )

        c.drill_down_url = drill_down_url

        def remove_field(key, value=None, replace=None):
            return h.remove_url_param(key, value=value, replace=replace, controller='package', action='search')

        c.remove_field = remove_field

        sort_by = request.params.get('sort', None)
        params_nosort = [(k, v) for k, v in params_nopage if k != 'sort']

        def _sort_by(fields):
            params = params_nosort[:]
            if fields:
                sort_string = ', '.join('%s %s' % f for f in fields)
                params.append(('sort', sort_string))
            return search_url(params, package_type)

        c.sort_by = _sort_by
        if not sort_by:
            c.sort_by_fields = []
        else:
            c.sort_by_fields = [field.split()[0] for field in sort_by.split(',')]

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params, package_type)

        c.search_url_params = urlencode(_encode_params(params_nopage))

        try:
            c.fields = []
            c.fields_grouped = {}
            search_extras = {}
            fq = ''
            for (param, value) in request.params.items():
                if param not in ['q', 'page', 'sort'] \
                        and len(value) and not param.startswith('_'):
                    if not param.startswith('ext_'):
                        c.fields.append((param, value))
                        if param != 'organization':
                            fq += ' %s:"%s"' % (param, value)
                        else:
                            fq += custom_organization_filter(value)
                        if param not in c.fields_grouped:
                            c.fields_grouped[param] = [value]
                        else:
                            c.fields_grouped[param].append(value)
                    else:
                        search_extras[param] = value

            context = {'model': model, 'session': model.Session,
                       'user': c.user or c.author, 'for_view': True,
                       'auth_user_obj': c.userobj}

            if package_type and package_type != 'dataset':
                fq += ' +dataset_type:{type}'.format(type=package_type)
            elif not asbool(config.get('ckan.search.show_all_types', 'False')):
                fq += ' +dataset_type:dataset'

            facets = OrderedDict()

            default_facet_titles = {
                'organization': _('Organizations'),
                'groups': _('Groups'),
                'tags': _('Tags'),
                'res_format': _('Formats'),
                'license_id': _('Licenses'),
            }

            for facet in g.facets:
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            for plugin in p.PluginImplementations(p.IFacets):
                facets = plugin.dataset_facets(facets, package_type)

            c.facet_titles = facets

            data_dict = {
                'q': q,
                'fq': fq.strip(),
                'facet.field': facets.keys(),
                'rows': limit,
                'start': (page - 1) * limit,
                'sort': sort_by,
                'extras': search_extras
            }

            query = get_action('package_search')(context, data_dict)
            c.sort_by_selected = query['sort']

            c.page = h.Page(
                collection=query['results'],
                page=page,
                url=pager_url,
                item_count=query['count'],
                items_per_page=limit
            )
            c.facets = query['facets']
            c.search_facets = query['search_facets']
            c.page.items = query['results']
        except SearchError, se:
            log.error('Dataset search error: %r', se.args)
            c.query_error = True
            c.facets = {}
            c.search_facets = {}
            c.page = h.Page(collection=[])
                url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name'])
                redirect(url)
            elif request.params['save'] == 'save-draft':
                url = h.url_for(controller='package', action='read', id=pkg_dict['name'])
                redirect(url)
            self._form_save_redirect(pkg_dict['name'], 'new', package_type=package_type)
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound, e:
            abort(404, _('Dataset not found'))
        except dict_fns.DataError:
            abort(400, _(u'Integrity Error'))
        except SearchIndexError, e:
            try:
                exc_str = unicode(repr(e.args))
            except Exception:  # We don't like bare excepts
                exc_str = unicode(str(e))
            abort(500, _(u'Unable to add package to search index.') + exc_str)
        except ValidationError, e:
            errors = e.error_dict
            error_summary = e.error_summary
            if is_an_update:
                # we need to get the state of the dataset to show the stage we
                # are on.
                pkg_dict = get_action('package_show')(context, data_dict)
                data_dict['state'] = pkg_dict['state']
                return self.edit(data_dict['id'], data_dict,
                                 errors, error_summary)
            data_dict['state'] = 'none'
            return self.new(data_dict, errors, error_summary)
Пример #9
0
    def search(self):
        package_type = self._guess_package_type()

        try:
            context = {
                'model': model,
                'user': c.user or c.author,
                'auth_user_obj': c.userobj
            }
            check_access('site_read', context)
        except NotAuthorized:
            abort(401, _('Not authorized to see this page'))

        q = c.q = request.params.get('q', u'')
        c.query_error = False
        page = self._get_page_number(request.params)
        limit = g.datasets_per_page
        params_nopage = [(k, v) for k, v in request.params.items()
                         if k != 'page']

        def drill_down_url(alternative_url=None, **by):
            return h.add_url_param(alternative_url=alternative_url,
                                   controller='package',
                                   action='search',
                                   new_params=by)

        c.drill_down_url = drill_down_url

        def remove_field(key, value=None, replace=None):
            return h.remove_url_param(key,
                                      value=value,
                                      replace=replace,
                                      controller='package',
                                      action='search')

        c.remove_field = remove_field

        sort_by = request.params.get('sort', None)
        params_nosort = [(k, v) for k, v in params_nopage if k != 'sort']

        def _sort_by(fields):
            params = params_nosort[:]
            if fields:
                sort_string = ', '.join('%s %s' % f for f in fields)
                params.append(('sort', sort_string))
            return search_url(params, package_type)

        c.sort_by = _sort_by
        if not sort_by:
            c.sort_by_fields = []
        else:
            c.sort_by_fields = [
                field.split()[0] for field in sort_by.split(',')
            ]

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params, package_type)

        c.search_url_params = urlencode(_encode_params(params_nopage))

        try:
            c.fields = []
            c.fields_grouped = {}
            search_extras = {}
            fq = ''
            for (param, value) in request.params.items():
                if param not in ['q', 'page', 'sort'] \
                        and len(value) and not param.startswith('_'):
                    if not param.startswith('ext_'):
                        c.fields.append((param, value))
                        if param != 'organization':
                            fq += ' %s:"%s"' % (param, value)
                        else:
                            fq += custom_organization_filter(value)
                        if param not in c.fields_grouped:
                            c.fields_grouped[param] = [value]
                        else:
                            c.fields_grouped[param].append(value)
                    else:
                        search_extras[param] = value

            context = {
                'model': model,
                'session': model.Session,
                'user': c.user or c.author,
                'for_view': True,
                'auth_user_obj': c.userobj
            }

            if package_type and package_type != 'dataset':
                fq += ' +dataset_type:{type}'.format(type=package_type)
            elif not asbool(config.get('ckan.search.show_all_types', 'False')):
                fq += ' +dataset_type:dataset'

            facets = OrderedDict()

            default_facet_titles = {
                'organization': _('Organizations'),
                'groups': _('Groups'),
                'tags': _('Tags'),
                'res_format': _('Formats'),
                'license_id': _('Licenses'),
            }

            for facet in g.facets:
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            for plugin in p.PluginImplementations(p.IFacets):
                facets = plugin.dataset_facets(facets, package_type)

            c.facet_titles = facets

            data_dict = {
                'q': q,
                'fq': fq.strip(),
                'facet.field': facets.keys(),
                'rows': limit,
                'start': (page - 1) * limit,
                'sort': sort_by,
                'extras': search_extras
            }

            query = get_action('package_search')(context, data_dict)
            c.sort_by_selected = query['sort']

            c.page = h.Page(collection=query['results'],
                            page=page,
                            url=pager_url,
                            item_count=query['count'],
                            items_per_page=limit)
            c.facets = query['facets']
            c.search_facets = query['search_facets']
            c.page.items = query['results']
        except SearchError, se:
            log.error('Dataset search error: %r', se.args)
            c.query_error = True
            c.facets = {}
            c.search_facets = {}
            c.page = h.Page(collection=[])
    def import_stage(self, harvest_object):
        # The import stage actually creates the dataset.

        log.debug('In %s import_stage' % repr(self))

        if (harvest_object.content == None):
            return True

        dataset = json.loads(harvest_object.content)
        schema_version = '1.2'  # default to '1.0'
        is_collection = False
        parent_pkg_id = ''
        catalog_extras = {}
        for extra in harvest_object.extras:
            if extra.key == 'schema_version':
                schema_version = extra.value
            if extra.key == 'is_collection' and extra.value:
                is_collection = True
            if extra.key == 'collection_pkg_id' and extra.value:
                parent_pkg_id = extra.value
            if extra.key.startswith('catalog_'):
                catalog_extras[extra.key] = extra.value

        # if this dataset is part of collection, we need to check if
        # parent dataset exist or not. we dont support any hierarchy
        # in this, so the check does not apply to those of is_collection
        if parent_pkg_id and not is_collection:
            parent_pkg = None
            try:
                parent_pkg = get_action('package_show')(self.context(), {
                    "id": parent_pkg_id
                })
            except:
                pass
            if not parent_pkg:
                parent_check_message = "isPartOf identifer '%s' not found." \
                                       % dataset.get('isPartOf')
                self._save_object_error(parent_check_message, harvest_object,
                                        'Import')
                return None

        # Get default values.
        dataset_defaults = self.load_config(harvest_object.source)["defaults"]

        source_config = json.loads(harvest_object.source.config or '{}')
        validator_schema = source_config.get('validator_schema')
        if schema_version == '1.2' and validator_schema != 'non-federal':
            lowercase_conversion = True
        else:
            lowercase_conversion = False

        MAPPING = {
            "title": "title",
            "identifier": "extras__identifier",
            "description": "notes",
            "keyword": "tags",
            "modified": "modified",
            "author": "author",
            "author_email": "author_email",
            "maintainer": "maintainer",
            "maintainer_email": "maintainer_email",
            "dataQuality": "extras__dataQuality",
            "license": "license_title",
            "spatial": "extras__spatial",
            "temporal": "extras__temporal",
            "superTheme": "groups",
            "primaryITInvestmentUII": "extras__primaryITInvestmentUII",
            "accrualPeriodicity": "extras__updateFrequency",
            "landingPage": "url",
            "language": "extras__language",
            "references": "extras__references",
            "issued": "extras__issued",
            "distribution": None,
        }

        MAPPING_V1_1 = {
            "title": "title",
            "identifier": "extras__identifier",
            "description": "notes",
            "keyword": "tags",
            "modified": "modified",
            "author": "author",
            "author_email": "author_email",
            "maintainer": "maintainer",
            "maintainer_email": "maintainer_email",
            "dataQuality": "extras__dataQuality",
            "license": "license_title",
            "spatial": "extras__spatial",
            "temporal": "extras__temporal",
            "superTheme": "groups",
            "primaryITInvestmentUII": "extras__primaryITInvestmentUII",
            "accrualPeriodicity": "extras__updateFrequency",
            "landingPage": "url",
            "language": "extras__language",
            "references": "extras__references",
            "issued": "extras__issued",
            "distribution": None,
        }
        MAPPING_V1_2 = {
            "title": "title",
            "identifier": "extras__identifier",
            "description": "notes",
            "keyword": "tags",
            "modified": "modified",
            "author": "author",
            "author_email": "author_email",
            "maintainer": "maintainer",
            "maintainer_email": "maintainer_email",
            "dataQuality": "extras__dataQuality",
            "license": "license_title",
            "spatial": "extras__spatial",
            "temporal": "extras__temporal",
            "superTheme": "groups",
            "primaryITInvestmentUII": "extras__primaryITInvestmentUII",
            "accrualPeriodicity": "extras__updateFrequency",
            "landingPage": "url",
            "language": "extras__language",
            "references": "extras__references",
            "issued": "extras__issued",
            "distribution": None,
        }

        SKIP = [
            "accessURL", "webService", "format", "distribution",
            "processed_how"
        ]

        SKIP_V1_1 = [
            "@type", "isPartOf", "license", "distribution", "processed_how"
        ]

        if lowercase_conversion:

            mapping_processed = {}
            for k, v in MAPPING.items():
                mapping_processed[k.lower()] = v

            skip_processed = [k.lower() for k in SKIP]

            dataset_processed = {'processed_how': ['lowercase']}
            for k, v in dataset.items():
                if k.lower() in mapping_processed.keys():
                    dataset_processed[k.lower()] = v
                else:
                    dataset_processed[k] = v

            if 'distribution' in dataset and dataset[
                    'distribution'] is not None:
                dataset_processed['distribution'] = []
                for d in dataset['distribution']:
                    d_lower = {}
                    for k, v in d.items():
                        if k.lower() in mapping_processed.keys():
                            d_lower[k.lower()] = v
                        else:
                            d_lower[k] = v
                    dataset_processed['distribution'].append(d_lower)
        else:
            dataset_processed = dataset
            mapping_processed = MAPPING
            skip_processed = SKIP

        if schema_version == '1.1':
            mapping_processed = MAPPING_V1_1
            skip_processed = SKIP_V1_1
        if schema_version == '1.2':
            mapping_processed = MAPPING_V1_2
            skip_processed = SKIP_V1_1

        validate_message = self._validate_dataset(validator_schema,
                                                  schema_version,
                                                  dataset_processed)
        if validate_message:
            self._save_object_error(validate_message, harvest_object, 'Import')
            return None

        # We need to get the owner organization (if any) from the harvest
        # source dataset
        owner_org = None
        source_dataset = model.Package.get(harvest_object.source.id)
        if source_dataset.owner_org:
            owner_org = source_dataset.owner_org

        source_config = json.loads(harvest_object.source.config or '{}')
        # group_name = source_config.get('default_groups', '')
        group_name = [{
            'name': theme.lower()
        } for theme in dataset['superTheme']]

        # Assemble basic information about the dataset.

        pkg = {
            "state":
            "active",  # in case was previously deleted
            "owner_org":
            owner_org,
            "groups":
            group_name,
            "resources": [],
            "extras": [
                {
                    "key": "resource-type",
                    "value": "Dataset",
                },
                {
                    "key":
                    "source_hash",
                    "value":
                    self.make_upstream_content_hash(dataset,
                                                    harvest_object.source,
                                                    catalog_extras,
                                                    schema_version),
                },
                {
                    "key": "source_datajson_identifier",
                    "value": True,
                },
                {
                    "key": "harvest_source_id",
                    "value": harvest_object.harvest_source_id,
                },
                {
                    "key": "harvest_object_id",
                    "value": harvest_object.id,
                },
                {
                    "key": "harvest_source_title",
                    "value": harvest_object.source.title,
                },
                {
                    "key": "source_schema_version",
                    "value": schema_version,
                },
            ]
        }

        extras = pkg["extras"]
        unmapped = []

        for key, value in dataset_processed.iteritems():
            if key in skip_processed:
                continue
            new_key = mapping_processed.get(key)
            if not new_key:
                unmapped.append(key)
                continue

            # after schema 1.0+, we need to deal with multiple new_keys
            new_keys = []
            values = []
            if isinstance(new_key, dict):  # when schema is not 1.0
                _new_key_keys = new_key.keys()
                new_keys = new_key.values()
                values = []
                for _key in _new_key_keys:
                    values.append(value.get(_key))
            else:
                new_keys.append(new_key)
                values.append(value)

            if not any(item for item in values):
                continue

            mini_dataset = dict(zip(new_keys, values))
            for mini_key, mini_value in mini_dataset.iteritems():
                if not mini_value:
                    continue
                if mini_key.startswith('extras__'):
                    extras.append({"key": mini_key[8:], "value": mini_value})
                else:
                    pkg[mini_key] = mini_value

        # pick a fix number of unmapped entries and put into extra
        if unmapped:
            unmapped.sort()
            del unmapped[100:]
            for key in unmapped:
                value = dataset_processed.get(key, "")
                if value is not None:
                    extras.append({"key": key, "value": value})

        # if theme is geospatial/Geospatial, we tag it in metadata_type.
        themes = self.find_extra(pkg, "theme")
        if themes and ('geospatial' in [x.lower() for x in themes]):
            extras.append({'key': 'metadata_type', 'value': 'geospatial'})

        if is_collection:
            extras.append({'key': 'collection_metadata', 'value': 'true'})
        elif parent_pkg_id:
            extras.append({
                'key': 'collection_package_id',
                'value': parent_pkg_id
            })

        for k, v in catalog_extras.iteritems():
            extras.append({'key': k, 'value': v})

        # Set specific information about the dataset.
        self.set_dataset_info(pkg, dataset_processed, dataset_defaults,
                              schema_version)

        # Try to update an existing package with the ID set in harvest_object.guid. If that GUID
        # corresponds with an existing package, get its current metadata.
        try:
            existing_pkg = get_action('package_show')(self.context(), {
                "id": harvest_object.guid
            })
        except NotFound:
            existing_pkg = None

        if existing_pkg:
            # Update the existing metadata with the new information.

            # But before doing that, try to avoid replacing existing resources with new resources
            # my assigning resource IDs where they match up.
            for res in pkg.get("resources", []):
                for existing_res in existing_pkg.get("resources", []):
                    if res["url"] == existing_res["url"]:
                        res["id"] = existing_res["id"]
            pkg['groups'] = existing_pkg['groups']
            existing_pkg.update(
                pkg
            )  # preserve other fields that we're not setting, but clobber extras
            pkg = existing_pkg

            log.warn('updating package %s (%s) from %s' %
                     (pkg["name"], pkg["id"], harvest_object.source.url))
            pkg = get_action('package_update')(self.context(), pkg)
        else:
            # It doesn't exist yet. Create a new one.
            pkg['name'] = self.make_package_name(dataset_processed["title"],
                                                 harvest_object.guid)
            try:
                pkg = get_action('package_create')(self.context(), pkg)
                log.warn('created package %s (%s) from %s' %
                         (pkg["name"], pkg["id"], harvest_object.source.url))
            except IntegrityError:
                # sometimes one fetch worker does not see new pkg added
                # by other workers. it gives db error for pkg with same title.
                model.Session.rollback()
                pkg['name'] = self.make_package_name(
                    dataset_processed["title"], harvest_object.guid)
                pkg = get_action('package_create')(self.context(), pkg)
                log.warn('created package %s (%s) from %s' %
                         (pkg["name"], pkg["id"], harvest_object.source.url))
            except:
                log.error('failed to create package %s from %s' %
                          (pkg["name"], harvest_object.source.url))
                raise

        # Flag the other HarvestObjects linking to this package as not current anymore
        for ob in model.Session.query(HarvestObject).filter_by(
                package_id=pkg["id"]):
            ob.current = False
            ob.save()

        # Flag this HarvestObject as the current harvest object
        harvest_object.package_id = pkg['id']
        harvest_object.current = True
        harvest_object.save()
        model.Session.commit()

        # Now that the package and the harvest source are associated, re-index the
        # package so it knows it is part of the harvest source. The CKAN harvester
        # does this by creating the association before the package is saved by
        # overriding the GUID creation on a new package. That's too difficult.
        # So here we end up indexing twice.
        PackageSearchIndex().index_package(pkg)

        return True
class DatasetHarvesterBase(HarvesterBase):
    """
    A Harvester for datasets.
    """
    _user_name = None

    def validate_config(self, config):
        if not config:
            return config
        config_obj = yaml.load(config)
        return config

    def load_config(self, harvest_source):
        # Load the harvest source's configuration data. We expect it to be a YAML
        # string. Unfortunately I went ahead of CKAN on this. The stock CKAN harvester
        # only allows JSON in the configuration box. My fork is necessary for this
        # to work: https://github.com/joshdata/ckanext-harvest

        ret = {
            "filters":
            {},  # map data.json field name to list of values one of which must be present
            "defaults": {},
        }

        source_config = yaml.load(harvest_source.config)

        try:
            ret["filters"].update(source_config["filters"])
        except TypeError:
            pass
        except KeyError:
            pass

        try:
            ret["defaults"].update(source_config["defaults"])
        except TypeError:
            pass
        except KeyError:
            pass

        return ret

    def _get_user_name(self):
        if not self._user_name:
            user = p.toolkit.get_action('get_site_user')({
                'model': model,
                'ignore_auth': True
            }, {})
            self._user_name = user['name']

        return self._user_name

    def context(self):
        return {"user": self._get_user_name(), "ignore_auth": True}

    # SUBCLASSES MUST IMPLEMENT
    def load_remote_catalog(self, harvest_job):
        raise Exception("Not implemented")

    def extra_schema(self):
        return {
            'validator_schema': [ignore_empty, unicode, validate_schema],
        }

    def gather_stage(self, harvest_job):
        log.debug('In %s gather_stage (%s)' %
                  (repr(self), harvest_job.source.url))
        try:
            source_datasets, catalog_values = self.load_remote_catalog(
                harvest_job)
        except ValueError as e:
            self._save_gather_error("Error loading json content: %s." % (e),
                                    harvest_job)
            return []
        tmp_superThemes = [
            "agri", "educ", "econ", "ener", "envi", "gove", "heal", "intr",
            "just", "regi", "soci", "tech", "tran"
        ]
        ckan_host = ''
        # Call to config.ini to load superTheme list
        if 'CKAN_CONFIG' in environ:
            if path.exists(environ['CKAN_CONFIG']):
                try:
                    tmp_ckan_config = ConfigParser()
                    tmp_ckan_config.read(environ['CKAN_CONFIG'])
                except IOError:
                    log.warn(
                        'Error loading CKAN config.ini file [%s]. '
                        'Loading default SuperThemes', environ['CKAN_CONFIG'])
                except Exception:
                    log.warn(
                        'Unknow error loading CKAN config.ini file [%s]. '
                        'Loading default SuperThemes', environ['CKAN_CONFIG'])
                try:
                    ckan_host = tmp_ckan_config.get('app:main',
                                                    'ckan.site_url')
                except Exception:
                    log.warn(
                        'Error loading \"ckan.site_url\" from CKAN config.ini file [%s]. '
                        'Loading default SuperThemes', environ['CKAN_CONFIG'])
                # Get superThemeTaxonomy
                try:
                    if len(ckan_host) > 0:
                        stt_url = '{site_url}/superThemeTaxonomy.json'.format(
                            site_url=ckan_host)
                        superThemeTaxonomy = requests.get(stt_url)
                        superThemeTaxonomy = superThemeTaxonomy.json()
                        if len(superThemeTaxonomy) < 0:
                            raise Exception('SuperThemeTaxonomy JSON in empty')
                        if 'id' not in [theme for theme in superThemeTaxonomy]:
                            raise Exception(
                                'SuperThemeTaxonomy JSON don\'t contains \"id\" field'
                            )
                        tmp_superThemes = [
                            theme['id'] for theme in superThemeTaxonomy
                        ]
                        log.info("superThemeTaxonomy loaded!")
                    else:
                        raise Exception(
                            'The field of config.ini \"site_url\" is empty.')
                except Exception, e:
                    log.warn("Error getting \"ThemeTaxonomy.json\", err: %s.",
                             e)
        superThemes = tmp_superThemes
        for dataset in source_datasets:
            # Delete if exists @type key
            try:
                del dataset['@type']
            except Exception:
                pass

            try:
                foo = dataset['theme']
                log.info('Theme exists and it value is:{0}.'.format(foo))
            except IndexError:
                log.warn('The field \"theme\" not exists!... Fill it MF!')
                dataset.update({'theme': []})
            try:
                tags = dataset['keyword']
                themes = dataset['theme']
                if len(themes) > 0:
                    if type(tags) is list:
                        dataset['keyword'] = tags + themes
                    else:
                        dataset['keyword'] = [tags] + themes
            except IndexError:
                pass
            try:
                dataset.update({'author_email': dataset['publisher']['mbox']})
            except IndexError:
                log.warn(
                    'El campo \"publisher\" para \"{0}\" no contine campo \"mbox\".'
                    .format(dataset['title']))
                dataset.update({'author_mail': "unknow"})
            except Exception:
                log.warn(
                    'El fallo el campo \"publisher\" para \"{0}\". Este error es critico, '
                    'se completara el campo \"mbox\". para evitar errores futuros.'
                    .format(dataset['title']))
                dataset.update({'author_email': "unknow"})

            try:
                dataset.update({'author': dataset['publisher']['name']})
            except IndexError:
                log.warn(
                    'El campo \"publisher\" para \"{0}\" no contine campo \"name\".'
                    .format(dataset['title']))
                dataset.update({'author': "unknow"})
            except Exception:
                log.warn(
                    'El fallo el campo \"publisher\" para \"{0}\". Este error es critico, '
                    'se completara el campo \"name\". para evitar errores futuros.'
                    .format(dataset['title']))
                dataset.update({'author': "unknow"})
            try:
                del dataset['publisher']
            except Exception:
                pass

            try:
                dataset.update(
                    {'maintainer_email': dataset['contactPoint']['hasEmail']})
                dataset.update({'maintainer': dataset['contactPoint']['fn']})
                del dataset['contactPoint']
            except Exception:
                dataset.update({'maintainer_email': ""})
                dataset.update({'maintainer': ""})
                del dataset['contactPoint']

        DATAJSON_SCHEMA = source_datasets
        schema_version = '1.2'
        parent_identifiers = set()
        child_identifiers = set()
        catalog_extras = {}
        if isinstance(catalog_values, dict):
            schema_version = '1.2'
            for dataset in source_datasets:
                parent_identifier = dataset.get('isPartOf')
                if parent_identifier:
                    parent_identifiers.add(parent_identifier)
                    child_identifiers.add(dataset.get('identifier'))

            # get a list of needed catalog values and put into hobj
            catalog_fields = ['title', 'description']
            catalog_extras = dict(('catalog_' + k, v)
                                  for (k, v) in catalog_values.iteritems()
                                  if k in catalog_fields)

        # Loop through the packages we've already imported from this source
        # and go into their extra fields to get their source_identifier,
        # which corresponds to the remote catalog's 'identifier' field.
        # Make a mapping so we know how to update existing records.
        # Added: mark all existing parent datasets.
        existing_datasets = {}
        existing_parents = {}
        for hobj in model.Session.query(HarvestObject).filter_by(
                source=harvest_job.source, current=True):
            try:
                pkg = get_action('package_show')(self.context(), {
                    "id": hobj.package_id
                })
            except:
                # reference is broken
                continue
            sid = self.find_extra(pkg, "identifier")
            is_parent = self.find_extra(pkg, "collection_metadata")
            if sid:
                existing_datasets[sid] = pkg
            if is_parent and pkg.get("state") == "active":
                existing_parents[sid] = pkg

        # which parent has been demoted to child level?
        existing_parents_demoted = set(
            identifier for identifier in existing_parents.keys() \
            if identifier not in parent_identifiers)

        # which dataset has been promoted to parent level?
        existing_datasets_promoted = set(
            identifier for identifier in existing_datasets.keys() \
            if identifier in parent_identifiers \
            and identifier not in existing_parents.keys())

        # if there is any new parents, we will have to harvest parents
        # first, mark the status in harvest_source config, which
        # triggers a children harvest_job after parents job is finished.
        source = harvest_job.source
        source_config = json.loads(source.config or '{}')
        # run status: None, or parents_run, or children_run?
        run_status = source_config.get('datajson_collection')
        if parent_identifiers:
            for parent in parent_identifiers & child_identifiers:
                self._save_gather_error("Collection identifier '%s' \
                    cannot be isPartOf another collection." \
                                        % parent, harvest_job)

            new_parents = set(identifier for identifier in parent_identifiers \
                              if identifier not in existing_parents.keys())
            if new_parents:
                if not run_status:
                    # fresh start
                    run_status = 'parents_run'
                    source_config['datajson_collection'] = run_status
                    source.config = json.dumps(source_config)
                    source.save()
                elif run_status == 'children_run':
                    # it means new parents are tried and failed.
                    # but skip some which have previously reported with
                    # parent_identifiers & child_identifiers
                    for parent in new_parents - \
                            (parent_identifiers & child_identifiers):
                        self._save_gather_error("Collection identifier '%s' \
                            not found. Records which are part of this \
                            collection will not be harvested." \
                                                % parent, harvest_job)
                else:
                    # run_status was parents_run, and did not finish.
                    # something wrong but not sure what happened.
                    # let's leave it as it is, let it run one more time.
                    pass
            else:
                # all parents are already in place. run it as usual.
                run_status = None
        elif run_status:
            # need to clear run_status
            run_status = None
            source_config['datajson_collection'] = run_status
            source.config = json.dumps(source_config)
            source.save()

        # Create HarvestObjects for any records in the remote catalog.

        object_ids = []
        seen_datasets = set()
        unique_datasets = set()

        filters = self.load_config(harvest_job.source)["filters"]

        for dataset in source_datasets:
            # Create a new HarvestObject for this dataset and save the
            # dataset metdata inside it for later.

            # Check the config's filters to see if we should import this dataset.
            # For each filter, check that the value specified in the data.json file
            # is among the permitted values in the filter specification.
            matched_filters = True
            for k, v in filters.items():
                if dataset.get(k) not in v:
                    matched_filters = False
            if not matched_filters:
                continue

            if parent_identifiers and new_parents \
                    and dataset['identifier'] not in parent_identifiers \
                    and dataset.get('isPartOf') in new_parents:
                if run_status == 'parents_run':
                    # skip those whose parents still need to run.
                    continue
                else:
                    # which is 'children_run'.
                    # error out since parents got issues.
                    self._save_gather_error(
                        "Record with identifier '%s': isPartOf '%s' points to \
                        an erroneous record." %
                        (dataset['identifier'], dataset.get('isPartOf')),
                        harvest_job)
                    continue

            # Some source contains duplicate identifiers. skip all except the first one
            if dataset['identifier'] in unique_datasets:
                self._save_gather_error(
                    "Duplicate entry ignored for identifier: '%s'." %
                    (dataset['identifier']), harvest_job)
                continue
            unique_datasets.add(dataset['identifier'])

            # Get the package_id of this resource if we've already imported
            # it into our system. Otherwise, assign a brand new GUID to the
            # HarvestObject. I'm not sure what the point is of that.

            if dataset['identifier'] in existing_datasets:
                pkg = existing_datasets[dataset["identifier"]]
                pkg_id = pkg["id"]
                seen_datasets.add(dataset['identifier'])

                # We store a hash of the dict associated with this dataset
                # in the package so we can avoid updating datasets that
                # don't look like they've changed.
                if pkg.get("state") == "active" \
                        and dataset['identifier'] not in existing_parents_demoted \
                        and dataset['identifier'] not in existing_datasets_promoted \
                        and self.find_extra(pkg, "source_hash") == self.make_upstream_content_hash(dataset,
                                                                                                   harvest_job.source,
                                                                                                   catalog_extras,
                                                                                                   schema_version):
                    continue
            else:
                pkg_id = uuid.uuid4().hex

            # Create a new HarvestObject and store in it the GUID of the
            # existing dataset (if it exists here already) and the dataset's
            # metadata from the remote catalog file.
            extras = [
                HarvestObjectExtra(key='schema_version', value=schema_version)
            ]
            if dataset['identifier'] in parent_identifiers:
                extras.append(
                    HarvestObjectExtra(key='is_collection', value=True))
            elif dataset.get('isPartOf'):
                parent_pkg_id = existing_parents[dataset.get('isPartOf')]['id']
                extras.append(
                    HarvestObjectExtra(key='collection_pkg_id',
                                       value=parent_pkg_id))
            # FIX EXTRAS
            # for k,v in

            for k, v in catalog_extras.iteritems():
                extras.append(HarvestObjectExtra(key=k, value=v))
            # ----
            obj = HarvestObject(
                guid=pkg_id,
                job=harvest_job,
                extras=extras,
                content=json.dumps(dataset, sort_keys=True)
            )  # use sort_keys to preserve field order so hashes of this string are constant from run to run
            obj.save()
            object_ids.append(obj.id)

        # Remove packages no longer in the remote catalog.
        for upstreamid, pkg in existing_datasets.items():
            if upstreamid in seen_datasets: continue  # was just updated
            if pkg.get("state") == "deleted": continue  # already deleted
            pkg["state"] = "deleted"
            log.warn('deleting package %s (%s) because it is no longer in %s' %
                     (pkg["name"], pkg["id"], harvest_job.source.url))
            get_action('package_update')(self.context(), pkg)
            obj = HarvestObject(
                guid=pkg_id,
                package_id=pkg["id"],
                job=harvest_job,
            )
            obj.save()
            object_ids.append(obj.id)

        return object_ids
    def new_resource(self, id, data=None, errors=None, error_summary=None):
        ''' FIXME: This is a temporary action to allow styling of the
        forms. '''
        if request.method == 'POST' and not data:
            save_action = request.params.get('save')
            data = data or clean_dict(
                dict_fns.unflatten(tuplize_dict(parse_params(request.POST))))
            # we don't want to include save as it is part of the form
            del data['save']
            resource_id = data['id']
            del data['id']

            self._validate_resource(data)

            context = {
                'model': model,
                'session': model.Session,
                'user': c.user,
                'auth_user_obj': c.userobj
            }

            if save_action == 'go-dataset':
                # go to first stage of add dataset
                h.redirect_to(controller='package', action='edit', id=id)

            # see if we have any data that we are trying to save
            data_provided = False
            for key, value in data.iteritems():
                if ((value
                     or isinstance(value, cgi.FieldStorage)) and key not in [
                         'resource_type', 'license_id', 'attributesDescription'
                     ]):
                    data_provided = True
                    break

            if not data_provided and save_action != "go-dataset-complete":
                if save_action == 'go-dataset':
                    # go to first stage of add dataset
                    h.redirect_to(controller='package', action='edit', id=id)
                try:
                    data_dict = get_action('package_show')(context, {'id': id})
                except NotAuthorized:
                    abort(403, _('Unauthorized to update dataset'))
                except NotFound:
                    abort(
                        404,
                        _('The dataset {id} could not be found.').format(
                            id=id))
                if not len(data_dict['resources']):
                    # no data so keep on page
                    msg = _('You must add at least one data resource')
                    # On new templates do not use flash message

                    if asbool(config.get('ckan.legacy_templates')):
                        h.flash_error(msg)
                        h.redirect_to(controller='package',
                                      action='new_resource',
                                      id=id)
                    else:
                        errors = {}
                        error_summary = {_('Error'): msg}
                        return self.new_resource(id, data, errors,
                                                 error_summary)
                # XXX race condition if another user edits/deletes
                data_dict = get_action('package_show')(context, {'id': id})
                get_action('package_update')(dict(context,
                                                  allow_state_change=True),
                                             dict(data_dict, state='active'))
                h.redirect_to(controller='package', action='read', id=id)

            data['package_id'] = id
            try:
                if resource_id:
                    data['id'] = resource_id
                    get_action('resource_update')(context, data)
                else:
                    get_action('resource_create')(context, data)
            except ValidationError, e:
                errors = e.error_dict
                error_summary = e.error_summary
                return self.new_resource(id, data, errors, error_summary)
            except NotAuthorized:
                abort(403, _('Unauthorized to create a resource'))
    def _save_new(self, context, package_type=None):
        # The staged add dataset used the new functionality when the dataset is
        # partially created so we need to know if we actually are updating or
        # this is a real new.
        is_an_update = False
        ckan_phase = request.params.get('_ckan_phase')
        from ckan.lib.search import SearchIndexError

        def pop_groups_from_data_dict_and_get_package_name_and_group_name(
                a_data_dict):
            # sacamos los grupos para que no fallen más adelante las validaciones de ckan
            some_group_names = [
                group['name'] for group in (
                    a_data_dict['groups'] if 'groups' in a_data_dict else [])
            ]
            a_data_dict['groups'] = []
            a_package_name = a_data_dict[
                'name']  # El campo Name identifica unívocamente a un Dataset
            return a_package_name, some_group_names

        def update_package_group_relation(a_package_name, group_names_to_add):
            # obtener id del package usando el a_package_name
            package = model.Package.get(a_package_name)

            # Es necesario eliminar *todos* los objetos `Member` que relacionan `Group`s con `Package`s
            # ya que vamos a reescribir esas relaciones según el parámetro `group_names_to_add`
            for group in model.Session.query(model.Group):
                # con el ID del package queriear los Member con table_id = package_id eliminar
                members_to_delete = model.Session.query(model.Member).filter(
                    model.Member.group_id == group.id,
                    model.Member.table_name == 'package',
                    model.Member.table_id == package.id)
                for member in members_to_delete:
                    model.Session.delete(member)
            model.Session.commit()  # Hace falta el commit?

            # relaciono los datasets con los grupos correspondientes (que fueron ingresados)
            for group_name in group_names_to_add:
                group = model.Group.get(group_name)

                group.add_package_by_name(a_package_name)
                group.save()

        try:
            data_dict = clean_dict(
                dict_fns.unflatten(tuplize_dict(parse_params(request.POST))))
            if ckan_phase:
                # prevent clearing of groups etc
                context['allow_partial_update'] = True
                # sort the tags
                if 'tag_string' in data_dict:
                    data_dict['tags'] = self._tag_string_to_list(
                        data_dict['tag_string'])

                self._validate_dataset(data_dict)

                # Limpiamos el data_dict para poder guardar el DS aun siendo colaborador no miembro del grupo
                package_name, group_names = pop_groups_from_data_dict_and_get_package_name_and_group_name(
                    data_dict)

                if data_dict.get('pkg_name'):
                    is_an_update = True
                    # This is actually an update not a save
                    data_dict['id'] = data_dict['pkg_name']
                    del data_dict['pkg_name']
                    # don't change the dataset state
                    data_dict['state'] = 'draft'
                    # this is actually an edit not a save
                    pkg_dict = get_action('package_update')(context, data_dict)

                    # Restauramos los grupos asignados al dataset (cuando es un update)
                    update_package_group_relation(package_name, group_names)

                    if request.params['save'] == 'go-metadata':
                        # redirect to add metadata
                        url = h.url_for(controller='package',
                                        action='new_metadata',
                                        id=pkg_dict['name'])
                    elif request.params['save'] == 'save-draft':
                        url = h.url_for(controller='package',
                                        action='read',
                                        id=pkg_dict['name'])
                    else:
                        # redirect to add dataset resources
                        url = h.url_for(controller='package',
                                        action='new_resource',
                                        id=pkg_dict['name'])
                    redirect(url)
                # Make sure we don't index this dataset
                if request.params['save'] not in [
                        'go-resource', 'go-metadata'
                ]:
                    data_dict['state'] = 'draft'
                # allow the state to be changed
                context['allow_state_change'] = True

            data_dict['type'] = package_type
            context['message'] = data_dict.get('log_message', '')

            self.__generate_spatial_extra_field(data_dict)

            pkg_dict = get_action('package_create')(context, data_dict)

            # Restauramos los grupos asignados al dataset (cuando es un insert)
            update_package_group_relation(package_name, group_names)

            if ckan_phase and request.params['save'] != 'save-draft':
                url = h.url_for(controller='package',
                                action='new_resource',
                                id=pkg_dict['name'])
                redirect(url)
            elif request.params['save'] == 'save-draft':
                url = h.url_for(controller='package',
                                action='read',
                                id=pkg_dict['name'])
                redirect(url)
            self._form_save_redirect(pkg_dict['name'],
                                     'new',
                                     package_type=package_type)
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound, e:
            abort(404, _('Dataset not found'))
            abort(404, _('Dataset not found'))
        except dict_fns.DataError:
            abort(400, _(u'Integrity Error'))
        except SearchIndexError, e:
            try:
                exc_str = unicode(repr(e.args))
            except Exception:  # We don't like bare excepts
                exc_str = unicode(str(e))
            abort(500, _(u'Unable to add package to search index.') + exc_str)
        except ValidationError, e:
            errors = e.error_dict
            error_summary = e.error_summary
            if is_an_update:
                # we need to get the state of the dataset to show the stage we
                # are on.
                pkg_dict = get_action('package_show')(context, data_dict)
                data_dict['state'] = pkg_dict['state']
                return self.edit(data_dict['id'], data_dict, errors,
                                 error_summary)
            data_dict['state'] = 'none'
            return self.new(data_dict, errors, error_summary)

    def new_resource(self, id, data=None, errors=None, error_summary=None):
        ''' FIXME: This is a temporary action to allow styling of the
        forms. '''
        if request.method == 'POST' and not data:
            save_action = request.params.get('save')
            data = data or clean_dict(
                dict_fns.unflatten(tuplize_dict(parse_params(request.POST))))
            # we don't want to include save as it is part of the form
            del data['save']
                                action='read',
                                id=pkg_dict['name'])
                redirect(url)
            self._form_save_redirect(pkg_dict['name'],
                                     'new',
                                     package_type=package_type)
        except NotAuthorized:
            abort(401, _('Unauthorized to read package %s') % '')
        except NotFound, e:
            abort(404, _('Dataset not found'))
        except dict_fns.DataError:
            abort(400, _(u'Integrity Error'))
        except SearchIndexError, e:
            try:
                exc_str = unicode(repr(e.args))
            except Exception:  # We don't like bare excepts
                exc_str = unicode(str(e))
            abort(500, _(u'Unable to add package to search index.') + exc_str)
        except ValidationError, e:
            errors = e.error_dict
            error_summary = e.error_summary
            if is_an_update:
                # we need to get the state of the dataset to show the stage we
                # are on.
                pkg_dict = get_action('package_show')(context, data_dict)
                data_dict['state'] = pkg_dict['state']
                return self.edit(data_dict['id'], data_dict, errors,
                                 error_summary)
            data_dict['state'] = 'none'
            return self.new(data_dict, errors, error_summary)