Пример #1
0
def package_update(context, data_dict):
    '''
    Updates the dataset.

    Extends ckan's similar method to instantly re-index the SOLR index.
    Otherwise the changes would only be added during a re-index (a rebuild of search index,
    to be specific).

    :type context: dict
    :param context: context
    :type data_dict: dict
    :param data_dict: dataset as dictionary

    :rtype: dictionary
    '''
    # Get all resources here since we get only 'dataset' resources from WUI.
    package_context = {'model': model, 'ignore_auth': True, 'validate': True,
                       'extras_as_string': True}

    user = model.User.get(context['user'])
    if not user.name == "harvest":
        _remove_extras_from_data_dict(data_dict)

    package_data = package_show(package_context, data_dict)

    if not 'resources' in data_dict:
        # When this is reached, we are updating a dataset, not creating a new resource
        old_resources = package_data.get('resources', [])
        data_dict['resources'] = old_resources
        data_dict = utils.dataset_to_resource(data_dict)
    else:
        data_dict['accept-terms'] = 'yes'  # This is not needed when adding a resource

    _handle_pids(data_dict)

    _add_ida_download_url(data_dict)

    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if package_data.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_update_package_schema()

    pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict)

    # Logging for production use
    _log_action('Package', 'update', context['user'], data_dict['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': True}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    # update_dict calls index_package, so it would basically be the same
    index.update_dict(pkg_dict)

    return pkg_dict1
Пример #2
0
def package_create(context, data_dict):
    """
    Creates a new dataset.

    Extends ckan's similar method to instantly reindex the SOLR index,
    so that this newly added package emerges in search results instantly instead of
    during the next timed reindexing.

    :param context: context
    :param data_dict: data dictionary (package data)

    :rtype: dictionary
    """
    user = model.User.get(context['user'])
    if data_dict.get('type') == 'harvest' and not user.sysadmin:
        ckan.lib.base.abort(401, _('Unauthorized to add a harvest source'))

    if not user.name == "harvest":
        _remove_extras_from_data_dict(data_dict)

    data_dict = utils.dataset_to_resource(data_dict)

    if not user.name == 'harvest':
        _handle_package_id_on_create(data_dict)
    _handle_pids(data_dict)

    _add_ida_download_url(data_dict)
    
    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_create_package_schema()

    pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict)

    # Logging for production use
    _log_action('Package', 'create', context['user'], pkg_dict1['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': False}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    index.index_package(pkg_dict)
    return pkg_dict1
Пример #3
0
def package_show(context, data_dict):
    '''
    Return the metadata of a dataset (package) and its resources.

    Called before showing the dataset in some interface (browser, API),
    or when adding package to Solr index (no validation / conversions then).

    :param id: the id or name of the dataset
    :type id: string

    :rtype: dictionary
    '''

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_show_package_schema()

    if not data_dict.get('id') and not data_dict.get('name'):
        # Get package by data PIDs
        data_dict['id'] = utils.get_package_id_by_data_pids(data_dict)

    pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict)
    pkg_dict1 = utils.resource_to_dataset(pkg_dict1)

    # Remove empty agents that come from padding the agent list in converters
    if 'agent' in pkg_dict1:
        agents = filter(None, pkg_dict1.get('agent', []))
        pkg_dict1['agent'] = agents or []

    # Normally logic function should not catch the raised errors
    # but here it is needed so action package_show won't catch it instead
    # Hiding information from API calls
    try:
        check_access('package_update', context)
    except NotAuthorized:
        pkg_dict1 = utils.hide_sensitive_fields(pkg_dict1)

    pkg = Package.get(pkg_dict1['id'])
    if 'erelated' in pkg.extras:
        erelated = pkg.extras['erelated']
        if len(erelated):
            for value in erelated.split(';'):
                if len(Session.query(Related).filter(Related.title == value).all()) == 0:
                    data_dict = {'title': value,
                                 'type': _("Paper"),
                                 'dataset_id': pkg.id}
                    related_create(context, data_dict)

    # Update package.title to match package.extras.title_0
    extras_title = pkg.extras.get(u'title_0')
    if extras_title and extras_title != pkg.title:
        repo.new_revision()
        pkg.title = pkg.extras[u'title_0']
        pkg.save()
        rebuild(pkg.id)  # Rebuild solr-index for this dataset

    return pkg_dict1
Пример #4
0
def package_create(context, data_dict):
    """
    Creates a new dataset.

    Extends ckan's similar method to instantly reindex the SOLR index,
    so that this newly added package emerges in search results instantly instead of
    during the next timed reindexing.

    :param context: context
    :param data_dict: data dictionary (package data)

    :rtype: dictionary
    """
    user = model.User.get(context['user'])
    if data_dict.get('type') == 'harvest' and not user.sysadmin:
        ckan.lib.base.abort(401, _('Unauthorized to add a harvest source'))

    data_dict = utils.dataset_to_resource(data_dict)

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)
    
    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_create_package_schema()

    pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict)

    # Logging for production use
    _log_action('Package', 'create', context['user'], pkg_dict1['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': False}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    index.index_package(pkg_dict)
    return pkg_dict1
Пример #5
0
def package_show(context, data_dict):
    '''
    Return the metadata of a dataset (package) and its resources.

    Called before showing the dataset in some interface (browser, API),
    or when adding package to Solr index (no validation / conversions then).

    :param id: the id or name of the dataset
    :type id: string

    :rtype: dictionary
    '''

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_show_package_schema()

    context['use_cache'] = False  # Disable package retrieval directly from Solr as contact.email is not there.

    if not data_dict.get('id') and not data_dict.get('name'):
        # Get package by data PIDs
        data_dict['id'] = utils.get_package_id_by_primary_pid(data_dict)

    pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict)
    pkg_dict1 = utils.resource_to_dataset(pkg_dict1)

    # Remove empty agents that come from padding the agent list in converters
    if 'agent' in pkg_dict1:
        agents = filter(None, pkg_dict1.get('agent', []))
        pkg_dict1['agent'] = agents or []

    # Normally logic function should not catch the raised errors
    # but here it is needed so action package_show won't catch it instead
    # Hiding information from API calls
    try:
        check_access('package_update', context)
    except NotAuthorized:
        pkg_dict1 = utils.hide_sensitive_fields(pkg_dict1)

    pkg = Package.get(pkg_dict1['id'])
    if 'erelated' in pkg.extras:
        erelated = pkg.extras['erelated']
        if len(erelated):
            for value in erelated.split(';'):
                if len(Session.query(Related).filter(Related.title == value).all()) == 0:
                    data_dict = {'title': value,
                                 'type': _("Paper"),
                                 'dataset_id': pkg.id}
                    related_create(context, data_dict)

    return pkg_dict1
Пример #6
0
def package_create(context, data_dict):
    """
    Creates a new dataset.

    Extends ckan's similar method to instantly reindex the SOLR index, 
    so that this newly added package emerges in search results instantly instead of 
    during the next timed reindexing.

    :param context: context
    :param data_dict: data dictionary (package data)

    :rtype: dictionary
    """
    user = model.User.get(context['user'])
    try:
        if data_dict['type'] == 'harvest' and not user.sysadmin:
            ckan.lib.base.abort(401, _('Unauthorized to add a harvest source'))

    except KeyError:
        log.debug("Tried to check the package type, but it wasn't present!")
        # TODO: JUHO: Dubious to let pass without checking user.sysadmin
        pass

    data_dict = utils.dataset_to_resource(data_dict)

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)
    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_create_package_schema()

    pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict)

    # Logging for production use
    _log_action('Package', 'create', context['user'], pkg_dict1['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': False}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    index.index_package(pkg_dict)
    return pkg_dict1
Пример #7
0
    def index(self):
        '''
        Simplified index function compared to CKAN's original one.

        :return: render home/index.html
        '''

        try:
            # package search
            context = {'model': model, 'session': model.Session,
                       'user': c.user or c.author, 'schema': kata_schemas.package_search_schema()}
            data_dict = {
                'q': '*:*',
                'facet.field': g.facets,
                'facet.limit': -1,
                'rows': 0,
                'start': 0,
                'sort': 'title_string desc',
                'fq': 'capacity:"public" +dataset_type:dataset'
            }
            query = logic.get_action('package_search')(
                context, data_dict)
            c.package_count = query['count']

            c.facets = query['facets']
            maintain.deprecate_context_item(
                'facets',
                'Use `c.search_facets` instead.')

            c.search_facets = query['search_facets']

            c.num_tags = len(c.facets.get('tags'))
            c.num_discipline = len(c.facets.get('extras_discipline'))

        except search.SearchError:
            c.package_count = 0
            c.groups = []
            c.num_tags = 0
            c.num_discipline = 0

        return render("home/index.html", cache_force=True)
Пример #8
0
    def index(self):
        '''
        Simplified index function compared to CKAN's original one.

        :return: render home/index.html
        '''

        try:
            # package search
            context = {'model': model, 'session': model.Session,
                       'user': c.user or c.author, 'schema': kata_schemas.package_search_schema()}
            data_dict = {
                'q': '*:*',
                'facet.field': g.facets,
                'facet.limit': -1,
                'rows': 0,
                'start': 0,
                'sort': 'title_string desc',
                'fq': 'capacity:"public" +dataset_type:dataset'
            }
            query = logic.get_action('package_search')(
                context, data_dict)
            c.package_count = query['count']

            c.facets = query['facets']
            maintain.deprecate_context_item(
                'facets',
                'Use `c.search_facets` instead.')

            c.search_facets = query['search_facets']

            c.num_tags = len(c.facets.get('tags'))
            c.num_discipline = len(c.facets.get('extras_discipline'))

        except search.SearchError:
            c.package_count = 0
            c.groups = []
            c.num_tags = 0
            c.num_discipline = 0

        return render("home/index.html", cache_force=True)
Пример #9
0
def get_package_ratings_for_data_dict(data_dict):
    '''
    Create a metadata rating (1-5) for given data_dict.

    This is the same as :meth:`get_package_ratings` but can be used
    for getting metadata ratings e.g. for search results where
    only raw data_dicts are available rather than already-converted
    package dicts.

    :param data_dict: A CKAN data_dict
    '''
    from ckanext.kata.schemas import Schemas         # Importing here prevents circular import

    context = {
        'model': model,
        'schema': Schemas.show_package_schema()
    }
    try:
        pkg_dict = get_action('package_show')(context, data_dict)
    except ValidationError:
        return (0, u'○○○○○')

    return get_package_ratings(pkg_dict)
Пример #10
0
def get_package_ratings_for_data_dict(data_dict):
    '''
    Create a metadata rating (1-5) for given data_dict.

    This is the same as :meth:`get_package_ratings` but can be used
    for getting metadata ratings e.g. for search results where
    only raw data_dicts are available rather than already-converted
    package dicts.

    :param data_dict: A CKAN data_dict
    '''
    from ckanext.kata.schemas import Schemas         # Importing here prevents circular import

    context = {
        'model': model,
        'schema': Schemas.show_package_schema()
    }
    try:
        pkg_dict = get_action('package_show')(context, data_dict)
    except ValidationError:
        return (0, u'○○○○○')

    return get_package_ratings(pkg_dict)
Пример #11
0
def package_update(context, data_dict):
    '''
    Updates the dataset.

    Extends ckan's similar method to instantly re-index the SOLR index.
    Otherwise the changes would only be added during a re-index (a rebuild of search index,
    to be specific).

    :type context: dict
    :param context: context
    :type data_dict: dict
    :param data_dict: dataset as dictionary

    :rtype: dictionary
    '''
    # Get all resources here since we get only 'dataset' resources from WUI.
    package_context = {'model': model, 'ignore_auth': True, 'validate': True,
                       'extras_as_string': True}
    package_data = package_show(package_context, data_dict)
    # package_data = ckan.logic.action.get.package_show(package_context, data_dict)

    old_resources = package_data.get('resources', [])

    if not 'resources' in data_dict:
        # When this is reached, we are updating a dataset, not creating a new resource
        data_dict['resources'] = old_resources
        data_dict = utils.dataset_to_resource(data_dict)
    else:
        data_dict['accept-terms'] = 'yes'  # This is not needed when adding a resource

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)

    # # Check if data version has changed and if so, generate a new version_PID
    # if not data_dict['version'] == temp_pkg_dict['version']:
    #     data_dict['pids'].append(
    #         {
    #             u'provider': u'kata',
    #             u'id': utils.generate_pid(),
    #             u'type': u'version',
    #         })

    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if package_data.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_update_package_schema()

    pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict)

    # Logging for production use
    _log_action('Package', 'update', context['user'], data_dict['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': True}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    # update_dict calls index_package, so it would basically be the same
    index.update_dict(pkg_dict)

    return pkg_dict1
Пример #12
0
def package_update(context, data_dict):
    '''
    Updates the dataset.

    Extends ckan's similar method to instantly re-index the SOLR index.
    Otherwise the changes would only be added during a re-index (a rebuild of search index,
    to be specific).

    :type context: dict
    :param context: context
    :type data_dict: dict
    :param data_dict: dataset as dictionary

    :rtype: dictionary
    '''
    # Get all resources here since we get only 'dataset' resources from WUI.
    package_context = {'model': model, 'ignore_auth': True, 'validate': True,
                    'extras_as_string': True}
    package_data = package_show(package_context, data_dict)
    # package_data = ckan.logic.action.get.package_show(package_context, data_dict)

    old_resources = package_data.get('resources', [])

    if not 'resources' in data_dict:
        # When this is reached, we are updating a dataset, not creating a new resource
        data_dict['resources'] = old_resources
        data_dict = utils.dataset_to_resource(data_dict)

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)

    # # Check if data version has changed and if so, generate a new version_PID
    # if not data_dict['version'] == temp_pkg_dict['version']:
    #     data_dict['pids'].append(
    #         {
    #             u'provider': u'kata',
    #             u'id': utils.generate_pid(),
    #             u'type': u'version',
    #         })

    # This fixes extras fields being cleared when adding a resource. This is be because the extras are not properly
    # cleared in show_package_schema conversions. Some fields stay in extras and they cause all other fields to be
    # dropped in package_update(). When updating a dataset via UI or API, the conversion to extras occur in
    # package_update() and popping extras here should have no effect.

    data_dict.pop('extras', None)
    # TODO: MIKKO: Get rid of popping extras here and rather pop the additional extras in converters so we could remove the
    # popping and the above "context['allow_partial_update'] = True" which causes the extras to be processed in a way
    # that nothing gets added to extras from the converters and everything not initially present in extras gets removed.

    # TODO: JUHO: Apply correct schema depending on dataset
    # This is quick resolution. More robust way would be to check through
    # model.Package to which harvest source the dataset belongs and then get the
    # type of the harvester (eg. DDI)
    # if data_dict['name'].startswith('FSD'):
    #     context['schema'] = schemas.update_package_schema_ddi()

    if package_data.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_update_package_schema()

    pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict)

    # Logging for production use
    _log_action('Package', 'update', context['user'], data_dict['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': True}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    # update_dict calls index_package, so it would basically be the same
    index.update_dict(pkg_dict)
    return pkg_dict1