Пример #1
0
def migrate_package_to_virtuoso(config_file_path, package,
                                controlled_vocabulary):
    import ckanext.ecportal.migration.database_to_ontology_converter as database_to_ontology_converter
    try:
        dataset = database_to_ontology_converter.convert_package_to_dataset(
            package, controlled_vocabulary,
            config_file_path)  # type: DatasetDcatApOp
    except BaseException as e:
        import traceback
        log.error(traceback.print_exc())
        raise MigrationError(
            message="error migrating dataset [{0}]".format(package.name))

    if dataset:
        #is_saved = dataset.save_to_ts()
        val_dataset, error = validation.validate_dacat_dataset(dataset)

        if error.get('fatal', None) or error.get('error', None):
            directory = '/home/ecodp/migration_errors'
            if not os.path.exists(directory):
                os.makedirs(directory)
            with open(
                    '/home/ecodp/migration_errors/{0}'.format(
                        dataset.dataset_uri.split('/')[-1]), 'w') as file:
                file.write(json.dumps(error))

        if dataset.privacy_state == 'public':
            write_to_public_graph(dataset)
        else:
            write_to_private_graph(dataset)
Пример #2
0
    def __validate_package(self, context):
        data = ecportal_logic.transform_to_data_dict(request.POST)
        ui_datasets = []


        split_id = data['manifest'][0]['publisher_uri'].split('/')[-1]
        org = get_action('organization_show')(context, {'id': split_id.lower(),
                                                    'include_datasets': 'false'})
        data['organization'] = org

        for package in data.get('dataset', []):
            uri = package.get('uri')
            if not uri or '__temporal/uri' == uri:
                uri,  name= uri_util.new_cataloge_uri_from_title(package.get('title','default'))
                package['name'] = name
            if not package.get('name'):
                package['name'] = uri_util.create_name_from_title(package.get('title','default'))
            dataset = DatasetDcatApOp(uri)
            try:
                publisher_uri = data['manifest'][0]['publisher_uri']
                package['creator'] = publisher_uri
                dataset.create_dataset_schema_for_package_dict(package, {}, context)
                dataset.schema.publisher_dcterms['0'] = AgentSchemaDcatApOp(publisher_uri)

            except Exception as e:
                import traceback
                log.error(traceback.print_exc())

            dataset, errors = validation.validate_dacat_dataset(dataset, context)
            ui_ds = ui_util.transform_dcat_schema_to_form_schema(dataset)
            if errors:
                ui_ds['errors']  = errors
                data['errors'] = True
            ui_datasets.append(ui_ds)

        data['dataset'] = ui_datasets
        # data['manifest'][0]['publisher_uri'] = 'http://publications.europa.eu/resource/authority/corporate-body/{0}'.format(
        #             data.get('organization', {}).get('name', '').upper())

        return data
Пример #3
0
def package_create(context, data_dict):
    '''
    This overides core package_create  to deal with DCAT-AP datasets and old CKAN model datasets.
    :param context:
    :param data_dict:
    :return:
    '''
    user = context['user']
    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema()

    _check_access('package_create', context, data_dict)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work
                package_plugin.check_data_dict(data_dict)

    ex_url = data_dict.get('url')
    publisher = data_dict.get('owner_org')

    dataset = None
    mapper = None
    if not 'DCATAP' == context.get('model', ''):
        package_show_action = 'legacy_package_show'
        if config.get('ckan.ecodp.backward_compatibility',
                      'true') in 'false, False':
            raise logic.NotFound('Function not available')

        #if not validation.is_ckanName_unique(data_dict.get('name', '')):
        #    raise ValidationError(_('That CKAN name is already in use.'))

        try:
            dataset = dataset_transition_util.create_dataset_schema_for_package_dict(
                data_dict)
        except ValidationError as e:
            import traceback
            log.error('{0}'.format(e))
            log.error(traceback.print_exc())
            raise e
        except BaseException as e:
            import traceback
            log.error('{0}'.format(e))
            log.error(traceback.print_exc())
            raise ValidationError(
                'Could {0} not transform to new model'.format(
                    data_dict.get('name')))

        context['package'] = dataset

        if not ex_url:
            raise ValidationError(_('The URL is mandatory.'))

        int_id = dataset.dataset_uri.split('/')[-1]
        mapper = DatasetIdMapping(ex_url, int_id, publisher)
        if mapper.is_mapping_exists():
            raise ValidationError(
                _('That URL already exists [{0}] for publisher [{1}].'.format(
                    ex_url, publisher)))

    else:
        package_show_action = 'package_show'
        uri, ds_name = uri_util.new_dataset_uri_from_title(
            data_dict.get('title'))
        dataset = DatasetDcatApOp(uri)
        context['package'] = dataset
        data_dict[
            'name'] = ds_name  #uri.split('/')[-1]# put the correct ckanName alligned with the uri.
        data_dict[
            'accessRights'] = 'http://publications.europa.eu/resource/authority/access-right/PUBLIC'  #add default public accessright

        dataset.create_dataset_schema_for_package_dict(data_dict, {}, context)

    start = time.time()
    dataset, errors = validation.validate_dacat_dataset(dataset, context)
    context['errors'] = errors
    log.info('validation took {0} sec'.format(time.time() - start))

    # TODO check the business rule of save
    if errors.get('fatal'):
        # dataset.privacy_state = DCATAPOP_PRIVATE_DATASET
        # dataset.add_draft_to_title()
        raise ValidationError(errors)
    elif errors.get(
            'error') and dataset.privacy_state == DCATAPOP_PUBLIC_DATASET:
        # dataset.privacy_state = DCATAPOP_PRIVATE_DATASET
        # dataset.add_draft_to_title()
        raise ValidationError(errors)
        # elif errors.get('error') and dataset.privacy_state == DCATAPOP_PRIVATE_DATASET:
        #   pass

    # if dataset.privacy_state ==DCATAPOP_PRIVATE_DATASET:
    #    dataset.add_draft_to_title()

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % dataset.dataset_uri

    try:
        state = dataset.save_to_ts(rev.id)
        if state and mapper:
            mapper.save_to_db()
    except BaseException as e:
        import traceback
        log.error('{0}'.format(e))
        log.error(traceback.print_exc())
        log.error("Error while saving the package to Virtuoso.")
        model.repo.rollback()
        raise ActionError(
            'Error while saving the package {0} to Virtuoso.'.format(
                dataset.dataset_uri))

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update, {
        'id':
        dataset.schema.uri,
        'organization_id':
        data_dict.get('owner_org') or data_dict.get('organization')
    })

    # for item in plugins.PluginImplementations(plugins.IPackageController):
    #     item.create(pkg)
    #
    #     item.after_create(context, data)

    ## this is added so that the rest controller can make a new location
    context["id"] = dataset.schema.uri
    log.debug('Created object %s' % dataset.schema.uri)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    return_id_only = context.get('return_id_only', False)
    if return_id_only:
        output = dataset.dataset_uri
    elif 'legacy_package_show' == package_show_action:
        output = _get_action(package_show_action)(context, {
            'uri': dataset.dataset_uri
        })
    else:
        _get_action(package_show_action)(context, {'uri': dataset.dataset_uri})
        output = context.get('package')

    indexer = solar_package_index()

    try:
        indexer.update_dict(dataset)
        if not context.get('defer_commit'):
            model.repo.commit()
    except Exception as e:
        dataset.delete_from_ts()
        model.repo.rollback()
        raise ActionError('Error while index the package {0} to Solr.'.format(
            dataset.dataset_uri))

    return output
Пример #4
0
def package_update(context, data_dict):
    '''
    This overrides core package_update  to deal with DCAT-AP datasets.
    This method handels old input type CKAN property keys used by API
    :param context:
    :param data_dict:
    :return:
    '''
    user = context['user']
    dataset = None  # type: DatasetDcatApOp
    active_cache = config.get('ckan.cache.active', 'false')
    _check_access('package_update', context, data_dict)
    old_dataset = None
    rdft = True
    if 'DCATAP' == context.get('model', ''):
        package_show_action = 'package_show'
        pkg_dict = logic.get_action('package_show')(context, {
            'id': data_dict.get('id')
        })
        dataset = context['package']
        dataset.update_dataset_for_package_dict(data_dict, {}, context)
        old_dataset = pickle.dumps(dataset)
        context['package'] = dataset

    else:  # old model, use migration. this can also be the new model comming from the UI
        # prepare the dataset object with migration function
        package_show_action = 'legacy_package_show'
        rdft = False
        if config.get('ckan.ecodp.backward_compatibility',
                      'true') in 'false, False':
            raise logic.NotFound('Function not available')

        pkg_dict = logic.get_action('package_show')(context, {
            'id': data_dict.get('name')
        })
        dataset = context['package']
        old_dataset = pickle.dumps(dataset)
        try:
            dataset = dataset_transition_util.update_dataset_for_package_dict(
                dataset, data_dict)
        except ValidationError as e:
            import traceback
            log.error('{0}'.format(e))
            log.error(traceback.print_exc())
            raise e
        except BaseException as e:
            import traceback
            log.error(traceback.print_exc())
            raise ValidationError(
                'Could {0} not transform to new model'.format(
                    dataset.dataset_uri))
            # old_data_dict = logic.get_action('package_show')(context, {'id': data_dict.get('id')})
            # old_dataset = context['package']  # type: DatasetDcatApOp
    start = time.time()
    dataset, errors = validation.validate_dacat_dataset(dataset, context)
    context['errors'] = errors
    log.info('validation took {0} sec'.format(time.time() - start))
    # TODO check the business rule of save
    if errors.get('fatal'):
        raise ValidationError(errors)
    elif errors.get(
            'error') and dataset.privacy_state == DCATAPOP_PUBLIC_DATASET:
        raise ValidationError(errors)
    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(
            u'REST API: Update object %s') % dataset.dataset_uri.split('/')[-1]

    try:
        save_to_ts_status = dataset.save_to_ts(rev.id)
    except BaseException as e:
        log.error('Error while saving the package {0} to Virtuoso.'.format(
            dataset.dataset_uri))
        model.repo.rollback()
        raise ActionError(
            'Error while saving the package {0} to Virtuoso.'.format(
                dataset.dataset_uri))

    if save_to_ts_status:
        context_org_update = context.copy()
        context_org_update['ignore_auth'] = True
        context_org_update['defer_commit'] = True
        if not rdft:
            ext_id = data_dict.get('url')
            publisher = data_dict.get('owner_org')
            int_id = dataset.dataset_uri.split('/')[-1]
            mapping = DatasetIdMapping.by_internal_id(int_id)
            if not mapping:
                mapping = DatasetIdMapping(ext_id, int_id, publisher)
                mapping.save_to_db()
            else:
                mapping.publisher = publisher
                mapping.external_id = ext_id
                mapping.update_db()

        for item in plugins.PluginImplementations(plugins.IPackageController):
            item.edit(dataset)
            item.after_update(context, dataset)

        log.debug('Updated object %s' % dataset.dataset_uri)

        return_id_only = context.get('return_id_only', False)

        # Make sure that a user provided schema is not used on package_show
        context.pop('schema', None)

        if dataset.privacy_state == 'public' and active_cache == 'true':
            redis_cache.set_value_no_ttl_in_cache(dataset.dataset_uri,
                                                  pickle.dumps(dataset))
        else:
            redis_cache.delete_value_from_cache(dataset.dataset_uri)

        try:
            redis_cache.flush_all_from_db(redis_cache.MISC_POOL)
            search.rebuild(dataset.dataset_uri.split('/')[-1])
        except BaseException as e:
            log.error("Error while index the package {0} to Solr".format(
                dataset.dataset_uri))
            old_dataset = pickle.loads(old_dataset)
            dataset.schema = old_dataset.schema
            dataset.schema_catalog_record = old_dataset.schema_catalog_record
            dataset.privacy_state = old_dataset.privacy_state
            dataset.save_to_ts()
            search.rebuild(dataset.dataset_uri.split('/')[-1])
            model.repo.rollback()
            raise ActionError(
                'Error while index the package {0} to Solr.'.format(
                    dataset.dataset_uri))

        if not context.get('defer_commit'):
            model.repo.commit()

        for item in plugins.PluginImplementations(plugins.IResourceUrlChange):
            if item.name != 'qa':
                item.notify(dataset,
                            model.domain_object.DomainObjectOperation.changed)

        # we could update the dataset so we should still be able to read it.
        context['ignore_auth'] = True
        return_id_only = context.get('return_id_only', False)
        if return_id_only:
            output = dataset.dataset_uri
        elif 'legacy_package_show' == package_show_action:
            output = _get_action(package_show_action)(
                context, {
                    'uri': dataset.dataset_uri
                })
        else:
            _get_action(package_show_action)(context, {
                'uri': dataset.dataset_uri
            })
            output = context.get('package')

        return output
    else:
        log.error('[Action] [Update] [Failed] [Dataset:<{0}>]'.format(
            dataset.dataset_uri))
        raise ActionError(
            'Error while saving the package {0} to Virtuoso.'.format(
                dataset.dataset_uri))
Пример #5
0
def resource_create(context, data_dict):
    user = context['user']
    dataset = None  # type: DatasetDcatApOp
    active_cache = config.get('ckan.cache.active', 'false')
    _check_access('package_update', context, data_dict)

    pkg = pkg_dict = logic.get_action('package_show')(
        context, {
            'id': data_dict.pop('package_id', '')
        })
    dataset = context['package']

    old_dataset = pickle.dumps(dataset)
    try:
        dataset = dataset_transition_util.update_dataset_for_package_dict(
            dataset, data_dict)
        dataset = dataset_transition_util.update_resources_for_dataset(
            [data_dict], dataset, dataset)
    except ValidationError as e:
        import traceback
        log.error('{0}'.format(e))
        log.error(traceback.print_exc())
        raise e
    except Exception as e:
        import traceback
        log.error(traceback.print_exc())
        raise ValidationError('Could {0} not transform to new model'.format(
            dataset.dataset_uri))
        # old_data_dict = logic.get_action('package_show')(context, {'id': data_dict.get('id')})
        # old_dataset = context['package']  # type: DatasetDcatApOp
    start = time.time()
    dataset, errors = validation.validate_dacat_dataset(dataset, context)
    context['errors'] = errors
    log.info('validation took {0} sec'.format(time.time() - start))
    # TODO check the business rule of save
    if errors.get('fatal'):
        # dataset.privacy_state = DCATAPOP_PRIVATE_DATASET
        # dataset.add_draft_to_title()
        raise ValidationError(errors)
    elif errors.get(
            'error') and dataset.privacy_state == DCATAPOP_PUBLIC_DATASET:
        # dataset.privacy_state = DCATAPOP_PRIVATE_DATASET
        # dataset.add_draft_to_title()
        raise ValidationError(errors)
    elif errors.get(
            'error') and dataset.privacy_state == DCATAPOP_PRIVATE_DATASET:
        # dataset.add_draft_to_title()
        pass

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(
            u'REST API: Update object %s') % dataset.dataset_uri.split('/')[-1]

    try:
        result = dataset.save_to_ts(rev.id)
    except BaseException as e:
        log.error('Error while saving the package {0} to Virtuoso.'.format(
            dataset.dataset_uri))
        model.repo.rollback()
        raise ActionError(
            'Error while saving the package {0} to Virtuoso.'.format(
                dataset.dataset_uri))

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True

    for item in lib_plugins.PluginImplementations(
            lib_plugins.IPackageController):
        item.edit(dataset)

        item.after_update(context, dataset)

    log.debug('Updated object %s' % dataset.dataset_uri)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    if dataset.privacy_state == 'public' and active_cache == 'true':
        redis_cache.set_value_no_ttl_in_cache(dataset.dataset_uri,
                                              pickle.dumps(dataset))
    else:
        redis_cache.delete_value_from_cache(dataset.dataset_uri)

    try:
        redis_cache.flush_all_from_db(redis_cache.MISC_POOL)
        search.rebuild(dataset.dataset_uri.split('/')[-1])
    except Exception as e:
        log.error("Error while index the package {0} to Solr".format(
            dataset.dataset_uri))
        old_dataset = pickle.loads(old_dataset)
        dataset.schema = old_dataset.schema
        dataset.schema_catalog_record = old_dataset.schema_catalog_record
        dataset.privacy_state = old_dataset.privacy_state
        dataset.save_to_ts()
        search.rebuild(dataset.dataset_uri.split('/')[-1])
        model.repo.rollback()
        raise ActionError('Error while index the package {0} to Solr.'.format(
            dataset.dataset_uri))

    if not context.get('defer_commit'):
        model.repo.commit()

    for item in lib_plugins.PluginImplementations(
            lib_plugins.IResourceUrlChange):
        if item.name != 'qa':
            item.notify(dataset,
                        model.domain_object.DomainObjectOperation.changed)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    return_id_only = context.get('return_id_only', False)

    output = _get_action('legacy_package_show')(context, {
        'uri': dataset.dataset_uri
    })

    return output