Пример #1
0
def package_update_rest_minimal(context, data_dict):
    setup()
    package = ''
    fulltext = ''
    old_fulltext = ''
    if data_dict.has_key('extras'):
        if 'full_text_search' in data_dict['extras'].keys():
            fulltext = data_dict['extras']['full_text_search']
            data_dict = _del_extra_field_from_list(data_dict,
                                                   'full_text_search')
            package = update.package_update_rest(context, data_dict)
            old_fulltext = None

            if package.has_key('id'):
                old_fulltext = Session.query(PackageFulltext) \
                                    .filter(PackageFulltext.package_id==package['id']) \
                                    .first()
            fulltext_dict_save(fulltext, old_fulltext, package, context)
        else:
            package = update.package_update(context, data_dict)
    else:
        package = update.package_update_rest(context, data_dict)

    if check_logged_in(context):
        fulltext = _get_fulltext(package['id'])
        if fulltext:
            package['extras']['full_text_search'] = fulltext.text
        return package

    minimal_package = _del_extra_field_from_list(package)
    minimal_package = _del_main_field_from_dict(minimal_package)
    return minimal_package
Пример #2
0
def package_update_rest_minimal(context, data_dict):
    setup()
    package= ''
    fulltext = ''
    old_fulltext = ''
    if data_dict.has_key('extras'):
        if 'full_text_search' in data_dict['extras'].keys():
            fulltext = data_dict['extras']['full_text_search']
            data_dict = _del_extra_field_from_list(data_dict, 'full_text_search')
            package = update.package_update_rest(context, data_dict)
            old_fulltext = None
            
            if package.has_key('id'):
                old_fulltext = Session.query(PackageFulltext) \
                                    .filter(PackageFulltext.package_id==package['id']) \
                                    .first()
            fulltext_dict_save(fulltext, old_fulltext, package, context)
        else:
            package = update.package_update(context, data_dict)
    else:
        package = update.package_update_rest(context, data_dict)

    if check_logged_in(context):
        fulltext = _get_fulltext(package['id'])
        if fulltext:
            package['extras']['full_text_search'] = fulltext.text 
        return package
    
    minimal_package = _del_extra_field_from_list(package)
    minimal_package = _del_main_field_from_dict(minimal_package)
    return minimal_package
Пример #3
0
    def import_stage(self, harvest_object):
        package_dict = json.loads(harvest_object.content)

        # do not import packages that are not defined as open
        if not package_dict['isopen']:
            return

        if package_dict['license_id'] in EXCLUDE_OPEN_LICENSES:
            return

        super(OpenCKANHarvester, self).import_stage(harvest_object)

        if harvest_object.package_id:
            # Add some extras to the newly created package
            new_extras = {
                'harvest_catalogue_name': self.config.get('harvest_catalogue_name', ''),
                'harvest_catalogue_url': harvest_object.job.source.url,
                'harvest_dataset_url': harvest_object.job.source.url.strip('/') + '/package/' + harvest_object.package_id
            }

            context = {
                'model': model,
                'session': Session,
                'user': u'harvest',
                'id': harvest_object.package_id
            }

            data_dict = {'extras': new_extras}
            package_update_rest(data_dict, context)
Пример #4
0
    def import_stage(self, harvest_object):

        super(PDEUCKANHarvester, self).import_stage(harvest_object)

        if harvest_object.package_id:
            # Add some extras to the newly created package
            new_extras = {
                'eu_country':
                self.config.get('eu_country', ''),
                'harvest_catalogue_name':
                self.config.get('harvest_catalogue_name', ''),
                'harvest_catalogue_url':
                harvest_object.job.source.url,
                'harvest_dataset_url':
                harvest_object.job.source.url.strip('/') + '/package/' +
                harvest_object.package_id
            }

            for extra in ['eu_nuts1', 'eu_nuts2', 'eu_nuts3']:
                if self.config.get(extra, ''):
                    new_extras[extra] = self.config[extra]

            context = {
                'model': model,
                'session': Session,
                'user': u'harvest',
                'id': harvest_object.package_id
            }

            data_dict = {'extras': new_extras}
            package_update_rest(data_dict, context)
Пример #5
0
    def import_stage(self, harvest_object):
        package_dict = json.loads(harvest_object.content)

        # do not import packages that are not defined as open
        if not package_dict["isopen"]:
            return

        if package_dict["license_id"] in EXCLUDE_OPEN_LICENSES:
            return

        super(OpenCKANHarvester, self).import_stage(harvest_object)

        if harvest_object.package_id:
            # Add some extras to the newly created package
            new_extras = {
                "harvest_catalogue_name": self.config.get("harvest_catalogue_name", ""),
                "harvest_catalogue_url": harvest_object.job.source.url,
                "harvest_dataset_url": harvest_object.job.source.url.strip("/")
                + "/package/"
                + harvest_object.package_id,
            }

            context = {"model": model, "session": Session, "user": u"harvest", "id": harvest_object.package_id}

            data_dict = {"extras": new_extras}
            package_update_rest(data_dict, context)
Пример #6
0
    def _create_or_update_package(self,package_dict,harvest_object):
        '''
            Creates a new package or updates an exisiting one according to the
            package dictionary provided. The package dictionary should look like
            the REST API response for a package:

            http://ckan.net/api/rest/package/statistics-catalunya

            Note that the package_dict must contain an id, which will be used to
            check if the package needs to be created or updated (use the remote
            dataset id).

            If the remote server provides the modification date of the remote
            package, add it to package_dict['metadata_modified'].

        '''
        try:
            #from pprint import pprint 
            #pprint(package_dict)
            ## change default schema
            schema = default_package_schema()
            schema["id"] = [ignore_missing, unicode]

            context = {
                'model': model,
                'session':Session,
                'user': u'harvest',
                'api_version':'2',
                'schema': schema,
            }

            # Check if package exists
            context.update({'id':package_dict['id']})
            try:
                existing_package_dict = package_show(context)
                # Check modified date
                if not 'metadata_modified' in package_dict or \
                   package_dict['metadata_modified'] > existing_package_dict['metadata_modified']:
                    log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid)
                    # Update package
                    updated_package = package_update_rest(package_dict,context)

                    harvest_object.package_id = updated_package['id']
                    harvest_object.save()
                else:
                    log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)

            except NotFound:
                # Package needs to be created
                del context['id']
                log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
                new_package = package_create_rest(package_dict,context)
                harvest_object.package_id = new_package['id']
                harvest_object.save()

            return True

        except ValidationError,e:
            log.exception(e)
            self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
Пример #7
0
    def import_stage(self,harvest_object):

        super(BerlinCKANHarvester, self).import_stage(harvest_object)

        if harvest_object.package_id:

            original_package = json.loads(harvest_object.content)

            # Add some extras to the newly created package
            new_extras = {
                'eu_country': self.config.get('eu_country',''),
                'harvest_catalogue_name': self.config.get('harvest_catalogue_name',''),
                'harvest_catalogue_url': harvest_object.job.source.url,
                'harvest_dataset_url': harvest_object.job.source.url.strip('/') + '/package/' + harvest_object.package_id
            }

            for extra in ['eu_nuts1','eu_nuts2','eu_nuts3']:
                if self.config.get(extra,''):
                    new_extras[extra] = self.config[extra]
            
            if len(original_package.get('groups',[])):
                group_id = original_package['groups'][0]
                if not group_id in self._groups_cache:
                    log.debug('Requesting group details: %s' % group_id)

                    url = harvest_object.source.url.rstrip('/')
                    url = url + self._get_rest_api_offset() + '/group/' + group_id
                    # Get contents
                    try:
                        content = self._get_content(url)
                        group = json.loads(content)
                        self._groups_cache[group_id] = group['name']
                    except Exception,e:
                        self._save_object_error('Unable to get content for group: %s: %r' % \
                                                    (url, e),harvest_object)
                
                new_extras['categories'] = self._groups_cache[group_id] 
 
            context = {
                'model': model,
                'session': Session,
                'user': u'harvest',
                'id': harvest_object.package_id
            }

            data_dict = {'extras':new_extras}
            package_update_rest(data_dict,context)