def import_stage(self, harvest_object):
        # The import stage actually creates the dataset.
        
        log.debug('In datajson import_stage')

        # Get the metadata that we stored in the HarvestObject's content field.
        dataset = json.loads(harvest_object.content)
        
        # Assemble basic information about the dataset.
        pkg = {
            "name": self.make_package_name(dataset["title"], harvest_object.guid),
            "extras": [{
                "key": "source_datajson_url",
                "value": harvest_object.source.url,
                },
                {
                "key": "source_datajson_identifier",
                "value": dataset["identifier"],
                }]
        }
        from parse_datajson import parse_datajson_entry
        parse_datajson_entry(dataset, pkg)
    
        # Try to update an existing package with the ID set in harvest_object.guid. If that GUID
        # corresponds with an existing package, get its current metadata.
        try:
            existing_pkg = get_action('package_show')(self.context(), { "id": harvest_object.guid })
        except NotFound:
            existing_pkg = None
      
        if existing_pkg:
            # Update the existing metadata with the new information.
            
            # But before doing that, try to avoid replacing existing resources with new resources
            # my assigning resource IDs where they match up.
            for res in pkg.get("resources", []):
                for existing_res in existing_pkg.get("resources", []):
                    if res["url"] == existing_res["url"]:
                        res["id"] = existing_res["id"]
            
            existing_pkg.update(pkg) # preserve other fields that we're not setting, but clobber extras
            pkg = existing_pkg
            
            log.warn('updating package %s (%s) from %s' % (pkg["name"], pkg["id"], harvest_object.source.url))
            pkg = get_action('package_update')(self.context(), pkg)
        else:
            # It doesn't exist yet. Create a new one.
            try:
                pkg = get_action('package_create')(self.context(), pkg)
                log.warn('created package %s (%s) from %s' % (pkg["name"], pkg["id"], harvest_object.source.url))
            except:
                log.error('failed to create package %s from %s' % (pkg["name"], harvest_object.source.url))
                raise

        # Flag the other HarvestObjects linking to this package as not current anymore
        for ob in model.Session.query(HarvestObject).filter_by(package_id=pkg["id"]):
            ob.current = False
            ob.save()

        # Flag this HarvestObject as the current harvest object
        harvest_object.package_id = pkg['id']
        harvest_object.current = True
        harvest_object.save()

        return True
    def import_stage(self, harvest_object):
        # The import stage actually creates the dataset.

        log.debug('In datajson import_stage')

        # Get the metadata that we stored in the HarvestObject's content field.
        dataset = json.loads(harvest_object.content)

        # Assemble basic information about the dataset.
        pkg = {
            "name":
            self.make_package_name(dataset["title"], harvest_object.guid),
            "extras": [{
                "key": "source_datajson_url",
                "value": harvest_object.source.url,
            }, {
                "key": "source_datajson_identifier",
                "value": dataset["identifier"],
            }]
        }
        from parse_datajson import parse_datajson_entry
        parse_datajson_entry(dataset, pkg)

        # Try to update an existing package with the ID set in harvest_object.guid. If that GUID
        # corresponds with an existing package, get its current metadata.
        try:
            existing_pkg = get_action('package_show')(self.context(), {
                "id": harvest_object.guid
            })
        except NotFound:
            existing_pkg = None

        if existing_pkg:
            # Update the existing metadata with the new information.

            # But before doing that, try to avoid replacing existing resources with new resources
            # my assigning resource IDs where they match up.
            for res in pkg.get("resources", []):
                for existing_res in existing_pkg.get("resources", []):
                    if res["url"] == existing_res["url"]:
                        res["id"] = existing_res["id"]

            existing_pkg.update(
                pkg
            )  # preserve other fields that we're not setting, but clobber extras
            pkg = existing_pkg

            log.warn('updating package %s (%s) from %s' %
                     (pkg["name"], pkg["id"], harvest_object.source.url))
            pkg = get_action('package_update')(self.context(), pkg)
        else:
            # It doesn't exist yet. Create a new one.
            try:
                pkg = get_action('package_create')(self.context(), pkg)
                log.warn('created package %s (%s) from %s' %
                         (pkg["name"], pkg["id"], harvest_object.source.url))
            except:
                log.error('failed to create package %s from %s' %
                          (pkg["name"], harvest_object.source.url))
                raise

        # Flag the other HarvestObjects linking to this package as not current anymore
        for ob in model.Session.query(HarvestObject).filter_by(
                package_id=pkg["id"]):
            ob.current = False
            ob.save()

        # Flag this HarvestObject as the current harvest object
        harvest_object.package_id = pkg['id']
        harvest_object.current = True
        harvest_object.save()

        return True
示例#3
0
 def set_dataset_info(self, pkg, dataset, dataset_defaults):
     from parse_datajson import parse_datajson_entry
     parse_datajson_entry(dataset, pkg, dataset_defaults)
示例#4
0
 def set_dataset_info(self, pkg, dataset, dataset_defaults, schema_version):
     parse_datajson_entry(dataset, pkg, dataset_defaults, schema_version)
 def set_dataset_info(self, pkg, dataset, dataset_defaults):
     from parse_datajson import parse_datajson_entry
     parse_datajson_entry(dataset, pkg, dataset_defaults)