Exemplo n.º 1
0
    def _cited_responsible_party_to_responsible_organizations(self, parties, force_responsible_organization):
        if force_responsible_organization:
            if isinstance(force_responsible_organization, list):
                resp_orgs = force_responsible_organization
            else:
                resp_orgs = [force_responsible_organization]
        else:
            resp_org_roles = cioos_helpers.load_json(toolkit.config.get('ckan.responsible_organization_roles', '["owner", "originator", "custodian", "author", "principalInvestigator"]'))
            resp_orgs = [x.get('organisation-name', '').strip() for x in cioos_helpers.load_json(parties) if not set(cioos_helpers.load_json(x.get('role'))).isdisjoint(resp_org_roles)]
            resp_orgs = list(dict.fromkeys(resp_orgs))  # remove duplicates
            resp_orgs = list(filter(None, resp_orgs))  # remove empty elements (in a python 2 and 3 friendly way)

        return resp_orgs
Exemplo n.º 2
0
    def infer_publisher(self, values):
        name = ''
        uri = ''
        email = ''
        url = ''
        parties = []

        if isinstance(load_json(values['metadata-point-of-contact']), dict):
            parties = load_json(values['cited-responsible-party']) + [(load_json(values['metadata-point-of-contact']))]
        else:
            parties = load_json(values['cited-responsible-party']) + load_json(values['metadata-point-of-contact'])

        for responsible_party in parties:
            if 'publisher' in responsible_party['role']:
                name = responsible_party.get('organisation-name') or responsible_party.get('individual-name')
                email = responsible_party.get('contact-info_email')
                url = responsible_party.get('contact-info_online-resource_url')
                identifier = responsible_party.get('organisation-uri') or responsible_party.get('individual-uri', {})
                if isinstance(identifier, str):
                    uri = identifier
                else:
                    code = identifier.get('code')
                    codeSpace = identifier.get('code-space')
                    authority = identifier.get('authority')
                    version = identifier.get('version')
                    if code:
                        id_list = [authority, codeSpace, code, version]
                        uri = '/'.join(x.strip() for x in id_list if x.strip())
                    else:
                        uri = ''
            if name:
                break
        if(not name):
            org_details = values.get('organization')
            org_id = org_details.get('id')
            url = org_details.get('external_home_url')

            name = toolkit.h.scheming_language_text(load_json(org_details.get('title_translated', {})))
            uri_details = org_details.get('organization-uri', {})
            if uri_details:
                code = uri_details.get('code')
                codeSpace = uri_details.get('code-space')
                authority = uri_details.get('authority')
                version = uri_details.get('version')
                id_list = [authority, codeSpace, code, version]
                uri = '/'.join(x.strip() for x in id_list if x.strip())
            else:
                uri = '{0}/organization/{1}'.format(toolkit.config.get('ckan.site_url').rstrip('/'), org_id)

        values['publisher_name'] = name
        values['publisher_uri'] = uri
        values['publisher_email'] = email
        values['publisher_url'] = url
Exemplo n.º 3
0
    def _catalog_graph(self, dataset_ref, dataset_dict):
        # remove all previous catalogs set by base profile as it is garbage.
        for s, p, o in self.g.triples((None, RDF.type, SCHEMA.DataCatalog)):
            self.g.remove((s, None, None))
        self.g.remove((dataset_ref, SCHEMA.includedInDataCatalog, None))

        data_catalog = BNode()
        self.g.add((dataset_ref, SCHEMA.includedInDataCatalog, data_catalog))
        self.g.add((data_catalog, RDF.type, SCHEMA.DataCatalog))
        self.g.add((data_catalog, SCHEMA.name, Literal(toolkit.h.scheming_language_text(load_json(toolkit.config.get('ckan.site_title'))))))
        self.g.add((data_catalog, SCHEMA.description, Literal(toolkit.h.scheming_language_text(load_json(toolkit.config.get('ckan.site_description'))))))
        self.g.add((data_catalog, SCHEMA.url, Literal(toolkit.config.get('ckan.site_url'))))
Exemplo n.º 4
0
    def _basic_fields_graph(self, dataset_ref, dataset_dict):
        notes = dataset_dict.get('notes_translated', dataset_dict.get('notes'))

        # remove previous notes and replace with translated version
        for s, p, o in self.g.triples((None, RDF.type, SCHEMA.Dataset)):
            self.g.remove((s, SCHEMA.description, None))
            self.g.add((s, SCHEMA.description, Literal(toolkit.h.scheming_language_text(load_json(notes)))))
Exemplo n.º 5
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):
        g = self.g

        # Creators
        for responsible_party in load_json(dataset_dict['cited-responsible-party']):
            if 'publisher' in responsible_party['role']:
                continue

            name = responsible_party.get('individual-name')
            org = responsible_party.get('organisation-name')
            email = responsible_party.get('contact-info_email')
            url = responsible_party.get('contact-info_online-resource_url')
            ind_identifier = responsible_party.get('individual-uri', {})
            if isinstance(ind_identifier, str):
                ind_uri = ind_identifier
            else:
                code = ind_identifier.get('code')
                codeSpace = ind_identifier.get('code-space')
                authority = ind_identifier.get('authority')
                version = ind_identifier.get('version')
                if code:
                    id_list = [authority, codeSpace, code, version]
                    ind_uri = '/'.join(x.strip() for x in id_list if x.strip())
                else:
                    ind_uri = ''
            org_identifier = responsible_party.get('organisation-uri', {})
            if isinstance(org_identifier, str):
                org_uri = org_identifier
            else:
                code = org_identifier.get('code')
                codeSpace = org_identifier.get('code-space')
                authority = org_identifier.get('authority')
                version = org_identifier.get('version')
                if code:
                    id_list = [authority, codeSpace, code, version]
                    org_uri = '/'.join(x.strip() for x in id_list if x.strip())
                else:
                    org_uri = ''
            if ind_uri:
                creator_details = CleanedURIRef(uri)
            elif org_uri:
                creator_details = CleanedURIRef(uri)
            else:
                creator_details = BNode()
            if name:
                ind_names = name.split(' ')
                self.g.add((creator_details, RDF.type, SCHEMA.Person))
                self.g.add((creator_details, SCHEMA.name, Literal(name)))
                self.g.add((creator_details, SCHEMA.sameAs, Literal(ind_uri)))
                self.g.add((creator_details, SCHEMA.givenName, Literal(ind_names[0])))
                self.g.add((creator_details, SCHEMA.additionalName, Literal(','.join(ind_names[1:-1]))))
                self.g.add((creator_details, SCHEMA.familyName, Literal(ind_names[-1])))
                self.g.add((creator_details, SCHEMA.affiliation, Literal(org)))
            elif org:
                self.g.add((creator_details, RDF.type, SCHEMA.Organization))
                self.g.add((creator_details, SCHEMA.name, Literal(org)))
                self.g.add((creator_details, SCHEMA.sameAs, Literal(org_uri)))

            self.g.add((dataset_ref, SCHEMA.creator, creator_details))

        # change license over to "use-limitations"
        use_limitations_str = dataset_dict.get('use-limitations', '[]')
        dataset_name = dataset_dict.get('name')
        try:
            use_limitations = json.loads(use_limitations_str)
            if use_limitations:
                for use_limitation in use_limitations:
                    creative_work = BNode()
                    g.add((creative_work, RDF.type, SCHEMA.CreativeWork))
                    license_str = "License text for {}".format(dataset_name)
                    g.add((creative_work, SCHEMA.text, Literal(use_limitation)))
                    g.add((creative_work, SCHEMA.name, Literal(license_str)))
                    g.add((dataset_ref, SCHEMA.license, creative_work))
        # NB: this is accurate in Python 2.  In Python 3 JSON parsing
        #     exceptions are moved to json.JSONDecodeError
        except ValueError:
            pass

        try:
            std_names = dataset_dict.get('cf_standard_names')
        except Exception:
            # TODO: add logging, etc
            pass

        if (std_names is not None and
           hasattr(std_names, '__iter__')):
            for standard_name in sorted(std_names):
                g.add((dataset_ref, SCHEMA.variableMeasured,
                      Literal(standard_name)))

        spatial_uri = dataset_dict.get('spatial_uri')
        spatial_text = dataset_dict.get('spatial_text')

        if spatial_uri:
            spatial_ref = URIRef(spatial_uri)
        else:
            spatial_ref = BNode()

        if spatial_text:
            g.add((dataset_ref, DCT.spatial, spatial_ref))
            g.add((spatial_ref, RDF.type, DCT.Location))
            g.add((spatial_ref, RDFS.label, Literal(spatial_text)))

        spatial_uri = dataset_dict.get('spatial_uri')
        spatial_text = dataset_dict.get('spatial_text')
        spatial_geom = dataset_dict.get('spatial')

        if spatial_uri or spatial_text or spatial_geom:
            if spatial_uri:
                spatial_ref = CleanedURIRef(spatial_uri)
            else:
                spatial_ref = BNode()

        g.add((spatial_ref, RDF.type, SCHEMA.Place))
        g.add((dataset_ref, SCHEMA.spatialCoverage, spatial_ref))

        if spatial_text:
            g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text)))

        if spatial_geom:
            try:
                gj = load_json(spatial_geom)
                bounds = shape(gj).bounds
                bbox = [str(bound) for bound in bounds[1::-1] + bounds[:1:-1]]
            except Exception:
                pass
            else:
                bbox_str = ' '.join(bbox)
                geo_shape = BNode()
                g.add((geo_shape, RDF.type, SCHEMA.GeoShape))
                g.add((geo_shape, SCHEMA.box, Literal(bbox_str)))
                # Add bounding box element
                g.add((spatial_ref, SCHEMA.geo, geo_shape))

        # Basic fields
        self._basic_fields_graph(dataset_ref, dataset_dict)

        # Catalog
        self._catalog_graph(dataset_ref, dataset_dict)

        # Publisher
        self.infer_publisher(dataset_dict)
        self._publisher_graph(dataset_ref, dataset_dict)

        # Add contentUrl to Distribution
        for s, p, o in self.g.triples((None, RDF.type, SCHEMA.DataDownload)):
            url = self.g.value(s, SCHEMA.url, None)
            g.add((s, SCHEMA.contentUrl, Literal(url)))

        # Identifier
        unique_identifiers = dataset_dict.get('unique-resource-identifier-full', {})
        if unique_identifiers:
            self.g.remove((dataset_ref, SCHEMA.identifier, None))
            for unique_identifier in unique_identifiers:
                if 'doi.org' in unique_identifier.get('authority', '') or not unique_identifier.get('authority'):
                    doi = re.sub(r'^http.*doi\.org/', '', unique_identifier['code'], flags=re.IGNORECASE)  # strip https://doi.org/ and the like
                    if doi and re.match(r'^10.\d{4,9}\/[-._;()/:A-Z0-9]+$', doi, re.IGNORECASE):
                        identifier = BNode()
                        g.add((dataset_ref, SCHEMA.identifier, identifier))
                        self.g.add((identifier, RDF.type, SCHEMA.PropertyValue))
                        self.g.add((identifier, SCHEMA.propertyID, Literal("https://registry.identifiers.org/registry/doi")))
                        self.g.add((identifier, SCHEMA.name, Literal("DOI: %s" % doi)))
                        self.g.add((identifier, SCHEMA.value, Literal("doi:%s" % doi)))
                        self.g.add((identifier, SCHEMA.url, Literal("https://doi.org/%s" % doi)))

        # Temporal
        temporal_extent = load_json(dataset_dict.get('temporal-extent', {}))
        if (isinstance(temporal_extent, list)):
            temporal_extent = temporal_extent[0]
        start = temporal_extent.get('begin')
        end = temporal_extent.get('end')
        if start or end:
            if start and end:
                self.g.add((dataset_ref, SCHEMA.temporalCoverage, Literal('%s/%s' % (start, end))))
            elif start:
                self._add_date_triple(dataset_ref, SCHEMA.temporalCoverage, start)
            elif end:
                self._add_date_triple(dataset_ref, SCHEMA.temporalCoverage, end)
Exemplo n.º 6
0
def update_package_relationships(context, package_dict, is_create):
    to_delete = []
    to_add = []
    to_index = []
    relationships_errors = []

    if context['auth_user_obj'] is None:
        # updating package relationships during harvest creates all kinds of issues
        # a seperate command is avaiable to be run on a cron job to handle this situation
        # skip for now.
        return

    rels_from_schema = get_relationships_from_schema(load_json(package_dict.get('aggregation-info', [])), package_dict['name'])
    existing_rels = []
    # get existing package relationships where this package is the subject (from)
    existing_rels = toolkit.get_action('package_relationships_list')(
        data_dict={
            'id': package_dict['id']
        }
    )

    # existing_rels - rels_from_schema
    # do not delete inbound relationships, ie where this dataset is the object/target
    to_delete = to_delete + [x for x in existing_rels
                             if x not in rels_from_schema and
                             x['type'] not in ['linked_from', 'parent_of', 'has_derivation', 'dependency_of']]

    # rels_from_schema - existing_rels
    to_add = to_add + [x for x in rels_from_schema if x not in existing_rels]

    # delete relationships
    for d in to_delete:
        try:
            toolkit.get_action('package_relationship_delete')(data_dict=d)
            to_index.append(d['object'])
            log.debug('Deleted package relationship %s %s %s', d['subject'], d['type'], d['object'])
        except Exception as e:
            log.debug('%s' % str(e))
            relationships_errors.append('%r' % e)

    if to_delete:
        # we have to purge relationships flaged as deleted otherwise we
        # will get a detachedinstanceerror when trying to re-add the
        # relationship later
        for r in model.Session.query(model.PackageRelationship).filter(
                model.PackageRelationship.state == 'deleted').all():
            r.purge()
        model.repo.commit_and_remove()

    # create relationships
    for a in to_add:
        try:
            toolkit.get_action('package_relationship_create')(context, data_dict=a)
            to_index.append(a['object'])
            log.debug('Created package relationship %s %s %s', a['subject'], a['type'], a['object'])
        except toolkit.ObjectNotFound as e:
            log.debug('Package relationship Not Found for %s %s %s', a['subject'], a['type'], a['object'])
            relationships_errors.append('Failed to create package relationship for dataset %s: %r' % (package_dict['id'], e))

    # trigger indexing of datasets we are linking to
    for package_id in to_index:
        rebuild(package_id)

    if relationships_errors:
        raise toolkit.ValidationError(relationships_errors)

    return
Exemplo n.º 7
0
def rebuild(dataset_id_or_name=None, clear=False):
    '''create package relationships for a dataset_name if given, if not then
    rebuild package relationships for all datasets'''
    from ckan.lib.search import rebuild, commit

    # cron job
    # ckan --config=/etc/ckan/production.ini package_relationships rebuild [dataset name]

    dataset_id_arg = dataset_id_or_name

    context = {'model': model, 'session': model.Session, "ignore_auth": True}

    query_str = 'aggregation-info:[* TO *]'
    if dataset_id_arg:
        query_str = query_str + ' AND name:%s' % dataset_id_arg

    # TODO: add paging incase we have more then 1000 records
    query = get_action('package_search')(context,
                                         data_dict={
                                             "q": query_str,
                                             "fl":
                                             "id,name,extras_aggregation-info",
                                             "rows": 1000
                                         })

    to_index = []
    for package_dict in query['results']:
        to_delete = []
        to_add = []
        existing_rels = []

        rels_from_schema = get_relationships_from_schema(
            load_json(package_dict.get('aggregation-info', [])),
            package_dict['name'])

        # get existing package relationships where this package is the
        # subject (from)
        try:
            existing_rels = get_action('package_relationships_list')(
                data_dict={
                    'id': package_dict['id']
                })
        except Exception as e:
            click.echo(
                'No package relationship found for dataset %s: %r' %
                package_dict['id'], e)
            existing_rels = []

        if clear:
            to_delete = existing_rels
            to_add = []
        else:
            # existing_rels - rels_from_schema
            # do not delete inbound relationships, ie where this dataset is the object/target
            to_delete = to_delete + [
                x for x in existing_rels
                if x not in rels_from_schema and x['type'] not in [
                    'linked_from', 'parent_of', 'has_derivation',
                    'dependency_of'
                ]
            ]
            # rels_from_schema - existing_rels
            to_add = to_add + [
                x for x in rels_from_schema if x not in existing_rels
            ]

        # delete relationships
        for d in to_delete:
            try:
                get_action('package_relationship_delete')(context, data_dict=d)
                to_index.append(d['object'])
                click.echo('Deleted package relationship %s %s %s' %
                           (d['subject'], d['type'], d['object']))
            except Exception as e:
                click.echo(
                    'Failed to delete package relationship for dataset %s: %r'
                    % (package_dict['id'], e))

        if to_delete:
            # we have to purge relationships flagged as deleted otherwise we
            # will get a detachedinstanceerror when trying to re-add the
            # relationship later
            for r in model.Session.query(model.PackageRelationship).filter(
                    model.PackageRelationship.state == 'deleted').all():
                r.purge()
            model.repo.commit_and_remove()

        # create relationships
        for a in to_add:
            try:
                get_action('package_relationship_create')(context, data_dict=a)
                to_index.append(a['object'])
                click.echo('Created package relationship %s %s %s' %
                           (a['subject'], a['type'], a['object']))
            except Exception as e:
                click.echo(
                    'Failed to create package relationship for dataset %s: %r'
                    % (package_dict['id'], e))

        to_index.append(package_dict['id'])

    click.echo('Indexing datasets: %r' % to_index)
    # remove duplicates
    to_index = list(dict.fromkeys(to_index))
    # trigger indexing of datasets involved in relationships
    for target_package_id in to_index:
        ckan.lib.search.rebuild(target_package_id)
Exemplo n.º 8
0
    def after_show(self, context, package_dict):
        org_id = package_dict.get('owner_org')
        data_type = package_dict.get('type')

        if org_id and data_type == 'dataset':
            # need to turn off dataset_count, usersand groups here as it causes a recursive loop
            org_details = toolkit.get_action('organization_show')(
                data_dict={
                    'id': org_id,
                    'include_datasets': False,
                    'include_dataset_count': False,
                    'include_extras': True,
                    'include_users': False,
                    'include_groups': False,
                    'include_tags': False,
                    'include_followers': False,
                }
            )

            org_title = org_details.get('title_translated', {})
            if org_title:
                package_dict['organization']['title_translated'] = org_title

            org_description = org_details.get('description_translated', {})
            if org_description:
                package_dict['organization']['description_translated'] = org_description

            org_image_url = org_details.get('image_url_translated', {})
            if org_image_url:
                package_dict['organization']['image_url_translated'] = org_image_url

        force_resp_org = cioos_helpers.load_json(self._get_extra_value('force_responsible_organization', package_dict))
        cited_responsible_party = package_dict.get('cited-responsible-party')
        if((cited_responsible_party or force_resp_org) and not package_dict.get('responsible_organizations')):
            package_dict['responsible_organizations'] = self._cited_responsible_party_to_responsible_organizations(cited_responsible_party, force_resp_org)

        if package_dict.get('cited-responsible-party'):
            package_dict['cited-responsible-party'] = self.group_by_ind_or_org(package_dict.get('cited-responsible-party'))
        if package_dict.get('metadata-point-of-contact'):
            package_dict['metadata-point-of-contact'] = self.group_by_ind_or_org(package_dict.get('metadata-point-of-contact'))

        result = package_dict
        title = result.get('title_translated')
        if(title):
            result['title_translated'] = cioos_helpers.load_json(title)
        notes = result.get('notes_translated')
        if(notes):
            result['notes_translated'] = cioos_helpers.load_json(notes)

        # convert the rest of the strings to json
        for field in [
                "keywords",
                # "spatial", removed as making the field json brakes the dataset_map template
                "temporal-extent",
                "unique-resource-identifier-full",
                "notes",
                "vertical-extent",
                "dataset-reference-date",
                "metadata-reference-date",
                "metadata-point-of-contact",
                "cited-responsible-party"]:
            tmp = result.get(field)
            if tmp:
                result[field] = cioos_helpers.load_json(tmp)
        package_dict = result


        # title and notes must be a string or the index process errors
        if isinstance(package_dict.get('title'), dict):
            package_dict['title'] = json.dumps(package_dict.get('title'))
        if isinstance(package_dict.get('notes'), dict):
            package_dict['notes'] = json.dumps(package_dict.get('notes'))

        # if(package_dict.get('title') and re.search(r'\\\\u[0-9a-fA-F]{4}', package_dict.get('title'))):
        #     if isinstance(package_dict.get('title'), str):
        #         package_dict['title'] = package_dict.get('title').encode().decode('unicode-escape')
        #     else:  # we have bytes
        #         package_dict['title'] = package_dict.get('title').decode('unicode-escape')
        #
        # if(package_dict.get('notes') and re.search(r'\\\\u[0-9a-fA-F]{4}', package_dict.get('notes'))):
        #     if isinstance(package_dict.get('notes'), str):
        #         package_dict['notes'] = package_dict.get('notes').encode().decode('unicode-escape')
        #     else:  # we have bytes
        #         package_dict['notes'] = package_dict.get('notes').decode('unicode-escape')

        # Update package relationships with package name
        ras = package_dict['relationships_as_subject']
        for rel in ras:
            if rel.get('__extras'):
                id = rel['__extras']['object_package_id']
                result = toolkit.get_action('package_search')(context, data_dict={'q': 'id:%s' % id, 'fl': 'name'})
                if result['results']:
                    rel['__extras']['object_package_name'] = result['results'][0]['name']
                rel['__extras']['subject_package_name'] = package_dict['name']
            else:
                id = rel['object_package_id']
                result = toolkit.get_action('package_search')(context, data_dict={'q': 'id:%s' % id, 'fl': 'name'})
                if result['results']:
                    rel['object_package_name'] = result['results'][0]['name']
                rel['subject_package_name'] = package_dict['name']

        rao = package_dict['relationships_as_object']
        for rel in rao:
            if rel.get('__extras'):
                rel['__extras']['object_package_name'] = package_dict['name']
                id = rel['__extras']['subject_package_id']
                result = toolkit.get_action('package_search')(context, data_dict={'q': 'id:%s' % id, 'fl': 'name'})
                if result['results']:
                    rel['__extras']['subject_package_name'] = result['results'][0]['name']
            else:
                rel['object_package_name'] = package_dict['name']
                id = rel['subject_package_id']
                result = toolkit.get_action('package_search')(context, data_dict={'q': 'id:%s' % id, 'fl': 'name'})
                if result['results']:
                    rel['subject_package_name'] = result['results'][0]['name']

        return package_dict
Exemplo n.º 9
0
    def after_search(self, search_results, search_params):
        # no need to do all this if not returning data anyway
        if search_params.get('rows') == 0:
            return search_results

        search_facets = search_results.get('search_facets', {})
        eov = search_facets.get('eov', {})
        items = eov.get('items', [])
        if items:
            schema = toolkit.h.scheming_get_dataset_schema('dataset')
            fields = schema['dataset_fields']
            field = toolkit.h.scheming_field_by_name(fields, 'eov')
            choices = toolkit.h.scheming_field_choices(field)
            new_eovs = []
            for item in items:
                for ch in choices:
                    if ch['value'] == item['name']:
                        item['display_name'] = toolkit.h.scheming_language_text(ch.get('label', item['name']))
                        item['category'] = ch.get('catagory', u'')
                new_eovs.append(item)
            search_results['search_facets']['eov']['items'] = new_eovs

        # need to turn off dataset_count here as it causes a recursive loop with package_search
        org_list = toolkit.get_action('organization_list')(
            data_dict={
                'all_fields': True,
                'include_dataset_count': False,
                'include_extras': True,
                'include_users': False,
                'include_groups': False,
                'include_tags': False,
            }
        )
        org_dict = {x['id']: x for x in org_list}
        # convert string encoded json to json objects for translated fields
        # package_search with filters uses solr index values which are strings
        # this is inconsistant with package data which is returned as json objects
        # by the package_show and package_search end points whout filters applied
        for result in search_results.get('results', []):

            force_resp_org = cioos_helpers.load_json(self._get_extra_value('force_responsible_organization', result))
            cited_responsible_party = result.get('cited-responsible-party')
            if((cited_responsible_party or force_resp_org) and not result.get('responsible_organizations')):
                result['responsible_organizations'] = self._cited_responsible_party_to_responsible_organizations(cited_responsible_party, force_resp_org)

            if result.get('cited-responsible-party'):
                result['cited-responsible-party'] = self.group_by_ind_or_org(result.get('cited-responsible-party'))
            if result.get('metadata-point-of-contact'):
                result['metadata-point-of-contact'] = self.group_by_ind_or_org(result.get('metadata-point-of-contact'))

            title = result.get('title_translated')
            if(title):
                result['title_translated'] = cioos_helpers.load_json(title)
            notes = result.get('notes_translated')
            if(notes):
                result['notes_translated'] = cioos_helpers.load_json(notes)

            # convert the rest of the strings to json
            for field in [
                    "keywords",
                    # "spatial", removed as making the field json brakes the dataset_map template
                    "temporal-extent",
                    "unique-resource-identifier-full",
                    "notes",
                    "vertical-extent",
                    "dataset-reference-date",
                    "metadata-reference-date",
                    "metadata-point-of-contact",
                    "cited-responsible-party"]:
                tmp = result.get(field)
                if tmp:
                    result[field] = cioos_helpers.load_json(tmp)


            # update organization object while we are at it
            org_id = result.get('owner_org')
            if org_id:
                org_details = org_dict.get(org_id)
                if org_details:
                    org_title = org_details.get('title_translated', {})
                    organization = result.get('organization', {})
                    if not organization:
                        organization = {}
                    if org_title:
                        organization['title_translated'] = org_title
                    org_description = org_details.get('description_translated', {})
                    if org_description:
                        organization['description_translated'] = org_description
                    org_image_url = org_details.get('image_url_translated', {})
                    if org_image_url:
                        organization['image_url_translated'] = org_image_url
                    if organization:
                        result['organization'] = organization
                else:
                    log.warn('No org details for owner_org %s', result.get('org_descriptionid'))
            # else:
            #    log.warn('No owner_org for dataset %s: %s: %s', result.get('id'), result.get('name'), result.get('title'))

        return search_results
Exemplo n.º 10
0
    def before_index(self, data_dict):
        data_type = data_dict.get('type')
        if data_type != 'dataset':
            return data_dict

        try:
            tags_dict = cioos_helpers.load_json(data_dict.get('keywords', '{}'))
        except Exception as err:
            log.error(data_dict.get('id', 'NO ID'))
            log.error(type(err))
            log.error("error:%s, keywords:%r", err, data_dict.get('keywords', '{}'))
            tags_dict = {"en": [], "fr": []}

        force_resp_org = cioos_helpers.load_json(data_dict.get('force_responsible_organization', '[]'))
        data_dict['responsible_organizations'] = self._cited_responsible_party_to_responsible_organizations(data_dict.get('cited-responsible-party', '{}'), force_resp_org)

        # update tag list by language
        data_dict['tags_en'] = tags_dict.get('en', [])
        data_dict['tags_fr'] = tags_dict.get('fr', [])
        data_dict['tags'] = data_dict['tags_en'] + data_dict['tags_fr']

        # update organization list by language
        org_id = data_dict.get('owner_org')
        if org_id:
            org_details = toolkit.get_action('organization_show')(
                data_dict={
                    'id': org_id,
                    'include_datasets': False,
                    'include_dataset_count': False,
                    'include_extras': True,
                    'include_users': False,
                    'include_groups': False,
                    'include_tags': False,
                    'include_followers': False,
                }
            )
            org_title = org_details.get('title_translated', {})
            data_dict['organization_en'] = org_title.get('en', '')
            data_dict['organization_fr'] = org_title.get('fr', '')

        try:
            title = cioos_helpers.load_json(data_dict.get('title_translated', '{}'))
            data_dict['title_en'] = title.get('en', [])
            data_dict['title_fr'] = title.get('fr', [])
        except Exception as err:
            log.error(err)

        # create temporal extent index.
        te = data_dict.get('temporal-extent', '{}')
        if te:
            temporal_extent = cioos_helpers.load_json(te)
            temporal_extent_begin = temporal_extent.get('begin')
            temporal_extent_end = temporal_extent.get('end')
            if(temporal_extent_begin):
                data_dict['temporal-extent-begin'] = temporal_extent_begin
            if(temporal_extent_end):
                data_dict['temporal-extent-end'] = temporal_extent_end
            # If end is not set then we will still include these dataset in temporal searches by giving them an end time of 'NOW'
            if(temporal_extent_begin):
                data_dict['temporal-extent-range'] = '[' + temporal_extent_begin + ' TO ' + (temporal_extent_end or '*') + ']'

        # create vertical extent index
        ve = data_dict.get('vertical-extent', '{}')
        if ve:
            vertical_extent = cioos_helpers.load_json(ve)
            vertical_extent_min = vertical_extent.get('min')
            vertical_extent_max = vertical_extent.get('max')
            if(vertical_extent_min):
                data_dict['vertical-extent-min'] = vertical_extent_min
            if(vertical_extent_max):
                data_dict['vertical-extent-max'] = vertical_extent_max

        # eov is multi select so it is a json list rather then a python list
        if(data_dict.get('eov')):
            data_dict['eov'] = cioos_helpers.load_json(data_dict['eov'])

        return data_dict
Exemplo n.º 11
0
 def validator(value, context):
     range = cioos_helpers.load_json(value)
     if (not range.get('begin') and range.get('end')) or (range.get('end') and range['end'] < range['begin']):
         raise Invalid(_('Invalid value "%r". Valid ranges must contain begin <= end values') % (value))
     return value