Exemplo n.º 1
0
 def test_save_as_public(self):
     ds_private = DatasetDcatApOp(TRANSLATION_MEMORY_V_4,
                                  privacy_state=PRIVACY_STATE_PRIVATE,
                                  graph_name=DCATAPOP_PRIVATE_GRAPH_NAME)
     ds_private.privacy_state = PRIVACY_STATE_PUBLIC
     ds_private.get_description_from_ts()
     ds_private.save_to_ts()
Exemplo n.º 2
0
def edit_save_to_ts():
    ds1 = DatasetDcatApOp("http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2")
    if ds1.get_description_from_ts():
        ds1.privacy_state = "public"
        ds1.schema.ckanName_dcatapop['0'].value_or_uri = "NEW CKAN NAME"
        ds1.schema.keyword_dcat['fr'] = ResourceValue(u'la réussite', lang="fr")
        ds1.schema.keyword_dcat['grg'] = ResourceValue(u'επιτυχία', lang="gr")
        ds1.schema.contactPoint_dcat['0'].hasTelephone_vcard['0'].hasValue_vcard['0'].uri = "TEL:213232323"
        if ds1.save_to_ts():
            print " Save done"
        ds1after = DatasetDcatApOp("http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2")
        ds1after.get_description_from_ts()
        pass
Exemplo n.º 3
0
 def test_edit_save_to_ts(self):
     self.test_get_description_from_ts()
     dataset = DatasetDcatApOp(TRANSLATION_MEMORY_V_1_2)
     if dataset.get_description_from_ts():
         dataset.privacy_state = PRIVACY_STATE_PUBLIC
         dataset.schema.ckanName_dcatapop[
             '0'].value_or_uri = "NEW CKAN NAME"
         dataset.schema.keyword_dcat[LanguagesConstants.LANGUAGE_CODE_FR] = \
             ResourceValue(u'la réussite', lang=LanguagesConstants.LANGUAGE_CODE_FR)
         dataset.schema.keyword_dcat[LanguagesConstants.LANGUAGE_CODE_EL] = \
             ResourceValue(u'επιτυχία', lang=LanguagesConstants.LANGUAGE_CODE_EL)
         dataset.schema.contactPoint_dcat['0'].hasTelephone_vcard[
             '0'].hasValue_vcard['0'].uri = "TEL:213232323"
         if dataset.save_to_ts():
             print " Save done"
         ds1after = DatasetDcatApOp(TRANSLATION_MEMORY_V_1_2)
         ds1after.get_description_from_ts()
         pass
Exemplo n.º 4
0
    def test_save_to_ts(self):
        ds1 = DatasetDcatApOp(
            "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2")
        if ds1.get_description_from_ts():
            ds1.privacy_state = "public"
            ds1.schema.ckanName_dcatapop['0'].value_or_uri = "NEW CKAN NAME"
            ds1.schema.ckanName_dcatapop['1'] = ResourceValue(
                "Second CKAN NAME")
            # ckan_name_new = ds1.schema.ckanName_dcatapop['1'] = ResourceValue("another ckan Name")
            ds1.schema.contactPoint_dcat['0'].hasTelephone_vcard[
                '0'].hasValue_vcard['0'].uri = "TEL:213232323"
            ds1.save_to_ts()

            ds1after = DatasetDcatApOp(
                "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2"
            )
            ds1after.get_description_from_ts()

            ckan_name_new = ds1after.schema.ckanName_dcatapop['0'].value_or_uri
            lenc = len(ds1after.schema.ckanName_dcatapop)
            msg = "Expected name {0}, New value {1}. Expected length {2}, Get {3}"
            self.assertTrue(
                ckan_name_new == "NEW CKAN NAME" and lenc == 2,
                msg.format("NEW CKAN NAME", ckan_name_new, 2, lenc))
            # check if the generation of uris from memeber name workds in the case of DASH and DOT
            self.assertTrue("organisation-name" in ds1after.ttl_as_in_ts,
                            "generation of uri from member failed")

        ds_new = DatasetDcatApOp("http://newdcatap.com")
        ds_new.schema.ckanName_dcatapop['0'] = ResourceValue("ckan Name new")
        ds_new.save_to_ts()

        ds_new_from_ts = DatasetDcatApOp("http://newdcatap.com")
        ds_new_from_ts.get_description_from_ts()

        self.assertTrue(
            ds_new_from_ts.schema.ckanName_dcatapop['0'].value_or_uri ==
            ds_new.schema.ckanName_dcatapop['0'].value_or_uri,
            "New dataset is not saved")
Exemplo n.º 5
0
def save_as_public():
    ds_private = DatasetDcatApOp("http://data.europa.eu/999/dataset/dgt-translation-memory-V4", privacy_state="private",
                                 graph_name=DCATAPOP_PRIVATE_GRAPH_NAME)
    ds_private.privacy_state = 'public'
    ds_private.get_description_from_ts()
    ds_private.save_to_ts()
    def build_embargo_datasets_from_string_content(self,
                                                   rdf_string_content,
                                                   dataset_description_map,
                                                   format_input="xml",
                                                   doi_flag=True):
        """
        To build a dict of datasets in embargo mode. The key of the dict is the uri, the value is the dataset object

        :param unicode rdf_string_content:
        :param map dataset_description_map:
        :rtype: dict[str, DatasetDcatApOp]
        """
        def create_name_of_graph(rdf_string_content):
            """
            :param rdf_string_content: string that represents a rdf.
            :return: None if failure, a graph name otherwise.
            """
            try:
                if isinstance(rdf_string_content, unicode):
                    content_md5 = hashlib.md5(
                        rdf_string_content.encode('utf8')).hexdigest()
                else:
                    content_md5 = hashlib.md5(
                        rdf_string_content.decode('utf8').encode(
                            'utf8')).hexdigest()
                graph_name = DCATAPOP_EMBARGO_NAMESPACE + content_md5
                return graph_name
            except BaseException as e:
                import traceback
                log.error(traceback.print_exc())
                log.error("Create name of graph failed")
                return None

        try:
            name_ingestion_graph = create_name_of_graph(rdf_string_content)
            list_embargo_datasets = {}
            if name_ingestion_graph:
                #  Create the embargo graph for the current job of ingestion
                tripleStoreCRUDHelpers = TripleStoreCRUDHelpers()
                tripleStoreCRUDHelpers.graph_remove(name_ingestion_graph)
                tripleStoreCRUDHelpers.graph_create(name_ingestion_graph)
                # load one time the content of rdf to virtuoso.
                if self.ingest_graph_from_string(name_ingestion_graph,
                                                 rdf_string_content,
                                                 format_input):
                    for dataset_uri, dataset_description in dataset_description_map.items(
                    ):
                        embargo_dataset = DatasetDcatApOp(
                            dataset_uri, DCATAPOP_INGESTION_DATASET,
                            name_ingestion_graph)
                        embargo_dataset.privacy_state = DCATAPOP_INGESTION_DATASET
                        if embargo_dataset.get_description_from_ts():
                            # Generate DOI if requested
                            list_embargo_datasets[
                                dataset_uri] = embargo_dataset

                            if dataset_description.generate_doi and doi_flag:
                                doi = generate_doi_for_dataset(
                                    embargo_dataset,
                                    dataset_description.generate_doi)
                                if doi:
                                    embargo_dataset.set_doi(doi)
                        else:
                            log.error(
                                "Ingest dataset from string error. Can not extract embargo dataset from graph. graph name"
                                " [{0}]. dataset uri [{1}]".format(
                                    name_ingestion_graph, dataset_uri))
                            return None
                    return list_embargo_datasets
                else:
                    log.error(
                        "Ingest dataset from string failed. The ingestion to the embargo graph failed. graph "
                        "name [{0}]. content: [{1}]".format(
                            name_ingestion_graph, rdf_string_content))
                    return None
            else:
                log.error(
                    "Ingest dataset from string failed. Can not create a the name of the embargo graph. Content "
                    "[{0}]".format(rdf_string_content.encode('utf-8')))

        except BaseException as e:
            import traceback
            log.error(traceback.print_exc())
            log.error(
                u"Ingest dataset from string failed. Exception {0}".format(
                    str(e)))
            log.error(
                u"Ingest dataset from string failed. file content: [{0}]".
                format(rdf_string_content))
            return None
Exemplo n.º 7
0
def convert_package_to_dataset(package=Package(), controlled_vocabulary=ControlledVocabulary(),
                               configuration_file=CONFIGURATION_FILE_PATH):
    package_extra_list = \
        retrieve_package_extra_list_from_postgres(configuration_file, package)  # type: list[PackageExtra]

    tag_list = retrieve_tag_list_from_postgres(configuration_file, package)

    resource_list = retrieve_resource_list(configuration_file, package)

    dataset_uri = DATASET_URI_PREFIX + package.name
    dataset = DatasetDcatApOp(dataset_uri)

    dataset.graph_name = DCATAPOP_PUBLIC_GRAPH_NAME
    if package.private:
        dataset.graph_name = DCATAPOP_PRIVATE_GRAPH_NAME
        dataset.privacy_state = PRIVACY_STATE_PRIVATE

    dataset_schema = DatasetSchemaDcatApOp(dataset_uri,
                                           graph_name=dataset.graph_name)  # 1...1
    #dataset_schema.identifier_adms['0'] = SchemaGeneric(dataset_uri)
    dataset.schema_catalog_record = set_catalog_record(package, package_extra_list, dataset_schema)

    dataset_schema.versionInfo_owl['0'] = ResourceValue(package.version)

    #dataset_schema.isPartOfCatalog_dcatapop['0'] = CatalogSchemaDcatApOp(uri_util.new_cataloge_uri_from_title())

    set_landing_page(dataset_schema, package)

    set_package_titles(configuration_file, dataset_schema, package)  # 0...n
    set_package_descriptions(configuration_file, dataset_schema, package)  # 0...n

    dataset_schema.ckanName_dcatapop['0'] = ResourceValue(package.name)  # 1...1

    dataset_schema.modified_dcterms['0'] = ResourceValue(str(package.metadata_modified))

    groups = retrieve_groups(configuration_file, package)
    # To process only once the groups, multiple set are done once.
    set_publisher_and_theme_and_group(dataset_schema, groups, controlled_vocabulary.controlled_publishers)  # 0...1
    if not dataset_schema.publisher_dcterms.get('0', None):
        owner = model.Group.get(package.owner_org)
        if owner:
            dataset_schema.publisher_dcterms['0'] = AgentSchemaDcatApOp('http://publications.europa.eu/resource/authority/corporate-body/{0}'.format(owner.name.upper()), graph_name=dataset_schema.graph_name)
        else:
            log.warn('Dataset {0} has no publisher'.format(dataset_schema.uri))
            #raise MigrationError(message='Dataset {0} has no publisher'.format(dataset_schema.uri))

    for package_extra in package_extra_list:
        if package_extra.value:
            if package_extra.key == ACCRUAL_PERIODICITY:
                set_accrual_periodicity(dataset_schema, package_extra,
                                        controlled_vocabulary.controlled_frequencies)  # 0...1
            elif package_extra.key == TEMPORAL_COVERAGE_FROM:
                set_temporal(dataset_schema, package_extra)  # 0...1
            elif package_extra.key == TEMPORAL_COVERAGE_TO:
                set_temporal_to(dataset_schema, package_extra)  # 0...1
            elif package_extra.key == ALTERNATIVE_TITLE:
                set_alternative_titles(configuration_file, dataset_schema, package_extra)  # 0...n
            elif package_extra.key == IDENTIFIER:
                set_identifier(dataset_schema, package_extra)  # 0...n
            elif package_extra.key == METADATA_LANGUAGE:
                pass
            elif package_extra.key == CITATION:
                pass
            elif package_extra.key == RELEASE_DATE:
                #dataset_schema.issued_dcterms['0'] = ResourceValue(value_or_uri=str(package_extra.value),
                #                                         datatype=NAMESPACE_DCATAPOP.xsd + DATE_TIME)  # 0...1
                pass
            elif package_extra.key == EVALUATION_DATE:
                pass
            elif package_extra.key == SOURCE:
                pass
            elif package_extra.key == ANALYST_IN_EXTRA_FIELD:
                pass
            elif package_extra.key == THIS_IS_EXTRA_FIELD:
                pass
            elif package_extra.key == MODIFIED_DATE:
                pass
            elif package_extra.key == KIC:
                pass
            elif package_extra.key == CLC:
                pass
            elif package_extra.key == DATA_SOURCE:
                pass
            elif package_extra.key == EIT:
                pass
            elif package_extra.key == 'version_description':
                set_version_note(dataset_schema, package_extra)

    controlled_status = ""
    for tag in tag_list:  # type: Tag
        if tag.name:
            if not tag.vocabulary_id:  # where voc = /
                set_keyword(dataset_schema, tag, configuration_file)  # 0...n
            elif tag.vocabulary_id == VOC_LANGUAGE_ID:  # where voc = language
                set_language(dataset_schema, tag, controlled_vocabulary.controlled_languages)  # 0...n
            elif tag.vocabulary_id == VOC_GEO_COVERAGE:  # where voc = geographical_coverage
                set_spatial(dataset_schema, tag, controlled_vocabulary.controlled_country)  # 0...n
            elif tag.vocabulary_id == VOC_DATASET_TYPE:  # where voc = dataset_type
                set_dataset_type(dataset_schema, tag)  # 0...1
            elif tag.vocabulary_id == VOC_CONCEPTS_EUROVOC:  # where voc = concepts_eurovoc
                set_subject(dataset_schema, tag)  # 0...1
            elif tag.vocabulary_id == VOC_STATUS:  # where voc = status
                package_status = tag.name  # 0...1
                if package_status:
                    package_status_upper_case = package_status.split('/')[-1].upper()
                    if package_status_upper_case == 'UNDERDEVELOPMENT':
                        package_status_upper_case = 'DEVELOP'
                    controlled_status = next(
                        uri for uri, value in controlled_vocabulary.controlled_status.iteritems() if
                        value == package_status_upper_case)

                    # TODO no property for that in new ontology
                    # elif tag.vocabulary_id == '0311e5a2-c6a0-49c7-84cc-1ceec129fd7c':  # where voc = interoperability_level

    # TODO verify this field
    dataset_schema.issued_dcterms['0'] = ResourceValue(str(get_metadata_created_timestamp(package.id)),
                                                       datatype=NAMESPACE_DCATAPOP.xsd + DATE_TIME)  # 0...1

    for resource in resource_list:
        type = resource.resource_type or resource.extras
        if MAIN_DOCUMENTATION in type \
                or RELATED_DOCUMENTATION in type \
                or WEB_RELATED_DOCUMENTATION in type:
            set_document(configuration_file,
                         dataset_schema,
                         resource,
                         controlled_vocabulary.controlled_file_types,
                         controlled_vocabulary.controlled_documentation_types)  # 0...n
        else:
            set_distribution(configuration_file,
                             dataset_schema,
                             resource,
                             controlled_status,
                             controlled_vocabulary.controlled_file_types,
                             controlled_vocabulary.controlled_distribution_types)

    set_contact_point(dataset_schema, package_extra_list)

    dataset.schema = dataset_schema

    return dataset