def run(self): print("S4_import_odc_laws run") for lang in config['supported_languages']: try: ontology_xml = data.load_odc_laws(lang) if (ontology_xml): try: context = etree.iterparse(io.BytesIO(ontology_xml), events=('end', )) context = iter(context) root = context.next()[1] counter = 0 for event, elem in context: if event == "end" and elem.tag == "item": if (elem.find('wp:status', root.nsmap) is not None): status = elem.find('wp:status', root.nsmap).text if status == 'publish': if ((int(config['skip_n_datasets']) > 0) and (counter < int( config['skip_n_datasets']))): counter += 1 continue dataset_metadata = _map_xml_item_to_ckan_dataset_dict( root, elem) dataset_metadata = _set_extras( dataset_metadata, root, elem) dataset_metadata = _set_taxonomy_from_category( dataset_metadata, elem) dataset_metadata = _set_document_type_from_category( dataset_metadata, elem) dataset_metadata = _set_mandatory_metadata_fields_laws( dataset_metadata) dataset_metadata = script_utils._set_mandatory_metadata_fields( dataset_metadata, config['trace']) try: existing_metadata = ckanapiutils.get_package_contents( dataset_metadata['name']) dataset_metadata = _copy_resources_info( existing_metadata, dataset_metadata) if config['skip_existing']: if lang in existing_metadata[ 'odm_language']: print( "Dataset skipped ", dataset_metadata[ 'name'], lang, existing_metadata[ 'odm_language']) continue dataset_metadata = _set_title_translated( existing_metadata, dataset_metadata, lang) dataset_metadata = _set_notes_translated( existing_metadata, dataset_metadata, lang) dataset_metadata = _set_odm_laws_number( existing_metadata, dataset_metadata, lang) dataset_metadata = _set_odm_language( existing_metadata, dataset_metadata, lang) dataset_metadata = _set_odm_spatial_range( existing_metadata, dataset_metadata) if not config['dry']: dataset_metadata = ckanapiutils.update_package( dataset_metadata) print( "Dataset modified ", dataset_metadata['id'], dataset_metadata['title'], lang, existing_metadata[ 'odm_language']) except (ckanapi.SearchError, ckanapi.NotFound) as e: try: dataset_metadata = _set_title_translated( None, dataset_metadata, lang) dataset_metadata = _set_notes_translated( None, dataset_metadata, lang) dataset_metadata = _set_odm_laws_number( None, dataset_metadata, lang) dataset_metadata = _set_odm_language( None, dataset_metadata, lang) dataset_metadata = _set_odm_spatial_range( None, dataset_metadata) if not config['dry']: dataset_metadata = ckanapiutils.create_package( dataset_metadata) print( "Dataset created ", dataset_metadata['id'], dataset_metadata[ 'title']) except TypeError as e: if (config['debug']): print(e) except ckan.logic.ValidationError as e: if (config['debug']): print(e) if 'id' in dataset_metadata: _add_extras_urls_as_resources( dataset_metadata, config['field_prefixes'][lang], ckanapiutils, elem, root, lang) elem.clear() root.clear() except TypeError as e: if (config['debug']): traceback.print_exc() except etree.XMLSyntaxError as e: if (config['debug']): traceback.print_exc() except KeyError as e: print("Source file not available for lang %s", lang) if (config['debug']): traceback.print_exc() except IOError as e: if (config['debug']): traceback.print_exc() return "COMPLETED import_odc_laws"
reader = MARCReader(records) counter = 0 for record in reader: if ((int(config.SKIP_N_DATASETS) > 0) and (counter < int(config.SKIP_N_DATASETS))): counter += 1 continue dataset_metadata = library_utils._map_record_to_ckan_dataset_dict(record,config) if (dataset_metadata is None) or (dataset_metadata["name"] == ''): print("Dataset does not have any title or ISBN, unique name cannot be generated") continue dataset_metadata = library_utils._set_extras_from_record_to_ckan_dataset_dict(dataset_metadata,record,config) dataset_metadata = script_utils._set_mandatory_metadata_fields(dataset_metadata,config.trace) dataset_metadata['owner_org'] = orga['id'] try: response = ckanapiutils.get_package_contents(dataset_metadata['name']) if config.SKIP_EXISTING: print("Dataset skipped ",dataset_metadata['name']) continue modified_dataset = ckanapiutils.update_package(dataset_metadata) dataset_metadata['id'] = modified_dataset['id'] print("Dataset modified ",modified_dataset['id'],modified_dataset['title'])
def run(self): print("S4_import_odc_laws run") for lang in config['supported_languages']: try: ontology_xml = data.load_odc_laws(lang) if (ontology_xml): try: context = etree.iterparse(io.BytesIO(ontology_xml), events=('end', )) context = iter(context) root = context.next()[1] counter = 0 for event, elem in context: if event == "end" and elem.tag == "item": if (elem.find('wp:status', root.nsmap) is not None): status = elem.find('wp:status', root.nsmap).text if status == 'publish': if ((int(config['skip_n_datasets']) > 0) and (counter < int(config['skip_n_datasets']))): counter += 1 continue dataset_metadata = _map_xml_item_to_ckan_dataset_dict(root, elem) dataset_metadata = _set_extras(dataset_metadata,root,elem) dataset_metadata = _set_taxonomy_from_category(dataset_metadata,elem) dataset_metadata = _set_document_type_from_category(dataset_metadata,elem) dataset_metadata = _set_mandatory_metadata_fields_laws(dataset_metadata) dataset_metadata = script_utils._set_mandatory_metadata_fields(dataset_metadata,config['trace']) try: existing_metadata = ckanapiutils.get_package_contents(dataset_metadata['name']) dataset_metadata = _copy_resources_info(existing_metadata, dataset_metadata) if config['skip_existing']: if lang in existing_metadata['odm_language']: print("Dataset skipped ", dataset_metadata['name'], lang, existing_metadata['odm_language']) continue dataset_metadata = _set_title_translated(existing_metadata, dataset_metadata, lang) dataset_metadata = _set_notes_translated(existing_metadata, dataset_metadata, lang) dataset_metadata = _set_odm_laws_number(existing_metadata, dataset_metadata, lang) dataset_metadata = _set_odm_language(existing_metadata, dataset_metadata, lang) dataset_metadata = _set_odm_spatial_range(existing_metadata,dataset_metadata) if not config['dry']: dataset_metadata = ckanapiutils.update_package(dataset_metadata) print("Dataset modified ", dataset_metadata['id'], dataset_metadata['title'], lang, existing_metadata['odm_language']) except (ckanapi.SearchError, ckanapi.NotFound) as e: try: dataset_metadata = _set_title_translated(None, dataset_metadata, lang) dataset_metadata = _set_notes_translated(None, dataset_metadata, lang) dataset_metadata = _set_odm_laws_number(None, dataset_metadata, lang) dataset_metadata = _set_odm_language(None, dataset_metadata, lang) dataset_metadata = _set_odm_spatial_range(None,dataset_metadata) if not config['dry']: dataset_metadata = ckanapiutils.create_package(dataset_metadata) print("Dataset created ", dataset_metadata['id'], dataset_metadata['title']) except TypeError as e: if (config['debug']): print(e) except ckan.logic.ValidationError as e: if (config['debug']): print(e) if 'id' in dataset_metadata: _add_extras_urls_as_resources(dataset_metadata, config['field_prefixes'][lang], ckanapiutils, elem, root, lang) elem.clear() root.clear() except TypeError as e: if (config['debug']): traceback.print_exc() except etree.XMLSyntaxError as e: if (config['debug']): traceback.print_exc() except KeyError as e: print("Source file not available for lang %s", lang) if (config['debug']): traceback.print_exc() except IOError as e: if (config['debug']): traceback.print_exc() return "COMPLETED import_odc_laws"