def setUp(self): """Executed before each test.""" # tests stuff self.discriminator = "{}_{}".format( hostname, strftime("%Y-%m-%d_%H%M%S", gmtime()) ) # target # fixture metadata - destination md_target = Metadata( format="shp", name="network_analisis.shp", path="//datagis/path_to_vectors/network_analisis.shp", title="{} - {}".format( get_test_marker(), "{}_{}".format(hostname, strftime("%Y-%m-%d_%H%M%S", gmtime())), ), type="vectorDataset", ) # create it online self.fixture_metadata_target = self.isogeo.metadata.create( workgroup_id=self.fixture_metadata.groupId, metadata=md_target, return_basic_or_complete=True, )
def get_matching_share(self, metadata: Metadata, shares: list, mode: str = "simple") -> Share: """Get the first Share which contains the metadata, basing match on workgroup UUID. :param Metadata metadata: metadata object to use for the matching :param list shares: list of shares to use for the matching :param str mode: simple mode is based only on the UID """ if mode != "simple": raise NotImplementedError matching_share = [ share for share in shares if share.get("_creator").get("_id") == metadata.groupId ] if len(matching_share): matching_share = Share(**matching_share[0]) else: logger.warning( "No matching share found for {} ({}). The OpenCatalog URL will not be build." .format(metadata.title_or_name(), metadata._id)) matching_share = None return matching_share
def test_search_replace_basic(self): """duplicate_into_other_group""" # create fixture metadata local_obj = Metadata( title="Parcelles cadatrasles du Grand Dijon", abstract= "La communauté urbaine du Grand Dijon est heureuse de vous présenter" " la politique foncière au Grand Dijon.\n" "C'est dans le Grand Dijon qu'on trouve le vin de Bourgogne le plus cher.\n" "Bien cordialement, Grand Dijon", type="vectorDataset", ) md = self.isogeo.metadata.create( workgroup_id=environ.get("ISOGEO_WORKGROUP_TEST_UUID"), metadata=local_obj) # prepare search and replace replace_patterns = { "title": ("Grand Dijon", "Dijon Métropole"), "abstract": ("Grand Dijon", "Dijon Métropole"), } dict_prepositions = { "la Communauté Urbaine du ": "", "au ": "à ", "du ": "de ", "le ": "", } searchrpl_mngr = SearchReplaceManager( api_client=self.isogeo, attributes_patterns=replace_patterns, prepositions=dict_prepositions, ) # build search parameters. For example to filter on two specifics metadata search_parameters = { "group": environ.get("ISOGEO_WORKGROUP_TEST_UUID") } results = searchrpl_mngr.search_replace( search_params=search_parameters, safe=1) # checks self.assertGreaterEqual(len(results), 1) for i in results: self.assertNotIn("Grand Dijon", i.title) self.assertNotIn("Grand Dijon", i.abstract) # remove safe mode search_parameters = { "group": environ.get("ISOGEO_WORKGROUP_TEST_UUID"), "specific_md": (md._id, ), } searchrpl_mngr.search_replace(search_params=search_parameters, safe=0) # delete metadata self.isogeo.metadata.delete(metadata_id=md._id)
async def update(self, metadata: Metadata): logger.debug("Updating metadata: " + metadata.title_or_name()) md_updated = self.isogeo.metadata.update(metadata) # await asyncio.sleep(2) if isinstance(md_updated, Metadata): logger.debug(f"{metadata._id} has been updated") elif isinstance(md_updated, tuple): logger.error(f"{metadata._id} can't be updated: {md_updated[1]}")
def test_cgus(self): """CGU formatter.""" # get conditions reformatted for result in self.search.results: # load result md = Metadata.clean_attributes(result) # empty or not, it should work if len(md.conditions): cgus_out = self.fmt.conditions(md.conditions) else: cgus_out = self.fmt.conditions(md.conditions) # test self.assertIsInstance(cgus_out, list) self.assertEqual(len(result.get("conditions")), len(cgus_out))
def test_conditions(self): """Conditions formatter.""" # filtered search for md in self.search.results: metadata = Metadata.clean_attributes(md) if metadata.conditions: # get conditions reformatted conditions_out = self.fmt.conditions(metadata.conditions) self.assertIsInstance(conditions_out, tuple) # fixtures conditions_out = self.fmt.conditions(fixture_conditions) self.assertIsInstance(conditions_out, tuple) self.assertEqual(len(conditions_out), 6) for i in conditions_out: self.assertIsInstance(i, dict) self.assertIn("description", i)
def test_limitations(self): """Limitations formatter.""" # filtered search for md in self.search.results: metadata = Metadata.clean_attributes(md) if metadata.limitations: # get limitations reformatted limitations_out = self.fmt.limitations(metadata.limitations) self.assertIsInstance(limitations_out, tuple) # fixtures limitations_out = self.fmt.limitations(fixture_limitations) self.assertIsInstance(limitations_out, tuple) self.assertEqual(len(limitations_out), 10) for i in limitations_out: self.assertIsInstance(i, dict) self.assertIn("description", i)
def test_metadata_export(self): """Test search results export""" # temp output file out_xlsx = mkstemp(prefix="i2o_test_xlsx_") # load tags fixtures with open(self.search_all_includes, "r") as f: search = json.loads(f.read()) # add worksheets self.out_wb.set_worksheets(auto=search.get("tags").keys()) # run for md in search.get("results"): # clean invalid attributes md["coordinateSystem"] = md.pop("coordinate-system", list) md["featureAttributes"] = md.pop("feature-attributes", list) # load metadata metadata = Metadata(**md) self.out_wb.store_metadatas(metadata) # save self.out_wb.save(out_xlsx[1] + ".xlsx")
def test_metadata_export(self): """Test search results export""" # temp output file # out_docx = mkstemp(prefix="i2o_test_docx_") # load tags fixtures with open(self.search_all_includes, "r") as f: search = json.loads(f.read()) # load template tpl = DocxTemplate(self.word_template) # run for md in search.get("results")[:20]: metadata = Metadata.clean_attributes(md) # output path out_docx = mkstemp(prefix="i2o_test_docx_") out_docx_path = out_docx[1] + ".docx" # templating tpl = DocxTemplate(self.word_template) self.to_docx.md2docx(tpl, metadata) # save tpl.save(out_docx_path) del tpl
def test_specifications(self): """Specifications formatter.""" # filtered search for md in self.search.results: metadata = Metadata.clean_attributes(md) if metadata.specifications: # get specifications reformatted specs_out = self.fmt.specifications(metadata.specifications) self.assertIsInstance(specs_out, tuple) else: specs_no = self.fmt.specifications([]) self.assertIsInstance(specs_no, tuple) # fixtures specs_out = self.fmt.specifications(fixture_specifications) self.assertIsInstance(specs_out, tuple) self.assertEqual(len(specs_out), 2) for i in specs_out: self.assertIsInstance(i, dict) self.assertIn("conformant", i) self.assertIn("link", i) self.assertIn("name", i) self.assertIn("published", i)
new_condition = Condition() new_condition._license = lic_etalab2 search_migrated = isogeo.search( group=origin_wg_uuid, query="catalog:{}".format(migrated_cat_uuid), whole_results=True, include=("conditions",) ) for md in search_migrated.results: # retrieve licenses'ids of metadata's conditions md_lic_uuid = [condition.get("license").get("_id") for condition in md.get("conditions")] # tests if Etalab is one of metadata conditions' license if len(md_lic_uuid) and lic_etalab1_uuid in md_lic_uuid: # retrieve condition to delete dict md_condition = [condition for condition in md.get("conditions") if condition.get("license").get("_id") == lic_etalab1_uuid][0] # build metadata object isogeo_md = Metadata(**md) # add condtion to delete descritpion to new condition before adding new condition new_condition._description = md_condition.get("description") isogeo.metadata.conditions.create(metadata=isogeo_md, condition=new_condition) # build condition to delete object before deleting it isogeo_condition = Condition(**md_condition) isogeo.metadata.conditions.delete(metadata=isogeo_md, condition=isogeo_condition) else: pass
def store_md_generic(self, md: Metadata, ws: Worksheet, idx: int): """Exports generic metadata attributes into Excel worksheet with some dynamic adaptations based on metadata type. :param Metadata md: metadata object to export :param Worksheet ws: Excel worksheet to store the exported info :param int idx: row index in the worksheet """ # pick columns referential table depending on metadata type if md.type == "rasterDataset": col = self.columns_raster elif md.type == "resource": col = self.columns_resource elif md.type == "service": col = self.columns_service elif md.type == "vectorDataset": col = self.columns_vector else: raise TypeError("Unknown metadata type: {}".format(md.type)) logger.debug( "Start storing metadata {} ({}) using the matching reference columns for type of {} ..." .format(md.title_or_name(slugged=1), md._id, md.type)) # -- IDENTIFICATION ------------------------------------------------------------ if md.title: ws["{}{}".format(col.get("title").letter, idx)] = md.title if md.name: ws["{}{}".format(col.get("name").letter, idx)] = md.name if md.abstract: ws["{}{}".format(col.get("abstract").letter, idx)] = md.abstract # path to source try: src_path = Path(str(md.path)) except OSError as e: logger.debug( "Metadata.path value is not a valid system path. Maybe an URL? Original error: {}" .format(e)) urlparse(md.path).scheme != "" if isinstance(md.path, Path) and md.type != "service": if src_path.is_file(): link_path = r'=HYPERLINK("{0}","{1}")'.format( src_path.parent, src_path.resolve()) ws["{}{}".format(col.get("path").letter, idx)] = link_path logger.debug("Path reachable: {}".format(src_path)) else: ws["{}{}".format(col.get("path").letter, idx)] = str(src_path.resolve()) logger.debug("Path unreachable: {}".format(str(src_path))) elif md.path and md.type == "service": link_path = r'=HYPERLINK("{0}","{1}")'.format(md.path, md.path) ws["{}{}".format(col.get("path").letter, idx)] = link_path elif md.path: ws["{}{}".format(col.get("path").letter, idx)] = md.path logger.debug("Path not recognized: {}".format(str(src_path))) else: pass # -- TAGS ---------------------------------------------------------------------- keywords = [] inspire = [] if md.keywords: for k in md.keywords: if k.get("_tag").startswith("keyword:is"): keywords.append(k.get("text")) elif k.get("_tag").startswith("keyword:in"): inspire.append(k.get("text")) else: logger.info("Unknown keyword type: " + k.get("_tag")) continue if keywords: ws["{}{}".format(col.get("keywords").letter, idx)] = " ;\n".join(sorted(keywords)) if inspire: ws["{}{}".format(col.get("inspireThemes").letter, idx)] = " ;\n".join(sorted(inspire)) else: self.stats.md_empty_fields[md._id].append("keywords") logger.info("Vector dataset without any keyword or INSPIRE theme") # INSPIRE conformity if col.get("inspireConformance").letter is not None: ws["{}{}".format(col.get("inspireConformance").letter, idx)] = ("conformity:inspire" in md.tags) # owner ws["{}{}".format(col.get("_creator").letter, idx)] = next(v for k, v in md.tags.items() if "owner:" in k) # -- HISTORY ------------------------------------------------------------------- if md.collectionContext: ws["{}{}".format(col.get("collectionContext").letter, idx)] = md.collectionContext if md.collectionMethod: ws["{}{}".format(col.get("collectionMethod").letter, idx)] = md.collectionMethod # validity if md.validFrom: ws["{}{}".format(col.get("validFrom").letter, idx)] = utils.hlpr_datetimes(md.validFrom) if md.validTo: ws["{}{}".format(col.get("validTo").letter, idx)] = utils.hlpr_datetimes(md.validTo) if md.updateFrequency: ws["{}{}".format(col.get("updateFrequency").letter, idx)] = self.fmt.frequency_as_explicit_str( md.updateFrequency) if md.validityComment: ws["{}{}".format(col.get("validityComment").letter, idx)] = md.validityComment # -- EVENTS -------------------------------------------------------------------- # data creation date if md.created: ws["{}{}".format(col.get("created").letter, idx)] = utils.hlpr_datetimes(md.created) # events count if md.events: ws["{}{}".format(col.get("events").letter, idx)] = len(md.events) # data last update if md.modified: ws["{}{}".format(col.get("modified").letter, idx)] = utils.hlpr_datetimes(md.modified) # -- TECHNICAL ----------------------------------------------------------------- # format if md.format and md.type in ("rasterDataset", "vectorDataset"): format_lbl = next(v for k, v in md.tags.items() if "format:" in k) ws["{}{}".format(col.get("format").letter, idx)] = "{0} ({1} - {2})".format( format_lbl, md.formatVersion, md.encoding) self.stats.li_data_formats.append(format_lbl) elif md.format: ws["{}{}".format(col.get("format").letter, idx)] = "{0} {1}".format(md.format, md.formatVersion) self.stats.li_data_formats.append(md.format) else: pass # SRS if isinstance(md.coordinateSystem, dict): ws["{}{}".format(col.get("coordinateSystem").letter, idx)] = "{0} ({1})".format( md.coordinateSystem.get("name"), md.coordinateSystem.get("code")) # bounding box (envelope) if md.type != "resource" and md.envelope and md.envelope.get("bbox"): coords = md.envelope.get("coordinates") if md.envelope.get("type") == "Polygon": bbox = ",\n".join( format(coord, ".4f") for coord in md.envelope.get("bbox")) elif md.envelope.get("type") == "Point": bbox = "Centroïde : {}{}".format(coords[0], coords[1]) else: bbox = ",\n".join( format(coord, ".4f") for coord in md.envelope.get("bbox")) ws["{}{}".format(col.get("envelope").letter, idx)] = bbox # geometry if md.geometry: ws["{}{}".format(col.get("geometry").letter, idx)] = md.geometry # resolution if md.distance: ws["{}{}".format(col.get("distance").letter, idx)] = md.distance # scale if md.scale: ws["{}{}".format(col.get("scale").letter, idx)] = md.scale # features objects if md.features: ws["{}{}".format(col.get("features").letter, idx)] = md.features # -- QUALITY ------------------------------------------------------------------- if md.specifications: ws["{}{}".format(col.get("specifications").letter, idx)] = " ;\n".join( self.fmt.specifications(md.specifications)) # topology if md.topologicalConsistency: ws["AC{}".format(idx)] = md.topologicalConsistency # -- FEATURE ATTRIBUTES -------------------------------------------------------- if md.type == "vectorDataset" and isinstance(md.featureAttributes, list): fields = md.featureAttributes # count ws["{}{}".format(col.get("featureAttributesCount").letter, idx)] = len(fields) # alphabetic list fields_cct = sorted([ "{} ({}) - Type : {} - Descripion : {:.20} [...]".format( field.get("name"), field.get("alias"), field.get("dataType"), # field.get("language"), field.get("description", ""), ) for field in fields ]) ws["{}{}".format(col.get("featureAttributes").letter, idx)] = " ;\n".join(fields_cct) # if attributes analisis is activated, append fields dict if hasattr(self, "ws_fa"): self.fa_all.append(fields) else: pass # -- CGUs ---------------------------------------------------------------------- if md.conditions: ws["{}{}".format(col.get("conditions").letter, idx)] = " ;\n".join( self.fmt.conditions(md.conditions)) # -- LIMITATIONS --------------------------------------------------------------- if md.limitations: ws["{}{}".format(col.get("limitations").letter, idx)] = " ;\n".join( self.fmt.limitations(md.limitations)) # -- CONTACTS ------------------------------------------------------------------ if md.contacts: contacts = [ "{0} ({1})".format( contact.get("contact").get("name"), contact.get("contact").get("email"), ) for contact in md.contacts ] ws["{}{}".format(col.get("contacts").letter, idx)] = " ;\n".join(contacts) # -- ACTIONS ------------------------------------------------------------------- ws["{}{}".format(col.get("hasLinkDownload").letter, idx)] = ("action:download" in md.tags) ws["{}{}".format(col.get("hasLinkView").letter, idx)] = "action:view" in md.tags ws["{}{}".format(col.get("hasLinkOther").letter, idx)] = ("action:other" in md.tags) # -- METADATA ------------------------------------------------------------------ # id ws["{}{}".format(col.get("_id").letter, idx)] = md._id # creation if md._created: ws["{}{}".format(col.get("_created").letter, idx)] = utils.hlpr_datetimes(md._created) # add creation date (not datetime) for later stats self.stats.li_dates_md_created.append( utils.hlpr_datetimes(md._created).date()) # last update if md._modified: ws["{}{}".format(col.get("_modified").letter, idx)] = utils.hlpr_datetimes(md._modified) # add modification date (not datetime) for later stats, only if different from the creation date if md._modified != md._created: self.stats.li_dates_md_modified.append( utils.hlpr_datetimes(md._modified).date()) # edit ws["{}{}".format(col.get("linkEdit").letter, idx)] = utils.get_edit_url(md) if self.share is not None: link_visu = utils.get_view_url(md_id=md._id, share_id=self.share._id, share_token=self.share.urlToken) ws["{}{}".format(col.get("linkView").letter, idx)] = link_visu # lang ws["{}{}".format(col.get("language").letter, idx)] = md.language # log logger.info("Metadata stored: {} ({})".format( md.title_or_name(slugged=1), md._id))
logger.info( "\n------------- Update isogeo md from {} xlsx one ({}/{}) ---------------".format( xlsx_md._id, xlsx_reader.md_read.index(record) + 1, len(xlsx_reader.md_read) ) ) logger.info("Retrieving xlsx infos") xlsx_contacts = record.get("contacts") xlsx_kws = record.get("keywords") xlsx_inspire = record.get("inspireThemes") xlsx_events = record.get("events") try: logger.info("Retrieving isogeo infos") # retrieve isogeo md isogeo_md = Metadata().clean_attributes( [md for md in isogeo_mds if md.get("_id") == xlsx_md._id][0] ) origin_md = isogeo_md isogeo_contacts = [ v for k, v in isogeo_md.tags.items() if k.startswith("contact:") ] isogeo_kws = [ v for k, v in isogeo_md.tags.items() if k.startswith("keyword:is") ] isogeo_inspireTh = [ v.strip() for k, v in isogeo_md.tags.items() if k.startswith("keyword:in") ] isogeo_catalogs_uuid = [ k.split(":")[1] for k in isogeo_md.tags if k.startswith("catalog")
elif match_count == 1: # refresh token if needed if default_timer() - auth_timer >= 6000: logger.info("Manually refreshing token") isogeo.connect( username=environ.get("ISOGEO_USER_NAME"), password=environ.get("ISOGEO_USER_PASSWORD"), ) auth_timer = default_timer() else: pass # Retrieve metadata feature attributes list only needed matching_md = li_matching_md[0] if matching_md.get("_id") != current_md_uuid: current_md_uuid = matching_md.get("_id") metadata = Metadata().clean_attributes(matching_md) li_fAttrib = isogeo.metadata.attributes.listing(metadata) md_uuid = matching_md.get("_id") else: md_uuid = current_md_uuid app_link = app_base_url + md_uuid + "/attributes" # pprint(matching_md) matching_fAttrib = [ fAttrib for fAttrib in li_fAttrib if fAttrib.get("name") == name ] if len(matching_fAttrib) == 0: fAttrib_uuid = "no_match" elif len(matching_fAttrib) == 1: fAttrib_uuid = matching_fAttrib[0].get("_id")
def filter_matching_metadatas(self, isogeo_search_results: list) -> tuple: """Filter search results basing on matching patterns. :param MetadataSearch isogeo_search_results: Isogeo search results (`MetadataSearch.results`) :returns: a tuple of objects with the updated attributes :rtype: tuple """ # out list di_out_objects = {} # parse attributes to replace for attribute, pattern in self.attributes_patterns.items(): logger.info("Searching into '{}' values...".format(attribute)) # counters empty = 0 ignored = 0 matched = 0 # parse metadatas for md in isogeo_search_results: # load metadata as object metadata = Metadata.clean_attributes(md) # get attribute value in_value = getattr(metadata, attribute) # check if attribute has a value if not isinstance(in_value, str): empty += 1 continue # special cases: check if title is different from the technical name if attribute == "title" and in_value == metadata.name: empty += 1 continue # check if the value matches the search if pattern[0] in str(in_value): logger.debug( "Value of '{}' to change spotted in {}: '{}'".format( attribute, metadata._id, in_value ) ) matched += 1 if metadata._id in di_out_objects: # object has already been previously updated updated_obj = di_out_objects.get(metadata._id) # apply replacement setattr( updated_obj, attribute, self.replacer(in_value, pattern) ) di_out_objects[metadata._id] = updated_obj else: setattr(metadata, attribute, self.replacer(in_value, pattern)) di_out_objects[metadata._id] = metadata else: ignored += 1 # log for this attribute logger.info( "{} metadatas do not contains a valid {}".format(empty, attribute) ) logger.info( "{} metadatas.{} DO NOT MATCH the pattern: {}".format( ignored, attribute, pattern[0] ) ) logger.info( "{} metadatas.{} MATCH the pattern: {}".format( matched, attribute, pattern[0] ) ) # return tuple of metadata to be updated return tuple(di_out_objects.values())
# API client instanciation isogeo = Isogeo( client_id=environ.get("ISOGEO_API_USER_LEGACY_CLIENT_ID"), client_secret=environ.get("ISOGEO_API_USER_LEGACY_CLIENT_SECRET"), auth_mode="user_legacy", auto_refresh_url="{}/oauth/token".format(environ.get("ISOGEO_ID_URL")), platform=environ.get("ISOGEO_PLATFORM", "qa"), ) isogeo.connect( username=environ.get("ISOGEO_USER_NAME"), password=environ.get("ISOGEO_USER_PASSWORD"), ) auth_time = default_timer() new_md = Metadata() new_md.type = type_code new_md.format = format_code new_md.title = title new_md.name = name new_md.path = path metadata = isogeo.metadata.create(workgroup_id=trg_wg_uuid, metadata=new_md) for cat_uuid in li_trg_cat_uuid: cat = isogeo.catalog.get(workgroup_id=trg_wg_uuid, catalog_id=cat_uuid) isogeo.catalog.associate_metadata(metadata=metadata, catalog=cat) search = isogeo.search() isogeo.close()
else: logger.error("Invalid metadata UUID spotted: " + metadata_uuid) continue # compare title and technical names if row[dct_i2o_struct.get("title")].value == row[dct_i2o_struct.get( "name")].value: logger.warning( "Row has been ignored because title has not been changed.") continue else: pass # retrieve the metadata from Isogeo md_dict = isogeo.resource(resource_id=metadata_uuid) target_md = Metadata(**md_dict) # print(md_dict.get("name"), target_md.name) # print(target_md.name == row[dct_i2o_struct.get("name")].value) # check if technical names are matching if target_md.name == row[dct_i2o_struct.get("name")].value: logger.info("This a technical match! Show can go on!") else: logger.error("Hmmmm, there is no match between technical names") continue # contacts # print(row[dct_i2o_struct.get("contacts")].value) contacts_uuids = ( "f628a23c260b46cea83c98f6c1655119", "baf146d7befa474b94f19f25b92915ea", )
cat_tag = "catalog:{}".format(cat_uuid) cat_md = [md for md in wg_search.results if cat_tag in md.get("tags")] # cat_md = [md for md in wg_search.results if md.get("_id") == "e3f98e8b65f14ff2ab33a782a3717272"] for md in cat_md: # refresh token if needed if default_timer() - auth_timer >= 6900: isogeo.connect( username=environ.get("ISOGEO_USER_NAME"), password=environ.get("ISOGEO_USER_PASSWORD"), ) auth_timer = default_timer() else: pass metadata = Metadata(**md) # retrieve the old condition li_cond_to_delete = [cond for cond in md.get("conditions") if cond.get("license").get("_id") == license_uuid_src] if len(li_cond_to_delete): # build the old condition condition_to_delete = Condition(**li_cond_to_delete[0]) # add the new condition isogeo.metadata.conditions.create( metadata=metadata, condition=new_condition ) # delete the old condition isogeo.metadata.conditions.delete( metadata=metadata, condition=condition_to_delete )
for md in bd_carto_search.results: # refresh token if needed if default_timer() - auth_timer >= 6900: logger.info("Manually refreshing token") isogeo.connect( username=environ.get("ISOGEO_USER_NAME"), password=environ.get("ISOGEO_USER_PASSWORD"), ) auth_timer = default_timer() else: pass # Check if the metadata appears in matching table if md.get("_id") in li_md_uuid: md_info = [info for info in li_infos if info[0] == md.get("_id")][0] # check if infos retrieved from matching table are consistent if md_info[1] == md.get("title"): isogeo_md = Metadata(**md) isogeo_md.title = md_info[2] # let's updtae metadata title isogeo.metadata.update(metadata=isogeo_md) md_parsed_count += 1 else: logger.warning("Infos retrieved from matching table about '{}' metadata are not consistent with API response".format(md.get("_id"))) else: pass isogeo.close() logger.info("{}/{} metadatas parsed".format(md_parsed_count, len(li_infos)))
username=environ.get("ISOGEO_USER_NAME"), password=environ.get("ISOGEO_USER_PASSWORD"), ) auth_timer = default_timer() src_md = isogeo.search( group=environ.get("ISOGEO_ORIGIN_WORKGROUP"), whole_results=True, query="catalog:{}".format(src_cat), include="all" ) # listing li_md_to_delete = [] for md in src_md.results: metadata = Metadata.clean_attributes(md) md_cat = [metadata.tags.get(tag) for tag in metadata.tags if tag.startswith("catalog:")] if trg_cat not in md_cat: li_md_to_delete.append(metadata._id) else: pass logger.info("------- {} source metadatas listed gonna be backuped then deleted -------".format(len(li_md_to_delete))) # ################# BACKUP MDs THAT ARE GONNA BE DELETED ####################### # instanciate backup manager backup_path = Path(r"./scripts/dijon/migration/_output/_backup_deleted") backup_mng = BackupManager(api_client=isogeo, output_folder=backup_path) # lauching backup amplitude = 50 bound_range = int(len(li_md_to_delete) / amplitude) li_bound = []
# retrieve xlsx infos xlsx_md = record.get("md") logger.info( "\n------------- Update isogeo md from {} xlsx one ---------------" .format(xlsx_md._id)) logger.info("Retrieving xlsx infos") xlsx_contacts = record.get("contacts") xlsx_kws = record.get("keywords") xlsx_inspire = record.get("inspireThemes") xlsx_events = record.get("events") try: logger.info("Retrieving isogeo infos") # retrieve isogeo md isogeo_md = Metadata().clean_attributes([ md for md in search.results if md.get("_id") == xlsx_md._id ][0]) isogeo_contacts = [ v for k, v in isogeo_md.tags.items() if k.startswith("contact:") ] isogeo_kws = [ v for k, v in isogeo_md.tags.items() if k.startswith("keyword:is") ] isogeo_inspireTh = [ v.strip() for k, v in isogeo_md.tags.items() if k.startswith("keyword:in") ] except Exception as e: logger.error("faile to retrieve isogeo_md : {}".format(e))
def md2docx(self, docx_template: DocxTemplate, md: Metadata, share: Share = None): """Dump Isogeo metadata into a docx template. :param DocxTemplate docx_template: Word template to fill :param Metadata metadata: metadata to dumpinto the template :param Share share: share in which the metadata is. Used to build the view URL. """ logger.debug("Starting the export into Word .docx of {} ({})".format( md.title_or_name(slugged=1), md._id)) # template context starting with metadata attributes which do not require any special formatting context = { # IDENTIFICATION "varType": self.isogeo_tr("formatTypes", md.type), "varTitle": self.fmt.clean_xml(md.title), "varAbstract": self.fmt.clean_xml(md.abstract), "varNameTech": self.fmt.clean_xml(md.name), "varOwner": md.groupName, "varPath": self.fmt.clean_xml(md.path), # QUALITY "varTopologyInfo": self.fmt.clean_xml(md.topologicalConsistency), # HISTORY "varCollectContext": self.fmt.clean_xml(md.collectionContext), "varCollectMethod": self.fmt.clean_xml(md.collectionMethod), "varValidityComment": self.fmt.clean_xml(md.validityComment), # GEOGRAPHY "varEncoding": self.fmt.clean_xml(md.encoding), "varScale": self.fmt.clean_xml(md.scale), "varGeometry": self.fmt.clean_xml(md.geometry), "varObjectsCount": self.fmt.clean_xml(md.features), # METADATA "varMdDtCrea": utils.hlpr_datetimes(md._created).strftime(self.datetimes_fmt), "varMdDtUpda": utils.hlpr_datetimes(md._modified).strftime(self.datetimes_fmt), "varMdDtExp": datetime.now().strftime(self.datetimes_fmt), } # TAGS # # extracting & parsing tags li_motscles = [] li_theminspire = [] # default values context["varInspireConformity"] = self.isogeo_tr( "quality", "isNotConform") # looping on tags for tag in md.tags.keys(): # free keywords if tag.startswith("keyword:isogeo"): li_motscles.append(md.tags.get(tag)) continue # INSPIRE themes if tag.startswith("keyword:inspire-theme"): li_theminspire.append(md.tags.get(tag)) continue # coordinate system if tag.startswith("coordinate-system"): context["varSRS"] = md.tags.get(tag) continue # format if tag.startswith("format"): context["varFormat"] = md.tags.get(tag) if md.formatVersion: context["varFormat"] += " " + md.formatVersion continue # INSPIRE conformity if tag.startswith("conformity:inspire"): context["varInspireConformity"] = self.isogeo_tr( "quality", "isConform") continue # add tags to the template context context["varKeywords"] = " ; ".join(li_motscles) context["varKeywordsCount"] = len(li_motscles) context["varInspireTheme"] = " ; ".join(li_theminspire) # formatting links to visualize on OpenCatalog and edit on APP if share is not None: context["varViewOC"] = utils.get_view_url( md_id=md._id, share_id=share._id, share_token=share.urlToken) else: logger.debug("No OpenCatalog URL for metadata: {} ({})".format( md.title_or_name(), md._id)) # link to APP context["varEditAPP"] = utils.get_edit_url(md) # ---- CONTACTS # ---------------------------------------------------- contacts_out = [] if md.contacts: # formatting contacts for ct_in in md.contacts: ct = {} # translate contact role ct["role"] = self.isogeo_tr("roles", ct_in.get("role")) # ensure other contacts fields ct["name"] = ct_in.get("contact").get("name", "NR") ct["organization"] = ct_in.get("contact").get( "organization", "") ct["email"] = ct_in.get("contact").get("email", "") ct["phone"] = ct_in.get("contact").get("phone", "") ct["fax"] = ct_in.get("contact").get("fax", "") ct["addressLine1"] = ct_in.get("contact").get( "addressLine1", "") ct["addressLine2"] = ct_in.get("contact").get( "addressLine2", "") ct["zipCode"] = ct_in.get("contact").get("zipCode", "") ct["city"] = ct_in.get("contact").get("city", "") ct["countryCode"] = ct_in.get("contact").get("countryCode", "") # store into the final list contacts_out.append(ct) # add it to final context context["varContactsCount"] = len(contacts_out) context["varContactsDetails"] = contacts_out # ---- ATTRIBUTES -------------------------------------------------- fields_out = [] if md.type == "vectorDataset" and isinstance(md.featureAttributes, list): for f_in in md.featureAttributes: field = {} # ensure other fields field["name"] = self.fmt.clean_xml(f_in.get("name", "")) field["alias"] = self.fmt.clean_xml(f_in.get("alias", "")) field["description"] = self.fmt.clean_xml( f_in.get("description", "")) field["dataType"] = f_in.get("dataType", "") field["language"] = f_in.get("language", "") # store into the final list fields_out.append(field) # add to the final context context["varFieldsCount"] = len(fields_out) context["varFields"] = fields_out # ---- EVENTS ------------------------------------------------------ events_out = [] if md.events: for e in md.events: evt = Event(**e) # pop creation events (already in the export document) if evt.kind == "creation": continue # prevent invalid character for XML formatting in description evt.description = self.fmt.clean_xml(evt.description) # make data human readable evt.date = utils.hlpr_datetimes(evt.date).strftime( self.dates_fmt) # translate event kind # evt.kind = self.isogeo_tr("events", evt.kind) # append events_out.append(evt.to_dict()) # add to the final context context["varEventsCount"] = len(events_out) context["varEvents"] = events_out # ---- HISTORY # ----------------------------------------------------- # data events if md.created: context["varDataDtCrea"] = utils.hlpr_datetimes( md.created).strftime(self.dates_fmt) if md.modified: context["varDataDtUpda"] = utils.hlpr_datetimes( md.modified).strftime(self.dates_fmt) if md.published: context["varDataDtPubl"] = utils.hlpr_datetimes( md.published).strftime(self.dates_fmt) # validity if md.validFrom: context["varValidityStart"] = utils.hlpr_datetimes( md.validFrom).strftime(self.dates_fmt) # end validity date if md.validTo: context["varValidityEnd"] = utils.hlpr_datetimes( md.validTo).strftime(self.dates_fmt) # ---- SPECIFICATIONS # ----------------------------------------------- if md.specifications: context["varSpecifications"] = self.fmt.specifications( md_specifications=md.specifications) # ---- CGUs # -------------------------------------------------------- if md.conditions: context["varConditions"] = self.fmt.conditions( md_conditions=md.conditions) # ---- LIMITATIONS # ------------------------------------------------- if md.limitations: context["varLimitations"] = self.fmt.limitations( md_limitations=md.limitations) # -- THUMBNAIL ----------------------------------------------------------------- if md._id in self.thumbnails and Path(self.thumbnails.get( md._id)).is_file(): thumbnail = str(Path(self.thumbnails.get(md._id)).resolve()) context["varThumbnail"] = InlineImage(docx_template, thumbnail) logger.info("Thumbnail found for {}: {}".format( md.title_or_name(1), thumbnail)) # fillfull file try: docx_template.render(context, autoescape=True) logger.info("Vector metadata stored: {} ({})".format( md.title_or_name(slugged=1), md._id)) except etree.XMLSyntaxError as e: logger.error( "Invalid character in XML: {}. " "Any special character (<, <, &...)? Check: {}".format( e, context.get("varEditAPP"))) except (UnicodeEncodeError, UnicodeDecodeError) as e: logger.error( "Encoding error: {}. " "Any special character (<, <, &...)? Check: {}".format( e, context.get("varEditAPP"))) except Exception as e: logger.error("Unexpected error: {}. Check: {}".format( e, context.get("varEditAPP"))) # end of function return