def logical_delete(self, user, delete_res_files=True): """ Deletes the logical file as well as all resource files associated with this logical file. This function is primarily used by the system to delete logical file object and associated metadata as part of deleting a resource file object. Any time a request is made to deleted a specific resource file object, if the the requested file is part of a logical file then all files in the same logical file group will be deleted. if custom logic requires deleting logical file object (LFO) then instead of using LFO.delete(), you must use LFO.logical_delete() :param delete_res_files If True all resource files that are part of this logical file will be deleted """ from hs_core.hydroshare.resource import delete_resource_file # delete all resource files associated with this instance of logical file if delete_res_files: for f in self.files.all(): delete_resource_file(f.resource.short_id, f.id, user, delete_logical_file=False) # delete logical file first then delete the associated metadata file object # deleting the logical file object will not automatically delete the associated # metadata file object metadata = self.metadata if self.has_metadata else None super(AbstractLogicalFile, self).delete() if metadata is not None: # this should also delete on all metadata elements that have generic relations with # the metadata object metadata.delete()
def set_file_type(cls, resource, file_id, user): """ Sets a json resource file to RefTimeseriesFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as RefTimeSeriesFile type :param user: user who is setting the file type :return: """ log = logging.getLogger() # get the the selected resource file object res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None: raise ValidationError("File not found.") if res_file.extension != '.refts': raise ValidationError("Not a Ref Time Series file.") files_to_add_to_resource = [] if res_file.has_generic_logical_file: try: json_file_content = _validate_json_file(res_file) except Exception as ex: raise ValidationError(ex.message) # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) files_to_add_to_resource.append(temp_file) file_folder = res_file.file_folder with transaction.atomic(): # first delete the json file that we retrieved from irods # for setting it to reftimeseries file type delete_resource_file(resource.short_id, res_file.id, user) # create a reftiemseries logical file object to be associated with # resource files logical_file = cls.create() logical_file.metadata.json_file_content = json_file_content logical_file.metadata.save() try: # add the json file back to the resource uploaded_file = UploadedFile( file=open(temp_file, 'rb'), name=os.path.basename(temp_file)) # the added resource file will be part of a new generic logical file by default new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=file_folder) # delete the generic logical file object if new_res_file.logical_file is not None: # deleting the file level metadata object will delete the associated # logical file object new_res_file.logical_file.metadata.delete() # make the resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) logical_file.metadata.save() logical_file.dataset_name = logical_file.metadata.get_title_from_json( ) logical_file.save() # extract metadata _extract_metadata(resource, logical_file) log.info( "RefTimeseries file type - json file was added to the resource." ) except Exception as ex: msg = "RefTimeseries file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) log.info("RefTimeseries file type was created.") else: err_msg = "Selected file is not part of a GenericLogical file." log.error(err_msg) raise ValidationError(err_msg)
def check_irods_files(resource, stop_on_error=False, log_errors=True, echo_errors=False, return_errors=False, sync_ispublic=False, clean_irods=False, clean_django=False): """Check whether files in resource.files and on iRODS agree. :param resource: resource to check :param stop_on_error: whether to raise a ValidationError exception on first error :param log_errors: whether to log errors to Django log :param echo_errors: whether to print errors on stdout :param return_errors: whether to collect errors in an array and return them. :param sync_ispublic: whether to repair deviations between ResourceAccess.public and AVU isPublic :param clean_irods: whether to delete files in iRODs that are not in Django :param clean_django: whether to delete files in Django that are not in iRODs """ from hs_core.hydroshare.resource import delete_resource_file logger = logging.getLogger(__name__) istorage = resource.get_irods_storage() errors = [] ecount = 0 # skip federated resources if not configured to handle these if resource.is_federated and not settings.REMOTE_USE_IRODS: msg = "check_irods_files: skipping check of federated resource {} in unfederated mode"\ .format(resource.short_id) if echo_errors: print(msg) if log_errors: logger.info(msg) # skip resources that do not exist in iRODS elif not istorage.exists(resource.root_path): msg = "root path {} does not exist in iRODS".format(resource.root_path) ecount += 1 if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) else: # Step 2: does every file in Django refer to an existing file in iRODS? for f in resource.files.all(): if not istorage.exists(f.storage_path): ecount += 1 msg = "check_irods_files: django file {} does not exist in iRODS"\ .format(f.storage_path) if clean_django: delete_resource_file(resource.short_id, f.short_path, resource.creator, delete_logical_file=False) msg += " (DELETED FROM DJANGO)" if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) # Step 3: for composite resources, does every composite metadata file exist? from hs_composite_resource.models import CompositeResource as CR if isinstance(resource, CR): for lf in resource.logical_files: if not istorage.exists(lf.metadata_file_path): ecount += 1 msg = "check_irods_files: logical metadata file {} does not exist in iRODS"\ .format(lf.metadata_file_path) if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) if not istorage.exists(lf.map_file_path): ecount += 1 msg = "check_irods_files: logical map file {} does not exist in iRODS"\ .format(lf.map_file_path) if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) # Step 4: does every iRODS file correspond to a record in files? error2, ecount2 = __check_irods_directory(resource, resource.file_path, logger, stop_on_error=stop_on_error, log_errors=log_errors, echo_errors=echo_errors, return_errors=return_errors, clean=clean_irods) errors.extend(error2) ecount += ecount2 # Step 5: check whether the iRODS public flag agrees with Django django_public = resource.raccess.public irods_public = None try: irods_public = resource.getAVU('isPublic') except SessionException as ex: msg = "cannot read isPublic attribute of {}: {}"\ .format(resource.short_id, ex.stderr) ecount += 1 if log_errors: logger.error(msg) if echo_errors: print(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) if irods_public is not None: # convert to boolean irods_public = str(irods_public).lower() == 'true' if irods_public is None or irods_public != django_public: ecount += 1 if not django_public: # and irods_public msg = "check_irods_files: resource {} public in irods, private in Django"\ .format(resource.short_id) if sync_ispublic: try: resource.setAVU('isPublic', 'false') msg += " (REPAIRED IN IRODS)" except SessionException as ex: msg += ": (CANNOT REPAIR: {})"\ .format(ex.stderr) else: # django_public and not irods_public msg = "check_irods_files: resource {} private in irods, public in Django"\ .format(resource.short_id) if sync_ispublic: try: resource.setAVU('isPublic', 'true') msg += " (REPAIRED IN IRODS)" except SessionException as ex: msg += ": (CANNOT REPAIR: {})"\ .format(ex.stderr) if msg != '': if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) if ecount > 0: # print information about the affected resource (not really an error) msg = "check_irods_files: affected resource {} type is {}, title is '{}'"\ .format(resource.short_id, resource.resource_type, resource.title) if log_errors: logger.error(msg) if echo_errors: print(msg) if return_errors: errors.append(msg) return errors, ecount # empty unless return_errors=True
def set_file_type(cls, resource, file_id, user): """ Sets a tif or zip raster resource file to GeoRasterFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as GeoRasterFile type :param user: user who is setting the file type :return: """ # had to import it here to avoid import loop from hs_core.views.utils import create_folder log = logging.getLogger() # get the file from irods res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None: raise ValidationError("File not found.") if res_file.extension != '.nc': raise ValidationError("Not a NetCDF file.") # base file name (no path included) file_name = res_file.file_name # file name without the extension nc_file_name = file_name.split(".")[0] resource_metadata = [] file_type_metadata = [] files_to_add_to_resource = [] if res_file.has_generic_logical_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) files_to_add_to_resource.append(temp_file) # file validation and metadata extraction nc_dataset = nc_utils.get_nc_dataset(temp_file) if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict( temp_file) # populate resource_metadata and file_type_metadata lists with extracted metadata add_metadata_to_list(resource_metadata, res_dublin_core_meta, res_type_specific_meta, file_type_metadata, resource) # create the ncdump text file dump_file = create_header_info_txt_file( temp_file, nc_file_name) files_to_add_to_resource.append(dump_file) file_folder = res_file.file_folder with transaction.atomic(): # first delete the netcdf file that we retrieved from irods # for setting it to netcdf file type delete_resource_file(resource.short_id, res_file.id, user) # create a netcdf logical file object to be associated with # resource files logical_file = cls.create() # by default set the dataset_name attribute of the logical file to the # name of the file selected to set file type unless the extracted metadata # has a value for title dataset_title = res_dublin_core_meta.get('title', None) if dataset_title is not None: logical_file.dataset_name = dataset_title else: logical_file.dataset_name = nc_file_name logical_file.save() try: # create a folder for the netcdf file type using the base file # name as the name for the new folder new_folder_path = cls.compute_file_type_folder( resource, file_folder, nc_file_name) fed_file_full_path = '' if resource.resource_federation_path: fed_file_full_path = os.path.join( resource.root_path, new_folder_path) create_folder(resource.short_id, new_folder_path) log.info("Folder created:{}".format(new_folder_path)) new_folder_name = new_folder_path.split('/')[-1] if file_folder is None: upload_folder = new_folder_name else: upload_folder = os.path.join( file_folder, new_folder_name) # add all new files to the resource for f in files_to_add_to_resource: uploaded_file = UploadedFile( file=open(f, 'rb'), name=os.path.basename(f)) new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder, fed_res_file_name_or_path=fed_file_full_path) # make each resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) log.info( "NetCDF file type - new files were added to the resource." ) except Exception as ex: msg = "NetCDF file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) # TODO: in case of any error put the original file back and # delete the folder that was created raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) log.info("NetCDF file type was created.") # use the extracted metadata to populate resource metadata for element in resource_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element( 'title', title_element.id, **v) else: resource.metadata.create_element(k, **v) log.info("Resource - metadata was saved to DB") # use the extracted metadata to populate file metadata for element in file_type_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'subject': logical_file.metadata.keywords = v logical_file.metadata.save() else: logical_file.metadata.create_element(k, **v) log.info("NetCDF file type - metadata was saved to DB") else: err_msg = "Not a valid NetCDF file. File type file validation failed." log.error(err_msg) # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) raise ValidationError(err_msg)
def check_irods_files(resource, stop_on_error=False, log_errors=True, echo_errors=False, return_errors=False, sync_ispublic=False, clean_irods=False, clean_django=False): """Check whether files in resource.files and on iRODS agree. :param resource: resource to check :param stop_on_error: whether to raise a ValidationError exception on first error :param log_errors: whether to log errors to Django log :param echo_errors: whether to print errors on stdout :param return_errors: whether to collect errors in an array and return them. :param sync_ispublic: whether to repair deviations between ResourceAccess.public and AVU isPublic :param clean_irods: whether to delete files in iRODs that are not in Django :param clean_django: whether to delete files in Django that are not in iRODs """ from hs_core.hydroshare.resource import delete_resource_file logger = logging.getLogger(__name__) istorage = resource.get_irods_storage() errors = [] ecount = 0 defaultpath = getattr(settings, 'HS_USER_ZONE_PRODUCTION_PATH', '/hydroshareuserZone/home/localHydroProxy') # skip federated resources if not configured to handle these if resource.is_federated and not settings.REMOTE_USE_IRODS: msg = "check_irods_files: skipping check of federated resource {} in unfederated mode"\ .format(resource.short_id) if echo_errors: print(msg) if log_errors: logger.info(msg) # skip resources that do not exist in iRODS elif not istorage.exists(resource.root_path): msg = "root path {} does not exist in iRODS".format(resource.root_path) ecount += 1 if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) else: # Step 1: repair irods user file paths if necessary if clean_irods or clean_django: # fix user paths before check (required). This is an idempotent step. if resource.resource_federation_path == defaultpath: error2, ecount2 = fix_irods_user_paths(resource, log_actions=log_errors, echo_actions=echo_errors, return_actions=False) errors.extend(error2) ecount += ecount2 # Step 2: does every file in Django refer to an existing file in iRODS? for f in resource.files.all(): if not istorage.exists(f.storage_path): ecount += 1 msg = "check_irods_files: django file {} does not exist in iRODS"\ .format(f.storage_path) if clean_django: delete_resource_file(resource.short_id, f.short_path, resource.creator, delete_logical_file=False) msg += " (DELETED FROM DJANGO)" if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) # Step 3: for composite resources, does every composite metadata file exist? from hs_composite_resource.models import CompositeResource as CR if isinstance(resource, CR): for lf in resource.logical_files: if not istorage.exists(lf.metadata_file_path): ecount += 1 msg = "check_irods_files: logical metadata file {} does not exist in iRODS"\ .format(lf.metadata_file_path) if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) if not istorage.exists(lf.map_file_path): ecount += 1 msg = "check_irods_files: logical map file {} does not exist in iRODS"\ .format(lf.map_file_path) if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) # Step 4: does every iRODS file correspond to a record in files? error2, ecount2 = __check_irods_directory(resource, resource.file_path, logger, stop_on_error=stop_on_error, log_errors=log_errors, echo_errors=echo_errors, return_errors=return_errors, clean=clean_irods) errors.extend(error2) ecount += ecount2 # Step 5: check whether the iRODS public flag agrees with Django django_public = resource.raccess.public irods_public = None try: irods_public = resource.getAVU('isPublic') except SessionException as ex: msg = "cannot read isPublic attribute of {}: {}"\ .format(resource.short_id, ex.stderr) ecount += 1 if log_errors: logger.error(msg) if echo_errors: print(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) if irods_public is not None: # convert to boolean irods_public = str(irods_public).lower() == 'true' if irods_public is None or irods_public != django_public: ecount += 1 if not django_public: # and irods_public msg = "check_irods_files: resource {} public in irods, private in Django"\ .format(resource.short_id) if sync_ispublic: try: resource.setAVU('isPublic', 'false') msg += " (REPAIRED IN IRODS)" except SessionException as ex: msg += ": (CANNOT REPAIR: {})"\ .format(ex.stderr) else: # django_public and not irods_public msg = "check_irods_files: resource {} private in irods, public in Django"\ .format(resource.short_id) if sync_ispublic: try: resource.setAVU('isPublic', 'true') msg += " (REPAIRED IN IRODS)" except SessionException as ex: msg += ": (CANNOT REPAIR: {})"\ .format(ex.stderr) if msg != '': if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) if stop_on_error: raise ValidationError(msg) if ecount > 0: # print information about the affected resource (not really an error) msg = "check_irods_files: affected resource {} type is {}, title is '{}'"\ .format(resource.short_id, resource.resource_type, resource.title.encode('ascii', 'replace')) if log_errors: logger.error(msg) if echo_errors: print(msg) if return_errors: errors.append(msg) return errors, ecount # empty unless return_errors=True
def set_file_type(cls, resource, file_id, user): """ Sets a tif or zip raster resource file to GeoRasterFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as GeoRasterFile type :param user: user who is setting the file type :return: """ # had to import it here to avoid import loop from hs_core.views.utils import create_folder, remove_folder log = logging.getLogger() # get the file from irods res_file = utils.get_resource_file_by_id(resource, file_id) # base file name (no path included) file_name = utils.get_resource_file_name_and_extension(res_file)[1] # file name without the extension file_name = file_name[:-len(res_file.extension)] file_folder = res_file.file_folder upload_folder = '' if res_file is not None and res_file.has_generic_logical_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) # validate the file error_info, files_to_add_to_resource = raster_file_validation( raster_file=temp_file) if not error_info: log.info("Geo raster file type file validation successful.") # extract metadata temp_dir = os.path.dirname(temp_file) temp_vrt_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1] ].pop() metadata = extract_metadata(temp_vrt_file_path) log.info( "Geo raster file type metadata extraction was successful.") with transaction.atomic(): # create a geo raster logical file object to be associated with resource files logical_file = cls.create() # by default set the dataset_name attribute of the logical file to the # name of the file selected to set file type logical_file.dataset_name = file_name logical_file.save() try: # create a folder for the raster file type using the base file name as the # name for the new folder new_folder_path = cls.compute_file_type_folder( resource, file_folder, file_name) log.info("Folder created:{}".format(new_folder_path)) create_folder(resource.short_id, new_folder_path) new_folder_name = new_folder_path.split('/')[-1] if file_folder is None: upload_folder = new_folder_name else: upload_folder = os.path.join( file_folder, new_folder_name) # add all new files to the resource for f in files_to_add_to_resource: uploaded_file = UploadedFile( file=open(f, 'rb'), name=os.path.basename(f)) # the added resource file will be part of a new generic logical file # by default new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder) # delete the generic logical file object if new_res_file.logical_file is not None: # deleting the file level metadata object will delete the associated # logical file object new_res_file.logical_file.metadata.delete() # make each resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) log.info( "Geo raster file type - new files were added to the resource." ) # use the extracted metadata to populate file metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] logical_file.metadata.create_element(k, **v) log.info( "Geo raster file type - metadata was saved to DB") # set resource to private if logical file is missing required metadata resource.update_public_and_discoverable() # delete the original resource file delete_resource_file(resource.short_id, res_file.id, user) log.info("Deleted original resource file.") except Exception as ex: msg = "Geo raster file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) if upload_folder: # delete any new files uploaded as part of setting file type folder_to_remove = os.path.join( 'data', 'contents', upload_folder) remove_folder(user, resource.short_id, folder_to_remove) log.info("Deleted newly created file type folder") raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) else: err_msg = "Geo raster file type file validation failed.{}".format( ' '.join(error_info)) log.info(err_msg) raise ValidationError(err_msg) else: if res_file is None: err_msg = "Failed to set Geo raster file type. " \ "Resource doesn't have the specified file." log.error(err_msg) raise ValidationError(err_msg) else: err_msg = "Failed to set Geo raster file type." \ "The specified file doesn't have a generic logical file type." log.error(err_msg) raise ValidationError(err_msg)
def set_file_type(cls, resource, file_id, user): """ Sets a .shp or .zip resource file to GeoFeatureFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as GeoFeatureFile type :param user: user who is setting the file type :return: """ # had to import it here to avoid import loop from hs_core.views.utils import create_folder, remove_folder log = logging.getLogger() # get the file from irods res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None or not res_file.exists: raise ValidationError("File not found.") if res_file.extension.lower() not in ('.zip', '.shp'): raise ValidationError("Not a valid geographic feature file.") if not res_file.has_generic_logical_file: raise ValidationError( "Selected file must be part of a generic file type.") try: meta_dict, shape_files, shp_res_files = extract_metadata_and_files( resource, res_file) except ValidationError as ex: log.exception(ex.message) raise ex # hold on to temp dir for final clean up temp_dir = os.path.dirname(shape_files[0]) file_name = res_file.file_name # file name without the extension base_file_name = file_name[:-len(res_file.extension)] xml_file = '' for f in shape_files: if f.lower().endswith('.shp.xml'): xml_file = f break file_folder = res_file.file_folder file_type_success = False upload_folder = '' msg = "GeoFeature file type. Error when setting file type. Error:{}" with transaction.atomic(): # create a GeoFeature logical file object to be associated with # resource files logical_file = cls.create() # by default set the dataset_name attribute of the logical file to the # name of the file selected to set file type logical_file.dataset_name = base_file_name logical_file.save() try: # create a folder for the geofeature file type using the base file # name as the name for the new folder new_folder_path = cls.compute_file_type_folder( resource, file_folder, base_file_name) create_folder(resource.short_id, new_folder_path) log.info("Folder created:{}".format(new_folder_path)) new_folder_name = new_folder_path.split('/')[-1] if file_folder is None: upload_folder = new_folder_name else: upload_folder = os.path.join(file_folder, new_folder_name) # add all new files to the resource files_to_add_to_resource = shape_files for fl in files_to_add_to_resource: uploaded_file = UploadedFile(file=open(fl, 'rb'), name=os.path.basename(fl)) new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder) # make each resource file we added part of the logical file logical_file.add_resource_file(new_res_file) log.info( "GeoFeature file type - files were added to the file type." ) add_metadata(resource, meta_dict, xml_file, logical_file) log.info( "GeoFeature file type and resource level metadata updated." ) # delete the original resource files used as part of setting file type for fl in shp_res_files: delete_resource_file(resource.short_id, fl.id, user) log.info("Deleted original resource files.") file_type_success = True except Exception as ex: msg = msg.format(ex.message) log.exception(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if not file_type_success and upload_folder: # delete any new files uploaded as part of setting file type folder_to_remove = os.path.join('data', 'contents', upload_folder) remove_folder(user, resource.short_id, folder_to_remove) log.info("Deleted newly created file type folder") raise ValidationError(msg)