def extract_metadata(temp_vrt_file_path): metadata = [] res_md_dict = raster_meta_extract.get_raster_meta_dict(temp_vrt_file_path) wgs_cov_info = res_md_dict['spatial_coverage_info']['wgs84_coverage_info'] # add core metadata coverage - box if wgs_cov_info: box = {'coverage': {'type': 'box', 'value': wgs_cov_info}} metadata.append(box) # Save extended meta spatial reference orig_cov_info = res_md_dict['spatial_coverage_info'][ 'original_coverage_info'] # Here the assumption is that if there is no value for the 'northlimit' then there is no value # for the bounding box if orig_cov_info['northlimit'] is not None: ori_cov = {'OriginalCoverage': {'value': orig_cov_info}} metadata.append(ori_cov) # Save extended meta cell info res_md_dict['cell_info']['name'] = os.path.basename(temp_vrt_file_path) metadata.append({'CellInformation': res_md_dict['cell_info']}) # Save extended meta band info for band_info in list(res_md_dict['band_info'].values()): metadata.append({'BandInformation': band_info}) return metadata
def extract_metadata(temp_vrt_file_path): metadata = [] res_md_dict = raster_meta_extract.get_raster_meta_dict(temp_vrt_file_path) wgs_cov_info = res_md_dict['spatial_coverage_info']['wgs84_coverage_info'] # add core metadata coverage - box if wgs_cov_info: box = {'coverage': {'type': 'box', 'value': wgs_cov_info}} metadata.append(box) # Save extended meta spatial reference orig_cov_info = res_md_dict['spatial_coverage_info']['original_coverage_info'] # Here the assumption is that if there is no value for the 'northlimit' then there is no value # for the bounding box if orig_cov_info['northlimit'] is not None: ori_cov = {'OriginalCoverage': {'value': orig_cov_info}} metadata.append(ori_cov) # Save extended meta cell info res_md_dict['cell_info']['name'] = os.path.basename(temp_vrt_file_path) metadata.append({'CellInformation': res_md_dict['cell_info']}) # Save extended meta band info for band_info in res_md_dict['band_info'].values(): metadata.append({'BandInformation': band_info}) return metadata
shutil.rmtree(os.path.dirname(res_file_tmp_path)) copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) continue # update the metadata for the original coverage information of all the raster resources try: if temp_dir and vrt_file_path: meta_updated = False # extract meta. # the reason to change current working directory to temp_dir is to make sure # the raster files can be found by Gdal for metadata extraction # when "relativeToVRT" parameter is set as "0" ori_dir = os.getcwd() os.chdir(temp_dir) res_md_dict = {} res_md_dict = raster_meta_extract.get_raster_meta_dict( vrt_file_path) os.chdir(ori_dir) shutil.rmtree(temp_dir) # update original coverage information for datum and coordinate string in django if res_md_dict['spatial_coverage_info']['original_coverage_info'].\ get('datum', None): res.metadata.originalCoverage.delete() v = { 'value': res_md_dict['spatial_coverage_info'] ['original_coverage_info'] } res.metadata.create_element('OriginalCoverage', **v) meta_updated = True # update the bag if meta is updated if meta_updated:
def migrate_tif_file(apps, schema_editor): log = logging.getLogger() istorage = IrodsStorage() copy_res_fail = [] vrt_update_fail = [] vrt_update_success = [] meta_update_fail = [] meta_update_success = [] # start migration for each raster resource that has raster files for res in RasterResource.objects.all(): if res.files.all(): # copy all the resource files to temp dir try: temp_dir = tempfile.mkdtemp() for res_file in res.files.all(): shutil.copy( res_file.resource_file.file.name, os.path.join( temp_dir, os.path.basename(res_file.resource_file.name))) vrt_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == f[-4:] ].pop() except Exception as e: log.exception(e.message) copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) continue # update vrt file if the raster resource that has a single tif file try: if len(os.listdir(temp_dir)) == 2: # create new vrt file tif_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.tif' == f[-4:] ].pop() with open(os.devnull, 'w') as fp: subprocess.Popen( [ 'gdal_translate', '-of', 'VRT', tif_file_path, vrt_file_path ], stdout=fp, stderr=fp).wait() # remember to add .wait() # modify the vrt file contents tree = ET.parse(vrt_file_path) root = tree.getroot() for element in root.iter('SourceFilename'): element.attrib['relativeToVRT'] = '1' tree.write(vrt_file_path) # delete vrt res file for f in res.files.all(): if 'vrt' == f.resource_file.name[-3:]: f.resource_file.delete() f.delete() # add new vrt file to resource new_file = UploadedFile( file=open(vrt_file_path, 'r'), name=os.path.basename(vrt_file_path)) hydroshare.add_resource_files(res.short_id, new_file) # update the bag bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) resource_modified(res, res.creator) vrt_update_success.append('{}:{}'.format( res.short_id, res.metadata.title.value)) except Exception as e: log.exception(e.message) vrt_update_fail.append('{}:{}'.format( res.short_id, res.metadata.title.value)) # update the metadata for the band information of all the raster resources try: meta_updated = False # extract meta ori_dir = os.getcwd() os.chdir(temp_dir) res_md_dict = raster_meta_extract.get_raster_meta_dict( vrt_file_path) os.chdir(ori_dir) shutil.rmtree(temp_dir) # update band information metadata in django if res_md_dict['band_info']: for i, band_meta in res_md_dict['band_info'].items(): band_obj = res.metadata.bandInformation.filter( name='Band_{}'.format(i)).first() if band_obj: res.metadata.update_element( 'bandInformation', band_obj.id, maximumValue=band_meta['maximumValue'], minimumValue=band_meta['minimumValue'], noDataValue=band_meta['noDataValue'], ) meta_updated = True # update the bag if meta is updated if meta_updated: bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) resource_modified(res, res.creator) meta_update_success.append('{}:{}'.format( res.short_id, res.metadata.title.value)) except Exception as e: log.exception(e.message) meta_update_fail.append('{}:{}'.format( res.short_id, res.metadata.title.value)) # Print migration results print 'Copy resource to temp folder failure: Number: {} List: {}'.format( len(copy_res_fail), copy_res_fail) print 'VRT file update success: Number: {} List{}'.format( len(vrt_update_success), vrt_update_success) print 'VRT file update fail: Number: {} List{}'.format( len(vrt_update_fail), vrt_update_fail) print 'Meta update success: Number: {} List {}'.format( len(meta_update_success), meta_update_success) print 'Meta update fail: Number: {} List {}'.format( len(meta_update_fail), meta_update_fail)
def migrate_tif_file(apps, schema_editor): log = logging.getLogger() istorage = IrodsStorage() copy_res_fail = [] vrt_update_fail = [] vrt_update_success = [] meta_update_fail = [] meta_update_success = [] # start migration for each raster resource that has raster files for res in RasterResource.objects.all(): if res.files.all(): # copy all the resource files to temp dir try: temp_dir = tempfile.mkdtemp() for res_file in res.files.all(): shutil.copy(res_file.resource_file.file.name, os.path.join(temp_dir, os.path.basename(res_file.resource_file.name))) vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == f[-4:]].pop() except Exception as e: log.exception(e.message) copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) continue # update vrt file if the raster resource that has a single tif file try: if len(os.listdir(temp_dir)) == 2: # create new vrt file tif_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.tif' == f[-4:]].pop() with open(os.devnull, 'w') as fp: subprocess.Popen(['gdal_translate', '-of', 'VRT', tif_file_path, vrt_file_path], stdout=fp, stderr=fp).wait() # remember to add .wait() # modify the vrt file contents tree = ET.parse(vrt_file_path) root = tree.getroot() for element in root.iter('SourceFilename'): element.attrib['relativeToVRT'] = '1' tree.write(vrt_file_path) # delete vrt res file for f in res.files.all(): if 'vrt' == f.resource_file.name[-3:]: f.resource_file.delete() f.delete() # add new vrt file to resource new_file = UploadedFile(file=open(vrt_file_path, 'r'), name=os.path.basename(vrt_file_path)) hydroshare.add_resource_files(res.short_id, new_file) # update the bag bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) resource_modified(res, res.creator) vrt_update_success.append('{}:{}'.format(res.short_id,res.metadata.title.value)) except Exception as e: log.exception(e.message) vrt_update_fail.append('{}:{}'.format(res.short_id,res.metadata.title.value)) # update the metadata for the band information of all the raster resources try: meta_updated = False # extract meta ori_dir = os.getcwd() os.chdir(temp_dir) res_md_dict = raster_meta_extract.get_raster_meta_dict(vrt_file_path) os.chdir(ori_dir) shutil.rmtree(temp_dir) # update band information metadata in django if res_md_dict['band_info']: for i, band_meta in res_md_dict['band_info'].items(): band_obj = res.metadata.bandInformation.filter(name='Band_{}'.format(i)).first() if band_obj: res.metadata.update_element('bandInformation', band_obj.id, maximumValue=band_meta['maximumValue'], minimumValue=band_meta['minimumValue'], noDataValue=band_meta['noDataValue'], ) meta_updated = True # update the bag if meta is updated if meta_updated: bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) resource_modified(res, res.creator) meta_update_success.append('{}:{}'.format(res.short_id, res.metadata.title.value)) except Exception as e: log.exception(e.message) meta_update_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) # Print migration results print 'Copy resource to temp folder failure: Number: {} List: {}'.format(len(copy_res_fail), copy_res_fail) print 'VRT file update success: Number: {} List{}'.format(len(vrt_update_success), vrt_update_success) print 'VRT file update fail: Number: {} List{}'.format(len(vrt_update_fail), vrt_update_fail) print 'Meta update success: Number: {} List {}'.format(len(meta_update_success), meta_update_success) print 'Meta update fail: Number: {} List {}'.format(len(meta_update_fail), meta_update_fail)
if os.path.isfile(res_file_tmp_path): shutil.rmtree(os.path.dirname(res_file_tmp_path)) copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) continue # update the metadata for the original coverage information of all the raster resources try: if temp_dir and vrt_file_path: meta_updated = False # extract meta. # the reason to change current working directory to temp_dir is to make sure # the raster files can be found by Gdal for metadata extraction # when "relativeToVRT" parameter is set as "0" ori_dir = os.getcwd() os.chdir(temp_dir) res_md_dict = {} res_md_dict = raster_meta_extract.get_raster_meta_dict(vrt_file_path) os.chdir(ori_dir) shutil.rmtree(temp_dir) # update original coverage information for datum and coordinate string in django if res_md_dict['spatial_coverage_info']['original_coverage_info'].\ get('datum', None): res.metadata.originalCoverage.delete() v = {'value': res_md_dict['spatial_coverage_info']['original_coverage_info']} res.metadata.create_element('OriginalCoverage', **v) meta_updated = True # update the bag if meta is updated if meta_updated: resource_modified(res, res.creator) meta_update_success.append('{}:{}'.format(res.short_id, res.metadata.title.value)) except Exception as e: