示例#1
0
    def create_netcdf(self, storage_indices, data_descriptor):
        '''
        Function to create netCDF-CF file for specified storage indices
        '''
        temp_storage_path = self.get_temp_storage_path(storage_indices)
        storage_path = self.get_storage_path(self.storage_type, storage_indices)
        make_dir(os.path.dirname(storage_path))        
        
        if self.dryrun:
            return storage_path
        
        if os.path.isfile(storage_path) and not self.force: 
            logger.warning('Skipping existing storage unit %s' % storage_path)
            return 
#            return storage_path #TODO: Remove this temporary debugging hack
        
        t_indices = np.array([dt2secs(record_dict['end_datetime']) for record_dict in data_descriptor])
        
        gdfnetcdf = GDFNetCDF(storage_config=self.storage_config[self.storage_type])
        
        logger.debug('Creating temporary storage unit %s with %d timeslices', temp_storage_path, len(data_descriptor))
        gdfnetcdf.create(netcdf_filename=temp_storage_path, 
                         index_tuple=storage_indices, 
                         dimension_index_dict={'T': t_indices}, netcdf_format=None)
        del t_indices
        
        # Set georeferencing from first or second tile for fault tolerance
        try:
            gdfnetcdf.georeference_from_file(data_descriptor[0]['tile_pathname'])
        except:
            gdfnetcdf.georeference_from_file(data_descriptor[1]['tile_pathname'])

        variable_dict = self.storage_config[self.storage_type]['measurement_types']
        variable_names = variable_dict.keys()
                
        array_shape = tuple([len(variable_dict)] +
                            [dim['dimension_elements'] 
                             for dim in self.storage_config[self.storage_type]['dimensions'].values() 
                             if dim['indexing_type'] == 'regular']
                            )
        
        # All data types and no-data values should be the same - just use first one
        array_dtype = variable_dict[variable_dict.keys()[0]]['numpy_datatype_name']

        nodata_value = variable_dict[variable_dict.keys()[0]]['nodata_value']
        if nodata_value is None:
            nodata_value = np.nan
                    
        slice_index = 0
        for record_dict in data_descriptor:
            try:
                tile_dataset = gdal.Open(record_dict['tile_pathname'])
                assert tile_dataset, 'Failed to open tile file %s' % record_dict['tile_pathname']
            
                logger.debug('Reading array data from tile file %s (%d/%d)', record_dict['tile_pathname'], slice_index + 1, len(data_descriptor))
                data_array = tile_dataset.ReadAsArray()

                assert data_array.shape == array_shape, 'Tile array shape is not %s' % array_shape
            except Exception, e:
                # Can't read data_array from GeoTIFF - create empty data_array instead
                logger.warning('WARNING: Unable to read array from tile - empty array created: %s', e.message)

                data_array = np.ones(array_shape, array_dtype) * nodata_value
                
            logger.debug('data_array.shape = %s', data_array.shape)
            
            #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges
            for variable_index in range(len(variable_dict)):
                variable_name = variable_names[variable_index]
                logger.debug('Writing array to variable %s', variable_name)
                if len(data_array.shape) == 3:
                    gdfnetcdf.write_slice(variable_name, data_array[variable_index], {'T': slice_index})
                elif len(data_array.shape) == 2:
                    gdfnetcdf.write_slice(variable_name, data_array, {'T': slice_index})

            gdfnetcdf.sync() # Write cached data to disk      
            slice_index += 1
示例#2
0
    def create_netcdf(self, storage_indices, data_descriptor):
        '''
        Function to create netCDF-CF file for specified storage indices
        '''
        temp_storage_path = self.get_temp_storage_path(storage_indices)
        storage_path = self.get_storage_path(self.storage_type,
                                             storage_indices)
        make_dir(os.path.dirname(storage_path))

        if self.dryrun:
            return storage_path

        if os.path.isfile(storage_path) and not self.force:
            logger.warning('Skipping existing storage unit %s' % storage_path)
            return


#            return storage_path #TODO: Remove this temporary debugging hack

        t_indices = np.array([
            dt2secs(record_dict['end_datetime'])
            for record_dict in data_descriptor
        ])

        gdfnetcdf = GDFNetCDF(
            storage_config=self.storage_config[self.storage_type])

        logger.debug('Creating temporary storage unit %s with %d timeslices',
                     temp_storage_path, len(data_descriptor))
        gdfnetcdf.create(netcdf_filename=temp_storage_path,
                         index_tuple=storage_indices,
                         dimension_index_dict={'T': t_indices},
                         netcdf_format=None)
        del t_indices

        # Set georeferencing from first or second tile for fault tolerance
        try:
            gdfnetcdf.georeference_from_file(
                data_descriptor[0]['tile_pathname'])
        except:
            gdfnetcdf.georeference_from_file(
                data_descriptor[1]['tile_pathname'])

        variable_dict = self.storage_config[
            self.storage_type]['measurement_types']
        variable_names = variable_dict.keys()

        array_shape = tuple([len(variable_dict)] + [
            dim['dimension_elements']
            for dim in self.storage_config[self.storage_type]
            ['dimensions'].values() if dim['indexing_type'] == 'regular'
        ])

        # All data types and no-data values should be the same - just use first one
        array_dtype = variable_dict[variable_dict.keys()
                                    [0]]['numpy_datatype_name']

        nodata_value = variable_dict[variable_dict.keys()[0]]['nodata_value']
        if nodata_value is None:
            nodata_value = np.nan

        slice_index = 0
        for record_dict in data_descriptor:
            try:
                tile_dataset = gdal.Open(record_dict['tile_pathname'])
                assert tile_dataset, 'Failed to open tile file %s' % record_dict[
                    'tile_pathname']

                logger.debug('Reading array data from tile file %s (%d/%d)',
                             record_dict['tile_pathname'], slice_index + 1,
                             len(data_descriptor))
                data_array = tile_dataset.ReadAsArray()

                assert data_array.shape == array_shape, 'Tile array shape is not %s' % array_shape
            except Exception, e:
                # Can't read data_array from GeoTIFF - create empty data_array instead
                logger.warning(
                    'WARNING: Unable to read array from tile - empty array created: %s',
                    e.message)

                data_array = np.ones(array_shape, array_dtype) * nodata_value

            logger.debug('data_array.shape = %s', data_array.shape)

            #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges
            for variable_index in range(len(variable_dict)):
                variable_name = variable_names[variable_index]
                logger.debug('Writing array to variable %s', variable_name)
                if len(data_array.shape) == 3:
                    gdfnetcdf.write_slice(variable_name,
                                          data_array[variable_index],
                                          {'T': slice_index})
                elif len(data_array.shape) == 2:
                    gdfnetcdf.write_slice(variable_name, data_array,
                                          {'T': slice_index})

            gdfnetcdf.sync()  # Write cached data to disk
            slice_index += 1
示例#3
0
    def create_netcdf(self, storage_indices, data_descriptor):
        '''
        Function to create netCDF-CF file for specified storage indices
        '''
        temp_storage_path = self.get_temp_storage_path(storage_indices)
        storage_path = self.get_storage_path(self.storage_type, storage_indices)
        make_dir(os.path.dirname(storage_path))        
        
        if self.dryrun:
            return storage_path
        
        if os.path.isfile(storage_path) and not self.force: 
            logger.warning('Skipping existing storage unit %s' % storage_path)
            return 
#            return storage_path #TODO: Remove this temporary debugging hack
        
        t_indices = np.array([dt2secs(record_dict['end_datetime']) for record_dict in data_descriptor])
        
        gdfnetcdf = GDFNetCDF(storage_config=self.storage_config[self.storage_type])
        
        logger.debug('Creating temporary storage unit %s with %d timeslices', temp_storage_path, len(data_descriptor))
        gdfnetcdf.create(netcdf_filename=temp_storage_path, 
                         index_tuple=storage_indices, 
                         dimension_index_dict={'T': t_indices}, netcdf_format=None)
        del t_indices
        
        # Set georeferencing from first tile
        gdfnetcdf.georeference_from_file(data_descriptor[0]['tile_pathname'])

        variable_dict = self.storage_config[self.storage_type]['measurement_types']
        variable_names = variable_dict.keys()
                
        slice_index = 0
        for record_dict in data_descriptor:
            tile_dataset = gdal.Open(record_dict['tile_pathname'])
            assert tile_dataset, 'Failed to open tile file %s' % record_dict['tile_pathname']
            
            logger.debug('Reading array data from tile file %s (%d/%d)', record_dict['tile_pathname'], slice_index + 1, len(data_descriptor))
            data_array = tile_dataset.ReadAsArray()
            logger.debug('data_array.shape = %s', data_array.shape)
            
            #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges
            for variable_index in range(len(variable_dict)):
                variable_name = variable_names[variable_index]
                logger.debug('Writing array to variable %s', variable_name)
                if len(data_array.shape) == 3:
                    gdfnetcdf.write_slice(variable_name, data_array[variable_index], {'T': slice_index})
                elif len(data_array.shape) == 2:
                    gdfnetcdf.write_slice(variable_name, data_array, {'T': slice_index})

            gdfnetcdf.sync() # Write cached data to disk      
            slice_index += 1
            
        del gdfnetcdf # Close the netCDF
        
        logger.debug('Moving temporary storage unit %s to %s', temp_storage_path, storage_path)
        if os.path.isfile(storage_path):
            logger.debug('Removing existing storage unit %s' % storage_path)
            os.remove(storage_path)
        shutil.move(temp_storage_path, storage_path)
        
        return storage_path
示例#4
0
    def create_netcdf(self, storage_indices, data_descriptor):
        '''
        Function to create netCDF-CF file for specified storage indices
        '''
        temp_storage_path = self.get_temp_storage_path(storage_indices)
        storage_path = self.get_storage_path(self.storage_type,
                                             storage_indices)
        make_dir(os.path.dirname(storage_path))

        if self.dryrun:
            return storage_path

        if os.path.isfile(storage_path) and not self.force:
            logger.warning('Skipping existing storage unit %s' % storage_path)
            return


#            return storage_path #TODO: Remove this temporary debugging hack

        t_indices = np.array([
            dt2secs(record_dict['end_datetime'])
            for record_dict in data_descriptor
        ])

        gdfnetcdf = GDFNetCDF(
            storage_config=self.storage_config[self.storage_type])

        logger.debug('Creating temporary storage unit %s with %d timeslices',
                     temp_storage_path, len(data_descriptor))
        gdfnetcdf.create(netcdf_filename=temp_storage_path,
                         index_tuple=storage_indices,
                         dimension_index_dict={'T': t_indices},
                         netcdf_format=None)
        del t_indices

        # Set georeferencing from first tile
        gdfnetcdf.georeference_from_file(data_descriptor[0]['tile_pathname'])

        variable_dict = self.storage_config[
            self.storage_type]['measurement_types']
        variable_names = variable_dict.keys()

        slice_index = 0
        for record_dict in data_descriptor:
            tile_dataset = gdal.Open(record_dict['tile_pathname'])
            assert tile_dataset, 'Failed to open tile file %s' % record_dict[
                'tile_pathname']

            logger.debug('Reading array data from tile file %s (%d/%d)',
                         record_dict['tile_pathname'], slice_index + 1,
                         len(data_descriptor))
            data_array = tile_dataset.ReadAsArray()
            logger.debug('data_array.shape = %s', data_array.shape)

            #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges
            for variable_index in range(len(variable_dict)):
                variable_name = variable_names[variable_index]
                logger.debug('Writing array to variable %s', variable_name)
                if len(data_array.shape) == 3:
                    gdfnetcdf.write_slice(variable_name,
                                          data_array[variable_index],
                                          {'T': slice_index})
                elif len(data_array.shape) == 2:
                    gdfnetcdf.write_slice(variable_name, data_array,
                                          {'T': slice_index})

            gdfnetcdf.sync()  # Write cached data to disk
            slice_index += 1

        del gdfnetcdf  # Close the netCDF

        logger.debug('Moving temporary storage unit %s to %s',
                     temp_storage_path, storage_path)
        if os.path.isfile(storage_path):
            logger.debug('Removing existing storage unit %s' % storage_path)
            os.remove(storage_path)
        shutil.move(temp_storage_path, storage_path)

        return storage_path