def get_data(vds, first_record=False, missing_values=None): """ Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the VDS object which is being passed to us can only have come from pyhdf. :param vds: :param first_record: :param missing_values: :return: """ import numpy as np from pyhdf.HDF import HDF, HDF4Error from cis.utils import create_masked_array_for_missing_values # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() if first_record: # FIXME - This is the only bit that is actually different to the baseline vd = vs.attach('metadata') vd.setfields(variable) data = vd.read() else: # get data for that variable vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() # dealing with missing data if missing_values is None: v = _get_attribute_value(vd, 'missing') v = float(v) if v is not None else None missing_values = [v] data = create_masked_array_for_missing_values(data, missing_values) # detach and close vd.detach() vs.end() datafile.close() return data
def get_data(vds, first_record=False, missing_values=None): """ Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the VDS object which is being passed to us can only have come from pyhdf. :param vds: :param first_record: :param missing_values: :return: """ # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() if first_record: vd = vs.attach(vs.next(-1)) vd.setfields(variable) data = vd.read() else: # get data for that variable vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() # dealing with missing data if missing_values is None: missing_values = [_get_attribute_value(vd, 'missing')] data = create_masked_array_for_missing_values(data, missing_values) # detach and close vd.detach() vs.end() datafile.close() return data
def get_data(vds, first_record=False, missing_values=None): """ Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the VDS object which is being passed to us can only have come from pyhdf. :param vds: :param first_record: :param missing_values: :return: """ # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() if first_record: vd = vs.attach(vs.next(-1)) vd.setfields(variable) data = vd.read() else: # get data for that variable vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() # dealing with missing data if missing_values is None: missing_values = [__get_attribute_value(vd, 'missing')] data = create_masked_array_for_missing_values(data, missing_values) # detach and close vd.detach() vs.end() datafile.close() return data
def get_data(sds, missing_values=None): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays often found in NASA HDF-EOS data (e.g. MODIS) :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ data = sds.get() attributes = sds.attributes() # Missing data. if missing_values is None: missing_values = [attributes.get('_FillValue', None)] data = create_masked_array_for_missing_values(data, missing_values) # Offsets and scaling. offset = attributes.get('add_offset', 0) scale_factor = attributes.get('scale_factor', 1) data = __apply_scaling_factor_MODIS(data, scale_factor, offset) return data