示例#1
0
文件: dataset.py 项目: tovrstra/h5py
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args, )

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        vlen = h5t.check_dtype(vlen=self.dtype)
        if vlen not in (bytes, unicode, None):
            try:
                val = numpy.asarray(val, dtype=vlen)
            except ValueError:
                try:
                    val = numpy.array(
                        [numpy.array(x, dtype=vlen) for x in val],
                        dtype=self.dtype)
                except ValueError:
                    pass
            if vlen == val.dtype:
                if val.ndim > 1:
                    tmp = numpy.empty(shape=val.shape[:-1], dtype=object)
                    tmp.ravel()[:] = [
                        i for i in val.reshape((numpy.product(val.shape[:-1]),
                                                val.shape[-1]))
                    ]
                else:
                    tmp = numpy.array([None], dtype=object)
                    tmp[0] = val
                val = tmp
        elif self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" %
                                     names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = numpy.asarray(val, dtype=dtype, order='C')
            if cast_compound:
                val = val.astype(numpy.dtype([(names[0], dtype)]))
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError(
                    "When writing to array types, last N dimensions have to match (got %s, but should be %s)"
                    % (
                        valshp,
                        shp,
                    ))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape) - len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError(
                    "Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"' % x for x in mismatch)
                raise ValueError(
                    "Illegal slicing argument (fields %s not in dataset type)"
                    % mismatch)

            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5y.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names
                              if x in names]  # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError(
                    "Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if (len(mshape) < len(self.shape)):
            mshape_pad = (1, ) * (len(self.shape) - len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad,
                                   (h5s.UNLIMITED, ) * len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
示例#2
0
def getTypeElement(dt):
    if len(dt) > 1:
        raise Exception("unexpected numpy type passed to getTypeElement")

    type_info = {}

    if dt.kind == 'O':
        # numpy object type - assume this is a h5py variable length extension
        h5t_check = check_dtype(vlen=dt)
        if h5t_check is not None:

            if h5t_check == six.binary_type:
                type_info['class'] = 'H5T_STRING'
                type_info['length'] = 'H5T_VARIABLE'
                type_info['charSet'] = 'H5T_CSET_ASCII'
                type_info['strPad'] = 'H5T_STR_NULLTERM'
            elif h5t_check == six.text_type:
                type_info['class'] = 'H5T_STRING'
                type_info['length'] = 'H5T_VARIABLE'
                type_info['charSet'] = 'H5T_CSET_UTF8'
                type_info['strPad'] = 'H5T_STR_NULLTERM'
            elif type(h5t_check) == np.dtype:
                # vlen data
                type_info['class'] = 'H5T_VLEN'
                type_info['size'] = 'H5T_VARIABLE'
                type_info['base'] = getBaseType(h5t_check)
            else:
                #unknown vlen type
                raise TypeError("Unknown h5py vlen type: " + h5t_check)
        else:
            # check for reference type
            h5t_check = check_dtype(ref=dt)
            if h5t_check is not None:
                type_info['class'] = 'H5T_REFERENCE'

                if h5t_check is Reference:
                    type_info['base'] = 'H5T_STD_REF_OBJ'  # objref
                elif h5t_check is RegionReference:
                    type_info['base'] = 'H5T_STD_REF_DSETREG'  # region ref
                else:
                    raise TypeError("unexpected reference type")
            else:
                raise TypeError("unknown object type")
    elif dt.kind == 'V':
        baseType = getBaseType(dt)
        if dt.shape:
            # array type
            type_info['dims'] = dt.shape
            type_info['class'] = 'H5T_ARRAY'
            type_info['base'] = baseType
        elif baseType['class'] == 'H5T_OPAQUE':
            # expecting this to be an opaque type
            type_info = baseType  # just promote the base type
        else:
            raise TypeError("unexpected Void type")
    elif dt.kind == 'S':
        # String type
        baseType = getBaseType(dt)
        type_info = baseType  # just use base type
    elif dt.kind == 'U':
        # Unicode String type
        baseType = getBaseType(dt)
        type_info = baseType  # just use base type
    elif dt.kind == 'i' or dt.kind == 'u':
        # integer type
        baseType = getBaseType(dt)
        # numpy integer type - but check to see if this is the hypy
        # enum extension
        mapping = check_dtype(enum=dt)

        if mapping:
            # yes, this is an enum!
            type_info['class'] = 'H5T_ENUM'
            type_info['mapping'] = mapping
            type_info['base'] = baseType
        else:
            type_info = baseType  # just use base type

    elif dt.kind == 'f':
        # floating point type
        baseType = getBaseType(dt)
        type_info = baseType  # just use base type
    else:
        # unexpected kind
        raise TypeError("unexpected dtype kind: " + dt.kind)

    return type_info
示例#3
0
def getBaseType(dt):
    if len(dt) > 1:
        raise TypeError("unexpected numpy type passed to getTypeElement")

    predefined_int_types = {
        'int8': 'H5T_STD_I8',
        'uint8': 'H5T_STD_U8',
        'int16': 'H5T_STD_I16',
        'uint16': 'H5T_STD_U16',
        'int32': 'H5T_STD_I32',
        'uint32': 'H5T_STD_U32',
        'int64': 'H5T_STD_I64',
        'uint64': 'H5T_STD_U64'
    }
    predefined_float_types = {
        'float32': 'H5T_IEEE_F32',
        'float64': 'H5T_IEEE_F64'
    }
    type_info = {}

    #type_info['base_size'] = dt.base.itemsize

    # primitive type
    if dt.base.kind == 'S':
        # Fixed length string type
        type_info['class'] = 'H5T_STRING'
        type_info['charSet'] = 'H5T_CSET_ASCII'
        type_info['length'] = dt.base.itemsize
        type_info['strPad'] = 'H5T_STR_NULLPAD'
    elif dt.base.kind == 'V':
        type_info['class'] = 'H5T_OPAQUE'
        type_info['size'] = dt.itemsize
        type_info['tag'] = ''  # todo - determine tag
    elif dt.base.kind == 'i' or dt.base.kind == 'u':
        type_info['class'] = 'H5T_INTEGER'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'
        if dt.base.name in predefined_int_types:
            #maps to one of the HDF5 predefined types
            type_info['base'] = predefined_int_types[dt.base.name] + byteorder
    elif dt.base.kind == 'f':
        type_info['class'] = 'H5T_FLOAT'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'
        if dt.base.name in predefined_float_types:
            #maps to one of the HDF5 predefined types
            type_info['base'] = predefined_float_types[
                dt.base.name] + byteorder
    elif dt.base.kind == 'O':
        # check for reference type
        h5t_check = check_dtype(ref=dt)
        if h5t_check is not None:
            type_info['class'] = 'H5T_REFERENCE'
            if h5t_check is Reference:
                type_info['base'] = 'H5T_STD_REF_OBJ'  # objref
            elif h5t_check is RegionReference:
                type_info['base'] = 'H5T_STD_REF_DSETREG'  # region ref
            else:
                raise TypeError("unexpected reference type")
        else:
            raise TypeError("unknown object type")
    else:
        # unexpected kind
        raise TypeError("unexpected dtype base kind: " + dt.base.kind)

    return type_info
示例#4
0
def getTypeElement(dt):
    if len(dt) > 1:
        raise Exception("unexpected numpy type passed to getTypeElement")
    
    type_info = {}
         
    if dt.kind == 'O':
        # numpy object type - assume this is a h5py variable length extension
        h5t_check = check_dtype(vlen=dt)
        if h5t_check is not None:
            
            if h5t_check == str:
                type_info['class'] = 'H5T_STRING'
                type_info['length'] = 'H5T_VARIABLE'
                type_info['charSet'] = 'H5T_CSET_ASCII'
                type_info['strPad'] = 'H5T_STR_NULLTERM'
            elif h5t_check == unicode:
                type_info['class'] = 'H5T_STRING'
                type_info['length'] = 'H5T_VARIABLE'
                type_info['charSet'] = 'H5T_CSET_UTF8'
                type_info['strPad'] = 'H5T_STR_NULLTERM'
            elif type(h5t_check) == np.dtype:
                # vlen data
                type_info['class'] = 'H5T_VLEN'
                type_info['size'] = 'H5T_VARIABLE'
                type_info['base'] = getBaseType(h5t_check)  
            else:
                #unknown vlen type
                raise TypeError("Unknown h5py vlen type: " + h5t_check)
        else:
            # check for reference type
            h5t_check = check_dtype(ref=dt)
            if h5t_check is not None:
                type_info['class'] = 'H5T_REFERENCE'
              
                if h5t_check is Reference:
                    type_info['base'] = 'H5T_STD_REF_OBJ'  # objref
                elif h5t_check is RegionReference:
                    type_info['base'] = 'H5T_STD_REF_DSETREG'  # region ref
                else:
                    raise TypeError("unexpected reference type")
            else:     
                raise TypeError("unknown object type")
    elif dt.kind == 'V':
        baseType = getBaseType(dt)
        if dt.shape:
            # array type
            type_info['dims'] = dt.shape
            type_info['class'] = 'H5T_ARRAY'
            type_info['base'] = baseType
        elif baseType['class'] == 'H5T_OPAQUE':
            # expecting this to be an opaque type
            type_info = baseType  # just promote the base type
        else:
            raise TypeError("unexpected Void type")
    elif dt.kind == 'S':
        # String type
        baseType = getBaseType(dt)
        type_info = baseType  # just use base type
    elif dt.kind == 'U':
        # Unicode String type
        baseType = getBaseType(dt)
        type_info = baseType  # just use base type
    elif dt.kind == 'i' or dt.kind == 'u':
        # integer type
        baseType = getBaseType(dt)
        # numpy integer type - but check to see if this is the hypy 
        # enum extension
        mapping = check_dtype(enum=dt)  
            
        if mapping:
            # yes, this is an enum!
            type_info['class'] = 'H5T_ENUM'
            type_info['mapping'] = mapping
            type_info['base'] = baseType
        else:
            type_info = baseType  # just use base type
    
    elif dt.kind == 'f':
        # floating point type
        baseType = getBaseType(dt)
        type_info = baseType  # just use base type
    else:
        # unexpected kind
        raise TypeError("unexpected dtype kind: " + dt.kind)
        
    return type_info
示例#5
0
def getBaseType(dt):
    if len(dt) > 1:
        raise TypeError("unexpected numpy type passed to getTypeElement")
             
    predefined_int_types = {
        'int8':    'H5T_STD_I8',
        'uint8':   'H5T_STD_U8',
        'int16':   'H5T_STD_I16',
        'uint16':  'H5T_STD_U16',
        'int32':   'H5T_STD_I32',
        'uint32':  'H5T_STD_U32',
        'int64':   'H5T_STD_I64',
        'uint64':  'H5T_STD_U64'
    }
    predefined_float_types = {
        'float32': 'H5T_IEEE_F32',
        'float64': 'H5T_IEEE_F64'
    }
    type_info = {}
    
    #type_info['base_size'] = dt.base.itemsize
         
    # primitive type
    if dt.base.kind == 'S':
        # Fixed length string type
        type_info['class'] = 'H5T_STRING' 
        type_info['charSet'] = 'H5T_CSET_ASCII'
        type_info['length'] = dt.base.itemsize
        type_info['strPad'] = 'H5T_STR_NULLPAD'    
    elif dt.base.kind == 'V':
            type_info['class'] = 'H5T_OPAQUE'
            type_info['size'] = dt.itemsize
            type_info['tag'] = ''  # todo - determine tag
    elif dt.base.kind == 'i' or dt.base.kind == 'u':    
        type_info['class'] = 'H5T_INTEGER'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'
        if dt.base.name in predefined_int_types:
            #maps to one of the HDF5 predefined types
            type_info['base'] = predefined_int_types[dt.base.name] + byteorder  
    elif dt.base.kind == 'f':
        type_info['class'] = 'H5T_FLOAT'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'
        if dt.base.name in predefined_float_types:
            #maps to one of the HDF5 predefined types
            type_info['base'] = predefined_float_types[dt.base.name] + byteorder 
    elif dt.base.kind == 'O':
        # check for reference type
        h5t_check = check_dtype(ref=dt)
        if h5t_check is not None:
            type_info['class'] = 'H5T_REFERENCE' 
            if h5t_check is Reference:
                type_info['base'] = 'H5T_STD_REF_OBJ'  # objref
            elif h5t_check is RegionReference:
                type_info['base'] = 'H5T_STD_REF_DSETREG'  # region ref
            else:
                raise TypeError("unexpected reference type")
        else:     
            raise TypeError("unknown object type")
    else:
        # unexpected kind
        raise TypeError("unexpected dtype base kind: " + dt.base.kind)
    
    return type_info
示例#6
0
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args,)

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, basestring))
        args = tuple(x for x in args if not isinstance(x, basestring))
        if not py3:
            names = tuple(x.encode('utf-8') if isinstance(x, unicode) else x for x in names)

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        vlen = h5t.check_dtype(vlen=self.dtype)
        if vlen not in (bytes, unicode, None):
            try:
                val = numpy.asarray(val, dtype=vlen)
            except ValueError:
                try:
                    val = numpy.array([numpy.array(x, dtype=vlen)
                                       for x in val], dtype=self.dtype)
                except ValueError:
                    pass
            if vlen == val.dtype:
                if val.ndim > 1:
                    tmp = numpy.empty(shape=val.shape[:-1], dtype=object)
                    tmp.ravel()[:] = [i for i in val.reshape(
                        (numpy.product(val.shape[:-1]), val.shape[-1]))]
                else:
                    tmp = numpy.array([None], dtype=object)
                    tmp[0] = val
                val = tmp
        elif self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" % names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = numpy.asarray(val, dtype=dtype, order='C')
            if cast_compound:
                val = val.astype(numpy.dtype([(names[0], dtype)]))
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError("When writing to array types, last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape)-len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError("Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"'%x for x in mismatch)
                raise ValueError("Illegal slicing argument (fields %s not in dataset type)" % mismatch)
        
            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5y.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names if x in names] # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError("Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if(len(mshape) < len(self.shape)):
            mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
示例#7
0
def getTypeItem(dt):
     
    predefined_int_types = {
        'int8':    'H5T_STD_I8',
        'uint8':   'H5T_STD_U8',
        'int16':   'H5T_STD_I16',
        'uint16':  'H5T_STD_U16',
        'int32':   'H5T_STD_I32',
        'uint32':  'H5T_STD_U32',
        'int64':   'H5T_STD_I64',
        'uint64':  'H5T_STD_U64'
    }
    predefined_float_types = {
        'float32': 'H5T_IEEE_F32',
        'float64': 'H5T_IEEE_F64'
    }
    
    type_info = {}
    if len(dt) > 1:
        # compound type
        names = dt.names
        type_info['class'] = 'H5T_COMPOUND'
        fields = []
        for name in names:
            field = { 'name': name }
            field['type'] = getTypeItem(dt[name])
            fields.append(field)
            type_info['fields'] = fields
    elif dt.shape:
        # array type
        if dt.base == dt:
            raise TypeError("Expected base type to be different than parent")
        # array type
        type_info['dims'] = dt.shape
        type_info['class'] = 'H5T_ARRAY'
        type_info['base'] = getTypeItem(dt.base)
    elif dt.kind == 'O':
        # vlen string or data
        #
        # check for h5py variable length extension
        vlen_check = check_dtype(vlen=dt.base)
        if vlen_check is not None and type(vlen_check) != np.dtype:
            vlen_check = np.dtype(vlen_check)
        ref_check = check_dtype(ref=dt.base)
        if vlen_check == six.binary_type:
            type_info['class'] = 'H5T_STRING'
            type_info['length'] = 'H5T_VARIABLE'
            type_info['charSet'] = 'H5T_CSET_ASCII'
            type_info['strPad'] = 'H5T_STR_NULLTERM'
        elif vlen_check == six.text_type:
            type_info['class'] = 'H5T_STRING'
            type_info['length'] = 'H5T_VARIABLE'
            type_info['charSet'] = 'H5T_CSET_UTF8'
            type_info['strPad'] = 'H5T_STR_NULLTERM'
        elif type(vlen_check) == np.dtype:
            # vlen data
            type_info['class'] = 'H5T_VLEN'
            type_info['size'] = 'H5T_VARIABLE'
            type_info['base'] = getTypeItem(vlen_check)
        elif vlen_check is not None:
            #unknown vlen type
            raise TypeError("Unknown h5py vlen type: " + str(vlen_check))
        elif ref_check is not None:
            # a reference type
            type_info['class'] = 'H5T_REFERENCE'

            if ref_check is Reference:
                type_info['base'] = 'H5T_STD_REF_OBJ'  # objref
            elif ref_check is RegionReference:
                type_info['base'] = 'H5T_STD_REF_DSETREG'  # region ref
            else:
                raise TypeError("unexpected reference type")
        else:
            raise TypeError("unknown object type")
    elif dt.kind == 'V':
        # void type
        type_info['class'] = 'H5T_OPAQUE'
        type_info['size'] = dt.itemsize
        type_info['tag'] = ''  # todo - determine tag
    elif dt.base.kind == 'S':
        # Fixed length string type
        type_info['class'] = 'H5T_STRING'
        type_info['charSet'] = 'H5T_CSET_ASCII'
        type_info['length'] = dt.itemsize
        type_info['strPad'] = 'H5T_STR_NULLPAD'
    elif dt.base.kind == 'U':
        # Fixed length unicode type
        raise TypeError("Fixed length unicode type is not supported")
         
    elif dt.kind == 'b':
        # boolean type - h5py stores as enum
        # assume LE unless the numpy byteorder is '>'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'
        # this mapping is an h5py convention for boolean support
        mapping =  {
            "FALSE": 0,
            "TRUE": 1
        }      
        type_info['class'] = 'H5T_ENUM'
        type_info['mapping'] = mapping
        base_info = { "class": "H5T_INTEGER" }
        base_info['base'] = "H5T_STD_I8" + byteorder
        type_info["base"] = base_info

    elif dt.kind == 'f':
        # floating point type
        type_info['class'] = 'H5T_FLOAT'
        byteorder = 'LE'
        if dt.byteorder == '>':
            byteorder = 'BE'
        if dt.name in predefined_float_types:
            #maps to one of the HDF5 predefined types
            type_info['base'] = predefined_float_types[dt.base.name] + byteorder
        else:
            raise TypeError("Unexpected floating point type: " + dt.name)
    elif dt.kind == 'i' or dt.kind == 'u':
        # integer type
        
        # assume LE unless the numpy byteorder is '>'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'
             
        # numpy integer type - but check to see if this is the hypy
        # enum extension
        mapping = check_dtype(enum=dt)

        if mapping:
            # yes, this is an enum!
            type_info['class'] = 'H5T_ENUM'
            type_info['mapping'] = mapping
            if dt.name not in predefined_int_types:
                raise TypeError("Unexpected integer type: " + dt.name)
            #maps to one of the HDF5 predefined types
            base_info = { "class": "H5T_INTEGER" }
            base_info['base'] = predefined_int_types[dt.name] + byteorder
            type_info["base"] = base_info
        else:
            type_info['class'] = 'H5T_INTEGER'
            base_name = dt.name
            
            if dt.name not in predefined_int_types:
                raise TypeError("Unexpected integer type: " + dt.name)
           
            type_info['base'] = predefined_int_types[base_name] + byteorder
             
    else:
        # unexpected kind
        raise TypeError("unexpected dtype kind: " + dt.kind)
            
                 
    return type_info
示例#8
0
def getTypeItem(dt):

    predefined_int_types = {
        'int8': 'H5T_STD_I8',
        'uint8': 'H5T_STD_U8',
        'int16': 'H5T_STD_I16',
        'uint16': 'H5T_STD_U16',
        'int32': 'H5T_STD_I32',
        'uint32': 'H5T_STD_U32',
        'int64': 'H5T_STD_I64',
        'uint64': 'H5T_STD_U64'
    }
    predefined_float_types = {
        'float32': 'H5T_IEEE_F32',
        'float64': 'H5T_IEEE_F64'
    }

    type_info = {}
    if len(dt) > 1:
        # compound type
        names = dt.names
        type_info['class'] = 'H5T_COMPOUND'
        fields = []
        for name in names:
            field = {'name': name}
            field['type'] = getTypeItem(dt[name])
            fields.append(field)
            type_info['fields'] = fields
    elif dt.shape:
        # array type
        if dt.base == dt:
            raise TypeError("Expected base type to be different than parent")
        # array type
        type_info['dims'] = dt.shape
        type_info['class'] = 'H5T_ARRAY'
        type_info['base'] = getTypeItem(dt.base)
    elif dt.kind == 'O':
        # vlen string or data
        #
        # check for h5py variable length extension
        vlen_check = check_dtype(vlen=dt.base)
        if vlen_check is not None and type(vlen_check) != np.dtype:
            vlen_check = np.dtype(vlen_check)
        ref_check = check_dtype(ref=dt.base)
        if vlen_check == six.binary_type:
            type_info['class'] = 'H5T_STRING'
            type_info['length'] = 'H5T_VARIABLE'
            type_info['charSet'] = 'H5T_CSET_ASCII'
            type_info['strPad'] = 'H5T_STR_NULLTERM'
        elif vlen_check == six.text_type:
            type_info['class'] = 'H5T_STRING'
            type_info['length'] = 'H5T_VARIABLE'
            type_info['charSet'] = 'H5T_CSET_UTF8'
            type_info['strPad'] = 'H5T_STR_NULLTERM'
        elif type(vlen_check) == np.dtype:
            # vlen data
            type_info['class'] = 'H5T_VLEN'
            type_info['size'] = 'H5T_VARIABLE'
            type_info['base'] = getTypeItem(vlen_check)
        elif vlen_check is not None:
            #unknown vlen type
            raise TypeError("Unknown h5py vlen type: " + str(vlen_check))
        elif ref_check is not None:
            # a reference type
            type_info['class'] = 'H5T_REFERENCE'

            if ref_check is Reference:
                type_info['base'] = 'H5T_STD_REF_OBJ'  # objref
            elif ref_check is RegionReference:
                type_info['base'] = 'H5T_STD_REF_DSETREG'  # region ref
            else:
                raise TypeError("unexpected reference type")
        else:
            raise TypeError("unknown object type")
    elif dt.kind == 'V':
        # void type
        type_info['class'] = 'H5T_OPAQUE'
        type_info['size'] = dt.itemsize
        type_info['tag'] = ''  # todo - determine tag
    elif dt.base.kind == 'S':
        # Fixed length string type
        type_info['class'] = 'H5T_STRING'
        type_info['charSet'] = 'H5T_CSET_ASCII'
        type_info['length'] = dt.itemsize
        type_info['strPad'] = 'H5T_STR_NULLPAD'
    elif dt.base.kind == 'U':
        # Fixed length unicode type
        raise TypeError("Fixed length unicode type is not supported")

    elif dt.kind == 'b':
        # boolean type - h5py stores as enum
        if dt.base == dt:
            raise TypeError("Expected base type to be different than parent")
        baseType = getBaseType(dt)
        type_info['class'] = 'H5T_ENUM'
        type_info['mapping'] = {"false": 0, "true": 1}
        type_info['base'] = getTypeItem(dt.base)
    elif dt.kind == 'f':
        # floating point type
        type_info['class'] = 'H5T_FLOAT'
        byteorder = 'LE'
        if dt.byteorder == '>':
            byteorder = 'BE'
        if dt.name in predefined_float_types:
            #maps to one of the HDF5 predefined types
            type_info['base'] = predefined_float_types[
                dt.base.name] + byteorder
        else:
            raise TypeError("Unexpected floating point type: " + dt.name)
    elif dt.kind == 'i' or dt.kind == 'u':
        # integer type

        # assume LE unless the numpy byteorder is '>'
        byteorder = 'LE'
        if dt.base.byteorder == '>':
            byteorder = 'BE'

        # numpy integer type - but check to see if this is the hypy
        # enum extension
        mapping = check_dtype(enum=dt)

        if mapping:
            # yes, this is an enum!
            type_info['class'] = 'H5T_ENUM'
            type_info['mapping'] = mapping
            if dt.name not in predefined_int_types:
                raise TypeError("Unexpected integer type: " + dt.name)
            #maps to one of the HDF5 predefined types
            base_info = {"class": "H5T_INTEGER"}
            base_info['base'] = predefined_int_types[dt.name] + byteorder
            type_info["base"] = base_info
        else:
            type_info['class'] = 'H5T_INTEGER'
            base_name = dt.name

            if dt.name not in predefined_int_types:
                raise TypeError("Unexpected integer type: " + dt.name)

            type_info['base'] = predefined_int_types[base_name] + byteorder

    else:
        # unexpected kind
        raise TypeError("unexpected dtype kind: " + dt.kind)

    return type_info