Exemplo n.º 1
0
    def __init__(self, h5json, fname, ext='h5'):
        """Initialize a PyCode instance.

        :arg dict h5json: HDF5/JSON content.
        :arg str fname: Name of the HDF5 file the generated code will create.
        :arg str ext: Generated HDF5 file's extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in h5json:
            raise KeyError('"root" key not found.')
        self._h5j = h5json
        self._dimensions = list()
        self._dimscales = dict()
        self._fname = '{}.{}'.format(fname, ext)
        self._file_var = 'f'
        self._p = StringStore()
Exemplo n.º 2
0
    def __init__(self, tinfo, fname, ext='h5'):
        """Initialize an MCode instance.

        :arg dict tinfo: Template content information.
        :arg str fname: Name of the file to use when generating the source
            code.
        :arg str ext: HDF5 file extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in tinfo:
            raise KeyError('"root" key not found.')

        # Helper variables...
        self._d = tinfo
        self._root = self._d['root']
        self._fname = fname + '.' + ext
        self._dset_path = dict()  # map dataset ID to HDF5 paths
        self._dimlist = []  # List of dataset IDs that have dimscales attached

        # Variable for the generated source code...
        self._m = StringStore()
Exemplo n.º 3
0
    def __init__(self, tinfo, fname, ext='h5'):
        """Initialize an IdlCode instance.

        :arg dict tinfo: Template content information.
        :arg str fname: Name of the file to use when generating the source
            code.
        :arg str ext: HDF5 file extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in tinfo:
            raise KeyError('"root" key not found.')

        # Helper variables...
        self._d = tinfo
        self._root = self._d['root']
        self._fname = fname + '.' + ext
        self._dset_path = dict()  # map dataset ID to HDF5 paths
        self._dimlist = []  # List of dataset IDs that have dimscales attached

        # Variable for the generated source code...
        self._c = StringStore()
Exemplo n.º 4
0
class IdlCode(object):
    """Produce IDL source code (as an .m file) to produce a template HDF5
    file."""

    def __init__(self, tinfo, fname, ext='h5'):
        """Initialize an IdlCode instance.

        :arg dict tinfo: Template content information.
        :arg str fname: Name of the file to use when generating the source
            code.
        :arg str ext: HDF5 file extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in tinfo:
            raise KeyError('"root" key not found.')

        # Helper variables...
        self._d = tinfo
        self._root = self._d['root']
        self._fname = fname + '.' + ext
        self._dset_path = dict()  # map dataset ID to HDF5 paths
        self._dimlist = []  # List of dataset IDs that have dimscales attached

        # Variable for the generated source code...
        self._c = StringStore()

    def _get_hard_links(self, gid, collection):
        links = list()
        for l in self._d['groups'][gid].get('links', []):
            if l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection:
                links.append(l)
        return links

    def _order_groups(self):
        pid = [{'id': self._root, 'path': '/'}]

        def tree_walker(ginfo):
            glinks = self._get_hard_links(ginfo['id'], 'groups')
            chld_grps = list()
            for gl in glinks:
                chld_grps.append({'id': gl['id'],
                                  'path': pp.join(ginfo['path'], gl['title'])})
            pid.extend(chld_grps)
            for cg in chld_grps:
                tree_walker(cg)

        tree_walker(pid[0])

        return pid

    def _create_file(self):
        """Code to create an HDF5 file."""
        tmplt = Template(
            "; Create the HDF5 file. It will overwrite any file with same "
            "name.\n"
            "fname = '$filename';\n"
            "fid = H5F_CREATE(fname)\n"
        )
        vars = {'filename': self._fname}
        self._c.append(tmplt.substitute(vars))

    def _close_file(self):
        """Code to close an HDF5 file."""
        tmplt = (
            "\n\n"
            "; Close the HDF5 file.\n"
            "H5F_CLOSE, fid\n"
            "; Template file is ready!\n"
        )
        self._c.append(tmplt)

    def _dims2str(self, dims):
        """Stringify dimension list with support for the unlimited size.

        :arg list dims: Dimension size list.
        """
        dim_str = []
        for d in dims:
            if d == 'H5S_UNLIMITED':
                # Unlimited dimension...
                dim_str.append("-1")
            else:
                dim_str.append('{:d}ULL'.format(d))
        return '[{}]'.format(', '.join(dim_str))

    def _dspace(self, shape):
        """Generate dataspace code.

        :arg dict shape: HDF5/JSON shape information.
        """
        if shape['class'] == 'H5S_SCALAR':
            return "sid = H5S_CREATE_SCALAR()\n"
        elif shape['class'] == 'H5S_SIMPLE':
            rank = len(shape['dims'])
            if rank == 1:
                tmplt = Template(
                    "sid = H5S_CREATE_SIMPLE($dims, MAX_DIMENSIONS=$maxdims)\n"
                )
            else:
                tmplt = Template(
                    "sid = H5S_CREATE_SIMPLE(REVERSE($dims), "
                    "MAX_DIMENSIONS=REVERSE($maxdims))\n"
                )
            vars = {'dims': self._dims2str(shape['dims']),
                    'maxdims': self._dims2str(shape.get('maxdims', []))}
            return tmplt.substitute(vars)
        else:
            raise NotImplementedError('%s: Not supported' % shape['class'])

    def _dtype(self, t, var='tid'):
        """Generate datatype code.

        :arg dict t: HDF5/JSON datatype information.
        :arg str tid: Default name of the datatype variable.
        """
        tcls = t['class']
        if tcls == 'H5T_COMPOUND':
            tmplt = ''

            # Go over each compound field...
            field_cnt = 0
            field_tid_fmt = 'tid%d'
            for f in t['fields']:
                field_cnt += 1
                field_tid = field_tid_fmt % field_cnt
                tmplt += self._dtype(f['type'], var=field_tid)

            # Create the compound datatype...
            array = [field_tid_fmt % i for i in range(1, field_cnt + 1)]
            array = ', '.join(array)
            names = [f['name'].encode('ascii') for f in t['fields']]
            tmplt += "%s = H5T_COMPOUND_CREATE([%s], %s)\n" % (var, array,
                                                               names)

            # Close field datatypes...
            for i in range(1, field_cnt + 1):
                tmplt += "H5T_CLOSE, tid%d\n" % i

            return tmplt

        elif tcls == 'H5T_VLEN':
            return "%s = H5T_VLEN_CREATE(%s)\n" \
                % (var, self._atomic_dtype(t['base']))

        elif tcls == 'H5T_ARRAY':
            tmplt = Template(
                "${base}"
                "$var = H5T_ARRAY_CREATE(base_tid, $dims)\n"
                "H5T_CLOSE, base_tid\n"
            )
            if len(t['dims']) > 1:
                dims = 'REVERSE(%s)' % t['dims']
            else:
                dims = t['dims']
            return tmplt.substitute(
                {'base': self._dtype(t['base'], var='base_tid'),
                 'dims': dims,
                 'var': var})

        else:
            return var + " = " + self._atomic_dtype(t) + "\n"

    def _atomic_dtype(self, t, var='tid'):
        """Handle HDF5 atomic datatypes.

        :arg dict t: HDF5/JSON datatype information.
        """
        tcls = t['class']
        if tcls == 'H5T_STRING':
            if isinstance(t['length'], six.string_types):
                raise NotImplementedError('Variable length string datatype '
                                          'not supported yet.')
            else:
                tmplt = Template(
                    "H5T_IDL_CREATE(STRING('a', FORMAT='(A${n})'))"
                )
                return tmplt.substitute({'n': t['length']})

        elif tcls in ('H5T_FLOAT', 'H5T_INTEGER'):
            type_map = {'H5T_STD_U8': 'BYTE',
                        'H5T_STD_U16': 'UINT',
                        'H5T_STD_U32': 'ULONG',
                        'H5T_STD_U64': 'ULONG64',
                        'H5T_STD_I16': 'FIX',
                        'H5T_STD_I32': 'LONG',
                        'H5T_STD_I64': 'LONG64',
                        'H5T_IEEE_F32': 'FLOAT',
                        'H5T_IEEE_F64': 'DOUBLE'}
            base = t['base'][:-2]
            try:
                return "H5T_IDL_CREATE(%s(0))" % type_map[base]
            except KeyError:
                raise NotImplementedError('IDL does not support datatype: %s'
                                          % t['base'])

        elif tcls == 'H5T_REFERENCE':
            if t['base'] == 'H5T_STD_REF_OBJ':
                region = ''
            else:
                region = '/REGION'
            return "H5T_REFERENCE_CREATE(%s)" % region

        else:
            raise NotImplementedError('%s: Datatype class not supported yet'
                                      % t['class'])

    def _create_attr(self, attr, locid, dimscale=False):
        """Generate code for one attribute of the ``locid`` object.

        :arg dict attr: Attribute information.
        :arg str locid: Attribute's parent variable name.
        :arg bool dimscale: Parent object type: ``group`` or ``dataset``.
        """
        dataspace = self._dspace(attr['shape'])
        datatype = self._dtype(attr['type'])

        tmplt = Template(
            "\n; Attribute: $name\n"
            "${datatype}"
            "${dataspace}"
            "aid = H5A_CREATE($locid, '$name', tid, sid)\n"
            "${value}"
            "H5T_CLOSE, tid\n"
            "H5S_CLOSE, sid\n"
            "H5A_CLOSE, aid\n"
        )
        if dimscale and attr['name'] in ('REFERENCE_LIST', 'DIMENSION_LIST'):
            val = ("; This is a dimension scale attribute. It's value is "
                   "written later in the code.\n")
        else:
            if attr['type']['class'] == 'H5T_STRING':
                def prep_vals(vals):
                    values = list()
                    for v in vals:
                        temp = v.encode('ascii')
                        temp = temp.replace("'", "''")
                        if '\n' in temp:
                            # Replace "\n" with "STRING(10B)"
                            temp = "'+STRING(10B)+'".join(temp.split('\n'))
                        values.append(temp)
                    return values

                if attr['shape']['class'] == 'H5S_SCALAR':
                    value = "%s" % prep_vals([attr['value']])[0]
                else:
                    if len(attr['shape']['dims']) > 1:
                        raise NotImplementedError(
                            'Rank > 1 for string data not supported')
                    value = prep_vals(attr['value'])

                if attr['type']['length'] == 'H5T_VARIABLE':
                    val_str = \
                        '[' + ', '.join(["'%s'" % v for v in value]) + ']'
                    value = "H5T_VLEN_TO_STR(%s)" % val_str
                else:
                    # Left-justified, fixed-length string format...
                    # fmt = '{{:<{0:d}.{0:d}}}'.format(attr['type']['length'])

                    if attr['shape']['class'] == 'H5S_SCALAR':
                        # value = "'%s'" % fmt.format(value)
                        value = "'%s'" % value
                    else:
                        # for i in xrange(len(value)):
                        #     value[i] = fmt.format(value[i])
                        value = ('['
                                 + ', '.join(["'%s'" % v for v in value])
                                 + "]")
            else:
                value = attr['value']
            val = Template(
                "H5A_WRITE, aid, $value\n"
            ).substitute({'value': value})

        vars = {'locid': locid,
                'name': attr['name'],
                'datatype': datatype,
                'dataspace': dataspace,
                'value': val}
        self._c.append(tmplt.substitute(vars))

    def _is_dimscale(self, attrs):
        """Check if the dataset is a dimension scale.

        :arg list attrs: All dataset's attributes.
        """
        # Check if REFERENCE_LIST attribute is present...
        ref_list = any(a['name'] == 'REFERENCE_LIST'
                       and a['type']['class'] == 'H5T_COMPOUND'
                       for a in attrs)

        # Check if CLASS attribute is present...
        cls = any(a['name'] == 'CLASS'and a['value'] == 'DIMENSION_SCALE'
                  for a in attrs)

        if ref_list and cls:
            return True
        else:
            return False

    def _is_dimlist(self, attrs):
        """Check if the dataset has dimension scales atteched.

        :arg list attrs: All dataset's attributes.
        """
        # Check if DIMENSION_LIST attribute is present...
        dim_list = any(a['name'] == 'DIMENSION_LIST'
                       and a['type']['class'] == 'H5T_VLEN'
                       for a in attrs)

        return True if dim_list else False

    def _is_dimscale_related(self, attrs):
        """Check if the attributes of a dataset indicate it is related to
        dimension scales.

        :arg list attrs: All dataset's attributes.
        """
        dimscale = self._is_dimscale(attrs)
        dimlist = self._is_dimlist(attrs)

        if dimscale or dimlist:
            return True
        else:
            return False

    def _create_attrs(self, attrs, locid, dimscale=False):
        """Generate code for all the attributes of the ``locid`` object.

        :arg dict attrs: HDF5/JSON information about the attributes of
            the parent ``locid`` object.
        :arg str locid: MATLAB variable name of the attributes' parent object.
        :arg bool dimscale: Boolean indicating whether the attributes belong to
            a dimension scale.
        """
        for a in attrs:
            self._create_attr(a, locid, dimscale=dimscale)

    def _create_dset(self, id, name, ds, locid):
        """Generate code for one dataset of the ``locid`` group.

        : arg str id: Dataset's identifier.
        :arg str name: Dataset's name.
        :arg dict ds: Dataset information.
        :arg str locid: Varable name of the dataset's parent group.
        """
        dataspace = self._dspace(ds['shape'])
        datatype = self._dtype(ds['type'])

        tmplt = Template(
            "\n"
            "; Dataset: $name\n"
            "${datatype}"
            "${dataspace}"
            "dsid = H5D_CREATE($locid, '$name', tid, sid${layout}${filter})\n"
            "H5S_CLOSE, sid\n"
            "H5T_CLOSE, tid\n"
        )

        # Layout...
        layout = ds.get('creationProperties', {}).get('layout', {})
        lyt = ''
        if layout.get('class', 'H5D_CONTIGUOUS') == 'H5D_CONTIGUOUS':
            pass
        elif layout['class'] == 'H5D_COMPACT':
            pass
        elif layout['class'] == 'H5D_CHUNKED':
            if len(layout['dims']) == 1:
                chunks = '$chunks'
            else:
                chunks = 'REVERSE($chunks)'
            lyt = Template(
                ", CHUNK_DIMENSIONS=" + chunks
            ).substitute({'chunks': self._dims2str(layout['dims'])})
        else:
            raise ValueError('%s: Invalid layout class'
                             % layout['class'])

        # Filters...
        filters = ds.get('creationProperties', {}).get('filters', [])
        fltr = ''
        for f in filters:
            if f['class'] == 'H5Z_FILTER_DEFLATE':
                fltr += ", GZIP=%s" % f['level']
            elif f['class'] == 'H5Z_FILTER_SHUFFLE':
                fltr += ", /SHUFFLE"
            else:
                raise NotImplementedError('%s: Filter not supported yet'
                                          % f['class'])

        vars = {'locid': locid,
                'name': name,
                'datatype': datatype,
                'dataspace': dataspace,
                'layout': lyt,
                'filter': fltr}
        self._c.append(tmplt.substitute(vars))

        attrs = ds.get('attributes', [])
        dimscale = self._is_dimscale_related(attrs)
        if self._is_dimlist(attrs):
            self._dimlist.append(id)
        self._create_attrs(attrs, 'dsid', dimscale=dimscale)

        self._c.append("H5D_CLOSE, dsid\n")

    def _create_dsets(self, dsets, locid, path):
        """Generate code for all the datasets of the ``locid`` object.

        :arg dict dsets: HDF5/JSON information about the datasets of
            the parent ``locid`` object.
        :arg str locid: IDL variable name of the datasets' parent object.
        :arg str path: HDF5 path of the datasets' parent object.
        """
        for d in dsets:
            # Record the full HDF5 path to the dataset...
            self._dset_path[d['id']] = pp.join(path, d['title'])
            # Generate dataset code...
            self._create_dset(d['id'], d['title'],
                              self._d['datasets'][d['id']], locid)

    def _create_group(self, g):
        """Code for all group content.

        :arg dict g: Group id and full name.
        """
        grpid = g['id']
        path = g['path']

        if grpid == self._root:
            locid = 'fid'
        else:
            tmplt = Template(
                "\n\n"
                ";\n"
                "; Group: $path\n"
                ";\n"
                "gid = H5G_CREATE(fid, '$path')\n"
            )
            vars = {'path': path}
            self._c.append(tmplt.substitute(vars))
            locid = 'gid'

        self._create_attrs(self._d['groups'][grpid].get('attributes', []),
                           locid)
        self._create_dsets(self._get_hard_links(grpid, 'datasets'), locid,
                           path)

        if grpid != self._root:
            self._c.append("H5G_CLOSE, gid\n")

    def _dimscales(self):
        """Generate code connecting dimension scales and their datasets."""
        # List for storing dimension scales IDs...
        dscales = set()

        self._c.append(
            "\n\n"
            ";\n"
            "; Datasets and their dimension scales\n"
            ";\n"
        )
        for dset_id in self._dimlist:
            dset = self._d['datasets'][dset_id]

            # Get DIMENSION_LIST attribute value...
            dims = next(a['value'] for a in dset['attributes']
                        if a['name'] == 'DIMENSION_LIST')

            tmp = Template(
                "\n; Dataset with dimension scales: $name\n"
                "dset_id = H5D_OPEN(fid, '$name')\n"
                "aid = H5A_OPEN_NAME(dset_id, 'DIMENSION_LIST')\n"
                "dims = REPLICATE({IDL_H5_VLEN}, $n)\n"
            ).substitute({'name': self._dset_path[dset_id],
                          'n': len(dims)})
            self._c.append(tmp)

            # Iterate over dataset's dimension scales...
            for index, dimscales in enumerate(dims):
                self._c.append("ref = INDGEN(%d)\n" % len(dimscales))
                for n, ds in enumerate(dimscales):
                    ds_id = pp.basename(ds)
                    dscales.add(ds_id)
                    self._c.append("ref[%d] = H5R_CREATE(fid, '%s')\n"
                                   % (n, self._dset_path[ds_id]))
                self._c.append("dims[%d].pdata = PTR_NEW(ref);\n" % index)

            self._c.append(
                "H5A_WRITE, aid, dims\n"
                "H5A_CLOSE, aid\n"
                "H5D_CLOSE, dset_id\n"
            )

        for dset_id in dscales:
            tmp = Template(
                "\n; Dimension scale: $name\n"
                "dset_id = H5D_OPEN(fid, '$name')\n"
                "aid = H5A_OPEN_NAME(dset_id, 'REFERENCE_LIST')\n"
            ).substitute({'name': self._dset_path[dset_id]})
            self._c.append(tmp)

            dset = self._d['datasets'][dset_id]

            # Get REFERENCE_LIST attribute value and fields...
            refs, fields = next((a['value'], a['type']['fields'])
                                for a in dset['attributes']
                                if a['name'] == 'REFERENCE_LIST')

            # Field names...
            f_names = [f['name'] for f in fields]

            self._c.append("ref = REPLICATE({%s:1, %s:1}, %d)\n"
                           % tuple(f_names + [len(refs)]))
            for n, r in enumerate(refs):
                ds_id = pp.basename(r[0])
                tmp = Template(
                    "ref[$n].${d} = H5R_CREATE(fid, '$path')\n"
                    "ref[$n].${i} = $index;\n"
                ).substitute({'n': n,
                              'path': self._dset_path[ds_id],
                              'd': f_names[0],
                              'i': f_names[1],
                              'index': r[1]})
                self._c.append(tmp)
            self._c.append(
                "H5A_WRITE, aid, ref\n"
                "H5A_CLOSE, aid\n"
                "H5D_CLOSE, dset_id\n"
            )

    def get_code(self):
        """Generate IDL source code."""
        self._create_file()

        # Order groups by hierarchy...
        groups = self._order_groups()

        for g in groups:
            self._create_group(g)

        if self._dimlist:
            # Handle dimension scales...
            self._dimscales()

        self._close_file()

        return self._c.dump()
Exemplo n.º 5
0
class MCode(object):
    """Generate MATLAB source code (as an .m file) that produces a template
    HDF5 file."""

    def __init__(self, tinfo, fname, ext='h5'):
        """Initialize an MCode instance.

        :arg dict tinfo: Template content information.
        :arg str fname: Name of the file to use when generating the source
            code.
        :arg str ext: HDF5 file extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in tinfo:
            raise KeyError('"root" key not found.')

        # Helper variables...
        self._d = tinfo
        self._root = self._d['root']
        self._fname = fname + '.' + ext
        self._dset_path = dict()  # map dataset ID to HDF5 paths
        self._dimlist = []  # List of dataset IDs that have dimscales attached

        # Variable for the generated source code...
        self._m = StringStore()

    def _get_hard_links(self, gid, collection):
        links = list()
        for l in self._d['groups'][gid].get('links', []):
            if l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection:
                links.append(l)
        return links

    def _order_groups(self):
        pid = [{'id': self._root, 'path': '/'}]

        def tree_walker(ginfo):
            glinks = self._get_hard_links(ginfo['id'], 'groups')
            chld_grps = list()
            for gl in glinks:
                chld_grps.append({'id': gl['id'],
                                  'path': pp.join(ginfo['path'], gl['title'])})
            pid.extend(chld_grps)
            for cg in chld_grps:
                tree_walker(cg)

        tree_walker(pid[0])

        return pid

    def matlab_dtype(self, h5type):
        """Find appropriate MATLAB numerical data type for HDF5 predefined
        datatype.

        :arg str h5type: HDF5 predefined datatype.
        :return: MATLAB data type.
        :rtype: str
        """
        conv_map = {
            'H5T_STD_I8': 'int8',
            'H5T_STD_U8': 'uint8',
            'H5T_STD_I16': 'int16',
            'H5T_STD_U16': 'uint16',
            'H5T_STD_I32': 'int32',
            'H5T_STD_U32': 'uint32',
            'H5T_STD_I64': 'int64',
            'H5T_STD_U64': 'uint64',
            'H5T_IEEE_F32': 'single',
            'H5T_IEEE_F64': 'double'
        }
        try:
            return conv_map[h5type[:-2]]
        except KeyError:
            raise ValueError('%s: Invalid predefined datatype' % h5type)

    def _create_file(self):
        """Code to create an HDF5 file."""
        tmplt = Template(
            "% Create the HDF5 file. It will overwrite any file with same "
            "name.\n"
            "fname = '$filename';\n"
            "fcpl = H5P.create('H5P_FILE_CREATE');\n"
            "fapl = H5P.create('H5P_FILE_ACCESS');\n"
            "fid = H5F.create(fname, 'H5F_ACC_TRUNC', fcpl, fapl);\n"
        )
        vars = {'filename': self._fname}
        self._m.append(tmplt.substitute(vars))

    def _close_file(self):
        """Code to close an HDF5 file."""
        tmplt = (
            "\n\n"
            "% Close the HDF5 file.\n"
            "H5F.close(fid);\n"
            "% Template file is ready!\n"
        )
        self._m.append(tmplt)

    def _dims2str(self, dims):
        """Stringify dimension list with support for the unlimited size.

        :arg list dims: Dimension size list.
        """
        dim_str = []
        for d in dims:
            if d == 'H5S_UNLIMITED':
                # Unlimited dimension...
                dim_str.append("H5ML.get_constant_value('H5S_UNLIMITED')")
            else:
                dim_str.append('{:d}'.format(d))
        return '[{}]'.format(', '.join(dim_str))

    def _dspace(self, shape):
        """Generate dataspace code.

        :arg dict shape: HDF5/JSON shape information.
        """
        if shape['class'] == 'H5S_SCALAR':
            return "sid = H5S.create('H5S_SCALAR');\n"
        elif shape['class'] == 'H5S_SIMPLE':
            nelems_limit = 2**48 - 1
            nelems = 1
            for d in shape['dims']:
                if d != 'H5S_UNLIMITED':
                    nelems *= d
            if nelems > nelems_limit:
                raise ValueError(
                    'Number of elements too large (max. 2^48 - 1): {:d}'
                    .format(nelems))

            rank = len(shape['dims'])
            if rank == 1:
                tmplt = Template(
                    "sid = H5S.create_simple($rank, $dims, $maxdims);\n"
                )
            else:
                tmplt = Template(
                    "sid = H5S.create_simple($rank, fliplr($dims), "
                    "fliplr($maxdims));\n"
                )
            vars = {'dims': self._dims2str(shape['dims']),
                    'maxdims': self._dims2str(shape.get('maxdims', [])),
                    'rank': rank}
            return tmplt.substitute(vars)
        elif shape['class'] == 'H5S_NULL':
            return "sid = H5S.create('H5S_NULL');\n"
        else:
            raise NotImplementedError('%s: Not supported' % shape['class'])

    def _dtype(self, t, var='tid'):
        """Generate datatype code.

        :arg dict t: HDF5/JSON datatype information.
        :arg str var: Default name of the datatype variable.
        """
        tcls = t['class']
        if tcls == 'H5T_COMPOUND':
            tmplt = ''

            # Go over each compound field...
            field_cnt = 0
            for f in t['fields']:
                field_cnt += 1
                dt = "field_tid(%d)" % field_cnt
                tmplt += self._dtype(f['type'], var=dt)
                tmplt += "field_size(%d) = H5T.get_size(%s);\n" % (field_cnt,
                                                                   dt)

            # Compute field byte offsets...
            num_fields = len(t['fields'])
            tmplt += (
                "field_offset = [0 cumsum(field_size(1:%d))];\n"
            ) % (num_fields - 1)

            # Create the compound datatype...
            tmplt += "tid = H5T.create('H5T_COMPOUND', sum(field_size));\n"
            for n in range(num_fields):
                tmplt += ("H5T.insert(tid, '%s', field_offset(%d), "
                          "field_tid(%d));\n") % (t['fields'][n]['name'],
                                                  n + 1, n + 1)

            # Close field datatypes...
            for n in range(num_fields):
                tmplt += "H5T.close(field_tid(%d));\n" % (n + 1)

            return tmplt

        elif tcls == 'H5T_VLEN':
            if t['base']['class'] == 'H5T_STRING':
                base_type = 'H5T_C_S1'
                if t['base']['length'] not in (1, 'H5T_VARIABLE'):
                    raise NotImplementedError(
                        'MATLAB only allows vlen strings of variable or '
                        'fixed length of 1.')
            elif t['base']['class'] == 'H5T_REFERENCE':
                raise NotImplementedError(
                    'MATLAB does not support H5T_REFERENCE for vlen datatype')
            else:
                base_type = t['base']['base']
            return "tid = H5T.vlen_create('%s');\n" % base_type

        elif tcls == 'H5T_ARRAY':
            tmplt = Template(
                "${base}"
                "$var = H5T.array_create(base_tid, fliplr($dims));\n"
                "H5T.close(base_tid);\n"
            )
            return tmplt.substitute(
                {'base': self._dtype(t['base'], var='base_tid'),
                 'dims': t['dims'],
                 'var': var})

        else:
            return var + " = " + self._atomic_dtype(t, var=var)

    def _atomic_dtype(self, t, var='tid'):
        """Handle HDF5 atomic datatypes.

        :arg dict t: HDF5/JSON datatype information.
        "arg str tid: Default name of the datatype variable.
        """
        tcls = t['class']
        if tcls == 'H5T_STRING':
            tmplt = Template(
                "H5T.copy('H5T_C_S1');\n"
                "H5T.set_size($var, $length);\n"
                "H5T.set_strpad($var,'$strpad');\n"
                "H5T.set_cset($var, H5ML.get_constant_value('$cset'));\n"
            )
            if isinstance(t['length'], six.string_types):
                length = "'%s'" % t['length']
            else:
                length = t['length']
            return tmplt.substitute({'length': length,
                                     'strpad': t['strPad'],
                                     'cset': t['charSet'],
                                     'var': var})

        elif tcls in ('H5T_FLOAT', 'H5T_INTEGER'):
            return "H5T.copy('%s');\n" % t['base']

        elif tcls == 'H5T_REFERENCE':
            return "H5T.copy('%s');\n" % t['base']

        else:
            raise NotImplementedError('%s: Datatype not supported'
                                      % t['class'])

    def _create_attr(self, attr, locid, dimscale=False):
        """Generate code for one attribute of the ``locid`` object.

        :arg dict attr: Attribute information.
        :arg str locid: Attribute's parent variable name.
        :arg bool dimscale: Indicates attribute's parent is a dimension scale.
        """
        if dimscale and attr['name'] in ('REFERENCE_LIST', 'DIMENSION_LIST',
                                         'NAME', 'CLASS'):
            return

        dataspace = self._dspace(attr['shape'])
        datatype = self._dtype(attr['type'])

        tmplt = Template(
            "\n% Attribute: $name\n"
            "${datatype}"
            "${dataspace}"
            "acpl = H5P.create('H5P_ATTRIBUTE_CREATE');\n"
            "aid = H5A.create($lid, '$name', tid, sid, acpl, 'H5P_DEFAULT');\n"
            "${value}"
            "H5T.close(tid);\n"
            "H5S.close(sid);\n"
            "H5A.close(aid);\n"
        )
        if attr['type']['class'] == 'H5T_STRING':
            def prep_vals(vals):
                values = list()
                with_sprintf = False
                for v in vals:
                    temp = v.encode('ascii')
                    if "'" in temp:
                        temp = temp.replace("'", "''")
                        with_sprintf = True
                    if '%' in temp:
                        temp = temp.replace('%', '%%')
                        with_sprintf = True
                    if '\n' in temp:
                        temp = temp.replace('\n', '\\n')
                        with_sprintf = True
                    if with_sprintf:
                        values.append("sprintf('{}')".format(temp))
                    else:
                        values.append("'{}'".format(temp))
                return values

            if attr['shape']['class'] == 'H5S_SCALAR':
                value = prep_vals([attr['value']])[0]
            else:
                if len(attr['shape']['dims']) > 1:
                    raise NotImplementedError(
                        'Rank > 1 for string data not supported')
                value = prep_vals(attr['value'])

            if attr['type']['length'] == 'H5T_VARIABLE':
                val_str = ('{'
                           + ', '.join(value)
                           + '}')
            else:
                # Left-justified, fixed-length string format...
                fmt = "'%-{0:d}.{0:d}s'".format(attr['type']['length'])

                if attr['shape']['class'] == 'H5S_SCALAR':
                    val_str = "sprintf({}, {})".format(fmt, value)
                else:
                    for i in range(len(value)):
                        value[i] = "sprintf({}, {})".format(fmt, value[i])
                    val_str = (
                        '[' + '; '.join(value)
                        + "]'")
        else:
            val_str = attr['value']
        val = Template(
            "H5A.write(aid, 'H5ML_DEFAULT', $value);\n"
        ).substitute({'value': val_str})

        vars = {'lid': locid,
                'name': attr['name'],
                'datatype': datatype,
                'dataspace': dataspace,
                'value': val}
        self._m.append(tmplt.substitute(vars))

    def _is_dimscale(self, attrs):
        """Check if the dataset is a dimension scale.

        :arg list attrs: All dataset's attributes.
        """
        # Check if REFERENCE_LIST attribute is present...
        ref_list = any(a['name'] == 'REFERENCE_LIST'
                       and a['type']['class'] == 'H5T_COMPOUND'
                       for a in attrs)

        # Check if CLASS attribute is present...
        cls_ = any(a['name'] == 'CLASS'and a['value'] == 'DIMENSION_SCALE'
                   for a in attrs)

        if ref_list and cls_:
            return True
        else:
            return False

    def _dimscale(self, attrs, locid):
        """Generate the code that sets a dimension scale.

        :arg list attrs: All dataset's attributes.
        :arg str locid: MATLAB variable name of the attributes' parent object.
        """
        # Check if NAME attribute is present...
        for a in attrs:
            if a['name'] == 'NAME'and a['type']['class'] == 'H5T_STRING':
                scale_name = "'%s'" % a['value']
                break
        else:
            scale_name = '[]'

        return "H5DS.set_scale(%s, %s);\n" % (locid, scale_name)

    def _is_dimlist(self, attrs):
        """Check if the dataset has dimension scales attached.

        :arg list attrs: All dataset's attributes.
        """
        # Check if DIMENSION_LIST attribute is present...
        dim_list = any(a['name'] == 'DIMENSION_LIST'
                       and a['type']['class'] == 'H5T_VLEN'
                       for a in attrs)

        return True if dim_list else False

    def _is_dimscale_related(self, attrs):
        """Check if the attributes of a dataset indicate it is related to
        dimension scales.

        :arg list attrs: All dataset's attributes.
        """
        dimscale = self._is_dimscale(attrs)
        dimlist = self._is_dimlist(attrs)

        if dimscale or dimlist:
            return True
        else:
            return False

    def _create_attrs(self, attrs, locid, dimscale=False):
        """Generate code for all the attributes of the ``locid`` object.

        :arg dict attrs: HDF5/JSON information about the attributes of
            the parent ``locid`` object.
        :arg str locid: MATLAB variable name of the attributes' parent object.
        :arg bool dimscale: Indicates whether the attributes belong to a
            dimension scale.
        """
        for a in attrs:
            self._create_attr(a, locid, dimscale=dimscale)

    def _create_dset(self, id, name, ds, locid):
        """Generate code for one dataset of the ``locid`` group.

        : arg str id: Dataset's identifier.
        :arg str name: Dataset's name.
        :arg dict ds: Dataset information.
        :arg str locid: Varable name of the dataset's parent group.
        """
        try:
            dataspace = self._dspace(ds['shape'])
        except ValueError as e:
            raise ValueError('{}: {}'.format(ds.get('alias', [name])[0],
                                             str(e)))
        datatype = self._dtype(ds['type'])

        tmplt = Template(
            "\n"
            "% Dataset: $name\n"
            "${datatype}"
            "${dataspace}"
            "${dcpl}"
            "dsid = H5D.create($locid, '$name', tid, sid, '$plist', dcpl, "
            "'$plist');\n"
            "H5S.close(sid);\n"
            "H5T.close(tid);\n"
            "${dimscale}"
        )

        # Dataset creation property list...
        dcpl = "dcpl = H5P.create('H5P_DATASET_CREATE');\n"

        # Layout...
        layout = ds.get('creationProperties', {}).get('layout', {})
        if layout.get('class', 'H5D_CONTIGUOUS') == 'H5D_CONTIGUOUS':
            pass
        elif layout['class'] == 'H5D_COMPACT':
            dcpl = ("dcpl = H5P.create('H5P_DATASET_CREATE');\n"
                    "H5P.set_layout(dcpl, 'H5D_COMPACT');\n")
        elif layout['class'] == 'H5D_CHUNKED':
            if len(layout['dims']) == 1:
                chunks = layout['dims']
            else:
                chunks = 'fliplr(%s)' % layout['dims']
            dcpl = Template(
                "dcpl = H5P.create('H5P_DATASET_CREATE');\n"
                "H5P.set_layout(dcpl, H5ML.get_constant_value('H5D_CHUNKED'));"
                "\n"
                "H5P.set_chunk(dcpl, $chunks);\n"
            ).substitute({'chunks': chunks})
        else:
            raise ValueError('%s: Invalid layout class'
                             % layout['class'])

        # Filters...
        filters = ds.get('creationProperties', {}).get('filters', [])
        for f in filters:
            if f['class'] == 'H5Z_FILTER_DEFLATE':
                dcpl += "H5P.set_deflate(dcpl, %s);\n" % f['level']
            elif f['class'] == 'H5Z_FILTER_FLETCHER32':
                dcpl += "H5P.set_fletcher32(dcpl);\n"
            elif f['class'] == 'H5Z_FILTER_SHUFFLE':
                dcpl += "H5P.set_shuffle(dcpl);\n"
            elif f['class'] == 'H5Z_FILTER_SCALEOFFSET':
                dcpl += ("H5P.set_scaleoffset(dcpl, '%s', %d);\n"
                         % (f['scaleType'], f['scaleOffset']))
            elif f['class'] == 'H5Z_FILTER_NBIT':
                dcpl += "H5P.set_nbit(dcpl);\n"
            else:
                raise NotImplementedError('%s: Filter not supported yet'
                                          % f['class'])

        # Fill value...
        fv = ds.get('creationProperties', {}).get('fillValue', None)
        if fv:
            if type(fv) is list:
                raise NotImplementedError(
                    'Non-scalar fill value not supported yet')
            else:
                # Use dataset's datatype for fill value...
                if ds['type']['class'] == 'H5T_STRING':
                    fv = "'%s'" % fv
                else:
                    fv = str(fv)
                    # Remove an "L" suffix if present...
                    if fv[-1] == 'L':
                        fv = fv[:-1]
                    fv = '{}({})'.format(self.matlab_dtype(ds['type']['base']),
                                         fv)
                dcpl += "H5P.set_fill_value(dcpl, tid, %s);\n" % fv

        attrs = ds.get('attributes', [])
        is_dimscale = self._is_dimscale(attrs)
        if is_dimscale:
            dimscale = self._dimscale(attrs, 'dsid')
        else:
            dimscale = str()

        vars = {'locid': locid,
                'name': name,
                'datatype': datatype,
                'dataspace': dataspace,
                'plist': 'H5P_DEFAULT',
                'dcpl': dcpl,
                'dimscale': dimscale}
        self._m.append(tmplt.substitute(vars))

        is_dimlist = self._is_dimlist(attrs)
        if is_dimlist:
            self._dimlist.append(id)
        self._create_attrs(attrs, 'dsid',
                           dimscale=(is_dimscale or is_dimlist))

        self._m.append("H5D.close(dsid);\n")

    def _create_dsets(self, dsets, locid, path):
        """Generate code for all the datasets of the ``locid`` object.

        :arg dict dsets: HDF5/JSON information about the datasets of
            the parent ``locid`` object.
        :arg str locid: MATLAB variable name of the datasets' parent object.
        :arg str path: HDF5 path of the datasets' parent object.
        """
        for d in dsets:
            # Record the full HDF5 path to the dataset...
            self._dset_path[d['id']] = pp.join(path, d['title'])
            # Generate dataset code...
            self._create_dset(d['id'], d['title'],
                              self._d['datasets'][d['id']], locid)

    def _create_group(self, g):
        """Code for all group content.

        :arg dict g: Group id and full name.
        """
        grpid = g['id']
        path = g['path']

        if grpid == self._root:
            locid = 'fid'
        else:
            tmplt = Template(
                "\n\n"
                "%\n"
                "% Group: $path\n"
                "%\n"
                "gid = H5G.create(fid, '$path', '$plist', '$plist', '$plist');"
                "\n"
            )
            vars = {'path': path, 'plist': 'H5P_DEFAULT'}
            self._m.append(tmplt.substitute(vars))
            locid = 'gid'

        self._create_attrs(self._d['groups'][grpid].get('attributes', []),
                           locid)
        self._create_dsets(self._get_hard_links(grpid, 'datasets'), locid,
                           path)

        if grpid != self._root:
            self._m.append("H5G.close(gid);\n")

    def _dimensions(self):
        """Generate code connecting dimension scales and their datasets."""
        # List for storing dimension scale's IDs...
        dscales = list()

        self._m.append(
            "\n\n"
            "%\n"
            "% Datasets and their dimension scales\n"
            "%\n"
        )
        for dset_id in self._dimlist:
            tmp = Template(
                "\n% Dataset with dimension scales: $name\n"
                "dset_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n"
            ).substitute({'name': self._dset_path[dset_id]})
            self._m.append(tmp)

            dset = self._d['datasets'][dset_id]

            # Get DIMENSION_LIST attribute value...
            dims = next(a['value'] for a in dset['attributes']
                        if a['name'] == 'DIMENSION_LIST')

            # Iterate over dataset's dimension scales in reversed order
            # (because of using fliplr() when defining dataset's shape)...
            for index, dimscales in enumerate(reversed(dims)):
                for n, ds in enumerate(dimscales):
                    ds_id = pp.basename(ds)
                    dscales.append(ds_id)
                    tmp = Template(
                        "\n% Dimension scale: $name\n"
                        "dscl_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n"
                        "H5DS.attach_scale(dset_id, dscl_id, $idx);\n"
                        "H5D.close(dscl_id);\n"
                    ).substitute({'name': self._dset_path[ds_id],
                                  'idx': index})
                    self._m.append(tmp)

            self._m.append(
                "\nH5D.close(dset_id);\n"
            )

    def get_code(self):
        """Generate MATLAB source code."""
        self._create_file()

        # Order groups by hierarchy...
        groups = self._order_groups()

        for g in groups:
            self._create_group(g)

        if self._dimlist:
            # Handle dimension scales...
            self._dimensions()

        self._close_file()

        return self._m.dump()
Exemplo n.º 6
0
class MCode(object):
    """Generate MATLAB source code (as an .m file) that produces a template
    HDF5 file."""
    def __init__(self, tinfo, fname, ext='h5'):
        """Initialize an MCode instance.

        :arg dict tinfo: Template content information.
        :arg str fname: Name of the file to use when generating the source
            code.
        :arg str ext: HDF5 file extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in tinfo:
            raise KeyError('"root" key not found.')

        # Helper variables...
        self._d = tinfo
        self._root = self._d['root']
        self._fname = fname + '.' + ext
        self._dset_path = dict()  # map dataset ID to HDF5 paths
        self._dimlist = []  # List of dataset IDs that have dimscales attached

        # Variable for the generated source code...
        self._m = StringStore()

    def _get_hard_links(self, gid, collection):
        links = list()
        for l in self._d['groups'][gid].get('links', []):
            if l['class'] == 'H5L_TYPE_HARD' and l['collection'] == collection:
                links.append(l)
        return links

    def _order_groups(self):
        pid = [{'id': self._root, 'path': '/'}]

        def tree_walker(ginfo):
            glinks = self._get_hard_links(ginfo['id'], 'groups')
            chld_grps = list()
            for gl in glinks:
                chld_grps.append({
                    'id': gl['id'],
                    'path': pp.join(ginfo['path'], gl['title'])
                })
            pid.extend(chld_grps)
            for cg in chld_grps:
                tree_walker(cg)

        tree_walker(pid[0])

        return pid

    def matlab_dtype(self, h5type):
        """Find appropriate MATLAB numerical data type for HDF5 predefined
        datatype.

        :arg str h5type: HDF5 predefined datatype.
        :return: MATLAB data type.
        :rtype: str
        """
        conv_map = {
            'H5T_STD_I8': 'int8',
            'H5T_STD_U8': 'uint8',
            'H5T_STD_I16': 'int16',
            'H5T_STD_U16': 'uint16',
            'H5T_STD_I32': 'int32',
            'H5T_STD_U32': 'uint32',
            'H5T_STD_I64': 'int64',
            'H5T_STD_U64': 'uint64',
            'H5T_IEEE_F32': 'single',
            'H5T_IEEE_F64': 'double'
        }
        try:
            return conv_map[h5type[:-2]]
        except KeyError:
            raise ValueError('%s: Invalid predefined datatype' % h5type)

    def _create_file(self):
        """Code to create an HDF5 file."""
        tmplt = Template(
            "% Create the HDF5 file. It will overwrite any file with same "
            "name.\n"
            "fname = '$filename';\n"
            "fcpl = H5P.create('H5P_FILE_CREATE');\n"
            "fapl = H5P.create('H5P_FILE_ACCESS');\n"
            "fid = H5F.create(fname, 'H5F_ACC_TRUNC', fcpl, fapl);\n")
        vars = {'filename': self._fname}
        self._m.append(tmplt.substitute(vars))

    def _close_file(self):
        """Code to close an HDF5 file."""
        tmplt = ("\n\n"
                 "% Close the HDF5 file.\n"
                 "H5F.close(fid);\n"
                 "% Template file is ready!\n")
        self._m.append(tmplt)

    def _dims2str(self, dims):
        """Stringify dimension list with support for the unlimited size.

        :arg list dims: Dimension size list.
        """
        dim_str = []
        for d in dims:
            if d == 'H5S_UNLIMITED':
                # Unlimited dimension...
                dim_str.append("H5ML.get_constant_value('H5S_UNLIMITED')")
            else:
                dim_str.append('{:d}'.format(d))
        return '[{}]'.format(', '.join(dim_str))

    def _dspace(self, shape):
        """Generate dataspace code.

        :arg dict shape: HDF5/JSON shape information.
        """
        if shape['class'] == 'H5S_SCALAR':
            return "sid = H5S.create('H5S_SCALAR');\n"
        elif shape['class'] == 'H5S_SIMPLE':
            nelems_limit = 2**48 - 1
            nelems = 1
            for d in shape['dims']:
                if d != 'H5S_UNLIMITED':
                    nelems *= d
            if nelems > nelems_limit:
                raise ValueError(
                    'Number of elements too large (max. 2^48 - 1): {:d}'.
                    format(nelems))

            rank = len(shape['dims'])
            if rank == 1:
                tmplt = Template(
                    "sid = H5S.create_simple($rank, $dims, $maxdims);\n")
            else:
                tmplt = Template(
                    "sid = H5S.create_simple($rank, fliplr($dims), "
                    "fliplr($maxdims));\n")
            vars = {
                'dims': self._dims2str(shape['dims']),
                'maxdims': self._dims2str(shape.get('maxdims', [])),
                'rank': rank
            }
            return tmplt.substitute(vars)
        elif shape['class'] == 'H5S_NULL':
            return "sid = H5S.create('H5S_NULL');\n"
        else:
            raise NotImplementedError('%s: Not supported' % shape['class'])

    def _dtype(self, t, var='tid'):
        """Generate datatype code.

        :arg dict t: HDF5/JSON datatype information.
        :arg str var: Default name of the datatype variable.
        """
        tcls = t['class']
        if tcls == 'H5T_COMPOUND':
            tmplt = ''

            # Go over each compound field...
            field_cnt = 0
            for f in t['fields']:
                field_cnt += 1
                dt = "field_tid(%d)" % field_cnt
                tmplt += self._dtype(f['type'], var=dt)
                tmplt += "field_size(%d) = H5T.get_size(%s);\n" % (field_cnt,
                                                                   dt)

            # Compute field byte offsets...
            num_fields = len(t['fields'])
            tmplt += ("field_offset = [0 cumsum(field_size(1:%d))];\n") % (
                num_fields - 1)

            # Create the compound datatype...
            tmplt += "tid = H5T.create('H5T_COMPOUND', sum(field_size));\n"
            for n in range(num_fields):
                tmplt += ("H5T.insert(tid, '%s', field_offset(%d), "
                          "field_tid(%d));\n") % (t['fields'][n]['name'],
                                                  n + 1, n + 1)

            # Close field datatypes...
            for n in range(num_fields):
                tmplt += "H5T.close(field_tid(%d));\n" % (n + 1)

            return tmplt

        elif tcls == 'H5T_VLEN':
            if t['base']['class'] == 'H5T_STRING':
                base_type = 'H5T_C_S1'
                if t['base']['length'] not in (1, 'H5T_VARIABLE'):
                    raise NotImplementedError(
                        'MATLAB only allows vlen strings of variable or '
                        'fixed length of 1.')
            elif t['base']['class'] == 'H5T_REFERENCE':
                raise NotImplementedError(
                    'MATLAB does not support H5T_REFERENCE for vlen datatype')
            else:
                base_type = t['base']['base']
            return "tid = H5T.vlen_create('%s');\n" % base_type

        elif tcls == 'H5T_ARRAY':
            tmplt = Template(
                "${base}"
                "$var = H5T.array_create(base_tid, fliplr($dims));\n"
                "H5T.close(base_tid);\n")
            return tmplt.substitute({
                'base':
                self._dtype(t['base'], var='base_tid'),
                'dims':
                t['dims'],
                'var':
                var
            })

        else:
            return var + " = " + self._atomic_dtype(t, var=var)

    def _atomic_dtype(self, t, var='tid'):
        """Handle HDF5 atomic datatypes.

        :arg dict t: HDF5/JSON datatype information.
        "arg str tid: Default name of the datatype variable.
        """
        tcls = t['class']
        if tcls == 'H5T_STRING':
            tmplt = Template(
                "H5T.copy('H5T_C_S1');\n"
                "H5T.set_size($var, $length);\n"
                "H5T.set_strpad($var,'$strpad');\n"
                "H5T.set_cset($var, H5ML.get_constant_value('$cset'));\n")
            if isinstance(t['length'], six.string_types):
                length = "'%s'" % t['length']
            else:
                length = t['length']
            return tmplt.substitute({
                'length': length,
                'strpad': t['strPad'],
                'cset': t['charSet'],
                'var': var
            })

        elif tcls in ('H5T_FLOAT', 'H5T_INTEGER'):
            return "H5T.copy('%s');\n" % t['base']

        elif tcls == 'H5T_REFERENCE':
            return "H5T.copy('%s');\n" % t['base']

        else:
            raise NotImplementedError('%s: Datatype not supported' %
                                      t['class'])

    def _create_attr(self, attr, locid, dimscale=False):
        """Generate code for one attribute of the ``locid`` object.

        :arg dict attr: Attribute information.
        :arg str locid: Attribute's parent variable name.
        :arg bool dimscale: Indicates attribute's parent is a dimension scale.
        """
        if dimscale and attr['name'] in ('REFERENCE_LIST', 'DIMENSION_LIST',
                                         'NAME', 'CLASS'):
            return

        dataspace = self._dspace(attr['shape'])
        datatype = self._dtype(attr['type'])

        tmplt = Template(
            "\n% Attribute: $name\n"
            "${datatype}"
            "${dataspace}"
            "acpl = H5P.create('H5P_ATTRIBUTE_CREATE');\n"
            "aid = H5A.create($lid, '$name', tid, sid, acpl, 'H5P_DEFAULT');\n"
            "${value}"
            "H5T.close(tid);\n"
            "H5S.close(sid);\n"
            "H5A.close(aid);\n")
        if attr['type']['class'] == 'H5T_STRING':

            def prep_vals(vals):
                values = list()
                with_sprintf = False
                for v in vals:
                    temp = v.encode('ascii')
                    if "'" in temp:
                        temp = temp.replace("'", "''")
                        with_sprintf = True
                    if '%' in temp:
                        temp = temp.replace('%', '%%')
                        with_sprintf = True
                    if '\n' in temp:
                        temp = temp.replace('\n', '\\n')
                        with_sprintf = True
                    if with_sprintf:
                        values.append("sprintf('{}')".format(temp))
                    else:
                        values.append("'{}'".format(temp))
                return values

            if attr['shape']['class'] == 'H5S_SCALAR':
                value = prep_vals([attr['value']])[0]
            else:
                if len(attr['shape']['dims']) > 1:
                    raise NotImplementedError(
                        'Rank > 1 for string data not supported')
                value = prep_vals(attr['value'])

            if attr['type']['length'] == 'H5T_VARIABLE':
                val_str = ('{' + ', '.join(value) + '}')
            else:
                # Left-justified, fixed-length string format...
                fmt = "'%-{0:d}.{0:d}s'".format(attr['type']['length'])

                if attr['shape']['class'] == 'H5S_SCALAR':
                    val_str = "sprintf({}, {})".format(fmt, value)
                else:
                    for i in range(len(value)):
                        value[i] = "sprintf({}, {})".format(fmt, value[i])
                    val_str = ('[' + '; '.join(value) + "]'")
        else:
            val_str = attr['value']
        val = Template("H5A.write(aid, 'H5ML_DEFAULT', $value);\n").substitute(
            {'value': val_str})

        vars = {
            'lid': locid,
            'name': attr['name'],
            'datatype': datatype,
            'dataspace': dataspace,
            'value': val
        }
        self._m.append(tmplt.substitute(vars))

    def _is_dimscale(self, attrs):
        """Check if the dataset is a dimension scale.

        :arg list attrs: All dataset's attributes.
        """
        # Check if REFERENCE_LIST attribute is present...
        ref_list = any(a['name'] == 'REFERENCE_LIST'
                       and a['type']['class'] == 'H5T_COMPOUND' for a in attrs)

        # Check if CLASS attribute is present...
        cls_ = any(a['name'] == 'CLASS' and a['value'] == 'DIMENSION_SCALE'
                   for a in attrs)

        if ref_list and cls_:
            return True
        else:
            return False

    def _dimscale(self, attrs, locid):
        """Generate the code that sets a dimension scale.

        :arg list attrs: All dataset's attributes.
        :arg str locid: MATLAB variable name of the attributes' parent object.
        """
        # Check if NAME attribute is present...
        for a in attrs:
            if a['name'] == 'NAME' and a['type']['class'] == 'H5T_STRING':
                scale_name = "'%s'" % a['value']
                break
        else:
            scale_name = '[]'

        return "H5DS.set_scale(%s, %s);\n" % (locid, scale_name)

    def _is_dimlist(self, attrs):
        """Check if the dataset has dimension scales attached.

        :arg list attrs: All dataset's attributes.
        """
        # Check if DIMENSION_LIST attribute is present...
        dim_list = any(
            a['name'] == 'DIMENSION_LIST' and a['type']['class'] == 'H5T_VLEN'
            for a in attrs)

        return True if dim_list else False

    def _is_dimscale_related(self, attrs):
        """Check if the attributes of a dataset indicate it is related to
        dimension scales.

        :arg list attrs: All dataset's attributes.
        """
        dimscale = self._is_dimscale(attrs)
        dimlist = self._is_dimlist(attrs)

        if dimscale or dimlist:
            return True
        else:
            return False

    def _create_attrs(self, attrs, locid, dimscale=False):
        """Generate code for all the attributes of the ``locid`` object.

        :arg dict attrs: HDF5/JSON information about the attributes of
            the parent ``locid`` object.
        :arg str locid: MATLAB variable name of the attributes' parent object.
        :arg bool dimscale: Indicates whether the attributes belong to a
            dimension scale.
        """
        for a in attrs:
            self._create_attr(a, locid, dimscale=dimscale)

    def _create_dset(self, id, name, ds, locid):
        """Generate code for one dataset of the ``locid`` group.

        : arg str id: Dataset's identifier.
        :arg str name: Dataset's name.
        :arg dict ds: Dataset information.
        :arg str locid: Varable name of the dataset's parent group.
        """
        try:
            dataspace = self._dspace(ds['shape'])
        except ValueError as e:
            raise ValueError('{}: {}'.format(
                ds.get('alias', [name])[0], str(e)))
        datatype = self._dtype(ds['type'])

        tmplt = Template(
            "\n"
            "% Dataset: $name\n"
            "${datatype}"
            "${dataspace}"
            "${dcpl}"
            "dsid = H5D.create($locid, '$name', tid, sid, '$plist', dcpl, "
            "'$plist');\n"
            "H5S.close(sid);\n"
            "H5T.close(tid);\n"
            "${dimscale}")

        # Dataset creation property list...
        dcpl = "dcpl = H5P.create('H5P_DATASET_CREATE');\n"

        # Layout...
        layout = ds.get('creationProperties', {}).get('layout', {})
        if layout.get('class', 'H5D_CONTIGUOUS') == 'H5D_CONTIGUOUS':
            pass
        elif layout['class'] == 'H5D_COMPACT':
            dcpl = ("dcpl = H5P.create('H5P_DATASET_CREATE');\n"
                    "H5P.set_layout(dcpl, 'H5D_COMPACT');\n")
        elif layout['class'] == 'H5D_CHUNKED':
            if len(layout['dims']) == 1:
                chunks = layout['dims']
            else:
                chunks = 'fliplr(%s)' % layout['dims']
            dcpl = Template(
                "dcpl = H5P.create('H5P_DATASET_CREATE');\n"
                "H5P.set_layout(dcpl, H5ML.get_constant_value('H5D_CHUNKED'));"
                "\n"
                "H5P.set_chunk(dcpl, $chunks);\n").substitute(
                    {'chunks': chunks})
        else:
            raise ValueError('%s: Invalid layout class' % layout['class'])

        # Filters...
        filters = ds.get('creationProperties', {}).get('filters', [])
        for f in filters:
            if f['class'] == 'H5Z_FILTER_DEFLATE':
                dcpl += "H5P.set_deflate(dcpl, %s);\n" % f['level']
            elif f['class'] == 'H5Z_FILTER_FLETCHER32':
                dcpl += "H5P.set_fletcher32(dcpl);\n"
            elif f['class'] == 'H5Z_FILTER_SHUFFLE':
                dcpl += "H5P.set_shuffle(dcpl);\n"
            elif f['class'] == 'H5Z_FILTER_SCALEOFFSET':
                dcpl += ("H5P.set_scaleoffset(dcpl, '%s', %d);\n" %
                         (f['scaleType'], f['scaleOffset']))
            elif f['class'] == 'H5Z_FILTER_NBIT':
                dcpl += "H5P.set_nbit(dcpl);\n"
            else:
                raise NotImplementedError('%s: Filter not supported yet' %
                                          f['class'])

        # Fill value...
        fv = ds.get('creationProperties', {}).get('fillValue', None)
        if fv:
            if type(fv) is list:
                raise NotImplementedError(
                    'Non-scalar fill value not supported yet')
            else:
                # Use dataset's datatype for fill value...
                if ds['type']['class'] == 'H5T_STRING':
                    fv = "'%s'" % fv
                else:
                    fv = str(fv)
                    # Remove an "L" suffix if present...
                    if fv[-1] == 'L':
                        fv = fv[:-1]
                    fv = '{}({})'.format(self.matlab_dtype(ds['type']['base']),
                                         fv)
                dcpl += "H5P.set_fill_value(dcpl, tid, %s);\n" % fv

        attrs = ds.get('attributes', [])
        is_dimscale = self._is_dimscale(attrs)
        if is_dimscale:
            dimscale = self._dimscale(attrs, 'dsid')
        else:
            dimscale = str()

        vars = {
            'locid': locid,
            'name': name,
            'datatype': datatype,
            'dataspace': dataspace,
            'plist': 'H5P_DEFAULT',
            'dcpl': dcpl,
            'dimscale': dimscale
        }
        self._m.append(tmplt.substitute(vars))

        is_dimlist = self._is_dimlist(attrs)
        if is_dimlist:
            self._dimlist.append(id)
        self._create_attrs(attrs, 'dsid', dimscale=(is_dimscale or is_dimlist))

        self._m.append("H5D.close(dsid);\n")

    def _create_dsets(self, dsets, locid, path):
        """Generate code for all the datasets of the ``locid`` object.

        :arg dict dsets: HDF5/JSON information about the datasets of
            the parent ``locid`` object.
        :arg str locid: MATLAB variable name of the datasets' parent object.
        :arg str path: HDF5 path of the datasets' parent object.
        """
        for d in dsets:
            # Record the full HDF5 path to the dataset...
            self._dset_path[d['id']] = pp.join(path, d['title'])
            # Generate dataset code...
            self._create_dset(d['id'], d['title'],
                              self._d['datasets'][d['id']], locid)

    def _create_group(self, g):
        """Code for all group content.

        :arg dict g: Group id and full name.
        """
        grpid = g['id']
        path = g['path']

        if grpid == self._root:
            locid = 'fid'
        else:
            tmplt = Template(
                "\n\n"
                "%\n"
                "% Group: $path\n"
                "%\n"
                "gid = H5G.create(fid, '$path', '$plist', '$plist', '$plist');"
                "\n")
            vars = {'path': path, 'plist': 'H5P_DEFAULT'}
            self._m.append(tmplt.substitute(vars))
            locid = 'gid'

        self._create_attrs(self._d['groups'][grpid].get('attributes', []),
                           locid)
        self._create_dsets(self._get_hard_links(grpid, 'datasets'), locid,
                           path)

        if grpid != self._root:
            self._m.append("H5G.close(gid);\n")

    def _dimensions(self):
        """Generate code connecting dimension scales and their datasets."""
        # List for storing dimension scale's IDs...
        dscales = list()

        self._m.append("\n\n"
                       "%\n"
                       "% Datasets and their dimension scales\n"
                       "%\n")
        for dset_id in self._dimlist:
            tmp = Template("\n% Dataset with dimension scales: $name\n"
                           "dset_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n"
                           ).substitute({'name': self._dset_path[dset_id]})
            self._m.append(tmp)

            dset = self._d['datasets'][dset_id]

            # Get DIMENSION_LIST attribute value...
            dims = next(a['value'] for a in dset['attributes']
                        if a['name'] == 'DIMENSION_LIST')

            # Iterate over dataset's dimension scales in reversed order
            # (because of using fliplr() when defining dataset's shape)...
            for index, dimscales in enumerate(reversed(dims)):
                for n, ds in enumerate(dimscales):
                    ds_id = pp.basename(ds)
                    dscales.append(ds_id)
                    tmp = Template(
                        "\n% Dimension scale: $name\n"
                        "dscl_id = H5D.open(fid, '$name', 'H5P_DEFAULT');\n"
                        "H5DS.attach_scale(dset_id, dscl_id, $idx);\n"
                        "H5D.close(dscl_id);\n").substitute({
                            'name':
                            self._dset_path[ds_id],
                            'idx':
                            index
                        })
                    self._m.append(tmp)

            self._m.append("\nH5D.close(dset_id);\n")

    def get_code(self):
        """Generate MATLAB source code."""
        self._create_file()

        # Order groups by hierarchy...
        groups = self._order_groups()

        for g in groups:
            self._create_group(g)

        if self._dimlist:
            # Handle dimension scales...
            self._dimensions()

        self._close_file()

        return self._m.dump()
Exemplo n.º 7
0
class PyCode(object):
    """
    Produce Python code that generates HDF5 file based on given JSON input.
    """

    def __init__(self, h5json, fname, ext='h5'):
        """Initialize a PyCode instance.

        :arg dict h5json: HDF5/JSON content.
        :arg str fname: Name of the HDF5 file the generated code will create.
        :arg str ext: Generated HDF5 file's extension. **Without the comma!**
        """
        if len(fname) == 0:
            raise ValueError('Missing file name.')
        if 'root' not in h5json:
            raise KeyError('"root" key not found.')
        self._h5j = h5json
        self._dimensions = list()
        self._dimscales = dict()
        self._fname = '{}.{}'.format(fname, ext)
        self._file_var = 'f'
        self._p = StringStore()

    def get_code(self):
        """Generate Python code for supplied HDF5/JSON."""
        root_uuid = self._h5j["root"]

        self._p.append(
            "import h5py\n"
            "import numpy as np\n\n"
            "# creating file: {1}\n"
            "{0} = h5py.File('{1}', 'w')\n\n"
            .format(self._file_var, self._fname)
        )

        group_json = self._h5j["groups"]
        root_json = group_json[root_uuid]
        self.doAttributes(root_json, '/', self._file_var)
        self.doLinks(self._h5j, root_json, 0)
        self.doDimensions(self._h5j, self._dimensions, self._dimscales,
                          self._file_var)

        return self._p.dump()

    def getNumpyTypename(self, hdf5TypeName, typeClass=None):
        predefined_int_types = {
              'H5T_STD_I8':  'i1',
              'H5T_STD_U8':  'u1',
              'H5T_STD_I16': 'i2',
              'H5T_STD_U16': 'u2',
              'H5T_STD_I32': 'i4',
              'H5T_STD_U32': 'u4',
              'H5T_STD_I64': 'i8',
              'H5T_STD_U64': 'u8'
        }
        predefined_float_types = {
              'H5T_IEEE_F32': 'f4',
              'H5T_IEEE_F64': 'f8'
        }
        if len(hdf5TypeName) < 3:
            raise TypeError("%s: invalid type" % hdf5TypeName)
        endian = '<'  # default endian
        key = hdf5TypeName
        if hdf5TypeName.endswith('LE'):
            key = hdf5TypeName[:-2]
        elif hdf5TypeName.endswith('BE'):
            key = hdf5TypeName[:-2]
            endian = '>'

        if key in predefined_int_types and (typeClass is None or
                                            typeClass == 'H5T_INTEGER'):
            return endian + predefined_int_types[key]
        if key in predefined_float_types and (typeClass is None or
                                              typeClass == 'H5T_FLOAT'):
            return endian + predefined_float_types[key]
        raise TypeError("%s: invalid type" % hdf5TypeName)

    def getBaseDataType(self, typeItem):
        code = "dt = "
        if type(typeItem) == str or type(typeItem) == unicode:
            # should be one of the predefined types
            dtName = self.getNumpyTypename(typeItem)
            code += "np.dtype('{}')\n".format(dtName)
            return code

        if type(typeItem) != dict:
            raise TypeError("{}: invalid type".format(typeItem))

        code += 'np.dtype({})\n'.format(self._dtype(typeItem))

        return code

    def _dtype(self, typeItem, compound=False):
        """Helper function for generating numpy.dtype() code.

        :arg dict typeItem: HDF5/JSON datatype description.
        :arg bool compound: Flag indicating the datatype is part of a compound
            datatype.
        """
        typeClass = typeItem['class']
        shape = ''
        if 'dims' in typeItem:
            shp_key = 'dims'
        else:
            shp_key = 'shape'
        if shp_key in typeItem:
            dims = None
            if type(typeItem[shp_key]) == int:
                dims = (typeItem[shp_key],)  # make into a tuple
            elif type(typeItem[shp_key]) not in (list, tuple):
                raise TypeError("expected list or integer for %s" % shp_key)
            else:
                dims = typeItem[shp_key]
            shape = str(tuple(dims))

        code = ''
        if typeClass == 'H5T_INTEGER':
            if 'base' not in typeItem:
                raise KeyError("'base' not provided")
            baseType = self.getNumpyTypename(typeItem['base'],
                                             typeClass='H5T_INTEGER')
            code += "'{}{}'".format(shape, baseType)
        elif typeClass == 'H5T_FLOAT':
            if 'base' not in typeItem:
                raise KeyError("'base' not provided")
            baseType = self.getNumpyTypename(typeItem['base'],
                                             typeClass='H5T_FLOAT')
            code += "'{}{}'".format(shape, baseType)
        elif typeClass == 'H5T_STRING':
            if 'length' not in typeItem:
                raise KeyError("'length' not provided")
            if 'charSet' not in typeItem:
                raise KeyError("'charSet' not provided")

            if typeItem['length'] == 'H5T_VARIABLE':
                if shape:
                    raise TypeError(
                        "ArrayType is not supported for variable len types")
                if typeItem['charSet'] == 'H5T_CSET_ASCII':
                    code += "h5py.special_dtype(vlen=str)"
                elif typeItem['charSet'] == 'H5T_CSET_UTF8':
                    code += "h5py.special_dtype(vlen=unicode)"
                else:
                    raise TypeError("unexpected 'charSet' value")
            else:
                # fixed size ascii string
                nStrSize = typeItem['length']
                if type(nStrSize) != int:
                    raise TypeError("expecting integer value for 'length'")
                code += "'{}S{}'".format(shape, nStrSize)
        elif typeClass == 'H5T_VLEN':
            if shape:
                raise TypeError(
                    "ArrayType is not supported for variable len types")
            if 'base' not in typeItem:
                raise KeyError("'base' not provided")
            vlenBaseType = typeItem['base']
            baseType = self.getNumpyTypename(vlenBaseType['base'],
                                             typeClass=vlenBaseType['class'])
            code += "h5py.special_dtype(vlen=np.dtype('" + baseType + "'))"
        elif typeClass == 'H5T_OPAQUE':
            if shape:
                raise TypeError(
                    "Opaque Type is not supported for variable len types")
            if 'size' not in typeItem:
                raise KeyError("'size' not provided")
            nSize = int(typeItem['size'])
            if nSize <= 0:
                raise TypeError("'size' must be non-negative")
            code += "'V{}'".format(nSize)
        elif typeClass == 'H5T_ARRAY':
            if not shape:
                raise KeyError("'shape' must be provided for array types")
            if 'base' not in typeItem:
                raise KeyError("'base' not provided")
            baseType = self._dtype(typeItem['base'])
            if type(baseType) not in (str, unicode):
                raise TypeError(
                    "Array type is only supported for predefined base types")
            # should be one of the predefined types
            code += "{1}, {0}".format(shape, baseType)
            if not compound:
                code = "({})".format(code)
        elif typeClass == 'H5T_COMPOUND':
            if 'fields' not in typeItem:
                raise KeyError("'fields' must be provided for compound types")
            if type(typeItem['fields']) is not list:
                raise TypeError("compound 'fields' value must be a list")
            dt_arg = list()
            for fld in typeItem['fields']:
                dt_arg.append(
                    "('{}', {})".format(
                        fld['name'], self._dtype(fld['type'], compound=True)))
            code = '[' + ', '.join(dt_arg) + ']'
        else:
            raise TypeError("%s: Invalid type class" % typeClass)

        return code

    def valueToString(self, attr_json):
        value = attr_json["value"]
        return json.dumps(value)

    def doAttribute(self, attr_json, parent_var):
        if attr_json['type']['class'] == 'H5T_STRING':
            self._p.append(parent_var + ".attrs['" + attr_json['name']
                           + "'] = " + self.valueToString(attr_json) + '\n')
        else:
            dt = 'np.dtype({})'.format(self._dtype(attr_json["type"]))
            shape_json = attr_json["shape"]
            if shape_json['class'] == 'H5S_SIMPLE':
                shape = self._dims2str(shape_json['dims'])
            elif shape_json['class'] == 'H5S_SCALAR':
                shape = ', ()'
            else:
                raise NotImplementedError('{}: Dataspace not supported yet'
                                          .format(shape_json['class']))
            self._p.append("{}.attrs.create('{}', {}{}, dtype={})\n"
                           .format(parent_var, attr_json['name'],
                                   self.valueToString(attr_json), shape, dt))

    def getObjectName(self, obj_json, obj_title):
        name = obj_title
        if "alias" in obj_json:
            alias = obj_json["alias"]
            try:
                name = alias[0]
            except (TypeError, IndexError):
                name = alias
        return name

    def group_var_name(self, level, next=False):
        """Determine the name of the group variable based on its HDF5 tree
        depth.

        :arg int level: HDF5 tree depth level (root = 0).
        :arg bool next: Provide group variable for the next level, i.e.
            subgroup.
        """
        grp_fmt = 'grp_{:d}'
        if next:
            return grp_fmt.format(level+1)
        if level == 0:
            pvar = self._file_var
        else:
            pvar = grp_fmt.format(level)
        return pvar

    def doAttributes(self, obj_json, obj_name, parent_var, is_dimscale=False,
                     is_dimension=False):
        if len(obj_json.get('attributes', [])) == 0:
            return
        attrs_json = obj_json["attributes"]

        first_time = True
        for attr_json in attrs_json:
            if is_dimscale and attr_json['name'] in ('CLASS', 'REFERENCE_LIST',
                                                     'NAME'):
                    continue
            if is_dimension and attr_json['name'] == 'DIMENSION_LIST':
                continue
            if first_time:
                self._p.append("# Creating attributes for {}\n"
                               .format(obj_name))
                first_time = False
            self.doAttribute(attr_json, parent_var)

    def doGroup(self, h5json, group_id, group_name, level):
        parent_var = self.group_var_name(level)
        groups = h5json["groups"]
        group_json = groups[group_id]
        group_path = self.getObjectName(group_json, group_name)
        self._p.append("\n\n# Group: {}\n".format(group_path))
        group_var = self.group_var_name(level, next=True)
        self._p.append("{0} = {1}.create_group('{2}')\n"
                       .format(group_var, parent_var, group_name))
        self.doAttributes(group_json, group_path, group_var)
        self.doLinks(h5json, group_json, level+1)

    def _dims2str(self, dims, kwd=''):
        """Convert a list of integers to a string representing a dimension tuple.

        :arg list dims: A dimension list.
        :arg str kwd: Optional keyword string.
        """
        dims = [str(d) if d != 'H5S_UNLIMITED' else 'None' for d in dims]
        if len(dims) == 1:
            # Produce correct tuple when dim rank is 1...
            dims.append('')
        if kwd:
            kwd += '='
        return ', {}({})'.format(kwd, ','.join(dims))

    def doDataset(self, h5json, dset_id, dset_name, parent_var):
        datasets = h5json["datasets"]
        dset_json = datasets[dset_id]
        dset_path = dset_json.get('alias', [dset_name])[0]
        self._p.append("\n# Dataset: {}\n".format(dset_path))
        dset_var = 'dset'
        try:
            dtLine = self.getBaseDataType(dset_json["type"])  # "dt = ..."
            self._p.append(dtLine)

            shape = ''
            maxshape = ''
            chunks = ''
            flt = ''
            cp = dset_json.get('creationProperties', {})
            if "shape" in dset_json:
                shape_json = dset_json["shape"]
                if shape_json["class"] == "H5S_SIMPLE":
                    shape = self._dims2str(shape_json["dims"])

                    if 'maxdims' in shape_json:
                        maxshape = self._dims2str(shape_json['maxdims'],
                                                  kwd='maxshape')

                    layout = cp.get('layout', {}).get('class',
                                                      'H5D_CONTIGUOUS')
                    if layout == 'H5D_CHUNKED':
                        chunks = self._dims2str(cp['layout']['dims'],
                                                kwd='chunks')

                        filters = cp.get('filters', [])
                        for f in filters:
                            if f['class'] == 'H5Z_FILTER_DEFLATE':
                                flt += (
                                    ", compression='gzip', "
                                    "compression_opts={:d}").format(f['level'])
                            elif f['class'] == 'H5Z_FILTER_FLETCHER32':
                                flt += ', fletcher32=True'
                            elif f['class'] == 'H5Z_FILTER_SHUFFLE':
                                flt += ', shuffle=True'
                            elif f['class'] == 'H5Z_FILTER_SCALEOFFSET':
                                flt += (', scaleoffset={:d}'
                                        .format(f['scaleOffset']))
                            else:
                                raise NotImplementedError(
                                    '{}: Filter not supported yet'
                                    .format(f['class']))

                elif shape_json['class'] == 'H5S_SCALAR':
                    shape = ', ()'

            if 'fillValue' in cp:
                if dset_json['type']['class'] == 'H5T_STRING':
                    fv = ", fillvalue='{}'".format(cp['fillValue'])
                else:
                    fv = ', fillvalue={}'.format(cp['fillValue'])
            else:
                fv = ''

            code_line = ("{} = {}.create_dataset('{}'{}{}{}{}{}"
                         ", dtype=dt)\n").format(dset_var, parent_var,
                                                 dset_name, shape, maxshape,
                                                 chunks, fv, flt)
            self._p.append(code_line)
            self._p.append("# initialize dataset values here\n")

            dscale = self._is_dimscale(dset_json.get('attributes', []))
            if dscale:
                # Find out dimension scale's name, if available...
                for a in dset_json.get('attributes', []):
                    if a['name'] == 'NAME':
                        ds_name = a['value']
                        break
                else:
                    ds_name = None
                self._dimscales.update({dset_id: ds_name})
            dim = self._is_dimlist(dset_json.get('attributes', []))
            if dim:
                self._dimensions.append(dset_id)

            self.doAttributes(dset_json, dset_path, dset_var,
                              is_dimscale=dscale, is_dimension=dim)
        except Exception as e:
            raise type(e)('{}: {}'.format(dset_path, str(e)))

    def doLink(self, h5json, link_json, level):
        parent_var = self.group_var_name(level)

        if link_json["class"] == "H5L_TYPE_EXTERNAL":
            self._p.append("{0}['{1}'] = h5py.ExternalLink('{2}', '{3}')\n"
                           .format(parent_var, link_json["title"],
                                   link_json["file"], link_json["h5path"])
                           )

        elif link_json["class"] == "H5L_TYPE_SOFT":
            self._p.append("{0}['{1}'] = h5py.SoftLink('{2}')\n"
                           .format(parent_var, link_json["title"],
                                   link_json["h5path"])
                           )

        elif link_json["class"] == "H5L_TYPE_HARD":
            if link_json["collection"] == "groups":
                self.doGroup(h5json, link_json["id"], link_json["title"],
                             level)
            elif link_json["collection"] == "datasets":
                self.doDataset(h5json, link_json["id"], link_json["title"],
                               parent_var)
            elif link_json["collection"] == "datatypes":
                raise NotImplementedError(
                    'committed datatypes not supported yet')
            else:
                raise Exception(
                    "unexpected collection name: " + link_json["collection"])

        elif link_json["class"] == "H5L_TYPE_UDLINK":
            self._p.append("# ignoring user defined link: '{0}'\n"
                           .format(link_json["title"]))

        else:
            raise Exception("unexpected link type: " + link_json["class"])

    def doLinks(self, h5json, group_json, level):
        links = group_json.get("links", [])
        for link in links:
            self.doLink(h5json, link, level)

    def _is_dimscale(self, attrs):
        """Check if the dataset is a dimension scale.

        :arg list attrs: All dataset's attributes.
        """
        # Check if REFERENCE_LIST attribute is present...
        ref_list = any(a['name'] == 'REFERENCE_LIST'
                       and a['type']['class'] == 'H5T_COMPOUND'
                       for a in attrs)

        # Check if CLASS attribute is present...
        cls_ = any(a['name'] == 'CLASS'and a['value'] == 'DIMENSION_SCALE'
                   for a in attrs)

        if ref_list and cls_:
            return True
        else:
            return False

    def _is_dimlist(self, attrs):
        """Check if the dataset has dimension scales attached.

        :arg list attrs: All dataset's attributes.
        """
        # Check if DIMENSION_LIST attribute is present...
        dim_list = any(a['name'] == 'DIMENSION_LIST'
                       and a['type']['class'] == 'H5T_VLEN'
                       for a in attrs)

        return True if dim_list else False

    def _dset_paths(self, h5json):
        grps = [{'id': h5json['root'], 'path': '/'}]
        dsets = {}

        def get_hard_links(grpjson, collection):
            links = list()
            for l in grpjson.get('links', []):
                if (l['class'] == 'H5L_TYPE_HARD'
                        and l['collection'] == collection):
                    links.append(l)
            return links

        def tree_walker(ginfo):
            dlinks = get_hard_links(h5json['groups'][ginfo['id']], 'datasets')
            for dl in dlinks:
                dsets.update({dl['id']: pp.join(ginfo['path'], dl['title'])})

            glinks = get_hard_links(h5json['groups'][ginfo['id']], 'groups')
            chld_grps = list()
            for gl in glinks:
                chld_grps.append({'id': gl['id'],
                                  'path': pp.join(ginfo['path'], gl['title'])})
            grps.extend(chld_grps)
            for cg in chld_grps:
                tree_walker(cg)

        tree_walker(grps[0])

        return dsets

    def doDimensions(self, h5json, dimensions, dimscales, parent_var):
        if len(dimensions) == 0:
            return

        # Generate HDF5 paths for all datasets...
        dset_path = self._dset_paths(h5json)

        self._p.append('\n\n'
                       '#\n'
                       '# Adding dimensions\n'
                       '#\n'
                       '\n')

        self._p.append('# Creating dimension scales\n')
        for dsid, name in dimscales.iteritems():
            if dimscales[dsid]:
                name = ", '{}'". format(dimscales[dsid])
            else:
                name = ''
            self._p.append("h5py.h5ds.set_scale({}['{}'].id{})\n"
                           .format(parent_var, dset_path[dsid], name))

        for dsid in dimensions:
            dsid_path = dset_path[dsid]
            self._p.append("\n# Attaching dimension scales to dataset: {}\n"
                           .format(dsid_path))
            for attr in h5json['datasets'][dsid].get('attributes', []):
                if attr['name'] == 'DIMENSION_LIST':
                    dim_list = attr['value']
                    break
            else:
                raise ValueError('%s: DIMENSION_LIST attribute not found'
                                 % dsid_path)

            for idx, ds in enumerate(dim_list):
                for d in ds:
                    did = pp.split(d)[-1]
                    did_path = dset_path[did]
                    self._p.append(
                        "{}['{}'].dims[{:d}].attach_scale({}['{}'])\n"
                        .format(parent_var, dsid_path, idx, parent_var,
                                did_path)
                    )