Exemplo n.º 1
0
def write_simple_attrs(h5_obj, attrs, verbose=False):
    """
    Writes attributes to a h5py object

    Parameters
    ----------
    h5_obj : :class:`h5py.File`, :class:`h5py.Group`, or h5py.Dataset object
        h5py object to which the attributes will be written to
    attrs : dict
        Dictionary containing the attributes as key-value pairs
    verbose : bool, optional. Default=False
        Whether or not to print debugging statements

    """
    if not isinstance(attrs, dict):
        raise TypeError('attrs should be a dictionary but is instead of type '
                        '{}'.format(type(attrs)))
    if not isinstance(h5_obj, (h5py.File, h5py.Group, h5py.Dataset)):
        raise TypeError('h5_obj should be a h5py File, Group or Dataset object'
                        ' but is instead of type '
                        '{}t'.format(type(h5_obj)))

    for key, val in attrs.items():
        if not isinstance(key, (str, unicode)):
            warn('Skipping attribute with key: {}. Expected str, got {}'
                 ''.format(key, type(key)))
            continue

        # Get rid of spaces in the key
        key = key.strip()

        if val is None:
            continue
        if isinstance(val, Enum):
            if verbose:
                print('taking the name: {} of Enum: {}'.format(val.name, val))
            val = val.name
        if isinstance(val, dict):
            raise ValueError('provided dictionary was nested, not flat. '
                             'Flatten dictionary using sidpy.base.dict_utils.'
                             'flatten_dict before calling sidpy.hdf.hdf_utils.'
                             'write_simple_attrs')
        if verbose:
            print('Writing attribute: {} with value: {}'.format(key, val))
        clean_val = clean_string_att(val)
        if verbose:
            print('Attribute cleaned into: {}'.format(clean_val))
        h5_obj.attrs[key] = clean_val
    if verbose:
        print('Wrote all (simple) attributes to {}: {}\n'
              ''.format(type(h5_obj),
                        h5_obj.name.split('/')[-1]))
Exemplo n.º 2
0
def clean_string_att(att_val):
    """
    Replaces any unicode objects within lists with their string counterparts to ensure compatibility with python 3.
    If the attribute is indeed a list of unicodes, the changes will be made in-place

    Parameters
    ----------
    att_val : object
        Attribute object

    Returns
    -------
    att_val : object
        Attribute object
    """
    warn(
        'pyUSID.io.write_utils.clean_string_att has been moved to '
        'sidpy.base.string_utils.clean_string_att. This copy in pyUSID will'
        'be removed in future release. Please update your import statements',
        FutureWarning)
    return sut.clean_string_att(att_val)
Exemplo n.º 3
0
def copy_main_attributes(h5_main, h5_new):
    """
    Copies the units and quantity name from one dataset to another

    Parameters
    ----------
    h5_main : h5py.Dataset
        Dataset containing the target attributes
    h5_new : h5py.Dataset
        Dataset to which the target attributes are to be copied

    """
    for param, param_name in zip([h5_main, h5_new], ['h5_main', 'h5_new']):
        if not isinstance(param, h5py.Dataset):
            raise TypeError(param_name + ' should be a h5py.Dataset object')

    for att_name in ['quantity', 'units']:
        if att_name not in h5_main.attrs:
            raise KeyError('Attribute: {} does not exist in {}'.format(att_name, h5_main))
        val = get_attr(h5_main, att_name)
        h5_new.attrs[att_name] = clean_string_att(val)
Exemplo n.º 4
0
def write_region_references(h5_dset,
                            reg_ref_dict,
                            add_labels_attr=True,
                            verbose=False):
    """
    Creates attributes of a h5py.Dataset that refer to regions in the dataset

    Parameters
    ----------
    h5_dset : h5.Dataset instance
        Dataset to which region references will be added as attributes
    reg_ref_dict : dict
        The slicing information must be formatted using tuples of slice objects
        . For example {'region_1':(slice(None, None), slice (0,1))}
    add_labels_attr : bool, optional, default = True
        Whether or not to write an attribute named 'labels' with the
    verbose : Boolean (Optional. Default = False)
        Whether or not to print status messages
    """
    if not isinstance(reg_ref_dict, dict):
        raise TypeError('slices should be a dictionary but is instead of type '
                        '{}'.format(type(reg_ref_dict)))
    if not isinstance(h5_dset, h5py.Dataset):
        raise TypeError('h5_dset should be a h5py.Dataset object but is '
                        'instead of type {}'.format(type(h5_dset)))

    if verbose:
        print('Starting to write Region References to Dataset', h5_dset.name,
              'of shape:', h5_dset.shape)
    for reg_ref_name, reg_ref_tuple in reg_ref_dict.items():
        if verbose:
            print('About to write region reference:', reg_ref_name, ':',
                  reg_ref_tuple)

        reg_ref_tuple = clean_reg_ref(h5_dset, reg_ref_tuple, verbose=verbose)

        h5_dset.attrs[reg_ref_name] = h5_dset.regionref[reg_ref_tuple]

        if verbose:
            print('Wrote Region Reference:%s' % reg_ref_name)
    '''
    Next, write these label names as an attribute called labels
    Now make an attribute called 'labels' that is a list of strings 
    First ascertain the dimension of the slicing:
    '''
    if add_labels_attr:
        found_dim = False
        dimen_index = None

        for key, val in reg_ref_dict.items():
            if not isinstance(val, (list, tuple)):
                reg_ref_dict[key] = [val]

        for dimen_index, slice_obj in enumerate(
                list(reg_ref_dict.values())[0]):
            # We make the assumption that checking the start is sufficient
            if slice_obj.start is not None:
                found_dim = True
                break
        if found_dim:
            headers = [None] * len(reg_ref_dict)
            # The list that will hold all the names
            for col_name in reg_ref_dict.keys():
                headers[reg_ref_dict[col_name][dimen_index].start] = col_name
            if verbose:
                print('Writing header attributes: {}'.format('labels'))
            # Now write the list of col / row names as an attribute:
            h5_dset.attrs['labels'] = clean_string_att(headers)
        else:
            warn('Unable to write region references for {}'
                 ''.format(h5_dset.name.split('/')[-1]))

        if verbose:
            print('Wrote Region References of Dataset {}'
                  ''.format(h5_dset.name.split('/')[-1]))
Exemplo n.º 5
0
def copy_attributes(source, dest, skip_refs=True, verbose=False):
    """
    Copy attributes from one h5object to another

    Parameters
    ----------
    source : h5py.Dataset, :class:`h5py.Group`, or :class:`h5py.File`
        Object containing the desired attributes
    dest : h5py.Dataset, :class:`h5py.Group`, or :class:`h5py.File`
        Object to which the attributes need to be copied to
    skip_refs : bool, optional. default = True
        Whether or not the references (dataset and region) should be skipped
    verbose : bool, optional. Default = False
        Whether or not to print logs for debugging
    """
    mesg = 'should be a h5py.Dataset, h5py.Group,or h5py.File object'
    if not isinstance(source, (h5py.Dataset, h5py.Group, h5py.File)):
        raise TypeError('source ' + mesg)
    if not isinstance(dest, (h5py.Dataset, h5py.Group, h5py.File)):
        raise TypeError('dest ' + mesg)

    skip_dset_refs = skip_refs
    try:
        validate_h5_objs_in_same_h5_file(source, dest)
    except ValueError:
        if not skip_refs:
            warn('Dataset references will not be copied since {} and {} are '
                 'in different files'.format(source, dest))
        skip_dset_refs = True

    for att_name in source.attrs.keys():
        # print(att_name)
        if att_name not in ['DIMENSION_LIST']:
            att_val = get_attr(source, att_name)
            """
            Don't copy references unless asked
            """
            if isinstance(att_val, h5py.Reference) and not isinstance(
                    att_val, h5py.RegionReference):
                if not skip_dset_refs:
                    if verbose:
                        print('dset ref copying ' + att_name)
                    dest.attrs[att_name] = att_val
            elif isinstance(att_val, h5py.RegionReference):
                # handled in dedicated if condition below
                continue
            else:
                # everything else
                if verbose:
                    print('simple copying ' + att_name)
                dest.attrs[att_name] = clean_string_att(att_val)

    if not skip_refs:
        # This can be copied across files without problems
        mesg = 'Could not copy region references to {}.'.format(dest.name)
        if isinstance(dest, h5py.Dataset):
            try:
                if verbose:
                    print('requested reg ref copy')
                # copy_region_refs(source, dest)
                pass  # TODO: activate again

            except TypeError:
                warn(mesg)
        else:
            warn('Cannot copy region references to {}'.format(type(dest)))

    return dest