Python ChunkIterator示例，chunkiter.ChunkIterator Python示例

示例#1

0

显示文件

def write_dataset(src, tgt, ctx):
    """ write values from src dataset to target dataset.
    """
    msg = "write_dataset src: {} to tgt: {}, shape: {}, type: {}".format(
        src.name, tgt.name, src.shape, src.dtype)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)

    if src.shape is None:
        # null space dataset
        msg = "no data for null space dataset: {}".format(src.name)
        logging.info(msg)
        if ctx["verbose"]:
            print(msg)
        return  # no data

    if len(src.shape) == 0:
        # scalar dataset
        x = src[()]
        msg = "writing: {} for scalar dataset: {}".format(x, src.name)
        logging.info(msg)
        if ctx["verbose"]:
            print(msg)
        tgt[()] = x
        return

    msg = "iterating over chunks for {}".format(src.name)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)
    try:
        it = ChunkIterator(tgt)

        logging.debug("src dtype: {}".format(src.dtype))
        logging.debug("des dtype: {}".format(tgt.dtype))

        for s in it:
            msg = "writing dataset data for slice: {}".format(s)
            logging.info(msg)
            if ctx["verbose"]:
                print(msg)

            arr = src[s]
            tgt[s] = arr
    except (IOError, TypeError) as e:
        msg = "ERROR : failed to copy dataset data : {}".format(str(e))
        logging.error(msg)
        print(msg)
    msg = "done with dataload for {}".format(src.name)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)

示例#2

0

显示文件

def write_dataset(src, tgt, ctx):
    """ write values from src dataset to target dataset.
    """
    msg = "write_dataset src: {} to tgt: {}, shape: {}, type: {}".format(
        src.name, tgt.name, src.shape, src.dtype)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)

    if src.shape is None:
        # null space dataset
        msg = "no data for null space dataset: {}".format(src.name)
        logging.info(msg)
        if ctx["verbose"]:
            print(msg)
        return  # no data

    if len(src.shape) == 0:
        # scalar dataset
        x = src[()]
        msg = "writing: {} for scalar dataset: {}".format(x, src.name)
        logging.info(msg)
        if ctx["verbose"]:
            print(msg)
        tgt[()] = x
        return

    fillvalue = None
    try:
        # can trigger a runtime error if fillvalue is undefined
        fillvalue = src.fillvalue
    except RuntimeError:
        pass  # ignore

    msg = "iterating over chunks for {}".format(src.name)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)
    try:
        it = ChunkIterator(tgt)

        logging.debug("src dtype: {}".format(src.dtype))
        logging.debug("des dtype: {}".format(tgt.dtype))

        for s in it:
            arr = src[s]
            # don't write arr if it's all zeros (or the fillvalue if defined)
            empty_arr = np.zeros(arr.shape, dtype=arr.dtype)
            if fillvalue:
                empty_arr.fill(fillvalue)
            if np.array_equal(arr, empty_arr):
                msg = "skipping chunk for slice: {}".format(str(s))
            else:
                msg = "writing dataset data for slice: {}".format(s)
                tgt[s] = arr
            logging.info(msg)
            if ctx["verbose"]:
                print(msg)
    except (IOError, TypeError) as e:
        msg = "ERROR : failed to copy dataset data : {}".format(str(e))
        logging.error(msg)
        print(msg)
    msg = "done with dataload for {}".format(src.name)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)

示例#3

0

显示文件

def diff_dataset(src, ctx):
    """ compare dataset in src and tgt
    """
    msg = "checking dataset <{}>".format(src.name)
    logging.info(msg)
    if ctx["verbose"]:
        print(msg)

    fout = ctx["fout"]

    if src.name not in fout:
        msg = "<{}> not found in target".format(src.name)
        logging.info(msg)
        if not ctx["quiet"]:
            print(msg)
        ctx["differences"] += 1
        return False
    tgt = fout[src.name]

    try:
        tgt_shape = tgt.shape
    except AttributeError:
        msg = "<{}> in target not a dataset".format(src.name)
        logging.info(msg)
        if not ctx["quiet"]:
            print(msg)
        ctx["differences"] += 1
        return False

    # printed when there is a difference
    output = "dataset: <{}> and <{}>".format(src.name, tgt.name)
    if tgt_shape != src.shape:
        msg = "Shape of <{}> is different".format(src.name)
        logging.info(msg)
        if not ctx["quiet"]:
            print(output)
        ctx["differences"] += 1

        return False

    if tgt.dtype != src.dtype:
        msg = "Type of <{}> is different".format(src.name)
        logging.info(msg)
        if not ctx["quiet"]:
            print(output)
        ctx["differences"] += 1
        return False

    # TBD - check fillvalue

    if ctx["nodata"]:
        # skip data compare
        return True

    try:
        it = ChunkIterator(src)

        for s in it:
            msg = "checking dataset data for slice: {}".format(s)
            logging.debug(msg)

            arr_src = src[s]
            msg = "got src array {}".format(arr_src.shape)
            logging.debug(msg)
            arr_tgt = tgt[s]
            msg = "got tgt array {}".format(arr_tgt.shape)
            logging.debug(msg)

            if hash(arr_src.tostring()) != hash(arr_tgt.tostring()):
                msg = "values for dataset {} differ for slice: {}".format(
                    src.name, s)
                logging.info(msg)
                if not ctx["quiet"]:
                    print(output)
                ctx["differences"] += 1
                return False

    except (IOError, TypeError) as e:
        msg = "ERROR : failed to copy dataset data : {}".format(str(e))
        logging.error(msg)
        print(msg)

    if not ctx["noattr"]:
        result = diff_attrs(src, tgt, ctx)
    else:
        result = True
    return result

示例#4

0

显示文件

文件： utillib.py 项目： rsignell-usgs/h5pyd

def create_dataset(fd, dobj, verbose=False, nodata=False):
    """ create a dataset using the properties of the passed in h5py dataset.
        If successful, proceed to copy attributes and data.
    """
    msg = "creating dataset {}, shape: {}, type: {}".format(
        dobj.name, dobj.shape, dobj.dtype)
    logging.info(msg)
    if verbose:
        print(msg)

    fillvalue = None
    try:
        # can trigger a runtime error if fillvalue is undefined
        fillvalue = dobj.fillvalue
    except RuntimeError:
        pass  # ignore
    chunks = None
    if dobj.chunks:
        chunks = tuple(dobj.chunks)
    try:
        dset = fd.create_dataset( dobj.name, shape=dobj.shape, dtype=dobj.dtype, chunks=chunks, \
                compression=dobj.compression, shuffle=dobj.shuffle, \
                fletcher32=dobj.fletcher32, maxshape=dobj.maxshape, \
                compression_opts=dobj.compression_opts, fillvalue=fillvalue, \
                scaleoffset=dobj.scaleoffset)
        msg = "dataset created, uuid: {}, chunk_size: {}".format(
            dset.id.id, str(dset.chunks))
        logging.info(msg)
        if verbose:
            print(msg)
    except (IOError, TypeError, KeyError) as e:
        msg = "ERROR: failed to create dataset: {}".format(str(e))
        logging.error(msg)
        print(msg)
        return
    # create attributes
    for da in dobj.attrs:
        copy_attribute(dset, da, dobj)

    if nodata:
        msg = "skipping data load"
        logging.info(msg)
        if verbose:
            print(msg)
        return

    if dset.shape is None:
        # null space dataset
        msg = "no data for null space dataset: {}".format(dobj.name)
        logging.info(msg)
        if verbose:
            print(msg)
        return  # no data

    if len(dset.shape) == 0:
        # scalar dataset
        x = dobj[()]
        msg = "writing: {} for scalar dataset: {}".format(x, dobj.name)
        logging.info(msg)
        if verbose:
            print(msg)
        dset[()] = x
        return

    msg = "iterating over chunks for {}".format(dobj.name)
    logging.info(msg)
    if verbose:
        print(msg)
    try:
        it = ChunkIterator(dset)

        logging.debug("src dtype: {}".format(dobj.dtype))
        logging.debug("des dtype: {}".format(dset.dtype))

        for s in it:
            msg = "writing dataset data for slice: {}".format(s)
            logging.info(msg)
            if verbose:
                print(msg)
            arr = dobj[s]
            dset[s] = arr
    except (IOError, TypeError) as e:
        msg = "ERROR : failed to copy dataset data : {}".format(str(e))
        logging.error(msg)
        print(msg)
    msg = "done with dataload for {}".format(dobj.name)
    logging.info(msg)
    if verbose:
        print(msg)