Пример #1
0
def skim_dict(data_dir, settings):

    omx_file_path = config.data_file_path(settings["skims_file"])
    tags_to_load = settings['skim_time_periods']['labels']

    logger.info("loading skim_dict from %s" % (omx_file_path, ))

    # select the skims to load
    skim_info = get_skim_info(omx_file_path, tags_to_load)

    logger.debug("omx_shape %s skim_dtype %s" %
                 (skim_info['omx_shape'], skim_info['dtype']))

    skim_buffers = inject.get_injectable('data_buffers', None)
    if skim_buffers:
        logger.info('Using existing skim_buffers for skims')
    else:
        skim_buffers = buffers_for_skims(skim_info, shared=False)
        load_skims(omx_file_path, skim_info, skim_buffers)

    skim_data = skim_data_from_buffers(skim_buffers, skim_info)

    block_names = list(skim_info['blocks'].keys())
    for i in range(len(skim_data)):
        block_name = block_names[i]
        block_data = skim_data[i]
        logger.info(
            "block_name %s bytes %s (%s)" %
            (block_name, block_data.nbytes, util.GB(block_data.nbytes)))

    # create skim dict
    skim_dict = skim.SkimDict(skim_data, skim_info)
    skim_dict.offset_mapper.set_offset_int(-1)

    return skim_dict
Пример #2
0
def buffers_for_skims(skim_info, shared=False):

    skim_dtype = skim_info['dtype']
    omx_shape = skim_info['omx_shape']
    blocks = skim_info['blocks']

    skim_buffers = {}
    for block_name, block_size in blocks.items():

        # buffer_size must be int, not np.int64
        buffer_size = int(multiply_large_numbers(omx_shape) * block_size)

        itemsize = np.dtype(skim_dtype).itemsize
        csz = buffer_size * itemsize
        logger.info(
            "allocating shared buffer %s for %s skims (skim size: %s * %s bytes = %s) total size: %s (%s)"
            % (block_name, block_size, omx_shape, itemsize, buffer_size, csz,
               util.GB(csz)))

        if shared:
            if np.issubdtype(skim_dtype, np.float64):
                typecode = 'd'
            elif np.issubdtype(skim_dtype, np.float32):
                typecode = 'f'
            else:
                raise RuntimeError("buffers_for_skims unrecognized dtype %s" %
                                   skim_dtype)

            buffer = multiprocessing.RawArray(typecode, buffer_size)
        else:
            buffer = np.zeros(buffer_size, dtype=skim_dtype)

        skim_buffers[block_name] = buffer

    return skim_buffers
Пример #3
0
def buffers_for_skims(skim_info, shared=False):

    skim_dtype = skim_info['dtype']
    omx_shape = [np.float64(x) for x in skim_info['omx_shape']]
    blocks = skim_info['blocks']

    skim_buffers = {}
    for block_name, block_size in iteritems(blocks):

        # buffer_size must be int (or p2.7 long), not np.int64
        buffer_size = int(np.prod(omx_shape) * block_size)

        csz = buffer_size * np.dtype(skim_dtype).itemsize
        logger.info("allocating shared buffer %s for %s (%s) matrices (%s)" %
                    (block_name, buffer_size, omx_shape, util.GB(csz)))

        if shared:
            if np.issubdtype(skim_dtype, np.float64):
                typecode = 'd'
            elif np.issubdtype(skim_dtype, np.float32):
                typecode = 'f'
            else:
                raise RuntimeError("buffers_for_skims unrecognized dtype %s" %
                                   skim_dtype)

            buffer = multiprocessing.RawArray(typecode, buffer_size)
        else:
            buffer = np.zeros(buffer_size, dtype=skim_dtype)

        skim_buffers[block_name] = buffer

    return skim_buffers
Пример #4
0
def buffers_for_shadow_pricing(shadow_pricing_info):
    """
    Allocate shared_data buffers for multiprocess shadow pricing

    Allocates one buffer per model_selector.
    Buffer datatype and shape specified by shadow_pricing_info

    buffers are multiprocessing.Array (RawArray protected by a multiprocessing.Lock wrapper)
    We don't actually use the wrapped version as it slows access down and doesn't provide
    protection for numpy-wrapped arrays, but it does provide a convenient way to bundle
    RawArray and an associated lock. (ShadowPriceCalculator uses the lock to coordinate access to
    the numpy-wrapped RawArray.)

    Parameters
    ----------
    shadow_pricing_info : dict

    Returns
    -------
        data_buffers : dict {<model_selector> : <shared_data_buffer>}
        dict of multiprocessing.Array keyed by model_selector
    """

    dtype = shadow_pricing_info['dtype']
    block_shapes = shadow_pricing_info['block_shapes']

    data_buffers = {}
    for block_key, block_shape in block_shapes.items():

        # buffer_size must be int, not np.int64
        buffer_size = util.iprod(block_shape)

        csz = buffer_size * np.dtype(dtype).itemsize
        logger.info(
            "allocating shared shadow pricing buffer %s %s buffer_size %s bytes %s (%s)"
            % (block_key, buffer_size, block_shape, csz, util.GB(csz)))

        if np.issubdtype(dtype, np.int64):
            typecode = ctypes.c_int64
        else:
            raise RuntimeError(
                "buffer_for_shadow_pricing unrecognized dtype %s" % dtype)

        shared_data_buffer = multiprocessing.Array(typecode, buffer_size)

        logger.info("buffer_for_shadow_pricing added block %s" % block_key)

        data_buffers[block_key] = shared_data_buffer

    return data_buffers