Пример #1
0
 def clone(self):
     for unit, attrs in self.reals.items():
         for attr in attrs:
             value = getattr(unit, attr)
             if self.is_immutable(value):
                 setattr(self, attr, value)
                 continue
             if not isinstance(value, Array):
                 cloned = getattr(self, attr, None)
                 if cloned is None:
                     setattr(self, attr, deepcopy(value))
                     continue
                 if isinstance(value, list):
                     del cloned[:]
                     cloned.extend(value)
                 elif isinstance(value, (dict, set)):
                     cloned.clear()
                     cloned.update(value)
                 elif isinstance(value, Bool):
                     cloned <<= value
                 elif isinstance(value, numpy.ndarray):
                     cloned[:] = value
                 else:
                     setattr(self, attr, deepcopy(value))
                 continue
             vec = getattr(self, attr, None)
             if vec is None:
                 vec = Array()
                 self.vectors[value] = vec
                 setattr(self, attr, vec)
             else:
                 assert isinstance(vec, Array)
             if not vec and value:
                 vec.reset(value.mem.copy())
Пример #2
0
 def clone(self):
     for unit, attrs in self.reals.items():
         for attr in attrs:
             value = getattr(unit, attr)
             if self.is_immutable(value):
                 setattr(self, attr, value)
                 continue
             if not isinstance(value, Array):
                 cloned = getattr(self, attr, None)
                 if cloned is None:
                     setattr(self, attr, deepcopy(value))
                     continue
                 if isinstance(value, list):
                     del cloned[:]
                     cloned.extend(value)
                 elif isinstance(value, (dict, set)):
                     cloned.clear()
                     cloned.update(value)
                 elif isinstance(value, Bool):
                     cloned <<= value
                 elif isinstance(value, numpy.ndarray):
                     cloned[:] = value
                 else:
                     setattr(self, attr, deepcopy(value))
                 continue
             vec = getattr(self, attr, None)
             if vec is None:
                 vec = Array()
                 self.vectors[value] = vec
                 setattr(self, attr, vec)
             else:
                 assert isinstance(vec, Array)
             if not vec and value:
                 vec.reset(value.mem.copy())
Пример #3
0
class Summator(AcceleratedUnit):
    """Multiplies two vectors pointwise.
    """
    def __init__(self, workflow, **kwargs):
        super(Summator, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.demand("x", "y")

    def initialize(self, device, **kwargs):
        super(Summator, self).initialize(device, **kwargs)
        if not self.output:
            self.output.reset(numpy.zeros_like(self.x.mem))
        else:
            assert self.output.shape == self.x.shape
        self.init_vectors(self.x, self.y, self.output)

    def init_unpickled(self):
        super(Summator, self).init_unpickled()
        self.sources_["summator"] = {}

    def _gpu_init(self):
        self.build_program({"OUTPUT_SIZE": self.output.size},
                           "%s_%d" %
                           (self.__class__.__name__, self.output.size),
                           dtype=self.x.dtype)
        self.assign_kernel("add_forward")
        self.set_args(self.x, self.y, self.output)

    def cuda_init(self):
        self._gpu_init()
        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (int(numpy.ceil(self.output.size / block_size)), 1,
                             1)
        self._local_size = (block_size, 1, 1)

    def ocl_init(self):
        self._gpu_init()
        self._global_size = (self.output.size, 1, 1)
        self._local_size = None

    def numpy_init(self):
        pass  # nothing to init

    def _gpu_run(self):
        self.unmap_vectors(self.x, self.y, self.output)
        self.execute_kernel(self._global_size, self._local_size)

    def cuda_run(self):
        self._gpu_run()

    def ocl_run(self):
        self._gpu_run()

    def numpy_run(self):
        self.x.map_read()
        self.y.map_read()
        self.output.map_invalidate()
        numpy.add(self.x.mem, self.y.mem, self.output.mem)
Пример #4
0
class Summator(AcceleratedUnit):
    """Multiplies two vectors pointwise.
    """
    def __init__(self, workflow, **kwargs):
        super(Summator, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.demand("x", "y")

    def initialize(self, device, **kwargs):
        super(Summator, self).initialize(device, **kwargs)
        if not self.output:
            self.output.reset(numpy.zeros_like(self.x.mem))
        else:
            assert self.output.shape == self.x.shape
        self.init_vectors(self.x, self.y, self.output)

    def init_unpickled(self):
        super(Summator, self).init_unpickled()
        self.sources_["summator"] = {}

    def _gpu_init(self):
        self.build_program({"OUTPUT_SIZE": self.output.size},
                           "%s_%d" %
                           (self.__class__.__name__, self.output.size),
                           dtype=self.x.dtype)
        self.assign_kernel("add_forward")
        self.set_args(self.x, self.y, self.output)

    def cuda_init(self):
        self._gpu_init()
        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (
            int(numpy.ceil(self.output.size / block_size)), 1, 1)
        self._local_size = (block_size, 1, 1)

    def ocl_init(self):
        self._gpu_init()
        self._global_size = (self.output.size, 1, 1)
        self._local_size = None

    def numpy_init(self):
        pass  # nothing to init

    def _gpu_run(self):
        self.unmap_vectors(self.x, self.y, self.output)
        self.execute_kernel(self._global_size, self._local_size)

    def cuda_run(self):
        self._gpu_run()

    def ocl_run(self):
        self._gpu_run()

    def numpy_run(self):
        self.x.map_read()
        self.y.map_read()
        self.output.map_invalidate()
        numpy.add(self.x.mem, self.y.mem, self.output.mem)
Пример #5
0
class GDSummator(AcceleratedUnit):
    """Gradient descent for Multiplier.
    """
    def __init__(self, workflow, **kwargs):
        super(GDSummator, self).__init__(workflow, **kwargs)
        self.err_x = Array()
        self.err_y = Array()
        self.demand("err_output")

    def initialize(self, device, **kwargs):
        super(GDSummator, self).initialize(device, **kwargs)
        if not self.err_x:
            self.err_x.reset(numpy.zeros_like(self.err_output.mem))
        else:
            assert self.err_x.shape == self.err_output.shape
        if not self.err_y:
            self.err_y.reset(numpy.zeros_like(self.err_output.mem))
        else:
            assert self.err_y.shape == self.err_output.shape
        self.init_vectors(self.err_x, self.err_y, self.err_output)

    def cuda_init(self):
        pass  # nothing to init

    def ocl_init(self):
        pass  # nothing to init

    def numpy_init(self):
        pass  # nothing to init

    def cuda_run(self):
        self.unmap_vectors(self.err_output, self.err_x, self.err_y)
        self.err_x.devmem.from_device_async(self.err_output.devmem)
        self.err_y.devmem.from_device_async(self.err_output.devmem)

    def ocl_run(self):
        self.unmap_vectors(self.err_output, self.err_x, self.err_y)
        self.device.queue_.copy_buffer(self.err_output.devmem,
                                       self.err_x.devmem,
                                       0,
                                       0,
                                       self.err_output.nbytes,
                                       need_event=False)
        self.device.queue_.copy_buffer(self.err_output.devmem,
                                       self.err_y.devmem,
                                       0,
                                       0,
                                       self.err_output.nbytes,
                                       need_event=False)

    def numpy_run(self):
        self.err_output.map_read()
        self.err_x.map_invalidate()
        self.err_y.map_invalidate()
        self.err_x.mem[:] = self.err_output.mem[:]
        self.err_y.mem[:] = self.err_output.mem[:]
Пример #6
0
class GDSummator(AcceleratedUnit):
    """Gradient descent for Summator.
    """
    def __init__(self, workflow, **kwargs):
        super(GDSummator, self).__init__(workflow, **kwargs)
        self.err_x = Array()
        self.err_y = Array()
        self.demand("err_output")

    def initialize(self, device, **kwargs):
        super(GDSummator, self).initialize(device, **kwargs)

        if self.err_x:
            assert self.err_x.shape[1:] == self.err_output.shape[1:]
        if not self.err_x or self.err_x.shape[0] != self.err_output.shape[0]:
            self.err_x.reset(numpy.zeros_like(self.err_output.mem))
        if self.err_y:
            assert self.err_y.shape[1:] == self.err_output.shape[1:]
        if not self.err_y or self.err_y.shape[0] != self.err_output.shape[0]:
            self.err_y.reset(numpy.zeros_like(self.err_output.mem))
        self.init_vectors(self.err_x, self.err_y, self.err_output)

    def cuda_init(self):
        pass  # nothing to init

    def ocl_init(self):
        pass  # nothing to init

    def numpy_init(self):
        pass  # nothing to init

    def cuda_run(self):
        self.unmap_vectors(self.err_output, self.err_x, self.err_y)
        self.err_x.devmem.from_device_async(self.err_output.devmem)
        self.err_y.devmem.from_device_async(self.err_output.devmem)

    def ocl_run(self):
        self.unmap_vectors(self.err_output, self.err_x, self.err_y)
        self.device.queue_.copy_buffer(
            self.err_output.devmem, self.err_x.devmem, 0, 0,
            self.err_output.nbytes, need_event=False)
        self.device.queue_.copy_buffer(
            self.err_output.devmem, self.err_y.devmem, 0, 0,
            self.err_output.nbytes, need_event=False)

    def numpy_run(self):
        self.err_output.map_read()
        self.err_x.map_invalidate()
        self.err_y.map_invalidate()
        self.err_x.mem[:] = self.err_output.mem[:]
        self.err_y.mem[:] = self.err_output.mem[:]
Пример #7
0
class MemCpy(AcceleratedUnit):
    def __init__(self, workflow, **kwargs):
        super(MemCpy, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.demand("input")

    def initialize(self, device, **kwargs):
        super(MemCpy, self).initialize(device, **kwargs)
        if (self.output.mem is None or
                self.output.mem.size != self.input.mem.size):
            self.output.reset()
            self.output.mem = numpy.zeros(self.input.mem.shape,
                                          dtype=self.input.mem.dtype)
        self.input.initialize(self.device)
        self.output.initialize(self.device)

    def cuda_init(self):
        pass

    def ocl_init(self):
        pass

    def _gpu_run(self):
        self.input.unmap()
        self.output.unmap()

    def ocl_run(self):
        self._gpu_run()
        self.device.queue_.copy_buffer(self.input.devmem, self.output.devmem,
                                       0, 0, self.input.nbytes)

    def cuda_run(self):
        self._gpu_run()
        self.output.devmem.from_device_async(self.input.devmem)

    def numpy_run(self):
        self.input.map_read()
        self.output.map_invalidate()
        numpy.copyto(self.output.mem, self.input.mem)
Пример #8
0
class MemCpy(AcceleratedUnit):
    def __init__(self, workflow, **kwargs):
        super(MemCpy, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.demand("input")

    def initialize(self, device, **kwargs):
        super(MemCpy, self).initialize(device, **kwargs)
        if (self.output.mem is None
                or self.output.mem.size != self.input.mem.size):
            self.output.reset()
            self.output.mem = numpy.zeros(self.input.mem.shape,
                                          dtype=self.input.mem.dtype)
        self.input.initialize(self.device)
        self.output.initialize(self.device)

    def cuda_init(self):
        pass

    def ocl_init(self):
        pass

    def _gpu_run(self):
        self.input.unmap()
        self.output.unmap()

    def ocl_run(self):
        self._gpu_run()
        self.device.queue_.copy_buffer(self.input.devmem, self.output.devmem,
                                       0, 0, self.input.nbytes)

    def cuda_run(self):
        self._gpu_run()
        self.output.devmem.from_device_async(self.input.devmem)

    def numpy_run(self):
        self.input.map_read()
        self.output.map_invalidate()
        numpy.copyto(self.output.mem, self.input.mem)
Пример #9
0
class Cutter1D(AcceleratedUnit):
    """Cuts the specified interval from each 1D sample of input batch
    into output.

    y = alpha * x + beta * y
    """
    def __init__(self, workflow, **kwargs):
        super(Cutter1D, self).__init__(workflow, **kwargs)
        self.alpha = kwargs.get("alpha")
        self.beta = kwargs.get("beta")
        self.output_offset = kwargs.get("output_offset", 0)
        self.output = Array()
        self.demand("alpha", "beta", "input")
        # TODO: add input_offset and length to demand and not to crash lstm
        # TODO: unit test

    def init_unpickled(self):
        super(Cutter1D, self).init_unpickled()
        self.sources_["cutter"] = {}

    def initialize(self, device, **kwargs):
        super(Cutter1D, self).initialize(device, **kwargs)

        if not self.output or self.output.shape[0] != self.input.shape[0]:
            self.output.reset(
                numpy.zeros(
                    (self.input.shape[0], self.output_offset + self.length),
                    dtype=self.input.dtype))
        else:
            assert self.output.sample_size >= self.output_offset + self.length

        self.init_vectors(self.input, self.output)

    def cuda_init(self):
        dtype = self.input.dtype
        itemsize = self.input.itemsize
        limit = self.input.shape[0] * self.length

        self.build_program({}, "%s" % self.__class__.__name__, dtype=dtype)
        self.assign_kernel("cutter_1d_forward")

        self.set_args(
            int(self.input.devmem) + self.input_offset * itemsize,
            numpy.array([self.alpha], dtype=dtype),
            numpy.array([self.input.sample_size], dtype=numpy.int32),
            int(self.output.devmem) + self.output_offset * itemsize,
            numpy.array([self.beta], dtype=dtype),
            numpy.array([self.output.sample_size], dtype=numpy.int32),
            numpy.array([self.length], dtype=numpy.int32),
            numpy.array([limit], dtype=numpy.int32))

        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (int(numpy.ceil(limit / block_size)), 1, 1)
        self._local_size = (block_size, 1, 1)

    def ocl_init(self):
        dtype = self.input.dtype

        self.build_program({}, "%s" % self.__class__.__name__, dtype=dtype)
        self.assign_kernel("cutter_1d_forward")

        self.set_args(
            self.input.devmem,
            numpy.array([self.input_offset], dtype=numpy.int32),
            numpy.array([self.alpha], dtype=dtype),
            numpy.array([self.input.sample_size], dtype=numpy.int32),
            self.output.devmem,
            numpy.array([self.output_offset], dtype=numpy.int32),
            numpy.array([self.beta], dtype=dtype),
            numpy.array([self.output.sample_size], dtype=numpy.int32))

        self._global_size = (self.input.shape[0], self.length)
        self._local_size = None

    def _gpu_run(self):
        self.unmap_vectors(self.input, self.output)
        self.execute_kernel(self._global_size, self._local_size)

    def cuda_run(self):
        return self._gpu_run()

    def ocl_run(self):
        return self._gpu_run()

    def numpy_run(self):
        self.input.map_read()
        self.output.map_write()
        out = self.output.matrix[
            :, self.output_offset:self.output_offset + self.length]
        if self.beta:
            out *= self.beta
        else:
            out[:] = 0
        out += (
            self.input.matrix[
                :, self.input_offset:self.input_offset + self.length] *
            self.alpha)
Пример #10
0
class ImageLoader(LoaderWithValidationRatio):
    """Base class for all image loaders. It is generally used for loading large
    datasets.

    Attributes:
        color_space: the color space to which to convert images. Can be any of
                     the values supported by OpenCV, e.g., GRAY or HSV.
        source_dtype: dtype to work with during various image operations.
        shape: image shape (tuple) - set after initialize().

     Must be overriden in child classes:
        get_image_label()
        get_image_info()
        get_image_data()
        get_keys()
    """
    def __init__(self, workflow, **kwargs):
        super(ImageLoader, self).__init__(workflow, **kwargs)
        self.color_space = kwargs.get("color_space", "RGB")
        self._source_dtype = numpy.float32
        self._original_shape = tuple()
        self.class_keys = [[], [], []]
        self.verify_interface(IImageLoader)
        self.path_to_mean = kwargs.get("path_to_mean", None)
        self.add_sobel = kwargs.get("add_sobel", False)
        self.mirror = kwargs.get("mirror", False)  # True, False, "random"
        self.scale = kwargs.get("scale", 1.0)
        self.scale_maintain_aspect_ratio = kwargs.get(
            "scale_maintain_aspect_ratio", True)
        self.rotations = kwargs.get("rotations", (0.0, ))  # radians
        self.crop = kwargs.get("crop", None)
        self.crop_number = kwargs.get("crop_number", 1)
        self._background = None
        self.background_image = kwargs.get("background_image", None)
        self.background_color = kwargs.get("background_color",
                                           (0xff, 0x14, 0x93))
        self.smart_crop = kwargs.get("smart_crop", True)
        self.minibatch_label_values = Array()

    @property
    def source_dtype(self):
        return self._source_dtype

    @property
    def color_space(self):
        return self._color_space

    @color_space.setter
    def color_space(self, value):
        self._validate_color_space(value)
        self._color_space = value

    @Loader.shape.getter
    def shape(self):
        """
        :return: Final cropped image shape.
        """
        if self.crop is not None:
            shape = self.crop
        else:
            shape = self.uncropped_shape
        if self.channels_number > 1:
            shape += (self.channels_number, )
        return shape

    @property
    def uncropped_shape(self):
        """
        :return: Uncropped (but scaled) image shape.
        """
        if not isinstance(self.scale, tuple):
            if self._original_shape == tuple():
                return tuple()
            return self._scale_shape(self._original_shape)[:2]
        else:
            return self.scale

    @property
    def original_shape(self):
        return self._original_shape

    @original_shape.setter
    def original_shape(self, value):
        if value is None:
            raise ValueError("shape must not be None")
        if not isinstance(value, tuple):
            raise TypeError("shape must be a tuple (got %s)" % (value, ))
        if len(value) not in (2, 3):
            raise ValueError("len(shape) must be equal to 2 or 3 (got %s)" %
                             (value, ))
        for i, d in enumerate(value):
            if not isinstance(d, int):
                raise TypeError("shape[%d] is not an integer (= %s)" % (i, d))
            if d < 1:
                raise ValueError("shape[%d] < 1 (= %s)" % (i, d))
        self._original_shape = value

    @property
    def scale(self):
        return self._scale

    @scale.setter
    def scale(self, value):
        if not isinstance(value, (float, tuple)):
            raise TypeError("scale must be either float or tuple of two ints"
                            " (got %s of type %s)" % (value, value.__class__))
        if isinstance(value, tuple):
            if len(value) != 2:
                raise ValueError("scale must have length 2 (not %d in %s)" %
                                 (len(value), value))
            if not isinstance(value[0], int) or not isinstance(value[1], int):
                raise ValueError("scale must consist of integers (got %s)" %
                                 value)
        self._scale = value

    @property
    def crop(self):
        return self._crop

    @crop.setter
    def crop(self, value):
        if value is None:
            self._crop = None
            return
        if not isinstance(value, tuple):
            raise TypeError(
                "crop must be a tuple of 2 integers or floats (got %s)" %
                value)
        if len(value) != 2:
            raise ValueError("invalid crop length (got %d for %s), must be 2" %
                             (len(value), value))
        for i, val in enumerate(value):
            if not isinstance(val, (int, float)):
                raise TypeError(
                    "crop[%d] = %s is neither an integer nor a float" %
                    (i, val[i]))
            if isinstance(val, int) and val < 1:
                raise ValueError("crop[%d] = %s is out of range" % (i, val))
            if isinstance(val, float):
                if val <= 0 or val > 1:
                    raise ValueError("Out of range crop %s: %s" %
                                     (("height", "width")[i], val))
        self._crop = value

    @property
    def crop_number(self):
        return self._crop_number

    @crop_number.setter
    def crop_number(self, value):
        if not isinstance(value, int):
            raise TypeError("crop_number must be an integer (got %s)" % value)
        if value < 1:
            raise ValueError("crop_number must be greater than zero (got %d)" %
                             value)
        if value > 1 and self.crop is None:
            raise ValueError(
                "crop parameter is None, refusing to set crop_number")
        self._crop_number = value

    @property
    def smart_crop(self):
        """
        :return: Value indicating whether to crop only around bboxes.
        """
        return self._smart_crop

    @smart_crop.setter
    def smart_crop(self, value):
        if not isinstance(value, bool):
            raise TypeError("smart_crop must be a boolean value")
        self._smart_crop = value

    @property
    def mirror(self):
        return self._mirror

    @mirror.setter
    def mirror(self, value):
        if value not in (False, True, "random"):
            raise ValueError(
                "mirror must be any of the following: False, True, \"random\"")
        self._mirror = value

    @property
    def rotations(self):
        return self._rotations

    @rotations.setter
    def rotations(self, value):
        if not isinstance(value, tuple):
            raise TypeError("rotations must be a tuple (got %s)" % value)
        for i, rot in enumerate(value):
            if not isinstance(rot, float):
                raise TypeError("rotations[%d] = %s is not a float" % (i, rot))
            if rot >= numpy.pi * 2:
                raise ValueError("rotations[%d] = %s is greater than 2π" %
                                 (i, rot))
        self._rotations = tuple(sorted(value))

    @property
    def samples_inflation(self):
        return (1 if self.mirror is not True else 2) * len(self.rotations) * \
            self.crop_number

    @property
    def background_image(self):
        return self._background_image

    @background_image.setter
    def background_image(self, value):
        if isinstance(value, str):
            with open(value, "rb") as fin:
                self.background_image = fin
        elif hasattr(value, "read") and hasattr(value, "seek"):
            self.background_image = numpy.array(Image.open(value))
        elif isinstance(value, numpy.ndarray):
            if value.shape != self.shape:
                raise error.BadFormatError(
                    "background_image's shape %s != sample's shape "
                    "%s" % (value.shape, self.shape))
            self._background_image = value
            if getattr(self, "background_color", None) is not None:
                self.warning(
                    "background_color = %s is ignored in favor of "
                    "background_image", self.background_color)
        elif value is None:
            self._background_image = None
        else:
            raise ValueError("background_image must be any of the following: "
                             "file name, file object, numpy array or None")

    @property
    def background_color(self):
        return self._background_color

    @background_color.setter
    def background_color(self, value):
        if value is None:
            self._background_color = None
            return
        if not isinstance(value, tuple):
            raise TypeError("background_color must be a tuple (got %s)" %
                            value)
        if len(value) != self.channels_number:
            raise ValueError(
                "background_color must have the same length as the number of "
                "channels = %d (got length %d for %s)" %
                (self.channels_number, len(value), value))
        for i, col in enumerate(value):
            if not isinstance(col, int):
                raise TypeError("background_color[%d] = %s is not an integer" %
                                (i, col))
        if getattr(self, "background_image", None) is not None:
            self.warning(
                "background_color = %s is ignored in favor of "
                "background_image", value)
        self._background_color = value

    @property
    def background(self):
        if self._background is None:
            if self.background_image is not None:
                self._background = self.background_image
            else:
                self._background = numpy.zeros(self.shape)
                self._background[:] = self.background_color
        return self._background.copy()

    @property
    def channels_number(self):
        channels = COLOR_CHANNELS_MAP[self.color_space]
        if self.add_sobel:
            channels += 1
        return channels

    def get_effective_image_info(self, key):
        info = self.get_image_info(key)
        if self.scale == 1.0:
            return info
        if isinstance(self.scale, tuple):
            return self.scale, info[1]
        else:
            return self._scale_shape(info[0]), info[1]

    def get_image_bbox(self, key, size):
        """
        Override this method for custom label <-> bbox mapping.
        :param key: The image key.
        :param size: The image size (for optimization purposes).
        :return: (ymin, ymax, xmin, xmax).
        """
        return 0, size[0], 0, size[1]

    def preprocess_image(self, data, color, crop, bbox):
        """
        Transforms images before serving.
        :param data: the loaded image data.
        :param color: The loaded image color space.
        :param crop: True if must crop the scaled image; otherwise, False.
        :param bbox: The bounding box of the labeled object. Tuple
        (ymin, ymax, xmin, xmax).
        :return: The transformed image data, the label value (from 0 to 1).
        """
        if color != self.color_space:
            method = getattr(cv2, "COLOR_%s2%s" % (color, self.color_space),
                             None)
            if method is None:
                aux_method = getattr(cv2, "COLOR_%s2BGR" % color)
                try:
                    data = cv2.cvtColor(data, aux_method)
                except cv2.error as e:
                    self.error("Failed to perform '%s' conversion", aux_method)
                    raise from_none(e)
                method = getattr(cv2, "COLOR_BGR2%s" % self.color_space)
            try:
                data = cv2.cvtColor(data, method)
            except cv2.error as e:
                self.error("Failed to perform '%s' conversion", method)
                raise from_none(e)

        if self.add_sobel:
            data = self.add_sobel_channel(data)
        if self.scale != 1.0:
            data, bbox = self.scale_image(data, bbox)
        if crop and self.crop is not None:
            data, label_value = self.crop_image(data, bbox)
        else:
            label_value = 1

        return data, label_value, bbox

    def scale_image(self, data, bbox):
        bbox = numpy.array(bbox, float)
        if self.scale_maintain_aspect_ratio:
            if data.shape[1] >= data.shape[0]:
                dst_width = self.uncropped_shape[:2][1]
                dst_height = int(
                    numpy.round(
                        float(dst_width) * data.shape[0] / data.shape[1]))
            else:
                dst_height = self.uncropped_shape[:2][0]
                dst_width = int(
                    numpy.round(
                        float(dst_height) * data.shape[1] / data.shape[0]))
            dst_x_min = int(
                numpy.round(0.5 * (self.uncropped_shape[:2][1] - dst_width)))
            dst_y_min = int(
                numpy.round(0.5 * (self.uncropped_shape[:2][0] - dst_height)))
            data = cv2.resize(data, (dst_width, dst_height),
                              interpolation=cv2.INTER_CUBIC)
            dst_x_max = dst_x_min + data.shape[1]
            dst_y_max = dst_y_min + data.shape[0]
            sample = self.background
            sample[dst_y_min:dst_y_max, dst_x_min:dst_x_max] = data
            data = sample.copy()
            bbox[:2] *= (dst_y_max - dst_y_min) / (bbox[1] - bbox[0])
            bbox[:2] += dst_y_min
            bbox[2:] *= (dst_x_max - dst_x_min) / (bbox[3] - bbox[2])
            bbox[2:] += dst_x_min
        else:
            data = cv2.resize(data,
                              tuple(reversed(self.uncropped_shape[:2])),
                              interpolation=cv2.INTER_CUBIC)
            bbox[:2] *= self.uncropped_shape[0] / (bbox[1] - bbox[0])
            bbox[2:] *= self.uncropped_shape[1] / (bbox[3] - bbox[2])
        return data, tuple(bbox.astype(numpy.int32))

    def add_sobel_channel(self, data):
        original_data = data
        if self.channels_number == 1 + 1:
            original_data = original_data.reshape(original_data.shape[:2] +
                                                  (1, ))
        elif self.color_space in ("RGB", "BGR", "RGBA", "BGRA"):
            data = cv2.cvtColor(
                data, getattr(cv2, "COLOR_%s2GRAY" % self.color_space))
        elif self.color_space == "HSV":
            data = data[:, :, 2]
        elif self.color_space == "YCR_CB":
            data = data[:, :, 0]
        else:
            raise NotImplementedError(
                "Conversion from %s to GRAY is not ready" % self.color_space)
        sobel_xy = tuple(
            cv2.Sobel(data, cv2.CV_32F, *d, ksize=3) for d in ((1, 0), (0, 1)))
        sobel_data = numpy.zeros(shape=data.shape +
                                 (original_data.shape[2] + 1, ),
                                 dtype=original_data.dtype)
        sobel_data[:, :, -1] = numpy.linalg.norm(sobel_xy)
        sobel_data[:, :, :-1] = original_data
        return sobel_data

    def crop_image(self, data, bbox):
        """
        Cuts a rectangular part of an image.
        :param data: The source image to crop.
        :param bbox: (ymin, ymax, xmin, xmax)
        :return: tuple (image part randomly cropped around the bbox,\
        intersection ratio)
        """
        crop_hw_yx = [[0, 0], [0, 0]]
        for i in 0, 1:
            crop_hw_yx[0][i] = self.crop[i] if isinstance(self.crop[i], int) \
                else int(self.crop[i] * data.shape[i])
            crop_size = crop_hw_yx[0][i]
            crop_hw_yx[1][i] = self.prng.randint(
                max(bbox[i * 2] - crop_size, 0),
                min(data.shape[i] - crop_size + 1,
                    bbox[i * 2 + 1] + crop_size))
        crop_first = crop_hw_yx[1]
        crop_last = tuple(crop_hw_yx[1][i] + crop_hw_yx[0][i] for i in (0, 1))
        crop_bbox = crop_first[0], crop_last[0], crop_first[1], crop_last[1]
        return data[crop_bbox[0]:crop_bbox[1], crop_bbox[2]:crop_bbox[3]], \
            self._intersection(bbox, crop_bbox)

    def distort(self, data, mirror, rot):
        if mirror:
            data = cv2.flip(data, 1)
        data = numpy.resize(data, data.shape[:2] + (data.shape[-1] + 1, ))
        data[:, :, -1] = 1
        center = tuple(reversed(tuple(data.shape[i] // 2 for i in (0, 1))))
        rot_matrix = cv2.getRotationMatrix2D(center, rot * 180 / numpy.pi, 1.0)
        data = cv2.warpAffine(data, rot_matrix,
                              tuple(reversed(data.shape[:2])))
        real = data[:, :, :-1]
        imag = data[:, :, -1]
        real *= imag[..., None]
        real += self.background * (1 - imag)[..., None]
        return real

    def get_distortion_by_index(self, index):
        index //= self.crop_number
        if self.mirror is True:
            return index % 2 == 1, self.rotations[index // 2]
        elif self.mirror == "random":
            mirror = bool(self.prng.randint(2))
        else:
            mirror = False
        return mirror, self.rotations[index]

    def load_keys(self, keys, pbar, data, labels, label_values, crop=True):
        """Loads data from the specified keys.
        """
        index = 0
        has_labels = False
        for key in keys:
            obj, label_value, _ = self._load_image(key)
            label, has_labels = self._load_label(key, has_labels)
            if (self.crop is None or not crop) and \
                    obj.shape[:2] != self.uncropped_shape:
                self.warning("Ignored %s (label %s): shape %s", key, label,
                             obj.shape[:2])
                continue
            if data is not None:
                data[index] = obj
            if labels is not None:
                labels[index] = label
            if label_values is not None:
                label_values[index] = label_value
            index += 1
            if pbar is not None:
                pbar.inc()
        return has_labels

    def load_labels(self):
        if not self.has_labels:
            return
        self.info("Reading labels...")
        different_labels = defaultdict(int), defaultdict(int), defaultdict(int)
        label_key_map = defaultdict(list), defaultdict(list), defaultdict(list)
        pb = ProgressBar(maxval=self.total_samples, term_width=40)
        pb.start()
        for class_index in range(3):
            for key in self.class_keys[class_index]:
                label, has_labels = self._load_label(key, True)
                assert has_labels
                different_labels[class_index][label] += 1
                label_key_map[class_index][label].append(key)
                self._samples_mapping[label].add(key)
                pb.inc()
        pb.finish()

        return different_labels, label_key_map

    def initialize(self, **kwargs):
        self._restored_from_pickle_ = kwargs["snapshot"]
        super(ImageLoader, self).initialize(**kwargs)
        del self._restored_from_pickle_

    def load_data(self):
        try:
            super(ImageLoader, self).load_data()
        except AttributeError:
            pass
        if self._restored_from_pickle_:
            self.info("Scanning for changes...")
            progress = ProgressBar(maxval=self.total_samples, term_width=40)
            progress.start()
            for keys in self.class_keys:
                for key in keys:
                    progress.inc()
                    size, _ = self.get_effective_image_info(key)
                    if size != self.uncropped_shape:
                        raise error.BadFormatError(
                            "%s changed the effective size (now %s, was %s)" %
                            (key, size, self.uncropped_shape))
            progress.finish()
            return
        for keys in self.class_keys:
            del keys[:]
        for index, class_name in enumerate(CLASS_NAME):
            keys = set(self.get_keys(index))
            self.class_keys[index].extend(keys)
            self.class_lengths[index] = len(keys) * self.samples_inflation
            self.class_keys[index].sort()

        if self.uncropped_shape == tuple():
            raise error.BadFormatError(
                "original_shape was not initialized in get_keys()")
        self.info(
            "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)",
            self.total_samples, self.shape, *self.class_lengths)

        # Perform a quick (unreliable) test to determine if we have labels
        keys = next(k for k in self.class_keys if len(k) > 0)
        self._has_labels = self.load_keys(
            (keys[RandomGenerator(None).randint(len(keys))], ), None, None,
            None, None)
        self._resize_validation_keys(self.load_labels())

    def create_minibatch_data(self):
        self.minibatch_data.reset(
            numpy.zeros((self.max_minibatch_size, ) + self.shape,
                        dtype=self.dtype))

        self.minibatch_label_values.reset(
            numpy.zeros(self.max_minibatch_size, numpy.float32))

    def keys_from_indices(self, indices):
        for index in indices:
            class_index, origin_index, _ = \
                self._get_class_origin_distortion_from_index(index)
            yield self.class_keys[class_index][origin_index]

    def fill_minibatch(self):
        indices = self.minibatch_indices.mem[:self.minibatch_size]
        assert self.has_labels == self.load_keys(
            self.keys_from_indices(indices), None, self.minibatch_data.mem,
            self.raw_minibatch_labels, self.minibatch_label_values)
        if self.samples_inflation == 1:
            return
        for pos, index in enumerate(indices):
            _, _, dist_index = \
                self._get_class_origin_distortion_from_index(index)
            self.minibatch_data[pos] = self.distort(
                self.minibatch_data[pos],
                *self.get_distortion_by_index(dist_index))

    def _resize_validation_keys(self, label_analysis):
        if label_analysis is None:
            return
        different_labels, label_key_map = label_analysis
        if self.validation_ratio is None:
            self._setup_labels_mapping(different_labels)
            return
        if self.validation_ratio < 0:
            self.class_keys[TRAIN] += self.class_keys[VALID]
            self.class_lengths[TRAIN] += self.class_lengths[VALID]
            del self.class_keys[VALID][:]
            self.class_lengths[VALID] = 0
            merged = {
                k: (different_labels[VALID][k] + different_labels)[TRAIN][k]
                for k in label_key_map[TRAIN]
            }
            self._setup_labels_mapping((different_labels[TEST], {}, merged))
            return

        overall = sum(len(ck) for ck in self.class_keys[VALID:])
        target_validation_length = int(overall * self.validation_ratio)

        if not self.has_labels:
            keys = list(chain.from_iterable(self.class_keys[VALID:]))
            keys.sort()
            self.prng.shuffle(keys)
            del self.class_keys[VALID][:]
            self.class_keys[VALID].extend(keys[:target_validation_length])
            del self.class_keys[TRAIN][:]
            self.class_keys[TRAIN].extend(keys[target_validation_length:])
            self._finalize_resizing_validation(different_labels, label_key_map)
            return

        # We must ensure that each set has the same labels
        # The first step is to pick two keys for each label and distribute them
        # into VALID and TRAIN evenly
        if len(label_key_map[TRAIN]) > target_validation_length:
            raise LoaderError(
                "Unable to set the new size of the validation set to %d (%.3f)"
                " since the number of labels is %d" %
                (target_validation_length * self.samples_inflation,
                 self.validation_ratio, len(label_key_map[TRAIN])))
        if overall - target_validation_length < len(label_key_map[TRAIN]):
            raise LoaderError(
                "Unable to set the new size of the training set to %d (%.3f) "
                "since the number of labels is %d" %
                ((overall - target_validation_length) * self.samples_inflation,
                 1.0 - self.validation_ratio, len(label_key_map[TRAIN])))
        vt_label_key_map = {
            l:
            (label_key_map[VALID].get(l, []) + label_key_map[TRAIN].get(l, []))
            for l in label_key_map[TRAIN]
        }
        for i in VALID, TRAIN:
            del self.class_keys[i][:]
        for label, keys in sorted(vt_label_key_map.items()):
            if len(keys) < 2:
                raise LoaderError("Label %s has less than 2 keys" % label)
            choice = self.prng.choice(len(keys), 2, replace=False)
            assert choice[0] != choice[1]
            for i in VALID, TRAIN:
                self.class_keys[i].append(keys[choice[i - 1]])
            for c in sorted(choice, reverse=True):
                del keys[c]

        # Distribute the left keys randomly
        left_keys = list(sorted(chain.from_iterable(
            vt_label_key_map.values())))
        self.prng.shuffle(left_keys)
        offset_val_length = \
            target_validation_length - len(vt_label_key_map)
        self.class_keys[VALID].extend(left_keys[:offset_val_length])
        self.class_keys[TRAIN].extend(left_keys[offset_val_length:])
        self._finalize_resizing_validation(different_labels, label_key_map)

    def _finalize_resizing_validation(self, different_labels, label_key_map):
        for ck in self.class_keys[VALID:]:
            ck.sort()
        for i in VALID, TRAIN:
            self.class_lengths[i] = len(self.class_keys[i]) * \
                self.samples_inflation
        new_diff = defaultdict(int), defaultdict(int)
        key_label_map = {}
        for ci in VALID, TRAIN:
            key_label_map.update(
                {k: l
                 for l, keys in label_key_map[ci].items() for k in keys})
        for ci in VALID, TRAIN:
            for key in self.class_keys[ci]:
                new_diff[ci - 1][key_label_map[key]] += 1
        self._setup_labels_mapping((different_labels[TEST], ) + new_diff)

    def _get_class_origin_distortion_from_index(self, index):
        class_index, key_remainder = self.class_index_by_sample_index(index)
        key_index = self.class_lengths[class_index] - key_remainder
        return (class_index, ) + divmod(key_index, self.samples_inflation)

    def _load_image(self, key, crop=True):
        """Returns the data to serve corresponding to the given image key and
        the label value (from 0 to 1).
        """
        data = self.get_image_data(key)
        size, color = self.get_image_info(key)
        bbox = self.get_image_bbox(key, size)
        return self.preprocess_image(data, color, crop, bbox)

    def _load_label(self, key, has_labels):
        label = self.get_image_label(key)
        if label is not None:
            has_labels = True
        if has_labels and label is None:
            raise error.BadFormatError(
                "%s does not have a label, but others do" % key)
        return label, has_labels

    def _intersection(self, bbox_a, bbox_b):
        ymin_a, ymax_a, xmin_a, xmax_a = bbox_a
        ymin_b, ymax_b, xmin_b, xmax_b = bbox_b

        x_intersection = min(xmax_a, xmax_b) - max(xmin_a, xmin_b)
        y_intersection = min(ymax_a, ymax_b) - max(ymin_a, ymin_b)

        if int(x_intersection) | int(y_intersection) <= 0:
            return 0
        else:
            return x_intersection * y_intersection

    def _scale_shape(self, shape):
        return tuple(int(shape[i] * self.scale) for i in (0, 1)) + shape[2:]

    def _validate_color_space(self, value):
        if not isinstance(value, str):
            raise TypeError("db_colorpsace must be a string (got %s)" %
                            type(value))
        if value != "RGB" and not hasattr(cv2, "COLOR_%s2RGB" % value):
            raise ValueError("Unsupported color space: %s" % value)
Пример #11
0
class EvaluatorBase(AcceleratedUnit, TriviallyDistributable):
    hide_from_registry = True
    """Base class for evaluators.
    """
    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "EVALUATOR")
        super(EvaluatorBase, self).__init__(workflow, **kwargs)
        self.mean = kwargs.get("mean", True)
        self.err_output = Array()
        self._merged_output = Array()
        self.krn_constants_i_ = None
        self.krn_constants_f_ = None
        self.demand("output", "batch_size")
        if self.testing:
            self.demand("class_lengths", "offset")

    @property
    def mean(self):
        """
        :return: True if the error function averages values. Default is True.
        """
        return self._mean

    @mean.setter
    def mean(self, value):
        if not isinstance(value, bool):
            raise TypeError("mean must be boolean (got %s)" % type(value))
        self._mean = value

    @property
    def merged_output(self):
        assert self.testing
        return self._merged_output.mem

    def initialize(self, device, **kwargs):
        super(EvaluatorBase, self).initialize(device, **kwargs)
        dtype = self.output.dtype
        if self.testing:
            self._merged_output.reset(numpy.zeros(
                (self.class_lengths[TEST],) + self.output.shape[1:], dtype))
            return

        self.krn_constants_i_ = numpy.zeros(1, numpy.int32)
        self.krn_constants_f_ = numpy.zeros(1, dtype)
        self.err_output.reset(numpy.zeros_like(self.output.mem, dtype))

        for vec in self.output, self.err_output:
            vec.initialize(self.device)

    def run(self):
        if self.testing:
            self.output.map_read()
            self.merge_output()
            return
        return super(EvaluatorBase, self).run()

    def merge_output(self):
        self.merged_output[self.offset - self.batch_size:self.offset] = \
            self.output[:self.batch_size]

    def get_metric_names(self):
        if self.testing:
            return {"Output"}
        return set()

    def get_metric_values(self):
        if self.testing:
            return {"Output": self.merged_output}
        return {}
Пример #12
0
class EvaluatorSoftmax(EvaluatorBase):

    MAPPING = "evaluator_softmax"
    LOSS = "softmax"

    """Evaluator for nn softmax output from the batch labels.

    Must be assigned before initialize():
        output
        labels
        batch_size
        max_idx

    Updates after run():
        err_output
        n_err
        confusion_matrix
        max_err_output_sum

    Creates within initialize():
        err_output
        n_err
        confusion_matrix
        max_err_output_sum

    Attributes:
        labels: labels for Batch.
        output: output of the network_common as Batch.
        err_output: backpropagation errors based on labels.
        batch_size: number of elements in output to evaluate.
        confusion_matrix: confusion matrix for the output.
        compute_confusion_matrix: compute confusion matrix or not.
        max_idx: indexes of element with maximum real value for each sample.
        max_err_output_sum: maximum of backpropagated error sum by sample.
    """
    def __init__(self, workflow, **kwargs):
        super(EvaluatorSoftmax, self).__init__(workflow, **kwargs)
        self.compute_confusion_matrix = kwargs.get(
            "compute_confusion_matrix", True)
        self.confusion_matrix = Array()
        self.n_err = Array()
        self.max_err_output_sum = Array()
        self.class_keys = None
        self.demand("labels", "max_idx")
        if self.testing:
            self.demand("labels_mapping")

    def initialize(self, device, **kwargs):
        super(EvaluatorSoftmax, self).initialize(device=device, **kwargs)
        if self.testing:
            return
        self.sources_["evaluator"] = {}

        dtype = self.output.dtype

        if not self.n_err:
            self.n_err.reset(numpy.zeros(2, dtype=numpy.int32))
        else:
            assert self.n_err.size == 2

        out_size = self.output.sample_size
        if self.compute_confusion_matrix:
            if not self.confusion_matrix:
                self.confusion_matrix.reset(
                    numpy.zeros([out_size, out_size], numpy.int32))
            else:
                assert self.confusion_matrix.size == out_size * out_size
        else:
            self.confusion_matrix.reset()

        if not self.max_err_output_sum:
            self.max_err_output_sum.reset(numpy.zeros(1, dtype))
        else:
            assert self.max_err_output_sum.size == 1

        self.init_vectors(self.confusion_matrix, self.n_err, self.max_idx,
                          self.labels, self.max_err_output_sum)

    def _gpu_init(self):
        dtype = self.output.dtype
        block_size = min(self.err_output.shape[0], 256)
        self.build_program(
            cache_file_name="%s_%d_%d" % (self.__class__.__name__,
                                          self.output.shape[0],
                                          self.output.sample_size),
            dtype=dtype, block_size=block_size,
            max_batch_size=self.err_output.shape[0],
            output_size=self.err_output.sample_size)
        self.assign_kernel("evaluate_softmax")
        self.set_args(self.output, self.max_idx, self.labels,
                      self.skip_args(2), self.n_err, self.confusion_matrix,
                      self.max_err_output_sum, self.err_output)
        return block_size

    def ocl_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._global_size = [block_size]
        self._local_size = [block_size]

    def cuda_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._global_size = (1, 1, 1)
        self._local_size = (block_size, 1, 1)

    def _gpu_run(self):
        self.unmap_vectors(
            self.err_output, self.output, self.max_idx, self.labels,
            self.n_err, self.confusion_matrix, self.max_err_output_sum)

        self.krn_constants_i_[0] = self.batch_size
        self.set_arg(3, self.krn_constants_i_[0:1])
        self.krn_constants_f_[0] = 1.0 / self.batch_size if self.mean else 1.0
        self.set_arg(4, self.krn_constants_f_[0:1])

        self.execute_kernel(self._global_size, self._local_size)

    def ocl_run(self):
        return self._gpu_run()

    def cuda_run(self):
        return self._gpu_run()

    def numpy_run(self):
        self.err_output.map_invalidate()
        for vec in self.output, self.max_idx, self.labels:
            vec.map_read()
        for vec in self.n_err, self.confusion_matrix, self.max_err_output_sum:
            vec.map_write()

        batch_size = self.batch_size
        labels = self.labels.mem
        confusion_matrix = self.confusion_matrix.mem

        n_ok = 0
        n_total = 0
        multiplier = 1.0 / batch_size if self.mean else 1.0
        for i in range(batch_size):  # loop by batch
            if labels[i] < 0:
                self.err_output.mem[i] = 0.0
                continue
            output = ravel(self.output[i])
            err_output = ravel(self.err_output[i])

            max_idx = self.max_idx[i]
            confusion_matrix[max_idx, labels[i]] += 1
            if max_idx == labels[i]:
                n_ok += 1
            n_total += 1

            # Compute softmax output error gradient
            err_output[:] = output[:]
            err_output[labels[i]] -= 1.0
            err_output *= multiplier
            if err_output.dtype in (numpy.complex64, numpy.complex128):
                self.max_err_output_sum[0] = max(
                    self.max_err_output_sum[0], numpy.linalg.norm(err_output))
            else:
                self.max_err_output_sum[0] = max(
                    self.max_err_output_sum[0], (numpy.fabs(err_output)).sum())
        # Set errors for excessive samples to zero
        if batch_size < self.err_output.mem.shape[0]:
            self.err_output.mem[batch_size:] = 0.0
        self.n_err[0] += batch_size - n_ok
        self.n_err[1] += n_total

    def get_metric_values(self):
        if self.testing:
            output_labels = {}
            class_keys = getattr(self, "class_keys", None)
            for index, labels in enumerate(self.merged_output[:]):
                max_value = 0
                for label_index, value in enumerate(labels):
                    if value >= max_value:
                        max_value = value
                        max_index = label_index
                if class_keys is not None:
                    output_labels[self.class_keys[TEST][
                        index]] = self.labels_mapping[max_index]
                else:
                    output_labels[index] = self.labels_mapping[max_index]
            return {"Output": output_labels}
        return {}
Пример #13
0
class Forward(ForwardBase):
    """Class for forward propagation units.

    Attributes:
        input: input layer values.
        output: output layer values.
        weights: weights.
        bias: bias.
        weights_stddev: magnitude of the random distribution for weights.
        bias_stddev: magnitude of the random distribution for bias.
        rand: prng.Rand() object for initial weights generation.
    """
    hide_from_registry = True
    MAPPING = set()

    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "WORKER")
        super(Forward, self).__init__(workflow, **kwargs)
        self.weights_stddev = kwargs.get("weights_stddev")
        self.bias_stddev = kwargs.get("bias_stddev", self.weights_stddev)
        self.weights_filling = kwargs.get("weights_filling", "uniform")
        self.bias_filling = kwargs.get("bias_filling", "uniform")
        self.rand = kwargs.get("rand", prng.get())
        self.weights_transposed = kwargs.get("weights_transposed", False)
        self.include_bias = kwargs.get("include_bias", True)
        self.demand("input")
        self.output = Array(shallow_pickle=True)
        self.weights = Array()
        self.bias = Array()
        self.forward_mode = False
        self.exports = [
            "weights", "bias", "include_bias", "weights_transposed"
        ]

    def package_export(self):
        data = {}
        for attr in self.exports:
            value = getattr(self, attr)
            if value is not None:
                if isinstance(value, Array):
                    value.map_read()
                    value = value.mem
                data[attr] = value
        return data

    @property
    def forward_mode(self):
        return self._forward_mode

    @forward_mode.setter
    def forward_mode(self, value):
        if not isinstance(value, bool):
            raise TypeError("forward_mode must be boolean (got %s)" %
                            type(value))
        self._forward_mode = value

    def initialize(self, device, **kwargs):
        self.forward_mode = kwargs.get("forward_mode", False)
        super(Forward, self).initialize(device=device, **kwargs)

    def generate_data_for_slave(self, slave):
        if self.forward_mode:
            return None
        data = [None, None]
        if self.weights:
            self.weights.map_read()
            data[0] = self.weights.mem
        if self.bias:
            self.bias.map_read()
            data[1] = self.bias.mem
        return data

    def generate_data_for_master(self):
        return None

    def apply_data_from_master(self, data):
        if self.forward_mode:
            return
        if self.weights:
            self.weights.map_invalidate()
            numpy.copyto(self.weights.mem, data[0])
        else:
            self.weights.reset(data[0])
        if self.bias:
            self.bias.map_invalidate()
            numpy.copyto(self.bias.mem, data[1])
        else:
            self.bias.reset(data[1])

    def apply_data_from_slave(self, data, slave):
        pass

    def drop_slave(self, slave):
        pass
Пример #14
0
class Binarization(AcceleratedUnit, EmptyDeviceMethodsMixin):
    """
    Input Binarization. Input and output is 2d arrays of the same size.
    Each element A(i,j) (in row i and column j) of input is a float
    number between 0 and 1. Each element B(i,j) of output is equal 1 with
    probability A(i,j) and 0 with 1 - A(i,j).
    Must be assigned before initialize():
    * input

    Updates after run():
    * output

    Creates within initialize():
    * output

    Attributes:
        input: input as batch of samples.
        output: output as batch of samples.
    """
    def __init__(self, workflow, **kwargs):
        super(Binarization, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.rand = kwargs.get("rand", prng.get())
        self.demand("input", "batch_size")

    def run(self):
        """Batch binarization on CPU only.
        """
        self.output.map_invalidate()
        self.input.map_read()
        self.output.mem[:] = self.input.mem[:]
        self.output.mem[:self.batch_size, :] = self.matlab_binornd(
            1, self.input.mem[:self.batch_size, :])

    def initialize(self, device, **kwargs):
        super(Binarization, self).initialize(device=device, **kwargs)
        if not self.output or self.output.size != self.input.size:
            self.output.reset()
            self.output.mem = numpy.zeros_like(self.input.mem)
        self.output.initialize(self.device)

    def matlab_binornd(self, n, p_in):
        """
        Analogue binornd in Matlab, but n  must be scalar.

        The function generates a matrix of random variables,
        where the element at (i,j) position is generated from binomial
        distribution with the number of trials n and the probability of
        success p_in(i,j).

        Args:
            n (int): number of trials
            p_in (2 dimension numpy.array): success probability matrix
        Returns:
            res (2 dimension numpy.array): matrix of random variables
            generated from the binomial distribution
        """
        p = numpy.copy(p_in)
        if len(p.shape) == 2:
            nrow = p.shape[0]
            ncol = p.shape[1]
            p = numpy.transpose(p)
            p = p.flatten()
            dim = p.shape[0]
            p = matlib.repmat(p, n, 1)
            f = self.rand.rand(n, dim)
            res = f < p
            res = numpy.sum(res, axis=0)
            res = numpy.transpose(res.reshape(ncol, nrow)).reshape(nrow, ncol)
        elif len(p.shape) == 1:
            p = matlib.repmat(p, n, 1)
            dim = p.shape[0]
            p = matlib.repmat(p, n, 1)
            f = self.rand.rand(n, dim)
            res = f < p
            res = numpy.sum(res, axis=0)
        else:  # will make exeption
            raise ValueError("shape of input Binarization class "
                             "must be 1 or 2 dimensions")
        return res
Пример #15
0
class GradientsCalculator(AcceleratedUnit, EmptyDeviceMethodsMixin):
    """
    Making gradients for weights, hbias and vbias, using hbias0, vbias0
    and vbias1, hbias1, which calculated with help BatchWeights.
    Must be assigned before initialize():
    * hbias0
    * vbias0
    * hbias1
    * vbias1
    * weights1
    * weights0

    Updates after run():
    * hbias_grad
    * vbias_grad
    * weights_grad

    Creates within initialize():
    * hbias_grad
    * vbias_grad
    * weights_grad

    Attributes:
        vbias0: calculated with help BatchWeights from v0
        hbias0: calculated with help BatchWeights from h0
        vbias1: calculated with help BatchWeights from v1
        hbias1: calculated with help BatchWeights from h1
        weights1: calculated with help BatchWeights from v1.
        weights0: calculated with help BatchWeights from h1.
        hbias_grad: gradient for hbias
        vbias_grad: gradient for vbias
        weights_grad: gradient for weights
    """
    def __init__(self, workflow, **kwargs):
        super(GradientsCalculator, self).__init__(workflow, **kwargs)
        self.vbias_grad = Array()
        self.hbias_grad = Array()
        self.weights_grad = Array()
        self.demand("hbias1", "vbias1", "hbias0", "vbias0", "weights0",
                    "weights1")

    def initialize(self, device, **kwargs):
        super(GradientsCalculator, self).initialize(device=device, **kwargs)
        if not self.hbias_grad:
            self.hbias_grad.reset(
                numpy.zeros(self.hbias0.shape, dtype=self.hbias0.dtype))
        else:
            assert self.hbias_grad.shape == self.hbias0.shape
        if not self.vbias_grad:
            self.vbias_grad.reset(
                numpy.zeros(self.vbias0.shape, dtype=self.vbias0.dtype))
        else:
            assert self.vbias_grad.shape == self.vbias0.shape
        if not self.weights_grad:
            self.weights_grad.reset(
                numpy.zeros(self.weights0.shape, dtype=self.weights0.dtype))
        else:
            assert self.weights_grad.shape == self.weights0.shape
        for v in (self.weights_grad, self.hbias_grad, self.vbias_grad,
                  self.hbias0, self.vbias0, self.weights0, self.hbias1,
                  self.vbias1, self.weights1):
            v.initialize(self.device)

    def run(self):
        for v in (self.hbias0, self.vbias0, self.weights0, self.hbias1,
                  self.vbias1, self.weights1):
            v.map_read()

        for v in (self.weights_grad, self.vbias_grad, self.hbias_grad):
            v.map_invalidate()

        self.vbias_grad.mem[:] = self.vbias0.mem - self.vbias1.mem
        self.hbias_grad.mem[:] = self.hbias0.mem - self.hbias1.mem
        self.weights_grad.mem[:] = self.weights0.mem - self.weights1.mem
Пример #16
0
class InputJoiner(AcceleratedUnit):
    """Joins several minibatch inputs into one continuous minibatch output.

    Attributes:
        input_0, input_1, ...: inputs of type Array(), created via link_inputs
        offset_0, offset_1, ...: offsets of each input in elements,
                                 have valid values after initialize().
        length_0, length_1, ...: lengths of each input in elements,
                                 have valid values after initialize.
        output: Array()
        minibatch_size: size of the minibatch (will be set to the minimum
                        of the first shapes from the inputs
                        if not provided prior to the initialize)
    """
    def __init__(self, workflow, **kwargs):
        super(InputJoiner, self).__init__(workflow, **kwargs)
        self.output = Array()
        self._num_inputs = 0
        self.inputs = kwargs.get("inputs")

    def init_unpickled(self):
        super(InputJoiner, self).init_unpickled()
        self.sources_["join"] = {}

    @property
    def num_inputs(self):
        return self._num_inputs

    @num_inputs.setter
    def num_inputs(self, value):
        try:
            value = int(value)
        except (ValueError, TypeError):
            raise ValueError("num_inputs must be copnvertible to int")
        for x in range(value, self._num_inputs):
            try:
                delattr(self, "input_%d" % x)
                delattr(self, "offset_%d" % x)
                delattr(self, "length_%d" % x)
            except AttributeError:
                pass
        for x in range(self._num_inputs, value):
            setattr(self, "input_%d" % x, None)
            setattr(self, "offset_%d" % x, None)
            setattr(self, "length_%d" % x, None)
        self._num_inputs = value

    @property
    def inputs(self):
        return list(getattr(self, "input_%d" % x)
                    for x in range(self._num_inputs))

    @property
    def offsets(self):
        return list(getattr(self, "offset_%d" % x)
                    for x in range(self._num_inputs))

    @property
    def lengths(self):
        return list(getattr(self, "length_%d" % x)
                    for x in range(self._num_inputs))

    @inputs.setter
    def inputs(self, value):
        if value is None:
            self.num_inputs = 0
            return
        if not hasattr(value, "__iter__"):
            raise TypeError("inputs must be iterable")
        self.num_inputs = len(value)
        for i, inp in enumerate(value):
            setattr(self, "input_%d" % i, inp)

    def link_inputs(self, other, *args):
        """Adds more inputs and links them.

        It will link args to attributes named
        "input_0", "input_1", etc.

        Parameters:
            other: unit from which to link attributes.
            args: attribute names to link.
        """
        if not len(args):
            raise ValueError("args may not be empty")
        num_inputs = self.num_inputs
        self.num_inputs = num_inputs + len(args)
        for arg in args:
            self.link_attrs(other, ("input_%d" % num_inputs, arg))
            num_inputs += 1

    def _init_offset_length_attributes(self):
        """Initializes offset_0, offset_1, ...
                       length_0, length_1, ...
        """
        offset = 0
        for i in range(self.num_inputs):
            inp = getattr(self, "input_%d" % i)
            setattr(self, "offset_%d" % i, offset)
            setattr(self, "length_%d" % i, inp.sample_size)
            offset += inp.sample_size

    def initialize(self, device, **kwargs):
        if any(i.mem is None for i in self.inputs):
            # Not yet ready to initialize
            return True

        self._init_offset_length_attributes()

        super(InputJoiner, self).initialize(device=device, **kwargs)

        minibatch_size = min(i.shape[0] for i in self.inputs)
        if any(i.shape[0] > minibatch_size for i in self.inputs):
            self.warning("Detected inputs of different sizes. Sizes will be "
                         "cut to the lowest value (%d)", minibatch_size)

        output_shape = (minibatch_size,
                        sum(i.size // i.shape[0] for i in self.inputs))
        if not self.output:
            self.output.reset(numpy.zeros(output_shape, self.inputs[0].dtype))
        else:
            assert self.output.shape == output_shape

        self.init_vectors(self.output, *self.inputs)

    def _gpu_init(self):
        defines = {
            'etype': opencl_types.numpy_dtype_to_opencl(self.output.dtype),
        }
        self.build_program(
            defines, "%s_%d_%s" %
            (type(self).__name__, self.output.shape[0],
             "_".join(map(str, self.output.shape[1:]))), inputs=self.inputs)
        self.assign_kernel("join")
        self.set_args(self.output, *self.inputs)

    def ocl_init(self):
        self._gpu_init()

    def cuda_init(self):
        self._gpu_init()

    def numpy_run(self):
        self.output.map_invalidate()  # we will update output on CPU
        minibatch_size = self.output.shape[0]
        low = 0
        for inp in self.inputs:
            inp.map_read()
            high = low + inp.size // inp.shape[0]
            if low >= high:
                break
            self.output.mem[:, low:high] = inp[:minibatch_size]
            low = high

    def ocl_run(self):
        for inp in self.inputs:
            inp.unmap()
        self.execute_kernel(*((self.output.shape[0],),) * 2)

    def cuda_run(self):
        for inp in self.inputs:
            inp.unmap()
        # TODO(a.kazantsev): rewrite CUDA kernel for proper grid size
        self.execute_kernel((1, 1, 1), (self.output.shape[0], 1, 1))
Пример #17
0
class All2AllSoftmax(All2All):
    """All2All with linear activation and softmax normalization.

    Must be assigned before initialize():

    Updates after run():
        max_idx

    Creates within initialize():
        max_idx

    Attributes:
        krn_sm_: kernel for softmax activation calculation.
        max_idx: indexes of element with maximum value for each sample.
    """
    __id__ = "420219fc-3e1a-45b1-87f8-aaa0c1540de4"

    MAPPING = {"softmax"}

    def __init__(self, workflow, **kwargs):
        super(All2AllSoftmax, self).__init__(workflow, **kwargs)
        self.max_idx = Array()
        self.reduce_size = 256

    def init_unpickled(self):
        super(All2AllSoftmax, self).init_unpickled()
        self.krn_sm_ = None
        self._force_gpu_apply_exp = False

    def initialize(self, device, **kwargs):
        self.reduce_size = min(self.reduce_size,
                               int(numpy.prod(self.output_sample_shape)))
        self.sources_["all2all/softmax"] = {"REDUCE_SIZE": self.reduce_size}
        retval = super(All2AllSoftmax, self).initialize(device=device,
                                                        **kwargs)
        if retval:
            return retval
        if self.output.mem.size // self.output.mem.shape[0] <= 1:
            raise error.BadFormatError(
                "Output sample size should be greater than 1 for SoftMax.")

        if not self.max_idx:
            self.max_idx.reset(
                numpy.zeros(self.output.shape[0], dtype=numpy.int32))
        self.max_idx.initialize(self.device)
        return retval

    def numpy_apply_exp(self):
        self.output.map_write()
        self.max_idx.map_invalidate()
        out = self.output.mem
        out = reshape(out, (out.shape[0], out.size // out.shape[0]))
        for i, sample in enumerate(out):
            im = sample.argmax()
            self.max_idx[i] = im
            m = sample[im]
            sample -= m
            numpy.exp(sample, sample)
            smm = sample.sum()
            sample /= smm

    def ocl_apply_exp(self):
        self.unmap_vectors(self.output, self.max_idx)
        global_size = (self.output.shape[0] * self.reduce_size, )
        local_size = (self.reduce_size, )
        self.execute_kernel(global_size, local_size, self.krn_sm_)

    def cuda_apply_exp(self):
        self.unmap_vectors(self.output, self.max_idx)
        global_size = (self.output.shape[0], 1, 1)
        local_size = (self.reduce_size, 1, 1)
        self.execute_kernel(global_size, local_size, self.krn_sm_)

    def numpy_run(self):
        """Forward propagation from batch on CPU only.
        """
        super(All2AllSoftmax, self).numpy_run()
        if not self._force_gpu_apply_exp:
            self.numpy_apply_exp()

    def ocl_run(self):
        """Forward propagation from batch on GPU.
        """
        self._force_gpu_apply_exp = True
        super(All2AllSoftmax, self).ocl_run()
        self.ocl_apply_exp()

    def cuda_run(self):
        """Forward propagation from batch on GPU.
        """
        self._force_gpu_apply_exp = True
        super(All2AllSoftmax, self).cuda_run()
        self.cuda_apply_exp()

    def ocl_init(self):
        super(All2AllSoftmax, self).ocl_init()
        self.krn_sm_ = self.get_kernel("apply_exp")
        self.krn_sm_.set_args(self.output.devmem, self.max_idx.devmem)

    def cuda_init(self):
        super(All2AllSoftmax, self).cuda_init()
        self.krn_sm_ = self.get_kernel("apply_exp")
        self.krn_sm_.set_args(self.output.devmem, self.max_idx.devmem)
Пример #18
0
class OffsetPooling(Pooling):
    """Pooling by offset forward propagation.

    Must be assigned before initialize():

    Updates after run():
        input_offset

    Creates within initialize():
        input_offset

    Attributes:
        input_offset: offsets in the input where elements are passed through.
    """

    MAPPING = set()
    hide_from_registry = True

    def __init__(self, workflow, **kwargs):
        super(OffsetPooling, self).__init__(workflow, **kwargs)
        self.input_offset = Array()
        self.demand("input")

    def initialize(self, device, **kwargs):
        super(OffsetPooling, self).initialize(device=device, **kwargs)

        if self._no_output:
            return
        if not self.input_offset:
            self.input_offset.reset(numpy.zeros(self.output.shape,
                                                dtype=numpy.int32))
        else:
            assert self.input_offset.shape == self.output.shape
        self.input_offset.initialize(self.device)

    def set_args(self, *args):
        super(OffsetPooling, self).set_args(self.input, self.output,
                                            self.input_offset, *args)

    def ocl_run(self):
        self.input_offset.unmap()
        super(OffsetPooling, self).ocl_run()

    def cuda_run(self):
        self.input_offset.unmap()
        super(OffsetPooling, self).cuda_run()

    def numpy_run(self):
        self.input_offset.map_invalidate()
        super(OffsetPooling, self).numpy_run()

    def numpy_run_cut(self, cut, coords):
        batch, y1, x1, ch, out_y, out_x = coords
        cut_index = self.numpy_run_cut_offset(
            cut, numpy.ravel_multi_index((batch, out_y, out_x, ch),
                                         self.output.shape))
        i, j = numpy.unravel_index(cut_index, cut.shape)
        idx = numpy.ravel_multi_index((batch, y1 + i, x1 + j, ch),
                                      self.input.shape)
        val = numpy.ravel(self.input.mem)[idx]
        self.input_offset.mem[batch, out_y, out_x, ch] = idx
        return val
Пример #19
0
class KohonenForward(KohonenBase, AcceleratedUnit):
    """Kohonen forward layer.

    Must be assigned before initialize():
        input
        weights
        minibatch_offset (if total == True)
        minibatch_size (if total == True)
        batch_size (if total == True)
        argmins speeds up run() if linked from KohonenTrainer

    Updates after run():
        output

    Creates within initialize():
        output

    Attributes:
        input: input as batch of samples.
        weights: the weights of the neurons in Kohonen layer.
        output: the list of winners.
        total: if total=True is passed in __init__(), the overall winners table
    """
    def __init__(self, workflow, **kwargs):
        super(KohonenForward, self).__init__(workflow, **kwargs)
        self.demand("input", "weights")
        self.argmins = None
        self._distances = Array()
        self.output = Array()
        self._chunk_size_ = 0
        self.weights_transposed = False
        self.total = Array() if kwargs.get("total", False) else None
        if self.total is not None:
            self.minibatch_offset = None
            self.minibatch_size = None
            self.batch_size = None

    def init_unpickled(self):
        super(KohonenForward, self).init_unpickled()
        self.sources_["kohonen"] = {"FORWARD": 1}

    @property
    def neurons_number(self):
        return self.weights.mem.shape[0]

    @property
    def sample_length(self):
        return self.weights.mem.shape[1]

    @property
    def chunk_size(self):
        return self._chunk_size_

    def initialize(self, device, **kwargs):
        super(KohonenForward, self).initialize(device=device, **kwargs)

        assert self.input.mem.shape[1] == self.sample_length
        batch_size = self.input.mem.shape[0]

        self.output.reset(numpy.zeros(batch_size, dtype=numpy.int32))
        if self.argmins is None:
            self._distances.reset(numpy.zeros(
                [batch_size, self.neurons_number],
                dtype=self.weights.mem.dtype))

        if self.total is not None:
            self.total.reset(numpy.zeros(self.batch_size, dtype=numpy.int32))
            self._minibatch_offset_ = numpy.zeros(1, dtype=numpy.int32)

    def ocl_init(self):
        batch_size = self.input.mem.shape[0]
        self.output.initialize(self.device)
        if self.argmins is None:
            self.input.initialize(self.device)
            self.weights.initialize(self.device)
            self._distances.initialize(self.device)
        elif self.total is None:
            return
        if self.total is not None:
            self.total.initialize(self.device)

        copy_chunk_size = int(numpy.ceil(batch_size /
                                         self.device.max_group_size))
        chunk_size = self.neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self.neurons_number // 2 + 1
        self.argmin_group_size = \
            int(numpy.ceil(self.neurons_number / chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE': block_size,
            'VECTOR_OPT': int(bool(vector_opt)),
            'BATCH': batch_size,
            'SAMPLE_LENGTH': self.sample_length,
            'NEURONS_NUMBER': self.neurons_number,
            'CHUNK_SIZE': chunk_size,
            'COPY_CHUNK_SIZE': copy_chunk_size,
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines, "%s_%d_%d_%d" %
                           (self.__class__.__name__,
                            batch_size, self.sample_length,
                            self.neurons_number),
                           dtype=self.weights.mem.dtype)

        if self.total is not None:
            self._set_total_global_size_ = \
                [int(numpy.ceil(batch_size / copy_chunk_size))]
            self._krn_set_total_ = self.get_kernel("set_total")
            self._krn_set_total_.set_args(self.output.devmem, cl.skip,
                                          self.total.devmem)
        if self.argmins is not None:
            return

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem,
                                   None)

        self._gs_distance = [
            roundup(self.neurons_number, block_size),
            roundup(batch_size, block_size)]
        self._ls_distance = [block_size, block_size]

    def ocl_run(self):
        self.output.unmap()
        if self.total is not None:
            self.total.unmap()

        if self.argmins is None:
            self.input.unmap()
            self.weights.unmap()
            self.execute_kernel(self._gs_distance, self._ls_distance,
                                self._krn_distances_)
            self.execute_kernel([self.argmin_group_size],
                                [self.argmin_group_size],
                                self._krn_argmin_)
        else:
            self.argmins.unmap()
            self.argmins.map_read()
            self.output.map_write()
            self.output.mem[:] = self.argmins.mem
            self.output.unmap()
            self.argmins.unmap()

        if self.total is not None:
            self._minibatch_offset_[0] = \
                self.minibatch_offset - self.minibatch_size
            self._krn_set_total_.set_arg(1, self._minibatch_offset_)
            self.execute_kernel(self._set_total_global_size_, None,
                                self._krn_set_total_)

    def numpy_run(self):
        self.output.map_invalidate()

        if self.argmins is not None:
            self.argmins.map_read()
            self.output.mem[:] = self.argmins.mem
        else:
            self.input.map_read()
            self.weights.map_read()

        if self.total is not None:
            self.total.map_invalidate()

        length = self.minibatch_size if self.total is not None \
            else self.input.mem.shape[0]
        for sindex in range(length):
            if self.argmins is None:
                dist = self.weights.mem - self.input[sindex]
                winner = numpy.argmin(self.numpy_linalg_norm(dist))
                self.output[sindex] = winner
            else:
                winner = self.argmins[sindex]
            if self.total is not None:
                index = sindex + self.minibatch_offset - self.minibatch_size
                self.total[index] = winner
Пример #20
0
class Cutter1D(AcceleratedUnit):
    """Cuts the specified interval from each 1D sample of input batch
    into output.

    y = alpha * x + beta * y
    """
    def __init__(self, workflow, **kwargs):
        super(Cutter1D, self).__init__(workflow, **kwargs)
        self.alpha = kwargs.get("alpha")
        self.beta = kwargs.get("beta")
        self.output_offset = kwargs.get("output_offset", 0)
        self.output = Array()
        self.demand("alpha", "beta", "input")
        # TODO: add input_offset and length to demand and not to crash lstm
        # TODO: unit test

    def init_unpickled(self):
        super(Cutter1D, self).init_unpickled()
        self.sources_["cutter"] = {}

    def initialize(self, device, **kwargs):
        super(Cutter1D, self).initialize(device, **kwargs)

        if not self.output or self.output.shape[0] != self.input.shape[0]:
            self.output.reset(
                numpy.zeros(
                    (self.input.shape[0], self.output_offset + self.length),
                    dtype=self.input.dtype))
        else:
            assert self.output.sample_size >= self.output_offset + self.length

        self.init_vectors(self.input, self.output)

    def cuda_init(self):
        dtype = self.input.dtype
        itemsize = self.input.itemsize
        limit = self.input.shape[0] * self.length

        self.build_program({}, "%s" % self.__class__.__name__, dtype=dtype)
        self.assign_kernel("cutter_1d_forward")

        self.set_args(
            int(self.input.devmem) + self.input_offset * itemsize,
            numpy.array([self.alpha], dtype=dtype),
            numpy.array([self.input.sample_size], dtype=numpy.int32),
            int(self.output.devmem) + self.output_offset * itemsize,
            numpy.array([self.beta], dtype=dtype),
            numpy.array([self.output.sample_size], dtype=numpy.int32),
            numpy.array([self.length], dtype=numpy.int32),
            numpy.array([limit], dtype=numpy.int32))

        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (int(numpy.ceil(limit / block_size)), 1, 1)
        self._local_size = (block_size, 1, 1)

    def ocl_init(self):
        dtype = self.input.dtype

        self.build_program({}, "%s" % self.__class__.__name__, dtype=dtype)
        self.assign_kernel("cutter_1d_forward")

        self.set_args(
            self.input.devmem,
            numpy.array([self.input_offset], dtype=numpy.int32),
            numpy.array([self.alpha], dtype=dtype),
            numpy.array([self.input.sample_size], dtype=numpy.int32),
            self.output.devmem,
            numpy.array([self.output_offset], dtype=numpy.int32),
            numpy.array([self.beta], dtype=dtype),
            numpy.array([self.output.sample_size], dtype=numpy.int32))

        self._global_size = (self.input.shape[0], self.length)
        self._local_size = None

    def _gpu_run(self):
        self.unmap_vectors(self.input, self.output)
        self.execute_kernel(self._global_size, self._local_size)

    def cuda_run(self):
        return self._gpu_run()

    def ocl_run(self):
        return self._gpu_run()

    def numpy_run(self):
        self.input.map_read()
        self.output.map_write()
        out = self.output.matrix[:, self.output_offset:self.output_offset +
                                 self.length]
        if self.beta:
            out *= self.beta
        else:
            out[:] = 0
        out += (self.input.matrix[:, self.input_offset:self.input_offset +
                                  self.length] * self.alpha)
Пример #21
0
class InputJoiner(AcceleratedUnit):
    """Joins several minibatch inputs into one continuous minibatch output.

    Must be assigned before initialize():
        inputs

    Updates after run():
        output

    Creates within initialize():
        output

    Attributes:
        inputs: list of inputs of type memory.Array().
        output: memory.Array().
        minibatch_size: size of the minibatch (will be set to the minimum
                        of the first shapes from the inputs
                        if not provided prior to the initialize)
    """
    def __init__(self, workflow, **kwargs):
        super(InputJoiner, self).__init__(workflow, **kwargs)
        self.inputs = kwargs["inputs"]
        self.output = Array()
        self.registered_inputs = {}

    def init_unpickled(self):
        super(InputJoiner, self).init_unpickled()
        self.sources_["join"] = {}

    @property
    def inputs(self):
        return self._inputs

    @inputs.setter
    def inputs(self, value):
        if not hasattr(value, "__iter__"):
            raise TypeError("inputs must be iterable")
        self._inputs = list(value)
        if len(self._inputs) == 0:
            raise ValueError("inputs may not be empty")

    def register_offset_length_attributes(self, inp):
        idx = len(self.registered_inputs)
        attrs = ("offset_%d" % idx, "length_%d" % idx)
        for attr in attrs:
            setattr(self, attr, -1)
        self.registered_inputs[inp] = attrs
        return attrs

    def _init_offset_length_attributes(self):
        offsets = []
        lengths = []
        offset = 0
        for inp in self.inputs:
            offsets.append(offset)
            lengths.append(inp.sample_size)
            offset += lengths[-1]
        for inp, attrs in self.registered_inputs.items():
            try:
                idx = self.inputs.index(inp)
                vals = (offsets[idx], lengths[idx])
            except ValueError:
                vals = (-1, -1)
            for i, attr in enumerate(attrs):
                setattr(self, attr, vals[i])

    def initialize(self, device, **kwargs):
        if any(i.mem is None for i in self.inputs):
            # Not yet ready to initialize
            return True

        self._init_offset_length_attributes()

        super(InputJoiner, self).initialize(device=device, **kwargs)

        minibatch_size = min(i.shape[0] for i in self.inputs)
        if any(i.shape[0] > minibatch_size for i in self.inputs):
            self.warning(
                "Detected inputs of different sizes. Sizes will be "
                "cut to the lowest value (%d)", minibatch_size)

        output_shape = (minibatch_size,
                        sum(i.size // i.shape[0] for i in self.inputs))
        if not self.output:
            self.output.reset(numpy.zeros(output_shape, self.inputs[0].dtype))
        else:
            assert self.output.shape == output_shape

        self.init_vectors(self.output, *self.inputs)

    def _gpu_init(self):
        defines = {
            'etype': opencl_types.numpy_dtype_to_opencl(self.output.dtype),
        }
        self.build_program(
            defines,
            "%s_%d_%s" % (type(self).__name__, self.output.shape[0], "_".join(
                map(str, self.output.shape[1:]))),
            inputs=self.inputs)
        self.assign_kernel("join")
        self.set_args(self.output, *self.inputs)

    def ocl_init(self):
        self._gpu_init()

    def cuda_init(self):
        self._gpu_init()

    def numpy_run(self):
        self.output.map_invalidate()  # we will update output on CPU
        minibatch_size = self.output.shape[0]
        low = 0
        for inp in self.inputs:
            inp.map_read()
            high = low + inp.size // inp.shape[0]
            if low >= high:
                break
            self.output.mem[:, low:high] = inp[:minibatch_size]
            low = high

    def ocl_run(self):
        for inp in self.inputs:
            inp.unmap()
        self.execute_kernel(*((self.output.shape[0], ), ) * 2)

    def cuda_run(self):
        for inp in self.inputs:
            inp.unmap()
        # TODO(a.kazantsev): rewrite CUDA kernel for proper grid size
        self.execute_kernel((1, 1, 1), (self.output.shape[0], 1, 1))
Пример #22
0
class EvaluatorMSE(EvaluatorBase):

    MAPPING = "evaluator_mse"
    LOSS = "mse"
    """Evaluator for nn softmax output from the batch labels.

    Must be assigned before initialize():
        output
        target
        batch_size
        labels (may be None)
        class_targets (may be None)

    Updates after run():
        err_output
        confusion_matrix
        max_err_output_sum
        n_err (only if labels and class_targets is not None)

    Creates within initialize():
        err_output
        n_err (only if labels and class_targets is not None)
        max_err_output_sum

    Attributes:
        output: output of the network_common as Batch.
        target: target for the current Batch.
        err_output: backpropagation errors.
        batch_size: number of elements in output to evaluate.
        metrics: [0] - sum of sample's mse, [1] - max of sample's mse,
                 [2] - min of sample's mse.
        mse: array of mse for each sample in minibatch.
        krn_constants_i_: numpy array for constant arguments to kernel.
        labels: labels for a batch (may be None).
        class_targets: target for each class (may be None).
        n_err: number of wrongly recognized samples
            (if labels and class_targets is not None).
    """
    def __init__(self, workflow, **kwargs):
        super(EvaluatorMSE, self).__init__(workflow, **kwargs)
        self.metrics = Array()
        self.mse = Array()
        self.labels = None
        self.class_targets = None
        self.n_err = Array()
        self.root = kwargs.get("root", True)
        self.demand("target", "normalizer")

    @property
    def root(self):
        """
        :return: True if error metric is RMSE, otherwise, MSE (mean sum of
        squares). Default is True.
        """
        return self._root

    @root.setter
    def root(self, value):
        if not isinstance(value, bool):
            raise TypeError("root must be boolean (got %s)" % type(value))
        self._root = value

    def initialize(self, device, **kwargs):
        super(EvaluatorMSE, self).initialize(device=device, **kwargs)
        if self.testing:
            return

        if self.target.size != self.output.size:
            raise error.BadFormatError(
                "target.size != output.size (%s != %s)" %
                (self.target.size, self.output.size))

        self.sources_["evaluator_mse"] = {}
        self.sources_["denormalization"] = {}

        dtype = self.output.dtype

        self.metrics.reset(numpy.zeros(3, dtype=dtype))
        self.metrics[2] = 1.0e30  # mse_min
        self.mse.reset(numpy.zeros(self.err_output.mem.shape[0], dtype))
        self.n_err.reset(numpy.zeros(2, dtype=numpy.int32))
        self.init_vectors(self.n_err, self.target, self.metrics, self.mse)
        if self.class_targets:
            self.class_targets.initialize(self.device)

    def _gpu_init(self):
        dtype = self.output.dtype
        block_size = min(self.err_output.shape[0], 128)
        if self.class_targets:
            self.sources_["mse_find_closest"] = {
                "target_dtype": numpy_dtype_to_opencl(self.class_targets.dtype)
            }

        self.build_program(cache_file_name="%s_%d_%d" %
                           (self.__class__.__name__, self.output.shape[0],
                            self.output.sample_size),
                           dtype=dtype,
                           max_batch_size=self.err_output.shape[0],
                           block_size=block_size,
                           output_size=self.err_output.sample_size,
                           root=self.root,
                           normalization=self.normalizer.MAPPING,
                           targets_number=self.class_targets.shape[0]
                           if self.class_targets else None,
                           coeffs=self.normalizer.coefficients)

        self.assign_kernel("evaluate_mse")
        self.set_args(self.output, self.target, self.skip_args(2),
                      self.metrics, self.mse.devmem, self.err_output)

        if self.labels and self.class_targets:
            assert (self.labels.dtype == self.n_err.dtype == numpy.int32)
            self.krn_find_closest_ = self.get_kernel("mse_find_closest")
            self.krn_find_closest_.set_args(self.output.devmem,
                                            self.class_targets.devmem,
                                            self.labels.devmem,
                                            self.n_err.devmem)

        return block_size

    def ocl_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._local_size = [block_size]
        self._global_size = self._local_size
        self._global_size_find_closest_ = lambda: (self.batch_size, )
        self._local_size_find_closest = None

    def cuda_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._local_size = (block_size, 1, 1)
        self._global_size = (1, 1, 1)
        self._global_size_find_closest_ = lambda: (self.batch_size, 1, 1)
        self._local_size_find_closest = (1, 1, 1)

    def _gpu_run(self):
        self.unmap_vectors(self.err_output, self.output, self.target,
                           self.metrics, self.mse)

        batch_size = self.batch_size
        self.krn_constants_i_[0] = batch_size
        self.set_arg(2, self.krn_constants_i_[0:1])
        self.krn_constants_f_[0] = 1.0 / self.batch_size if self.mean else 1.0
        self.set_arg(3, self.krn_constants_f_[0:1])

        self.execute_kernel(self._global_size, self._local_size)

        if self.labels and self.class_targets:
            self.unmap_vectors(self.class_targets, self.labels, self.n_err)
            self.execute_kernel(self._global_size_find_closest_(),
                                self._local_size_find_closest,
                                self.krn_find_closest_)
            self.n_err.map_write()
            self.n_err.mem[1] += batch_size

    def ocl_run(self):
        return self._gpu_run()

    def cuda_run(self):
        return self._gpu_run()

    def numpy_run(self):
        self.output.map_read()
        self.target.map_read()
        self.metrics.map_write()
        self.err_output.map_invalidate()
        self.mse.map_invalidate()

        assert (self.output.size == self.target.size == self.err_output.size)
        batch_size = self.batch_size
        err_output = self.err_output.matrix[:batch_size]
        assert_addr(err_output, self.err_output.mem)
        output = self.output.matrix[:batch_size]
        assert_addr(output, self.output.mem)
        target = self.target.matrix[:batch_size]
        assert_addr(target, self.target.mem)
        mse = self.mse.mem[:batch_size]
        assert_addr(mse, self.mse.mem)

        err_output[:] = output - target
        if not isinstance(self.normalizer, NoneNormalizer):
            output_copy = output.copy()
            target_copy = target.copy()
            self.normalizer.denormalize(output_copy)
            self.normalizer.denormalize(target_copy)
            denormed_err_output = output_copy - target_copy
        else:
            denormed_err_output = err_output
        self.err_output.mem[batch_size:] = 0
        mse[:] = numpy.square(denormed_err_output).sum(axis=1) / \
            denormed_err_output.shape[1]
        if self.mean:
            err_output /= batch_size
        if self.root:
            numpy.sqrt(mse, mse)
        self.mse.mem[batch_size:] = 0

        self.metrics.mem[0] += mse.sum()
        self.metrics.mem[1] = max(self.metrics.mem[1], mse.max())
        self.metrics.mem[2] = min(self.metrics.mem[2], mse.min())

        if self.labels and self.class_targets:
            self.class_targets.map_read()
            self.labels.map_read()
            self.n_err.map_write()
            class_targets = self.class_targets.matrix
            labels = self.labels.mem
            for i, sample in enumerate(output):
                lbl = numpy.linalg.norm(class_targets - sample,
                                        axis=1).argmin()
                if lbl != labels[i]:
                    self.n_err.mem[0] += 1
                self.n_err.mem[1] += 1

    def merge_output(self):
        if not isinstance(self.normalizer, NoneNormalizer):
            output = self.output[:self.batch_size].copy()
            self.normalizer.denormalize(output)
        else:
            output = self.output.mem
        self.merged_output[self.offset - self.batch_size:self.offset] = output
Пример #23
0
class ZeroFiller(ForwardBase, TriviallyDistributable):
    """Fills weights of given unit with zero on every step"""

    MAPPING = {"zero_filter"}

    def __init__(self, workflow, **kwargs):
        super(ZeroFiller, self).__init__(workflow, **kwargs)

        self.mask = Array()
        self.grouping = kwargs.get("grouping", 1)
        self.demand("weights")

    def init_unpickled(self):
        super(ZeroFiller, self).init_unpickled()
        self.sources_["weights_zerofilling"] = {}

    @property
    def effective_shape(self):
        return (self.weights.shape[0],
                self.weights.size // self.weights.shape[0])

    @property
    def grouping(self):
        return self._grouping

    @grouping.setter
    def grouping(self, value):
        if not isinstance(value, int):
            raise TypeError(
                "grouping value must be an integer (got %s)" % type(value))
        if value < 2:
            raise ValueError("grouping value %d is invalid" % value)
        self._grouping = value

    def initialize(self, device=None, **kwargs):
        super(ZeroFiller, self).initialize(device, **kwargs)
        if not self.weights:
            return True

        if not self.mask:
            if self.effective_shape[1] % self.grouping != 0:
                raise ValueError(
                    "Non-multiple of grouping weights shape detected: "
                    "%s, grouping=%d" %
                    (self.weights.shape, self.grouping))
            self.mask.reset(numpy.zeros(self.effective_shape,
                                        dtype=self.weights.dtype))
            self.mask.map_invalidate()
            # TODO(a.kazantsev): add check for transposed weights.
            for kernel in range(self.effective_shape[0]):
                for chan in range(self.effective_shape[1]):
                    self.mask[kernel, chan] = not (
                        kernel % self.grouping == chan % self.grouping)
        else:
            assert self.mask.shape == self.effective_shape

        for vec in self.mask, self.weights:
            vec.initialize(device)

    def _gpu_init(self):
        self.build_program(cache_file_name="zero_filling_%d" % self.grouping,
                           dtype=self.weights.dtype)

        self.assign_kernel("multiply_by_mask")
        self.set_args(self.mask, self.weights)

    def ocl_init(self):
        self._gpu_init()
        self._global_size = [self.weights.size]
        self._local_size = None

    def cuda_init(self):
        self._gpu_init()
        self._global_size = (self.weights.size, 1, 1)
        self._local_size = (1, 1, 1)

    def numpy_run(self):
        self.mask.map_read()
        self.weights.map_write()

        self.weights.mem *= self.mask.mem

    def _gpu_run(self):
        self.weights.unmap()
        self.mask.unmap()
        self.execute_kernel(self._global_size, self._local_size)

    def ocl_run(self):
        self._gpu_run()

    def cuda_run(self):
        self._gpu_run()
Пример #24
0
class ZeroFiller(ForwardBase, TriviallyDistributable):
    """Fills weights of given unit with zero on every step"""

    MAPPING = {"zero_filter"}

    def __init__(self, workflow, **kwargs):
        super(ZeroFiller, self).__init__(workflow, **kwargs)

        self.mask = Array()
        self.grouping = kwargs.get("grouping", 1)
        self.demand("weights")

    def init_unpickled(self):
        super(ZeroFiller, self).init_unpickled()
        self.sources_["weights_zerofilling"] = {}

    @property
    def effective_shape(self):
        return (self.weights.shape[0],
                self.weights.size // self.weights.shape[0])

    @property
    def grouping(self):
        return self._grouping

    @grouping.setter
    def grouping(self, value):
        if not isinstance(value, int):
            raise TypeError("grouping value must be an integer (got %s)" %
                            type(value))
        if value < 2:
            raise ValueError("grouping value %d is invalid" % value)
        self._grouping = value

    def initialize(self, device=None, **kwargs):
        super(ZeroFiller, self).initialize(device, **kwargs)
        if not self.weights:
            return True

        if not self.mask:
            if self.effective_shape[1] % self.grouping != 0:
                raise ValueError(
                    "Non-multiple of grouping weights shape detected: "
                    "%s, grouping=%d" % (self.weights.shape, self.grouping))
            self.mask.reset(
                numpy.zeros(self.effective_shape, dtype=self.weights.dtype))
            self.mask.map_invalidate()
            # TODO(a.kazantsev): add check for transposed weights.
            for kernel in range(self.effective_shape[0]):
                for chan in range(self.effective_shape[1]):
                    self.mask[kernel, chan] = not (kernel % self.grouping
                                                   == chan % self.grouping)
        else:
            assert self.mask.shape == self.effective_shape

        for vec in self.mask, self.weights:
            vec.initialize(device)

    def _gpu_init(self):
        self.build_program(cache_file_name="zero_filling_%d" % self.grouping,
                           dtype=self.weights.dtype)

        self.assign_kernel("multiply_by_mask")
        self.set_args(self.mask, self.weights)

    def ocl_init(self):
        self._gpu_init()
        self._global_size = [self.weights.size]
        self._local_size = None

    def cuda_init(self):
        self._gpu_init()
        self._global_size = (self.weights.size, 1, 1)
        self._local_size = (1, 1, 1)

    def numpy_run(self):
        self.mask.map_read()
        self.weights.map_write()

        self.weights.mem *= self.mask.mem

    def _gpu_run(self):
        self.weights.unmap()
        self.mask.unmap()
        self.execute_kernel(self._global_size, self._local_size)

    def ocl_run(self):
        self._gpu_run()

    def cuda_run(self):
        self._gpu_run()
Пример #25
0
class BatchWeights(AcceleratedUnit, EmptyDeviceMethodsMixin):
    """Make weigths and biases from batch v and h.
    Must be assigned before initialize():
    * v
    * h
    * batch_size

    Updates after run():
    * hbias_batch
    * vbias_batch
    * W_batch

    Creates within initialize():
    * hbias_batch
    * vbias_batch
    * W_batch

    Attributes:
        v: input data  batch
        h: hidden states of input batch
        batch_size: size of batch
        hbias_batch: bias calculated from h
        vbias_batch: bias calculated from v
        W_batch: weigths calculated from batch v and h
    """
    def __init__(self, workflow, **kwargs):
        super(BatchWeights, self).__init__(workflow, **kwargs)
        self.vbias_batch = Array()
        self.hbias_batch = Array()
        self.weights_batch = Array()
        self.demand("v", "h", "batch_size")

    def initialize(self, device, **kwargs):
        super(BatchWeights, self).initialize(device=device, **kwargs)
        vbias_size = self.v.size // self.v.shape[0]
        hbias_size = self.h.size // self.h.shape[0]
        W_size = vbias_size * hbias_size
        if not self.hbias_batch:
            self.hbias_batch.reset(numpy.zeros((1, hbias_size),
                                               dtype=self.h.mem.dtype))
        else:
            assert self.hbias_batch.size == hbias_size
        if not self.vbias_batch:
            self.vbias_batch.reset(numpy.zeros((1, vbias_size),
                                               dtype=self.h.mem.dtype))
        else:
            assert self.vbias_batch.size == vbias_size
        if not self.weights_batch:
            self.weights_batch.reset(numpy.zeros((vbias_size, hbias_size),
                                                 dtype=self.h.mem.dtype))
        else:
            assert self.weights_batch.size == W_size
        self.init_vectors(self.weights_batch, self.vbias_batch,
                          self.hbias_batch, self.v, self.h)

    def run(self):
        self.v.map_read()
        self.h.map_read()
        for v in self.weights_batch, self.hbias_batch, self.vbias_batch:
            v.map_invalidate()
        self.weights_batch.mem[:] = numpy.dot(
            numpy.transpose(self.v.mem[0: self.batch_size, :]),
            self.h.mem[0: self.batch_size, :]) / \
            self.batch_size
        for bv in (self.vbias_batch, self.v), (self.hbias_batch, self.h):
            bv[0].mem[:] = (numpy.sum(bv[1].mem[:self.batch_size, :], 0) /
                            self.batch_size)
            bv[0].shape = (1, bv[0].size)
Пример #26
0
class GradientsCalculator(AcceleratedUnit, EmptyDeviceMethodsMixin):
    """
    Making gradients for weights, hbias and vbias, using hbias0, vbias0
    and vbias1, hbias1, which calculated with help BatchWeights.
    Must be assigned before initialize():
    * hbias0
    * vbias0
    * hbias1
    * vbias1
    * weights1
    * weights0

    Updates after run():
    * hbias_grad
    * vbias_grad
    * weights_grad

    Creates within initialize():
    * hbias_grad
    * vbias_grad
    * weights_grad

    Attributes:
        vbias0: calculated with help BatchWeights from v0
        hbias0: calculated with help BatchWeights from h0
        vbias1: calculated with help BatchWeights from v1
        hbias1: calculated with help BatchWeights from h1
        weights1: calculated with help BatchWeights from v1.
        weights0: calculated with help BatchWeights from h1.
        hbias_grad: gradient for hbias
        vbias_grad: gradient for vbias
        weights_grad: gradient for weights
    """
    def __init__(self, workflow, **kwargs):
        super(GradientsCalculator, self).__init__(workflow, **kwargs)
        self.vbias_grad = Array()
        self.hbias_grad = Array()
        self.weights_grad = Array()
        self.demand("hbias1", "vbias1", "hbias0", "vbias0", "weights0",
                    "weights1")

    def initialize(self, device, **kwargs):
        super(GradientsCalculator, self).initialize(device=device, **kwargs)
        if not self.hbias_grad:
            self.hbias_grad.reset(numpy.zeros(self.hbias0.shape,
                                              dtype=self.hbias0.dtype))
        else:
            assert self.hbias_grad.shape == self.hbias0.shape
        if not self.vbias_grad:
            self.vbias_grad.reset(numpy.zeros(self.vbias0.shape,
                                              dtype=self.vbias0.dtype))
        else:
            assert self.vbias_grad.shape == self.vbias0.shape
        if not self.weights_grad:
            self.weights_grad.reset(numpy.zeros(self.weights0.shape,
                                                dtype=self.weights0.dtype))
        else:
            assert self.weights_grad.shape == self.weights0.shape
        for v in (self.weights_grad, self.hbias_grad, self.vbias_grad,
                  self.hbias0, self.vbias0, self.weights0, self.hbias1,
                  self.vbias1, self.weights1):
            v.initialize(self.device)

    def run(self):
        for v in (self.hbias0, self.vbias0, self.weights0,
                  self.hbias1, self.vbias1, self.weights1):
            v.map_read()

        for v in (self.weights_grad, self.vbias_grad, self.hbias_grad):
            v.map_invalidate()

        self.vbias_grad.mem[:] = self.vbias0.mem - self.vbias1.mem
        self.hbias_grad.mem[:] = self.hbias0.mem - self.hbias1.mem
        self.weights_grad.mem[:] = self.weights0.mem - self.weights1.mem
Пример #27
0
class MeanDispNormalizer(AcceleratedUnit, TriviallyDistributable):
    """Normalizes multichannel byte images according to
    dataset mean and dispersion.

    Attributes:
        input: minibatch of images (dtype=numpy.uint8,
                                    shape[0]=minibatch_size).
        mean: mean image over the dataset (dtype=numpy.uint8).
        rdisp: 1.0 / dispersion over the dataset (float datatype).
        output: normalized float images of the same dtype as rdisp.
    """
    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "WORKER")
        super(MeanDispNormalizer, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.global_size = None
        self.local_size = None
        self.demand("input", "mean", "rdisp")

    def init_unpickled(self):
        super(MeanDispNormalizer, self).init_unpickled()
        self.sources_["mean_disp_normalizer"] = {}

    def initialize(self, device, **kwargs):
        super(MeanDispNormalizer, self).initialize(device, **kwargs)

        for arr in self.input, self.mean, self.rdisp:
            if not isinstance(arr, Array):
                raise TypeError("veles.memory.Array type expected (got %s)" %
                                type(arr))
            if not arr:
                raise ValueError("Invalid Array state")
        if len(self.input.shape) < 2:
            raise ValueError("input should be at least 2D")
        sample_size = self.mean.size
        if (self.input.sample_size != sample_size
                or self.rdisp.size != sample_size):
            raise ValueError(
                "Sample size of input differs from mean-rdisp size")

        if not self.output:
            self.output.reset(numpy.zeros(self.input.shape, self.rdisp.dtype))
        else:
            assert self.output.shape == self.input.shape

        self.init_vectors(self.input, self.mean, self.rdisp, self.output)

    def _gpu_init(self):
        dtype = self.rdisp.dtype
        sample_size = self.mean.size

        defines = {
            "input_type": numpy_dtype_to_opencl(self.input.dtype),
            "mean_type": numpy_dtype_to_opencl(self.mean.dtype),
            "SAMPLE_SIZE": sample_size
        }
        self.build_program(defines, self.__class__.__name__, dtype=dtype)
        self.assign_kernel("normalize_mean_disp")
        self.set_args(self.input, self.mean, self.rdisp, self.output)

    def ocl_init(self):
        self._gpu_init()
        self.global_size = [self.mean.size, self.input.shape[0]]

    def cuda_init(self):
        self._gpu_init()
        self.local_size = 1, 1, 1
        self.global_size = self.mean.size, self.input.shape[0], 1

    def _gpu_run(self):
        self.unmap_vectors(self.input, self.mean, self.rdisp, self.output)
        self.execute_kernel(self.global_size, self.local_size)

    def ocl_run(self):
        self._gpu_run()

    def cuda_run(self):
        self._gpu_run()

    def numpy_run(self):
        self.input.map_read()
        self.mean.map_read()
        self.rdisp.map_read()
        self.output.map_invalidate()

        dtype = self.output.dtype
        self.output.matrix[:] = (
            self.input.matrix.astype(dtype)[:] -
            self.mean.plain.astype(dtype)) * self.rdisp.plain
Пример #28
0
class KohonenForward(KohonenBase, AcceleratedUnit):
    """Kohonen forward layer.

    Must be assigned before initialize():
        input
        weights
        minibatch_offset (if total == True)
        minibatch_size (if total == True)
        batch_size (if total == True)
        argmins speeds up run() if linked from KohonenTrainer

    Updates after run():
        output

    Creates within initialize():
        output

    Attributes:
        input: input as batch of samples.
        weights: the weights of the neurons in Kohonen layer.
        output: the list of winners.
        total: if total=True is passed in __init__(), the overall winners table
    """
    def __init__(self, workflow, **kwargs):
        super(KohonenForward, self).__init__(workflow, **kwargs)
        self.demand("input", "weights")
        self.argmins = None
        self._distances = Array()
        self.output = Array()
        self._chunk_size_ = 0
        self.weights_transposed = False
        self.total = Array() if kwargs.get("total", False) else None
        if self.total is not None:
            self.minibatch_offset = None
            self.minibatch_size = None
            self.batch_size = None

    def init_unpickled(self):
        super(KohonenForward, self).init_unpickled()
        self.sources_["kohonen"] = {"FORWARD": 1}

    @property
    def neurons_number(self):
        return self.weights.mem.shape[0]

    @property
    def sample_length(self):
        return self.weights.mem.shape[1]

    @property
    def chunk_size(self):
        return self._chunk_size_

    def initialize(self, device, **kwargs):
        super(KohonenForward, self).initialize(device=device, **kwargs)

        assert self.input.mem.shape[1] == self.sample_length
        batch_size = self.input.mem.shape[0]

        self.output.reset(numpy.zeros(batch_size, dtype=numpy.int32))
        if self.argmins is None:
            self._distances.reset(
                numpy.zeros([batch_size, self.neurons_number],
                            dtype=self.weights.mem.dtype))

        if self.total is not None:
            self.total.reset(numpy.zeros(self.batch_size, dtype=numpy.int32))
            self._minibatch_offset_ = numpy.zeros(1, dtype=numpy.int32)

    def ocl_init(self):
        batch_size = self.input.mem.shape[0]
        self.output.initialize(self.device)
        if self.argmins is None:
            self.input.initialize(self.device)
            self.weights.initialize(self.device)
            self._distances.initialize(self.device)
        elif self.total is None:
            return
        if self.total is not None:
            self.total.initialize(self.device)

        copy_chunk_size = int(
            numpy.ceil(batch_size / self.device.max_group_size))
        chunk_size = self.neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self.neurons_number // 2 + 1
        self.argmin_group_size = \
            int(numpy.ceil(self.neurons_number / chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE': block_size,
            'VECTOR_OPT': int(bool(vector_opt)),
            'BATCH': batch_size,
            'SAMPLE_LENGTH': self.sample_length,
            'NEURONS_NUMBER': self.neurons_number,
            'CHUNK_SIZE': chunk_size,
            'COPY_CHUNK_SIZE': copy_chunk_size,
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines,
                           "%s_%d_%d_%d" %
                           (self.__class__.__name__, batch_size,
                            self.sample_length, self.neurons_number),
                           dtype=self.weights.mem.dtype)

        if self.total is not None:
            self._set_total_global_size_ = \
                [int(numpy.ceil(batch_size / copy_chunk_size))]
            self._krn_set_total_ = self.get_kernel("set_total")
            self._krn_set_total_.set_args(self.output.devmem, cl.skip,
                                          self.total.devmem)
        if self.argmins is not None:
            return

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem,
                                   None)

        self._gs_distance = [
            roundup(self.neurons_number, block_size),
            roundup(batch_size, block_size)
        ]
        self._ls_distance = [block_size, block_size]

    def ocl_run(self):
        self.output.unmap()
        if self.total is not None:
            self.total.unmap()

        if self.argmins is None:
            self.input.unmap()
            self.weights.unmap()
            self.execute_kernel(self._gs_distance, self._ls_distance,
                                self._krn_distances_)
            self.execute_kernel([self.argmin_group_size],
                                [self.argmin_group_size], self._krn_argmin_)
        else:
            self.argmins.unmap()
            self.argmins.map_read()
            self.output.map_write()
            self.output.mem[:] = self.argmins.mem
            self.output.unmap()
            self.argmins.unmap()

        if self.total is not None:
            self._minibatch_offset_[0] = \
                self.minibatch_offset - self.minibatch_size
            self._krn_set_total_.set_arg(1, self._minibatch_offset_)
            self.execute_kernel(self._set_total_global_size_, None,
                                self._krn_set_total_)

    def numpy_run(self):
        self.output.map_invalidate()

        if self.argmins is not None:
            self.argmins.map_read()
            self.output.mem[:] = self.argmins.mem
        else:
            self.input.map_read()
            self.weights.map_read()

        if self.total is not None:
            self.total.map_invalidate()

        length = self.minibatch_size if self.total is not None \
            else self.input.mem.shape[0]
        for sindex in range(length):
            if self.argmins is None:
                dist = self.weights.mem - self.input[sindex]
                winner = numpy.argmin(self.numpy_linalg_norm(dist))
                self.output[sindex] = winner
            else:
                winner = self.argmins[sindex]
            if self.total is not None:
                index = sindex + self.minibatch_offset - self.minibatch_size
                self.total[index] = winner
Пример #29
0
class GradientDescentBase(AcceleratedUnit):
    """Base class for gradient descent units.

    Attributes:
        input: input layer values.
        output: output layer values.
        err_output: error to backpropagate.
        err_input: backpropagated error.
        weights: weights.
        bias: bias.
        batch_size: current minibatch size.
        learning_rate: gradient descent speed (positive).
        learning_rate_bias
        weights_decay: regularization for weights (see l1_vs_l2).
        weights_decay_bias
        gradient_moment: moment coefficient for weights.
        gradient_moment_bias
        gradient_weights_with_moment: accumulated moment.
        gradient_bias_with_moment
        batch_size: effective batch size (if None, get it from y).
        weights_transposed: assume weights matrix as a transposed one.
        apply_gradient: will apply gradient.
        gradient_changed: when True, slave will send gradients to master
            (assigned to True just before the run call, so it can be set to
            False inside ocl_run, numpy_run if necessary).
        ocl_set_const_args: True when constant arguments for the kernel
                            had been changed and need to be set again.
    """
    hide_from_registry = True
    MAPPING = set()

    REDUCE_SIZE = 64  # used for updating bias

    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "TRAINER")
        super(GradientDescentBase, self).__init__(workflow, **kwargs)
        self.err_input = Array(shallow_pickle=True)
        self.ocl_set_const_args = True
        self.weights = None
        self.bias = None
        self.demand("input", "err_output")
        self.learning_rate = kwargs.get("learning_rate", 0.01)
        self.learning_rate_bias = kwargs.get("learning_rate_bias",
                                             self.learning_rate)
        self.weights_decay = kwargs.get("weights_decay", 0.00005)
        self.weights_decay_bias = kwargs.get("weights_decay_bias", 0.0)
        self.l1_vs_l2 = kwargs.get("l1_vs_l2", 0)
        self.l1_vs_l2_bias = kwargs.get("l1_vs_l2_bias", self.l1_vs_l2)
        self.gradient_moment = kwargs.get("gradient_moment", 0)
        self.gradient_moment_bias = kwargs.get("gradient_moment_bias",
                                               self.gradient_moment)
        self.weights_transposed = kwargs.get("weights_transposed", False)
        self.need_err_input = kwargs.get("need_err_input", True)
        self.include_bias = kwargs.get("include_bias", True)
        self.factor_ortho = kwargs.get("factor_ortho", 0)
        self.col_sums = Array()  # for orthogonalization

        # Current gradient as it is without applying learning_rate etc.
        self.gradient_weights = Array()
        self.gradient_bias = Array()

        # Gradient with applied learning_rate etc.
        # optionally accumulated from the previous run
        self.accumulate_gradient = kwargs.get("accumulate_gradient", False)

        # When accumulate_gradient set to True:
        # 1. Calculate gd
        # 2. acc = acc_alpha * gd + acc_beta * acc
        # 3. gd = gd_alpha * acc + gd_beta * gd
        # 4. Apply moments to gd
        # 5. weights += gd if apply_gradient set to True
        self.acc_alpha = kwargs.get("acc_alpha", 0.0)
        self.acc_beta = kwargs.get("acc_beta", 0.0)
        self.gd_alpha = kwargs.get("gd_alpha", 0.0)
        self.gd_beta = kwargs.get("gd_beta", 1.0)

        self.accumulated_gradient_weights = Array()
        self.accumulated_gradient_bias = Array()

        # Gradient with accumulated moments
        self.gradient_weights_with_moment = Array()
        self.gradient_bias_with_moment = Array()

        # Sets to True when gradient changes
        self.gradient_changed = False

        # Gradient will be applied to weights immediately just after computing
        self.apply_gradient = kwargs.get("apply_gradient",
                                         not workflow.is_slave)

    @property
    def current_batch_size(self):
        batch_size = getattr(self, "batch_size", None)
        if batch_size is None:
            return self.err_output.mem.shape[0]
        return int(batch_size)

    def initialize(self, device, **kwargs):
        super(GradientDescentBase, self).initialize(device, **kwargs)

        if self.weights:
            assert len(self.weights.shape) == 2
            self.weights_shape = (tuple(reversed(self.weights.shape))
                                  if self.weights_transposed else
                                  self.weights.shape)
        else:
            self.weights_shape = None

        self.learning_rate = kwargs.get("learning_rate", self.learning_rate)
        self.weights_decay = kwargs.get("weights_decay", self.weights_decay)
        self.gradient_moment = kwargs.get("gradient_moment",
                                          self.gradient_moment)
        self.learning_rate_bias = kwargs.get("learning_rate_bias",
                                             self.learning_rate_bias)
        self.weights_decay_bias = kwargs.get("weights_decay_bias",
                                             self.weights_decay_bias)
        self.gradient_moment_bias = kwargs.get("gradient_moment_bias",
                                               self.gradient_moment_bias)

        if self.weights:
            if not self.gradient_weights:
                self.gradient_weights.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights.size == self.weights.size

        if self.weights and self.accumulate_gradient:
            if not self.accumulated_gradient_weights:
                self.accumulated_gradient_weights.reset(
                    numpy.zeros_like(self.weights.mem))
            else:
                assert (self.accumulated_gradient_weights.size ==
                        self.weights.size)

        if self.weights and (self.gradient_moment or not self.is_standalone):
            if not self.gradient_weights_with_moment:
                self.gradient_weights_with_moment.reset(
                    numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights_with_moment.size == \
                    self.weights.size

        if (self.include_bias and self.bias
                and (not self.gradient_bias
                     or self.gradient_bias.size != self.bias.size)):
            self.gradient_bias.reset(numpy.zeros_like(self.bias.mem))

        if (self.include_bias and self.bias and self.accumulate_gradient and
            (not self.accumulated_gradient_bias
             or self.accumulated_gradient_bias.size != self.bias.size)):
            self.accumulated_gradient_bias.reset(
                numpy.zeros_like(self.bias.mem))

        if (self.include_bias and self.bias
                and (self.gradient_moment_bias or not self.is_standalone)):
            if not self.gradient_bias_with_moment:
                self.gradient_bias_with_moment.reset(
                    numpy.zeros_like(self.bias.mem))
            else:
                assert self.gradient_bias_with_moment.size == self.bias.size

        dtype = self.err_output.dtype
        if self.need_err_input:
            if not self.err_input:
                self.err_input.reset(numpy.zeros(self.input.shape, dtype))
            else:
                assert self.err_input.shape == self.input.shape

        if self.weights:
            side = self.weights_shape[0]
            other = self.weights.size // side
            if self.factor_ortho:
                if not self.col_sums:
                    self.col_sums.reset(numpy.zeros(other, dtype=dtype))
                else:
                    assert self.col_sums.size == other
                self.col_sums.initialize(self.device)
            self.reduce_size = roundup(min(self.reduce_size, other), 32)
            self.weights.initialize(self.device)

        for vec in self.bias, self.input, self.err_input:
            if vec:
                vec.initialize(self.device)
        self.init_vectors(self.err_output, self.gradient_weights,
                          self.gradient_bias,
                          self.accumulated_gradient_weights,
                          self.accumulated_gradient_bias,
                          self.gradient_weights_with_moment,
                          self.gradient_bias_with_moment)

    def gpu_weights_update(self):
        self.unmap_vectors(self.input, self.err_output, self.weights,
                           self.gradient_weights,
                           self.accumulated_gradient_weights,
                           self.gradient_weights_with_moment)

        if self.factor_ortho:
            self.col_sums.unmap()
            self.execute_kernel(self._global_size_ortho,
                                self._local_size_ortho,
                                self.krn_compute_col_sums_)

            self._weights_const[12] = self.factor_ortho
            self.krn_weights_.set_arg(12, self._weights_const[12:13])

        self._weights_const[4:12] = (self.learning_rate, self.weights_decay,
                                     self.l1_vs_l2, self.gradient_moment,
                                     self.acc_alpha, self.acc_beta,
                                     self.gd_alpha, self.gd_beta)
        self.krn_weights_.set_args(
            self.device.skip(4), self._weights_const[4:5],
            self._weights_const[5:6], self._weights_const[6:7],
            self._weights_const[7:8], self._weights_const[8:9],
            self._weights_const[9:10], self._weights_const[10:11],
            self._weights_const[11:12])

        self.execute_kernel(self._global_size_weights,
                            self._local_size_weights, self.krn_weights_)

    def gpu_bias_update(self):
        if not self.include_bias:
            return

        self.unmap_vectors(self.err_output, self.bias, self.gradient_bias,
                           self.accumulated_gradient_bias,
                           self.gradient_bias_with_moment)

        self._bias_const[5:13] = (self.learning_rate_bias,
                                  self.weights_decay_bias, self.l1_vs_l2_bias,
                                  self.gradient_moment_bias, self.acc_alpha,
                                  self.acc_beta, self.gd_alpha, self.gd_beta)
        self.krn_bias_.set_args(self.device.skip(5), self._bias_const[5:6],
                                self._bias_const[6:7], self._bias_const[7:8],
                                self._bias_const[8:9], self._bias_const[9:10],
                                self._bias_const[10:11],
                                self._bias_const[11:12],
                                self._bias_const[12:13])

        self.execute_kernel(self._global_size_bias, self._local_size_bias,
                            self.krn_bias_)

    def gpu_err_output_update(self):
        """Multiply err_output by activation derivative by output.
        """
        if self.krn_err_output_ is None:
            return
        self.err_output.unmap()
        self.output.unmap()
        self.execute_kernel(self._global_size_err_output,
                            self._local_size_err_output, self.krn_err_output_)

    def numpy_err_output_update(self):
        """Multiply err_output by activation derivative by output.
        """
        pass

    def print_debug_data(self):
        """
        Show weights statistics
        """
        if not self.logger.isEnabledFor(logging.DEBUG):
            return
        self.weights.map_read()
        self.bias.map_read()
        self.gradient_bias.map_read()
        self.gradient_weights.map_read()
        weights = self.weights.mem
        bias = self.bias.mem
        grad_weights = self.gradient_weights.mem
        grad_bias = self.gradient_bias.mem

        weight_table = PrettyTable("TYPE", "Mean", "StdDev", "Min", "Max")
        weight_table.float_format = ".10"
        for (w_name, w_array) in [("Weight", weights), ("Bias", bias),
                                  ("Grad Weight", grad_weights),
                                  ("Grad Bias", grad_bias)]:
            w_mean = w_stddev = w_min = w_max = None
            if w_array is not None and w_array.size > 0:
                w_mean = numpy.mean(w_array)
                w_stddev = numpy.std(w_array)
                w_min = numpy.min(w_array)
                w_max = numpy.max(w_array)
            weight_table.add_row(w_name, w_mean, w_stddev, w_min, w_max)
        self.debug("\n" + weight_table.get_string())

    def generate_data_for_slave(self, slave):
        return (self.learning_rate, self.weights_decay, self.gradient_moment,
                self.learning_rate_bias, self.weights_decay_bias,
                self.gradient_moment_bias)

    @staticmethod
    def fill_zeros(vector):
        if not vector:
            return
        vector.map_invalidate()
        vector.mem[:] = 0

    def apply_data_from_master(self, data):
        self.learning_rate = data[0]
        self.weights_decay = data[1]
        self.gradient_moment = data[2]
        self.learning_rate_bias = data[3]
        self.weights_decay_bias = data[4]
        self.gradient_moment_bias = data[5]
        self.fill_zeros(self.gradient_weights_with_moment)
        self.fill_zeros(self.gradient_bias_with_moment)
        self.fill_zeros(self.gradient_weights)
        self.fill_zeros(self.gradient_bias)
        self.fill_zeros(self.accumulated_gradient_weights)
        self.fill_zeros(self.accumulated_gradient_bias)

    def generate_data_for_master(self):
        if not self.gradient_changed:
            return None
        self.gradient_changed = False
        self.gradient_weights_with_moment.map_read()
        self.gradient_bias_with_moment.map_read()
        return (self.gradient_weights_with_moment.mem,
                self.gradient_bias_with_moment.mem)

    def apply_data_from_slave(self, data, slave):
        if self.weights:
            self.weights.map_write()
            self.gradient_weights_with_moment.map_write()
            self.gradient_weights_with_moment.mem *= self.gradient_moment
            self.gradient_weights_with_moment.mem += data[0]
            self.weights.mem += self.gradient_weights_with_moment.mem
        if self.bias:
            self.bias.map_write()
            self.gradient_bias_with_moment.map_write()
            self.gradient_bias_with_moment.mem *= self.gradient_moment_bias
            self.gradient_bias_with_moment.mem += data[1]
            self.bias.mem += self.gradient_bias_with_moment.mem

    def drop_slave(self, slave):
        pass

    def accumulate_gradient_f(self, accumulated_gradient, gradient):
        if accumulated_gradient and self.accumulate_gradient:
            accumulated_gradient[:] = (
                gradient * self.acc_alpha +
                (self.acc_beta * accumulated_gradient if self.acc_beta else 0))

            gradient *= self.gd_beta
            gradient += self.gd_alpha * accumulated_gradient

        return gradient

    @staticmethod
    def numpy_gradient_step(weight,
                            gradient,
                            lr,
                            factor_l12,
                            l1_vs_l2,
                            factor_ortho=0,
                            weights_transposed=False):
        gradient = gradient.copy()
        gradient += factor_l12 * (
            (1.0 - l1_vs_l2) * weight + 0.5 * l1_vs_l2 * numpy.sign(weight))
        if factor_ortho:
            col_sums = (reshape_transposed(weight).sum(
                axis=1) if weights_transposed else weight.sum(axis=0))
            for i, row in enumerate(gradient):
                row += (col_sums - weight[i]) * factor_ortho / weight.shape[0]
        gradient *= lr
        return gradient

    def run(self):
        self.gradient_changed = True
        super(GradientDescentBase, self).run()
        self.ocl_set_const_args = False
Пример #30
0
class Deconv(TriviallyDistributable, ConvolutionalBase, nn_units.Forward):
    # TriviallyDistributable overrides nn_units.Forward IDistributable
    """Deconvolutional layer for simple convolutional layer
    with linear activation and without bias.

    Must be assigned before initialize():
        input
        weights
        output_shape_source

    Updates after run():
        output

    Creates within initialize():
        output

    Attributes:
        input: input as batch of multichannel interleaved images.
        output: output as batch of multichannel interleaved images.
        weights: matrix of weights.
        output_shape_source: Array to get output shape from.
        n_kernels: number of convolutional kernels
                   in the corresponding convolutional layer.
        kx: kernel width.
        ky: kernel height.
        sliding: tuple of kernel sliding (by x-axis, by y-axis),
                 kx, ky MUST be a multiple of sliding to avoid irregularities.
        padding: tuple of virtual sample padding (left, top, right, bottom),
                 will be computed automatically based on sliding.
        weights_transposed: assume weights matrix as a transposed one.
        unsafe_padding: flag to enable unsafe padding and/or sliding.
    """

    MAPPING = {"deconv"}

    @staticmethod
    def compute_padding(sx, sy, kx, ky, sliding):
        """Computes required padding.
        """
        return (kx - sliding[1], ky - sliding[0],
                kx - sx % sliding[1] if sx % sliding[1] != 0
                else kx - sliding[1],
                ky - sy % sliding[0] if sy % sliding[0] != 0
                else ky - sliding[0])

    @staticmethod
    def check_padding_is_safe(kx, ky, sliding):
        if sliding[0] > (ky >> 1) or sliding[1] > (kx >> 1):
            raise ValueError(
                "sliding should not be greater than half of the kernel size")
        if kx % sliding[0] != 0 or kx % sliding[1] != 0:
            raise ValueError(
                "Kernel size should be multiple of sliding")

    def __init__(self, workflow, **kwargs):
        super(Deconv, self).__init__(workflow, **kwargs)
        self.unsafe_padding = kwargs.get("unsafe_padding", False)
        self.hits = Array()
        self.krn_clear_output_ = None
        self._global_size = None
        self._local_size = None
        del self.bias
        self.demand("n_kernels", "kx", "ky", "padding", "sliding",
                    "input", "weights", "output_shape_source")

    def init_unpickled(self):
        super(Deconv, self).init_unpickled()
        self.sources_["deconv/forward"] = {}

    def initialize(self, device, **kwargs):
        super(Deconv, self).initialize(device, **kwargs)

        self._dtype = self.input.dtype

        self.weights_shape = (tuple(reversed(self.weights.shape))
                              if self.weights_transposed
                              else self.weights.shape)

        if hasattr(self, "bias"):
            raise ValueError("bias should not be set")
        if (len(self.input.shape) != 4 or
                self.input.shape[3] != self.n_kernels):
            raise ValueError("Incorrectly shaped input encountered")
        if (len(self.weights_shape) != 2 or
                self.weights_shape[0] != self.n_kernels or
                self.weights_shape[1] % (self.kx * self.ky) != 0):
            raise ValueError("Incorrectly shaped weights encountered")

        output_shape = tuple(self.output_shape_source.shape)
        if len(output_shape) != 4:
            raise ValueError("Incorrect output_shape_source shape")
        if output_shape[0] != self.input.shape[0]:
            raise ValueError(
                "output_shape_source.shape[0] != input.shape[0]")

        try:
            self.check_padding_is_safe(self.kx, self.ky, self.sliding)
        except ValueError as e:
            if not self.unsafe_padding:
                raise from_none(e)
            self.warning("The padding will be unsafe")
            self._create_hits(output_shape)

        padding = Deconv.compute_padding(
            output_shape[2], output_shape[1], self.kx, self.ky, self.sliding)
        if self.padding is None:  # pylint: disable=E0203
            self.padding = padding
        elif self.padding != padding:
            if not self.unsafe_padding:
                raise ValueError(
                    "Expected padding %s but got %s" % (padding, self.padding))
            self._create_hits(output_shape)

        if self.output:
            assert self.output.shape[1:] == output_shape[1:]
        if not self.output or self.output.shape[0] != output_shape[0]:
            self.output.reset(numpy.zeros(output_shape,
                                          dtype=self._dtype))

        self._output_shape = output_shape

        self._sy, self._sx, self._n_channels = self._output_shape[1:]
        self._kernel_size = self.kx * self.ky * self._n_channels

        self._kernel_app_per_image = self.input.sample_size // self.n_kernels
        self._kernel_app_total = (self._kernel_app_per_image *
                                  self.input.shape[0])

        self.init_vectors(self.input, self.weights, self.output, self.hits)

    def _create_hits(self, output_shape):
        if not self.hits:
            self.hits.reset(
                numpy.zeros(output_shape, dtype=numpy.int32))
        else:
            assert self.hits.size == int(numpy.prod(output_shape))

    def _gpu_init(self, blas_class):
        defines = {
            "USE_ATOMICS": 1,
            "WEIGHTS_TRANSPOSED": int(self.weights_transposed),
            "BATCH": self._output_shape[0],
            "SX": self._sx,
            "SY": self._sy,
            "N_CHANNELS": self._n_channels,
            "KX": self.kx,
            "KY": self.ky,
            "N_KERNELS": self.n_kernels,
            "PAD_LEFT": self.padding[0],
            "PAD_TOP": self.padding[1],
            "PAD_RIGHT": self.padding[2],
            "PAD_BOTTOM": self.padding[3],
            "SLIDE_X": self.sliding[0],
            "SLIDE_Y": self.sliding[1],
            "USE_HITS": int(bool(self.hits)),
            "DECONV_MODE": int(bool(self.hits)) + 1,
            "OUTPUT_SIZE": self.output.size
        }

        self.build_program(
            defines, "%s/%s_%d_%dx%dx%d_%dx%d_%d" % (
                root.common.dirs.cache, self.__class__.__name__,
                self.input.shape[0],
                self._output_shape[2], self._output_shape[1],
                self._output_shape[3],
                self.kx, self.ky, self.n_kernels), dtype=self._dtype)

        self.krn_pack_ = self.get_kernel("DirectPack")
        unpack_bytes = (self._kernel_app_per_image * self.unpack_size *
                        self._kernel_size * self.input.itemsize)
        self.device.request_temp_buffer(unpack_bytes)

        if self.hits:
            self.krn_pack_.set_arg(3, self.hits.devmem)

            self.krn_apply_hits_ = self.get_kernel("apply_hits")
            self.krn_apply_hits_.set_args(self.output.devmem, self.hits.devmem)

        self.gemm_ = blas_class.gemm(self._dtype)
        self.np_one = numpy.ones(1, dtype=self._dtype)
        self.np_zero = numpy.zeros(1, dtype=self._dtype)
        self._const_i = numpy.zeros(1, dtype=numpy.int64)

    def ocl_init(self):
        ocl_blas.OCLBLAS.attach_to_device(self.device)
        self._gpu_init(ocl_blas.OCLBLAS)

        self._global_size_pack = lambda size: (size,)
        self._local_size_pack = None

        if self.hits:
            self.krn_clear_hits_ = self.get_kernel("clear_hits")
            self.krn_clear_hits_.set_arg(0, self.hits.devmem)

            self._global_size_hits = (self.output.size,)
            self._local_size_hits = None

        self.krn_clear_output_ = self.get_kernel("clear_output")
        self.krn_clear_output_.set_arg(0, self.output.devmem)

        self._clear_output = lambda: (
            self.execute_kernel((self.output.size,), None,
                                self.krn_clear_output_))
        self._clear_hits = lambda: (
            self.execute_kernel((self.hits.size,), None, self.krn_clear_hits_))

        self._process_subblock = self._ocl_process_subblock

        self.krn_pack_.set_arg(1, self.output.devmem)

    def cuda_init(self):
        self._gpu_init(cublas.CUBLAS)

        block_size = self.device.suggest_block_size(self.krn_pack_)
        self._global_size_pack = (
            lambda size: (int(numpy.ceil(size / block_size)), 1, 1))
        self._local_size_pack = (block_size, 1, 1)

        if self.hits:
            block_size = self.device.suggest_block_size(self.krn_apply_hits_)
            self._global_size_hits = (
                int(numpy.ceil(self.output.size / block_size)), 1, 1)
            self._local_size_hits = (block_size, 1, 1)

        self._clear_output = lambda: self.output.devmem.memset32_async()
        self._clear_hits = lambda: self.hits.devmem.memset32_async()

        self._process_subblock = self._cuda_process_subblock

    def ocl_run(self):
        self.gpu_run()

    def cuda_run(self):
        self.gpu_run()

    def gpu_run(self):
        self.unmap_vectors(self.output, self.input, self.weights)
        unpack_data = self.device.get_temp_buffer()
        self._clear_output()
        if self.hits:
            self.hits.unmap()
            self._clear_hits()
        batch_size = self.output.shape[0]
        for i in range(0, batch_size, self.unpack_size):
            self._process_subblock(i, min(batch_size - i, self.unpack_size),
                                   unpack_data)
        if self.hits:
            self.execute_kernel(self._global_size_hits, self._local_size_hits,
                                self.krn_apply_hits_)

    def _cuda_process_subblock(self, start_image, image_count, unpack_data):
        output_offs = (start_image * self.input.sample_size *
                       self.input.itemsize)
        unpack_side = self._kernel_app_per_image * image_count

        self.gemm_(
            self.device.blas, cublas.CUBLAS_OP_T if self.weights_transposed
            else cublas.CUBLAS_OP_N, cublas.CUBLAS_OP_N,
            self._kernel_size, unpack_side, self.weights_shape[0],
            self.np_one, self.weights.devmem,
            int(self.input.devmem) + output_offs,
            self.np_zero, unpack_data)

        self.krn_pack_.set_arg(0, unpack_data)
        self.krn_pack_.set_arg(
            1, int(self.output.devmem) +
            start_image * self.output.sample_size * self.output.itemsize)
        limit = unpack_side * self._kernel_size
        self._const_i[0] = limit
        self.krn_pack_.set_arg(2, self._const_i)
        self.execute_kernel(self._global_size_pack(limit),
                            self._local_size_pack, self.krn_pack_)

    def _ocl_process_subblock(self, start_image, image_count, unpack_data):
        output_offs = start_image * self.input.sample_size
        unpack_side = self._kernel_app_per_image * image_count

        self.gemm_(
            self.device.blas, cublas.CUBLAS_OP_T if self.weights_transposed
            else cublas.CUBLAS_OP_N, cublas.CUBLAS_OP_N,
            self._kernel_size, unpack_side, self.weights_shape[0],
            self.np_one, self.weights.devmem,
            self.input.devmem,
            self.np_zero, unpack_data, offsetB=output_offs)

        self.krn_pack_.set_arg(0, unpack_data)
        self._const_i[0] = start_image * self.output.sample_size
        self.krn_pack_.set_arg(2, self._const_i)
        limit = unpack_side * self._kernel_size
        self.execute_kernel(self._global_size_pack(limit),
                            self._local_size_pack, self.krn_pack_)

    def numpy_run(self):
        raise NotImplementedError()
Пример #31
0
class EvaluatorMSE(EvaluatorBase):

    MAPPING = "evaluator_mse"
    LOSS = "mse"

    """Evaluator for nn softmax output from the batch labels.

    Must be assigned before initialize():
        output
        target
        batch_size
        labels (may be None)
        class_targets (may be None)

    Updates after run():
        err_output
        confusion_matrix
        max_err_output_sum
        n_err (only if labels and class_targets is not None)

    Creates within initialize():
        err_output
        n_err (only if labels and class_targets is not None)
        max_err_output_sum

    Attributes:
        output: output of the network_common as Batch.
        target: target for the current Batch.
        err_output: backpropagation errors.
        batch_size: number of elements in output to evaluate.
        metrics: [0] - sum of sample's mse, [1] - max of sample's mse,
                 [2] - min of sample's mse.
        mse: array of mse for each sample in minibatch.
        krn_constants_i_: numpy array for constant arguments to kernel.
        labels: labels for a batch (may be None).
        class_targets: target for each class (may be None).
        n_err: number of wrongly recognized samples
            (if labels and class_targets is not None).
    """
    def __init__(self, workflow, **kwargs):
        super(EvaluatorMSE, self).__init__(workflow, **kwargs)
        self.metrics = Array()
        self.mse = Array()
        self.labels = None
        self.class_targets = None
        self.n_err = Array()
        self.root = kwargs.get("root", True)
        self.demand("target", "normalizer")

    @property
    def root(self):
        """
        :return: True if error metric is RMSE, otherwise, MSE (mean sum of
        squares). Default is True.
        """
        return self._root

    @root.setter
    def root(self, value):
        if not isinstance(value, bool):
            raise TypeError("root must be boolean (got %s)" % type(value))
        self._root = value

    def initialize(self, device, **kwargs):
        super(EvaluatorMSE, self).initialize(device=device, **kwargs)
        if self.testing:
            return

        if self.target.size != self.output.size:
            raise error.BadFormatError(
                "target.size != output.size (%s != %s)" %
                (self.target.size, self.output.size))

        self.sources_["evaluator_mse"] = {}
        self.sources_["denormalization"] = {}

        dtype = self.output.dtype

        self.metrics.reset(numpy.zeros(3, dtype=dtype))
        self.metrics[2] = 1.0e30  # mse_min
        self.mse.reset(numpy.zeros(self.err_output.mem.shape[0], dtype))
        self.n_err.reset(numpy.zeros(2, dtype=numpy.int32))
        self.init_vectors(self.n_err, self.target, self.metrics, self.mse)
        if self.class_targets:
            self.class_targets.initialize(self.device)

    def _gpu_init(self):
        dtype = self.output.dtype
        block_size = min(self.err_output.shape[0], 128)
        if self.class_targets:
            self.sources_["mse_find_closest"] = {
                "target_dtype": numpy_dtype_to_opencl(self.class_targets.dtype)
            }

        self.build_program(
            cache_file_name="%s_%d_%d" % (self.__class__.__name__,
                                          self.output.shape[0],
                                          self.output.sample_size),
            dtype=dtype, max_batch_size=self.err_output.shape[0],
            block_size=block_size, output_size=self.err_output.sample_size,
            root=self.root, normalization=self.normalizer.MAPPING,
            targets_number=self.class_targets.shape[0] if self.class_targets
            else None, coeffs=self.normalizer.coefficients)

        self.assign_kernel("evaluate_mse")
        self.set_args(self.output, self.target, self.skip_args(2),
                      self.metrics, self.mse.devmem, self.err_output)

        if self.labels and self.class_targets:
            assert(self.labels.dtype == self.n_err.dtype == numpy.int32)
            self.krn_find_closest_ = self.get_kernel("mse_find_closest")
            self.krn_find_closest_.set_args(
                self.output.devmem,
                self.class_targets.devmem,
                self.labels.devmem,
                self.n_err.devmem)

        return block_size

    def ocl_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._local_size = [block_size]
        self._global_size = self._local_size
        self._global_size_find_closest_ = lambda: (self.batch_size,)
        self._local_size_find_closest = None

    def cuda_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._local_size = (block_size, 1, 1)
        self._global_size = (1, 1, 1)
        self._global_size_find_closest_ = lambda: (self.batch_size, 1, 1)
        self._local_size_find_closest = (1, 1, 1)

    def _gpu_run(self):
        self.unmap_vectors(self.err_output, self.output, self.target,
                           self.metrics, self.mse)

        batch_size = self.batch_size
        self.krn_constants_i_[0] = batch_size
        self.set_arg(2, self.krn_constants_i_[0:1])
        self.krn_constants_f_[0] = 1.0 / self.batch_size if self.mean else 1.0
        self.set_arg(3, self.krn_constants_f_[0:1])

        self.execute_kernel(self._global_size, self._local_size)

        if self.labels and self.class_targets:
            self.unmap_vectors(self.class_targets, self.labels, self.n_err)
            self.execute_kernel(self._global_size_find_closest_(),
                                self._local_size_find_closest,
                                self.krn_find_closest_)
            self.n_err.map_write()
            self.n_err.mem[1] += batch_size

    def ocl_run(self):
        return self._gpu_run()

    def cuda_run(self):
        return self._gpu_run()

    def numpy_run(self):
        self.output.map_read()
        self.target.map_read()
        self.metrics.map_write()
        self.err_output.map_invalidate()
        self.mse.map_invalidate()

        assert(self.output.size == self.target.size == self.err_output.size)
        batch_size = self.batch_size
        err_output = self.err_output.matrix[:batch_size]
        assert_addr(err_output, self.err_output.mem)
        output = self.output.matrix[:batch_size]
        assert_addr(output, self.output.mem)
        target = self.target.matrix[:batch_size]
        assert_addr(target, self.target.mem)
        mse = self.mse.mem[:batch_size]
        assert_addr(mse, self.mse.mem)

        err_output[:] = output - target
        if not isinstance(self.normalizer, NoneNormalizer):
            output_copy = output.copy()
            target_copy = target.copy()
            self.normalizer.denormalize(output_copy)
            self.normalizer.denormalize(target_copy)
            denormed_err_output = output_copy - target_copy
        else:
            denormed_err_output = err_output
        self.err_output.mem[batch_size:] = 0
        mse[:] = numpy.square(denormed_err_output).sum(axis=1) / \
            denormed_err_output.shape[1]
        if self.mean:
            err_output /= batch_size
        if self.root:
            numpy.sqrt(mse, mse)
        self.mse.mem[batch_size:] = 0

        self.metrics.mem[0] += mse.sum()
        self.metrics.mem[1] = max(self.metrics.mem[1], mse.max())
        self.metrics.mem[2] = min(self.metrics.mem[2], mse.min())

        if self.labels and self.class_targets:
            self.class_targets.map_read()
            self.labels.map_read()
            self.n_err.map_write()
            class_targets = self.class_targets.matrix
            labels = self.labels.mem
            for i, sample in enumerate(output):
                lbl = numpy.linalg.norm(class_targets - sample,
                                        axis=1).argmin()
                if lbl != labels[i]:
                    self.n_err.mem[0] += 1
                self.n_err.mem[1] += 1

    def merge_output(self):
        if not isinstance(self.normalizer, NoneNormalizer):
            output = self.output[:self.batch_size].copy()
            self.normalizer.denormalize(output)
        else:
            output = self.output.mem
        self.merged_output[self.offset - self.batch_size:self.offset] = output
Пример #32
0
class KohonenTrainer(KohonenBase, AcceleratedUnit):
    """KohonenForward train pass.

    Must be assigned before initialize():
        input
        shape

    Creates within initialize():
        weights
        winners
        argmins
        _distances
        _coords

    Updates after run():
        weights

    Attributes:
        weights: weights of the current layer.
        input: input of the current layer as batch of 1D samples.
        krn_dist_: computes distances between input and neuron weights.
        _krn_argmin_: finds indexes of minimal computed distances.
        krn_gravity_: computes gravity to the winner neuron.
        krn_apply_gradients_: applies gradient to weights.
    """
    def __init__(self, workflow, **kwargs):
        super(KohonenTrainer, self).__init__(workflow, **kwargs)
        self._distances = Array()
        self.argmins = Array()
        self._coords = Array()
        self.weights = Array()
        self.winners = Array()
        self.weights_filling = kwargs.get("weights_filling", "uniform")
        self.weights_stddev = kwargs.get("weights_stddev", None)
        self.weights_transposed = kwargs.get("weights_transposed", False)
        self.time = 0
        self._sigma = 0
        self.gradient_decay = kwargs.get("gradient_decay",
                                         lambda t: 0.1 / (1.0 + t * 0.05))
        self.radius_decay = kwargs.get("radius_decay",
                                       lambda t: 1.0 / (1.0 + t * 0.05))
        self.demand("input", "shape")
        self._shape = kwargs.get("shape")

    def init_unpickled(self):
        super(KohonenTrainer, self).init_unpickled()
        self.sources_["kohonen"] = {"TRAIN": 1}
        self._krn_distances_ = None
        self._krn_argmin_ = None
        self._krn_gravity_ = None
        self._krn_compute_gradients_ = None
        self._krn_apply_gradients_ = None

    @property
    def gravity_radius(self):
        return self.radius_decay(self.time) * self._sigma

    @property
    def gradient_multiplier(self):
        return self.gradient_decay(self.time)

    @property
    def shape(self):
        return self._shape

    @shape.setter
    def shape(self, value):
        self._shape = value

    def initialize(self, device, **kwargs):
        super(KohonenTrainer, self).initialize(device=device, **kwargs)

        self._neurons_number = self.shape[0] * self.shape[1]
        self._sample_length = self.input.mem.size // self.input.mem.shape[0]

        # Initialize weights
        if self.weights_stddev is None:
            # Get weights magnitude and cap it to 0.05
            self.weights_stddev = min(self._get_weights_magnitude(), 0.05)
        weights_size = (self._sample_length * self._neurons_number)
        if not self.weights:
            self.weights.reset(numpy.zeros(weights_size,
                                           dtype=self.input.mem.dtype))
            filling = {
                "uniform": lambda rand: rand.fill(
                    self.weights.mem, -self.weights_stddev,
                    self.weights_stddev),
                "gaussian": lambda rand: rand.fill_normal_real(
                    self.weights.mem, 0, self.weights_stddev)
            }
            filling[self.weights_filling](prng.get())
            self.weights.mem = self.weights.mem.reshape((
                self._neurons_number, self._sample_length))
        else:
            assert self.weights.shape == (self._neurons_number,
                                          self._sample_length)
        if self.weights_transposed:
            # Reshape weights as a matrix:
            wtrncopy = self.weights.mem.transpose().copy()
            self.weights.mem.shape = wtrncopy.shape
            self.weights.mem[:] = wtrncopy[:]
        self._sample_length = \
            self.weights.mem.shape[0 if self.weights_transposed else 1]

        # Initialize winners
        self.winners.reset(numpy.zeros(self._neurons_number, numpy.int32))

        # Initialize distances
        batch_size = self.input.mem.shape[0]
        self._distances.reset(numpy.zeros(
            [batch_size, self._neurons_number],
            dtype=self.weights.mem.dtype))
        self.argmins.reset(numpy.zeros(batch_size, dtype=numpy.int32))
        self._coords.reset(numpy.zeros([self._neurons_number, 2],
                                       dtype=self.weights.mem.dtype))
        sz = self._neurons_number
        rows = int(numpy.round(numpy.sqrt(sz)))
        cols = sz // rows
        if sz % rows != 0:
            cols += 1
        x_min = -1.0
        x_max = 1.0
        y_min = -1.0
        y_max = 1.0
        x_step = (x_max - x_min) / (cols - 1) if cols > 1 else 0
        y = y_min
        y_step = (y_max - y_min) / (rows - 1) if rows > 1 else 0
        offs = 0
        mem = self._coords.mem
        for _row in range(rows):
            x = x_min + (x_step * 0.5 if _row & 1 else 0)
            for _col in range(cols):
                mem[offs, 0] = x
                mem[offs, 1] = y
                offs += 1
                x += x_step
            y += y_step

        self._sigma = (self._coords.mem.ravel().max() -
                       self._coords.mem.ravel().min()) * 1.42

    def ocl_init(self):
        self.input.initialize(self.device)
        self.weights.initialize(self.device)
        self.winners.initialize(self.device)
        self.argmins.initialize(self.device)
        self._distances.initialize(self.device)
        self._coords.initialize(self.device)

        batch_size = self.input.mem.shape[0]
        chunk_size = self._neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self._neurons_number // 2 + 1
        self.argmin_group_size = int(numpy.ceil(float(self._neurons_number) /
                                                chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE': block_size,
            'VECTOR_OPT': int(bool(vector_opt)),
            'BATCH': batch_size,
            'SAMPLE_LENGTH': self._sample_length,
            'NEURONS_NUMBER': self._neurons_number,
            'CHUNK_SIZE': chunk_size,
            'GRADIENT_CHUNK_SIZE': self.device.max_group_size,
            'coord_type':  "%s%d" %
            (opencl_types.numpy_dtype_to_opencl(self._coords.mem.dtype),
             self._coords.mem.shape[-1])
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines, "%s_%d_%d_%d" %
                           (self.__class__.__name__,
                            batch_size, self._sample_length,
                            self._neurons_number),
                           dtype=self.weights.mem.dtype)

        self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype)

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem,
                                   self.winners.devmem)

        self._krn_gravity_ = self.get_kernel("compute_gravity")
        self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem)
        self._krn_gravity_.set_arg(3, self._distances.devmem)

        self._krn_apply_gradient_ = self.get_kernel("apply_gradient")
        self._krn_apply_gradient_.set_args(self.input.devmem,
                                           self._distances.devmem)
        self._krn_apply_gradient_.set_arg(3, self.weights.devmem)

        self._gs_distance = [
            roundup(self._neurons_number, block_size),
            roundup(batch_size, block_size)]
        self._ls_distance = [block_size, block_size]

    def iteration(fn):
        def wrapped(self, *args, **kwargs):
            result = fn(self, *args, **kwargs)
            self.time += 1
            return result

        name = getattr(fn, '__name__', getattr(fn, 'func', wrapped).__name__)
        wrapped.__name__ = name + '_iteration'
        return wrapped

    @iteration
    def numpy_run(self):
        batch_size = self.input.mem.shape[0]
        neurons_number = self._neurons_number
        dists = numpy.empty(neurons_number)
        gradients = numpy.zeros(self.weights.mem.shape)
        sigma = self.gravity_radius
        gmult = self.gradient_multiplier
        self.input.map_read()
        self.weights.map_invalidate()
        self.winners.map_invalidate()

        for sindex in range(batch_size):
            dist = self.weights.mem - self.input[sindex]
            winner = numpy.argmin(self.numpy_linalg_norm(dist))
            self.winners[winner] += 1
            winner_coords = self._coords.mem[winner]
            for nindex in range(neurons_number):
                dist = self._coords.mem[nindex] - winner_coords
                dists[nindex] = numpy.sum(dist * dist)
            gravity = numpy.exp(dists / (-2 * sigma * sigma))
            gradients += gravity.reshape((1, neurons_number)).transpose() * \
                (self.input[sindex] - self.weights.mem) * gmult
        self.weights.mem += gradients

    @iteration
    def ocl_run(self):
        self.unmap_vectors(self.input, self.weights, self.winners,
                           self._distances, self.argmins, self._coords)

        batch_size = self.input.mem.shape[0]
        self.execute_kernel(self._gs_distance, self._ls_distance,
                            self._krn_distances_)
        self.execute_kernel([self.argmin_group_size],
                            [self.argmin_group_size],
                            self._krn_argmin_)
        self.ocl_consts_[0] = self.gravity_radius
        self._krn_gravity_.set_arg(2, self.ocl_consts_[0:1])
        self.execute_kernel([batch_size, self._neurons_number], None,
                            self._krn_gravity_)
        self.ocl_consts_[0] = self.gradient_multiplier
        self._krn_apply_gradient_.set_arg(2, self.ocl_consts_[0:1])
        self.execute_kernel(
            [int(numpy.ceil(self._sample_length / self.device.max_group_size)),
             self.device.max_group_size],
            None, self._krn_apply_gradient_)

    iteration = staticmethod(iteration)

    def _get_weights_magnitude(self):
        """
        Returns: weights magnitude for initial random distribution,
                 such that activation function will be near maximum
                 if all input values are at their supposed max value.

        Doesn't matter for classic Kohonen networks,
        get values as in All2AllTanh.
        """
        d = self.input.max_supposed * self._sample_length
        if self.input.mem.dtype in (numpy.complex64, numpy.complex128):
            return 1.0 / d
        return 9.0 / d
Пример #33
0
class ImageLoader(LoaderWithValidationRatio):
    """Base class for all image loaders. It is generally used for loading large
    datasets.

    Attributes:
        color_space: the color space to which to convert images. Can be any of
                     the values supported by OpenCV, e.g., GRAY or HSV.
        source_dtype: dtype to work with during various image operations.
        shape: image shape (tuple) - set after initialize().

     Must be overriden in child classes:
        get_image_label()
        get_image_info()
        get_image_data()
        get_keys()
    """

    def __init__(self, workflow, **kwargs):
        super(ImageLoader, self).__init__(workflow, **kwargs)
        self.color_space = kwargs.get("color_space", "RGB")
        self._source_dtype = numpy.float32
        self._original_shape = tuple()
        self.class_keys = [[], [], []]
        self.verify_interface(IImageLoader)
        self.path_to_mean = kwargs.get("path_to_mean", None)
        self.add_sobel = kwargs.get("add_sobel", False)
        self.mirror = kwargs.get("mirror", False)  # True, False, "random"
        self.scale = kwargs.get("scale", 1.0)
        self.scale_maintain_aspect_ratio = kwargs.get(
            "scale_maintain_aspect_ratio", True)
        self.rotations = kwargs.get("rotations", (0.0,))  # radians
        self.crop = kwargs.get("crop", None)
        self.crop_number = kwargs.get("crop_number", 1)
        self._background = None
        self.background_image = kwargs.get("background_image", None)
        self.background_color = kwargs.get(
            "background_color", (0xff, 0x14, 0x93))
        self.smart_crop = kwargs.get("smart_crop", True)
        self.minibatch_label_values = Array()

    @property
    def source_dtype(self):
        return self._source_dtype

    @property
    def color_space(self):
        return self._color_space

    @color_space.setter
    def color_space(self, value):
        self._validate_color_space(value)
        self._color_space = value

    @Loader.shape.getter
    def shape(self):
        """
        :return: Final cropped image shape.
        """
        if self.crop is not None:
            shape = self.crop
        else:
            shape = self.uncropped_shape
        if self.channels_number > 1:
            shape += (self.channels_number,)
        return shape

    @property
    def uncropped_shape(self):
        """
        :return: Uncropped (but scaled) image shape.
        """
        if not isinstance(self.scale, tuple):
            if self._original_shape == tuple():
                return tuple()
            return self._scale_shape(self._original_shape)[:2]
        else:
            return self.scale

    @property
    def original_shape(self):
        return self._original_shape

    @original_shape.setter
    def original_shape(self, value):
        if value is None:
            raise ValueError("shape must not be None")
        if not isinstance(value, tuple):
            raise TypeError("shape must be a tuple (got %s)" % (value,))
        if len(value) not in (2, 3):
            raise ValueError("len(shape) must be equal to 2 or 3 (got %s)" %
                             (value,))
        for i, d in enumerate(value):
            if not isinstance(d, int):
                raise TypeError("shape[%d] is not an integer (= %s)" % (i, d))
            if d < 1:
                raise ValueError("shape[%d] < 1 (= %s)" % (i, d))
        self._original_shape = value

    @property
    def scale(self):
        return self._scale

    @scale.setter
    def scale(self, value):
        if not isinstance(value, (float, tuple)):
            raise TypeError("scale must be either float or tuple of two ints"
                            " (got %s of type %s)" % (value, value.__class__))
        if isinstance(value, tuple):
            if len(value) != 2:
                raise ValueError("scale must have length 2 (not %d in %s)" %
                                 (len(value), value))
            if not isinstance(value[0], int) or not isinstance(value[1], int):
                raise ValueError("scale must consist of integers (got %s)" %
                                 value)
        self._scale = value

    @property
    def crop(self):
        return self._crop

    @crop.setter
    def crop(self, value):
        if value is None:
            self._crop = None
            return
        if not isinstance(value, tuple):
            raise TypeError(
                "crop must be a tuple of 2 integers or floats (got %s)" %
                value)
        if len(value) != 2:
            raise ValueError("invalid crop length (got %d for %s), must be 2" %
                             (len(value), value))
        for i, val in enumerate(value):
            if not isinstance(val, (int, float)):
                raise TypeError(
                    "crop[%d] = %s is neither an integer nor a float" %
                    (i, val[i]))
            if isinstance(val, int) and val < 1:
                raise ValueError(
                    "crop[%d] = %s is out of range" % (i, val))
            if isinstance(val, float):
                if val <= 0 or val > 1:
                    raise ValueError(
                        "Out of range crop %s: %s" %
                        (("height", "width")[i], val))
        self._crop = value

    @property
    def crop_number(self):
        return self._crop_number

    @crop_number.setter
    def crop_number(self, value):
        if not isinstance(value, int):
            raise TypeError("crop_number must be an integer (got %s)" % value)
        if value < 1:
            raise ValueError(
                "crop_number must be greater than zero (got %d)" % value)
        if value > 1 and self.crop is None:
            raise ValueError(
                "crop parameter is None, refusing to set crop_number")
        self._crop_number = value

    @property
    def smart_crop(self):
        """
        :return: Value indicating whether to crop only around bboxes.
        """
        return self._smart_crop

    @smart_crop.setter
    def smart_crop(self, value):
        if not isinstance(value, bool):
            raise TypeError("smart_crop must be a boolean value")
        self._smart_crop = value

    @property
    def mirror(self):
        return self._mirror

    @mirror.setter
    def mirror(self, value):
        if value not in (False, True, "random"):
            raise ValueError(
                "mirror must be any of the following: False, True, \"random\"")
        self._mirror = value

    @property
    def rotations(self):
        return self._rotations

    @rotations.setter
    def rotations(self, value):
        if not isinstance(value, tuple):
            raise TypeError("rotations must be a tuple (got %s)" % value)
        for i, rot in enumerate(value):
            if not isinstance(rot, float):
                raise TypeError(
                    "rotations[%d] = %s is not a float" % (i, rot))
            if rot >= numpy.pi * 2:
                raise ValueError(
                    "rotations[%d] = %s is greater than 2π" % (i, rot))
        self._rotations = tuple(sorted(value))

    @property
    def samples_inflation(self):
        return (1 if self.mirror is not True else 2) * len(self.rotations) * \
            self.crop_number

    @property
    def background_image(self):
        return self._background_image

    @background_image.setter
    def background_image(self, value):
        if isinstance(value, str):
            with open(value, "rb") as fin:
                self.background_image = fin
        elif hasattr(value, "read") and hasattr(value, "seek"):
            self.background_image = numpy.array(Image.open(value))
        elif isinstance(value, numpy.ndarray):
            if value.shape != self.shape:
                raise error.BadFormatError(
                    "background_image's shape %s != sample's shape "
                    "%s" % (value.shape, self.shape))
            self._background_image = value
            if getattr(self, "background_color", None) is not None:
                self.warning(
                    "background_color = %s is ignored in favor of "
                    "background_image", self.background_color)
        elif value is None:
            self._background_image = None
        else:
            raise ValueError(
                "background_image must be any of the following: "
                "file name, file object, numpy array or None")

    @property
    def background_color(self):
        return self._background_color

    @background_color.setter
    def background_color(self, value):
        if value is None:
            self._background_color = None
            return
        if not isinstance(value, tuple):
            raise TypeError(
                "background_color must be a tuple (got %s)" % value)
        if len(value) != self.channels_number:
            raise ValueError(
                "background_color must have the same length as the number of "
                "channels = %d (got length %d for %s)" %
                (self.channels_number, len(value), value))
        for i, col in enumerate(value):
            if not isinstance(col, int):
                raise TypeError(
                    "background_color[%d] = %s is not an integer" % (i, col))
        if getattr(self, "background_image", None) is not None:
            self.warning(
                "background_color = %s is ignored in favor of "
                "background_image", value)
        self._background_color = value

    @property
    def background(self):
        if self._background is None:
            if self.background_image is not None:
                self._background = self.background_image
            else:
                self._background = numpy.zeros(self.shape)
                self._background[:] = self.background_color
        return self._background.copy()

    @property
    def channels_number(self):
        channels = COLOR_CHANNELS_MAP[self.color_space]
        if self.add_sobel:
            channels += 1
        return channels

    def get_effective_image_info(self, key):
        info = self.get_image_info(key)
        if self.scale == 1.0:
            return info
        if isinstance(self.scale, tuple):
            return self.scale, info[1]
        else:
            return self._scale_shape(info[0]), info[1]

    def get_image_bbox(self, key, size):
        """
        Override this method for custom label <-> bbox mapping.
        :param key: The image key.
        :param size: The image size (for optimization purposes).
        :return: (ymin, ymax, xmin, xmax).
        """
        return 0, size[0], 0, size[1]

    def preprocess_image(self, data, color, crop, bbox):
        """
        Transforms images before serving.
        :param data: the loaded image data.
        :param color: The loaded image color space.
        :param crop: True if must crop the scaled image; otherwise, False.
        :param bbox: The bounding box of the labeled object. Tuple
        (ymin, ymax, xmin, xmax).
        :return: The transformed image data, the label value (from 0 to 1).
        """
        if color != self.color_space:
            method = getattr(
                cv2, "COLOR_%s2%s" % (color, self.color_space), None)
            if method is None:
                aux_method = getattr(cv2, "COLOR_%s2BGR" % color)
                try:
                    data = cv2.cvtColor(data, aux_method)
                except cv2.error as e:
                    self.error("Failed to perform '%s' conversion", aux_method)
                    raise from_none(e)
                method = getattr(cv2, "COLOR_BGR2%s" % self.color_space)
            try:
                data = cv2.cvtColor(data, method)
            except cv2.error as e:
                self.error("Failed to perform '%s' conversion", method)
                raise from_none(e)

        if self.add_sobel:
            data = self.add_sobel_channel(data)
        if self.scale != 1.0:
            data, bbox = self.scale_image(data, bbox)
        if crop and self.crop is not None:
            data, label_value = self.crop_image(data, bbox)
        else:
            label_value = 1

        return data, label_value, bbox

    def scale_image(self, data, bbox):
        bbox = numpy.array(bbox, float)
        if self.scale_maintain_aspect_ratio:
            if data.shape[1] >= data.shape[0]:
                dst_width = self.uncropped_shape[:2][1]
                dst_height = int(numpy.round(
                    float(dst_width) * data.shape[0] / data.shape[1]))
            else:
                dst_height = self.uncropped_shape[:2][0]
                dst_width = int(numpy.round(
                    float(dst_height) * data.shape[1] / data.shape[0]))
            dst_x_min = int(
                numpy.round(
                    0.5 * (self.uncropped_shape[:2][1] - dst_width)))
            dst_y_min = int(
                numpy.round(
                    0.5 * (self.uncropped_shape[:2][0] - dst_height)))
            data = cv2.resize(
                data, (dst_width, dst_height),
                interpolation=cv2.INTER_CUBIC)
            dst_x_max = dst_x_min + data.shape[1]
            dst_y_max = dst_y_min + data.shape[0]
            sample = self.background
            sample[dst_y_min:dst_y_max, dst_x_min:dst_x_max] = data
            data = sample.copy()
            bbox[:2] *= (dst_y_max - dst_y_min) / (bbox[1] - bbox[0])
            bbox[:2] += dst_y_min
            bbox[2:] *= (dst_x_max - dst_x_min) / (bbox[3] - bbox[2])
            bbox[2:] += dst_x_min
        else:
            data = cv2.resize(
                data, tuple(reversed(self.uncropped_shape[:2])),
                interpolation=cv2.INTER_CUBIC)
            bbox[:2] *= self.uncropped_shape[0] / (bbox[1] - bbox[0])
            bbox[2:] *= self.uncropped_shape[1] / (bbox[3] - bbox[2])
        return data, tuple(bbox.astype(numpy.int32))

    def add_sobel_channel(self, data):
        original_data = data
        if self.channels_number == 1 + 1:
            original_data = original_data.reshape(
                original_data.shape[:2] + (1,))
        elif self.color_space in ("RGB", "BGR", "RGBA", "BGRA"):
            data = cv2.cvtColor(
                data, getattr(cv2, "COLOR_%s2GRAY" % self.color_space))
        elif self.color_space == "HSV":
            data = data[:, :, 2]
        elif self.color_space == "YCR_CB":
            data = data[:, :, 0]
        else:
            raise NotImplementedError(
                "Conversion from %s to GRAY is not ready" % self.color_space)
        sobel_xy = tuple(cv2.Sobel(data, cv2.CV_32F, *d, ksize=3)
                         for d in ((1, 0), (0, 1)))
        sobel_data = numpy.zeros(
            shape=data.shape + (original_data.shape[2] + 1,),
            dtype=original_data.dtype)
        sobel_data[:, :, -1] = numpy.linalg.norm(sobel_xy)
        sobel_data[:, :, :-1] = original_data
        return sobel_data

    def crop_image(self, data, bbox):
        """
        Cuts a rectangular part of an image.
        :param data: The source image to crop.
        :param bbox: (ymin, ymax, xmin, xmax)
        :return: tuple (image part randomly cropped around the bbox,\
        intersection ratio)
        """
        crop_hw_yx = [[0, 0], [0, 0]]
        for i in 0, 1:
            crop_hw_yx[0][i] = self.crop[i] if isinstance(self.crop[i], int) \
                else int(self.crop[i] * data.shape[i])
            crop_size = crop_hw_yx[0][i]
            crop_hw_yx[1][i] = self.prng.randint(
                max(bbox[i * 2] - crop_size, 0),
                min(data.shape[i] - crop_size + 1,
                    bbox[i * 2 + 1] + crop_size))
        crop_first = crop_hw_yx[1]
        crop_last = tuple(crop_hw_yx[1][i] + crop_hw_yx[0][i]
                          for i in (0, 1))
        crop_bbox = crop_first[0], crop_last[0], crop_first[1], crop_last[1]
        return data[crop_bbox[0]:crop_bbox[1], crop_bbox[2]:crop_bbox[3]], \
            self._intersection(bbox, crop_bbox)

    def distort(self, data, mirror, rot):
        if mirror:
            data = cv2.flip(data, 1)
        data = numpy.resize(data, data.shape[:2] + (data.shape[-1] + 1,))
        data[:, :, -1] = 1
        center = tuple(reversed(tuple(data.shape[i] // 2 for i in (0, 1))))
        rot_matrix = cv2.getRotationMatrix2D(
            center, rot * 180 / numpy.pi, 1.0)
        data = cv2.warpAffine(data, rot_matrix,
                              tuple(reversed(data.shape[:2])))
        real = data[:, :, :-1]
        imag = data[:, :, -1]
        real *= imag[..., None]
        real += self.background * (1 - imag)[..., None]
        return real

    def get_distortion_by_index(self, index):
        index //= self.crop_number
        if self.mirror is True:
            return index % 2 == 1, self.rotations[index // 2]
        elif self.mirror == "random":
            mirror = bool(self.prng.randint(2))
        else:
            mirror = False
        return mirror, self.rotations[index]

    def load_keys(self, keys, pbar, data, labels, label_values, crop=True):
        """Loads data from the specified keys.
        """
        index = 0
        has_labels = False
        for key in keys:
            obj, label_value, _ = self._load_image(key)
            label, has_labels = self._load_label(key, has_labels)
            if (self.crop is None or not crop) and \
                    obj.shape[:2] != self.uncropped_shape:
                self.warning(
                    "Ignored %s (label %s): shape %s",
                    key, label, obj.shape[:2])
                continue
            if data is not None:
                data[index] = obj
            if labels is not None:
                labels[index] = label
            if label_values is not None:
                label_values[index] = label_value
            index += 1
            if pbar is not None:
                pbar.inc()
        return has_labels

    def load_labels(self):
        if not self.has_labels:
            return
        self.info("Reading labels...")
        different_labels = defaultdict(int), defaultdict(int), defaultdict(int)
        label_key_map = defaultdict(list), defaultdict(list), defaultdict(list)
        pb = ProgressBar(maxval=self.total_samples, term_width=40)
        pb.start()
        for class_index in range(3):
            for key in self.class_keys[class_index]:
                label, has_labels = self._load_label(key, True)
                assert has_labels
                different_labels[class_index][label] += 1
                label_key_map[class_index][label].append(key)
                self._samples_mapping[label].add(key)
                pb.inc()
        pb.finish()

        return different_labels, label_key_map

    def initialize(self, **kwargs):
        self._restored_from_pickle_ = kwargs["snapshot"]
        super(ImageLoader, self).initialize(**kwargs)
        del self._restored_from_pickle_

    def load_data(self):
        try:
            super(ImageLoader, self).load_data()
        except AttributeError:
            pass
        if self._restored_from_pickle_:
            self.info("Scanning for changes...")
            progress = ProgressBar(maxval=self.total_samples, term_width=40)
            progress.start()
            for keys in self.class_keys:
                for key in keys:
                    progress.inc()
                    size, _ = self.get_effective_image_info(key)
                    if size != self.uncropped_shape:
                        raise error.BadFormatError(
                            "%s changed the effective size (now %s, was %s)" %
                            (key, size, self.uncropped_shape))
            progress.finish()
            return
        for keys in self.class_keys:
            del keys[:]
        for index, class_name in enumerate(CLASS_NAME):
            keys = set(self.get_keys(index))
            self.class_keys[index].extend(keys)
            self.class_lengths[index] = len(keys) * self.samples_inflation
            self.class_keys[index].sort()

        if self.uncropped_shape == tuple():
            raise error.BadFormatError(
                "original_shape was not initialized in get_keys()")
        self.info(
            "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)",
            self.total_samples, self.shape, *self.class_lengths)

        # Perform a quick (unreliable) test to determine if we have labels
        keys = next(k for k in self.class_keys if len(k) > 0)
        self._has_labels = self.load_keys(
            (keys[RandomGenerator(None).randint(len(keys))],),
            None, None, None, None)
        self._resize_validation_keys(self.load_labels())

    def create_minibatch_data(self):
        self.minibatch_data.reset(numpy.zeros(
            (self.max_minibatch_size,) + self.shape, dtype=self.dtype))

        self.minibatch_label_values.reset(numpy.zeros(
            self.max_minibatch_size, numpy.float32))

    def keys_from_indices(self, indices):
        for index in indices:
            class_index, origin_index, _ = \
                self._get_class_origin_distortion_from_index(index)
            yield self.class_keys[class_index][origin_index]

    def fill_minibatch(self):
        indices = self.minibatch_indices.mem[:self.minibatch_size]
        assert self.has_labels == self.load_keys(
            self.keys_from_indices(indices), None, self.minibatch_data.mem,
            self.raw_minibatch_labels, self.minibatch_label_values)
        if self.samples_inflation == 1:
            return
        for pos, index in enumerate(indices):
            _, _, dist_index = \
                self._get_class_origin_distortion_from_index(index)
            self.minibatch_data[pos] = self.distort(
                self.minibatch_data[pos],
                *self.get_distortion_by_index(dist_index))

    def _resize_validation_keys(self, label_analysis):
        if label_analysis is None:
            return
        different_labels, label_key_map = label_analysis
        if self.validation_ratio is None:
            self._setup_labels_mapping(different_labels)
            return
        if self.validation_ratio < 0:
            self.class_keys[TRAIN] += self.class_keys[VALID]
            self.class_lengths[TRAIN] += self.class_lengths[VALID]
            del self.class_keys[VALID][:]
            self.class_lengths[VALID] = 0
            merged = {k: (different_labels[VALID][k] +
                          different_labels)[TRAIN][k]
                      for k in label_key_map[TRAIN]}
            self._setup_labels_mapping((different_labels[TEST], {}, merged))
            return

        overall = sum(len(ck) for ck in self.class_keys[VALID:])
        target_validation_length = int(overall * self.validation_ratio)

        if not self.has_labels:
            keys = list(chain.from_iterable(self.class_keys[VALID:]))
            keys.sort()
            self.prng.shuffle(keys)
            del self.class_keys[VALID][:]
            self.class_keys[VALID].extend(keys[:target_validation_length])
            del self.class_keys[TRAIN][:]
            self.class_keys[TRAIN].extend(keys[target_validation_length:])
            self._finalize_resizing_validation(different_labels, label_key_map)
            return

        # We must ensure that each set has the same labels
        # The first step is to pick two keys for each label and distribute them
        # into VALID and TRAIN evenly
        if len(label_key_map[TRAIN]) > target_validation_length:
            raise LoaderError(
                "Unable to set the new size of the validation set to %d (%.3f)"
                " since the number of labels is %d" %
                (target_validation_length * self.samples_inflation,
                 self.validation_ratio, len(label_key_map[TRAIN])))
        if overall - target_validation_length < len(label_key_map[TRAIN]):
            raise LoaderError(
                "Unable to set the new size of the training set to %d (%.3f) "
                "since the number of labels is %d" %
                ((overall - target_validation_length) * self.samples_inflation,
                 1.0 - self.validation_ratio, len(label_key_map[TRAIN])))
        vt_label_key_map = {l: (label_key_map[VALID].get(l, []) +
                                label_key_map[TRAIN].get(l, []))
                            for l in label_key_map[TRAIN]}
        for i in VALID, TRAIN:
            del self.class_keys[i][:]
        for label, keys in sorted(vt_label_key_map.items()):
            if len(keys) < 2:
                raise LoaderError("Label %s has less than 2 keys" % label)
            choice = self.prng.choice(len(keys), 2, replace=False)
            assert choice[0] != choice[1]
            for i in VALID, TRAIN:
                self.class_keys[i].append(keys[choice[i - 1]])
            for c in sorted(choice, reverse=True):
                del keys[c]

        # Distribute the left keys randomly
        left_keys = list(sorted(chain.from_iterable(
            vt_label_key_map.values())))
        self.prng.shuffle(left_keys)
        offset_val_length = \
            target_validation_length - len(vt_label_key_map)
        self.class_keys[VALID].extend(left_keys[:offset_val_length])
        self.class_keys[TRAIN].extend(left_keys[offset_val_length:])
        self._finalize_resizing_validation(different_labels, label_key_map)

    def _finalize_resizing_validation(self, different_labels, label_key_map):
        for ck in self.class_keys[VALID:]:
            ck.sort()
        for i in VALID, TRAIN:
            self.class_lengths[i] = len(self.class_keys[i]) * \
                self.samples_inflation
        new_diff = defaultdict(int), defaultdict(int)
        key_label_map = {}
        for ci in VALID, TRAIN:
            key_label_map.update({k: l
                                  for l, keys in label_key_map[ci].items()
                                  for k in keys})
        for ci in VALID, TRAIN:
            for key in self.class_keys[ci]:
                new_diff[ci - 1][key_label_map[key]] += 1
        self._setup_labels_mapping((different_labels[TEST],) + new_diff)

    def _get_class_origin_distortion_from_index(self, index):
        class_index, key_remainder = self.class_index_by_sample_index(index)
        key_index = self.class_lengths[class_index] - key_remainder
        return (class_index,) + divmod(key_index, self.samples_inflation)

    def _load_image(self, key, crop=True):
        """Returns the data to serve corresponding to the given image key and
        the label value (from 0 to 1).
        """
        data = self.get_image_data(key)
        size, color = self.get_image_info(key)
        bbox = self.get_image_bbox(key, size)
        return self.preprocess_image(data, color, crop, bbox)

    def _load_label(self, key, has_labels):
        label = self.get_image_label(key)
        if label is not None:
            has_labels = True
        if has_labels and label is None:
            raise error.BadFormatError(
                "%s does not have a label, but others do" % key)
        return label, has_labels

    def _intersection(self, bbox_a, bbox_b):
        ymin_a, ymax_a, xmin_a, xmax_a = bbox_a
        ymin_b, ymax_b, xmin_b, xmax_b = bbox_b

        x_intersection = min(xmax_a, xmax_b) - max(xmin_a, xmin_b)
        y_intersection = min(ymax_a, ymax_b) - max(ymin_a, ymin_b)

        if int(x_intersection) | int(y_intersection) <= 0:
            return 0
        else:
            return x_intersection * y_intersection

    def _scale_shape(self, shape):
        return tuple(int(shape[i] * self.scale) for i in (0, 1)) + shape[2:]

    def _validate_color_space(self, value):
        if not isinstance(value, str):
            raise TypeError(
                "db_colorpsace must be a string (got %s)" % type(value))
        if value != "RGB" and not hasattr(cv2, "COLOR_%s2RGB" % value):
            raise ValueError("Unsupported color space: %s" % value)
Пример #34
0
class All2AllSoftmax(All2All):
    """All2All with linear activation and softmax normalization.

    Must be assigned before initialize():

    Updates after run():
        max_idx

    Creates within initialize():
        max_idx

    Attributes:
        krn_sm_: kernel for softmax activation calculation.
        max_idx: indexes of element with maximum value for each sample.
    """
    __id__ = "420219fc-3e1a-45b1-87f8-aaa0c1540de4"

    MAPPING = {"softmax"}

    def __init__(self, workflow, **kwargs):
        super(All2AllSoftmax, self).__init__(workflow, **kwargs)
        self.max_idx = Array()
        self.reduce_size = 256

    def init_unpickled(self):
        super(All2AllSoftmax, self).init_unpickled()
        self.krn_sm_ = None
        self._force_gpu_apply_exp = False

    def initialize(self, device, **kwargs):
        self.reduce_size = min(self.reduce_size,
                               int(numpy.prod(self.output_sample_shape)))
        self.sources_["all2all/softmax"] = {
            "REDUCE_SIZE": self.reduce_size
        }
        retval = super(All2AllSoftmax, self).initialize(
            device=device, **kwargs)
        if retval:
            return retval
        if self.output.mem.size // self.output.mem.shape[0] <= 1:
            raise error.BadFormatError(
                "Output sample size should be greater than 1 for SoftMax.")

        if not self.max_idx:
            self.max_idx.reset(numpy.zeros(self.output.shape[0],
                                           dtype=numpy.int32))
        self.max_idx.initialize(self.device)
        return retval

    def numpy_apply_exp(self):
        self.output.map_write()
        self.max_idx.map_invalidate()
        out = self.output.mem
        out = reshape(out, (out.shape[0], out.size // out.shape[0]))
        for i, sample in enumerate(out):
            im = sample.argmax()
            self.max_idx[i] = im
            m = sample[im]
            sample -= m
            numpy.exp(sample, sample)
            smm = sample.sum()
            sample /= smm

    def ocl_apply_exp(self):
        self.unmap_vectors(self.output, self.max_idx)
        global_size = (self.output.shape[0] * self.reduce_size,)
        local_size = (self.reduce_size,)
        self.execute_kernel(global_size, local_size, self.krn_sm_)

    def cuda_apply_exp(self):
        self.unmap_vectors(self.output, self.max_idx)
        global_size = (self.output.shape[0], 1, 1)
        local_size = (self.reduce_size, 1, 1)
        self.execute_kernel(global_size, local_size, self.krn_sm_)

    def numpy_run(self):
        """Forward propagation from batch on CPU only.
        """
        super(All2AllSoftmax, self).numpy_run()
        if not self._force_gpu_apply_exp:
            self.numpy_apply_exp()

    def ocl_run(self):
        """Forward propagation from batch on GPU.
        """
        self._force_gpu_apply_exp = True
        super(All2AllSoftmax, self).ocl_run()
        self.ocl_apply_exp()

    def cuda_run(self):
        """Forward propagation from batch on GPU.
        """
        self._force_gpu_apply_exp = True
        super(All2AllSoftmax, self).cuda_run()
        self.cuda_apply_exp()

    def ocl_init(self):
        super(All2AllSoftmax, self).ocl_init()
        self.krn_sm_ = self.get_kernel("apply_exp")
        self.krn_sm_.set_args(self.output.devmem, self.max_idx.devmem)

    def cuda_init(self):
        super(All2AllSoftmax, self).cuda_init()
        self.krn_sm_ = self.get_kernel("apply_exp")
        self.krn_sm_.set_args(self.output.devmem, self.max_idx.devmem)
Пример #35
0
class KohonenTrainer(KohonenBase, AcceleratedUnit):
    """KohonenForward train pass.

    Must be assigned before initialize():
        input
        shape

    Creates within initialize():
        weights
        winners
        argmins
        _distances
        _coords

    Updates after run():
        weights

    Attributes:
        weights: weights of the current layer.
        input: input of the current layer as batch of 1D samples.
        krn_dist_: computes distances between input and neuron weights.
        _krn_argmin_: finds indexes of minimal computed distances.
        krn_gravity_: computes gravity to the winner neuron.
        krn_apply_gradients_: applies gradient to weights.
    """
    def __init__(self, workflow, **kwargs):
        super(KohonenTrainer, self).__init__(workflow, **kwargs)
        self._distances = Array()
        self.argmins = Array()
        self._coords = Array()
        self.weights = Array()
        self.winners = Array()
        self.weights_filling = kwargs.get("weights_filling", "uniform")
        self.weights_stddev = kwargs.get("weights_stddev", None)
        self.weights_transposed = kwargs.get("weights_transposed", False)
        self.time = 0
        self._sigma = 0
        self.gradient_decay = kwargs.get("gradient_decay", lambda t: 0.1 /
                                         (1.0 + t * 0.05))
        self.radius_decay = kwargs.get("radius_decay", lambda t: 1.0 /
                                       (1.0 + t * 0.05))
        self.demand("input", "shape")
        self._shape = kwargs.get("shape")

    def init_unpickled(self):
        super(KohonenTrainer, self).init_unpickled()
        self.sources_["kohonen"] = {"TRAIN": 1}
        self._krn_distances_ = None
        self._krn_argmin_ = None
        self._krn_gravity_ = None
        self._krn_compute_gradients_ = None
        self._krn_apply_gradients_ = None

    @property
    def gravity_radius(self):
        return self.radius_decay(self.time) * self._sigma

    @property
    def gradient_multiplier(self):
        return self.gradient_decay(self.time)

    @property
    def shape(self):
        return self._shape

    @shape.setter
    def shape(self, value):
        self._shape = value

    def initialize(self, device, **kwargs):
        super(KohonenTrainer, self).initialize(device=device, **kwargs)

        self._neurons_number = self.shape[0] * self.shape[1]
        self._sample_length = self.input.mem.size // self.input.mem.shape[0]

        # Initialize weights
        if self.weights_stddev is None:
            # Get weights magnitude and cap it to 0.05
            self.weights_stddev = min(self._get_weights_magnitude(), 0.05)
        weights_size = (self._sample_length * self._neurons_number)
        if not self.weights:
            self.weights.reset(
                numpy.zeros(weights_size, dtype=self.input.mem.dtype))
            filling = {
                "uniform":
                lambda rand: rand.fill(self.weights.mem, -self.weights_stddev,
                                       self.weights_stddev),
                "gaussian":
                lambda rand: rand.fill_normal_real(self.weights.mem, 0, self.
                                                   weights_stddev)
            }
            filling[self.weights_filling](prng.get())
            self.weights.mem = self.weights.mem.reshape(
                (self._neurons_number, self._sample_length))
        else:
            assert self.weights.shape == (self._neurons_number,
                                          self._sample_length)
        if self.weights_transposed:
            # Reshape weights as a matrix:
            wtrncopy = self.weights.mem.transpose().copy()
            self.weights.mem.shape = wtrncopy.shape
            self.weights.mem[:] = wtrncopy[:]
        self._sample_length = \
            self.weights.mem.shape[0 if self.weights_transposed else 1]

        # Initialize winners
        self.winners.reset(numpy.zeros(self._neurons_number, numpy.int32))

        # Initialize distances
        batch_size = self.input.mem.shape[0]
        self._distances.reset(
            numpy.zeros([batch_size, self._neurons_number],
                        dtype=self.weights.mem.dtype))
        self.argmins.reset(numpy.zeros(batch_size, dtype=numpy.int32))
        self._coords.reset(
            numpy.zeros([self._neurons_number, 2],
                        dtype=self.weights.mem.dtype))
        sz = self._neurons_number
        rows = int(numpy.round(numpy.sqrt(sz)))
        cols = sz // rows
        if sz % rows != 0:
            cols += 1
        x_min = -1.0
        x_max = 1.0
        y_min = -1.0
        y_max = 1.0
        x_step = (x_max - x_min) / (cols - 1) if cols > 1 else 0
        y = y_min
        y_step = (y_max - y_min) / (rows - 1) if rows > 1 else 0
        offs = 0
        mem = self._coords.mem
        for _row in range(rows):
            x = x_min + (x_step * 0.5 if _row & 1 else 0)
            for _col in range(cols):
                mem[offs, 0] = x
                mem[offs, 1] = y
                offs += 1
                x += x_step
            y += y_step

        self._sigma = (self._coords.mem.ravel().max() -
                       self._coords.mem.ravel().min()) * 1.42

    def ocl_init(self):
        self.input.initialize(self.device)
        self.weights.initialize(self.device)
        self.winners.initialize(self.device)
        self.argmins.initialize(self.device)
        self._distances.initialize(self.device)
        self._coords.initialize(self.device)

        batch_size = self.input.mem.shape[0]
        chunk_size = self._neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self._neurons_number // 2 + 1
        self.argmin_group_size = int(
            numpy.ceil(float(self._neurons_number) / chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE':
            block_size,
            'VECTOR_OPT':
            int(bool(vector_opt)),
            'BATCH':
            batch_size,
            'SAMPLE_LENGTH':
            self._sample_length,
            'NEURONS_NUMBER':
            self._neurons_number,
            'CHUNK_SIZE':
            chunk_size,
            'GRADIENT_CHUNK_SIZE':
            self.device.max_group_size,
            'coord_type':
            "%s%d" % (opencl_types.numpy_dtype_to_opencl(
                self._coords.mem.dtype), self._coords.mem.shape[-1])
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines,
                           "%s_%d_%d_%d" %
                           (self.__class__.__name__, batch_size,
                            self._sample_length, self._neurons_number),
                           dtype=self.weights.mem.dtype)

        self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype)

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem,
                                   self.winners.devmem)

        self._krn_gravity_ = self.get_kernel("compute_gravity")
        self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem)
        self._krn_gravity_.set_arg(3, self._distances.devmem)

        self._krn_apply_gradient_ = self.get_kernel("apply_gradient")
        self._krn_apply_gradient_.set_args(self.input.devmem,
                                           self._distances.devmem)
        self._krn_apply_gradient_.set_arg(3, self.weights.devmem)

        self._gs_distance = [
            roundup(self._neurons_number, block_size),
            roundup(batch_size, block_size)
        ]
        self._ls_distance = [block_size, block_size]

    def iteration(fn):
        def wrapped(self, *args, **kwargs):
            result = fn(self, *args, **kwargs)
            self.time += 1
            return result

        name = getattr(fn, '__name__', getattr(fn, 'func', wrapped).__name__)
        wrapped.__name__ = name + '_iteration'
        return wrapped

    @iteration
    def numpy_run(self):
        batch_size = self.input.mem.shape[0]
        neurons_number = self._neurons_number
        dists = numpy.empty(neurons_number)
        gradients = numpy.zeros(self.weights.mem.shape)
        sigma = self.gravity_radius
        gmult = self.gradient_multiplier
        self.input.map_read()
        self.weights.map_invalidate()
        self.winners.map_invalidate()

        for sindex in range(batch_size):
            dist = self.weights.mem - self.input[sindex]
            winner = numpy.argmin(self.numpy_linalg_norm(dist))
            self.winners[winner] += 1
            winner_coords = self._coords.mem[winner]
            for nindex in range(neurons_number):
                dist = self._coords.mem[nindex] - winner_coords
                dists[nindex] = numpy.sum(dist * dist)
            gravity = numpy.exp(dists / (-2 * sigma * sigma))
            gradients += gravity.reshape((1, neurons_number)).transpose() * \
                (self.input[sindex] - self.weights.mem) * gmult
        self.weights.mem += gradients

    @iteration
    def ocl_run(self):
        self.unmap_vectors(self.input, self.weights, self.winners,
                           self._distances, self.argmins, self._coords)

        batch_size = self.input.mem.shape[0]
        self.execute_kernel(self._gs_distance, self._ls_distance,
                            self._krn_distances_)
        self.execute_kernel([self.argmin_group_size], [self.argmin_group_size],
                            self._krn_argmin_)
        self.ocl_consts_[0] = self.gravity_radius
        self._krn_gravity_.set_arg(2, self.ocl_consts_[0:1])
        self.execute_kernel([batch_size, self._neurons_number], None,
                            self._krn_gravity_)
        self.ocl_consts_[0] = self.gradient_multiplier
        self._krn_apply_gradient_.set_arg(2, self.ocl_consts_[0:1])
        self.execute_kernel([
            int(numpy.ceil(self._sample_length / self.device.max_group_size)),
            self.device.max_group_size
        ], None, self._krn_apply_gradient_)

    iteration = staticmethod(iteration)

    def _get_weights_magnitude(self):
        """
        Returns: weights magnitude for initial random distribution,
                 such that activation function will be near maximum
                 if all input values are at their supposed max value.

        Doesn't matter for classic Kohonen networks,
        get values as in All2AllTanh.
        """
        d = self.input.max_supposed * self._sample_length
        if self.input.mem.dtype in (numpy.complex64, numpy.complex128):
            return 1.0 / d
        return 9.0 / d
Пример #36
0
class GDMultiplier(AcceleratedUnit):
    """Gradient descent for Multiplier.
    """
    def __init__(self, workflow, **kwargs):
        super(GDMultiplier, self).__init__(workflow, **kwargs)
        self.err_x = Array()
        self.err_y = Array()
        self.demand("x", "y", "err_output")

    def initialize(self, device, **kwargs):
        super(GDMultiplier, self).initialize(device, **kwargs)
        if not self.err_x:
            self.err_x.reset(numpy.zeros_like(self.x.mem))
        else:
            assert self.err_x.shape == self.x.shape
        if not self.err_y:
            self.err_y.reset(numpy.zeros_like(self.y.mem))
        else:
            assert self.err_y.shape == self.y.shape
        self.init_vectors(self.err_x, self.err_y,
                          self.x, self.y, self.err_output)

    def init_unpickled(self):
        super(GDMultiplier, self).init_unpickled()
        self.sources_["multiplier"] = {}

    def _gpu_init(self):
        self.build_program({"OUTPUT_SIZE": self.err_output.size},
                           "%s_%d" %
                           (self.__class__.__name__, self.err_output.size),
                           dtype=self.x.dtype)
        self.assign_kernel("multiply_backward")
        self.set_args(self.x, self.y, self.err_output, self.err_x, self.err_y)

    def cuda_init(self):
        self._gpu_init()
        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (
            int(numpy.ceil(self.err_output.size / block_size)), 1, 1)
        self._local_size = (block_size, 1, 1)

    def ocl_init(self):
        self._gpu_init()
        self._global_size = (self.err_output.size, 1, 1)
        self._local_size = None

    def numpy_init(self):
        pass  # nothing to init

    def _gpu_run(self):
        self.unmap_vectors(self.x, self.y, self.err_output,
                           self.err_x, self.err_y)
        self.execute_kernel(self._global_size, self._local_size)

    def cuda_run(self):
        self._gpu_run()

    def ocl_run(self):
        self._gpu_run()

    def numpy_run(self):
        self.x.map_read()
        self.y.map_read()
        self.err_output.map_read()
        self.err_x.map_invalidate()
        self.err_y.map_invalidate()
        numpy.multiply(self.err_output.mem, self.y.mem, self.err_x.mem)
        numpy.multiply(self.err_output.mem, self.x.mem, self.err_y.mem)
Пример #37
0
class Deconv(TriviallyDistributable, ConvolutionalBase, nn_units.Forward):
    # TriviallyDistributable overrides nn_units.Forward IDistributable
    """Deconvolutional layer for simple convolutional layer
    with linear activation and without bias.

    Must be assigned before initialize():
        input
        weights
        output_shape_source

    Updates after run():
        output

    Creates within initialize():
        output

    Attributes:
        input: input as batch of multichannel interleaved images.
        output: output as batch of multichannel interleaved images.
        weights: matrix of weights.
        output_shape_source: Array to get output shape from.
        n_kernels: number of convolutional kernels
                   in the corresponding convolutional layer.
        kx: kernel width.
        ky: kernel height.
        sliding: tuple of kernel sliding (by x-axis, by y-axis),
                 kx, ky MUST be a multiple of sliding to avoid irregularities.
        padding: tuple of virtual sample padding (left, top, right, bottom),
                 will be computed automatically based on sliding.
        weights_transposed: assume weights matrix as a transposed one.
        unsafe_padding: flag to enable unsafe padding and/or sliding.
    """

    MAPPING = {"deconv"}

    @staticmethod
    def compute_padding(sx, sy, kx, ky, sliding):
        """Computes required padding.
        """
        return (kx - sliding[1], ky - sliding[0], kx -
                sx % sliding[1] if sx % sliding[1] != 0 else kx - sliding[1],
                ky - sy % sliding[0] if sy % sliding[0] != 0 else ky -
                sliding[0])

    @staticmethod
    def check_padding_is_safe(kx, ky, sliding):
        if sliding[0] > (ky >> 1) or sliding[1] > (kx >> 1):
            raise ValueError(
                "sliding should not be greater than half of the kernel size")
        if kx % sliding[0] != 0 or kx % sliding[1] != 0:
            raise ValueError("Kernel size should be multiple of sliding")

    def __init__(self, workflow, **kwargs):
        super(Deconv, self).__init__(workflow, **kwargs)
        self.unsafe_padding = kwargs.get("unsafe_padding", False)
        self.hits = Array()
        self.krn_clear_output_ = None
        self._global_size = None
        self._local_size = None
        del self.bias
        self.demand("n_kernels", "kx", "ky", "padding", "sliding", "input",
                    "weights", "output_shape_source")

    def init_unpickled(self):
        super(Deconv, self).init_unpickled()
        self.sources_["deconv/forward"] = {}

    def initialize(self, device, **kwargs):
        super(Deconv, self).initialize(device, **kwargs)

        self._dtype = self.input.dtype

        self.weights_shape = (tuple(reversed(self.weights.shape)) if
                              self.weights_transposed else self.weights.shape)

        if hasattr(self, "bias"):
            raise ValueError("bias should not be set")
        if (len(self.input.shape) != 4
                or self.input.shape[3] != self.n_kernels):
            raise ValueError("Incorrectly shaped input encountered")
        if (len(self.weights_shape) != 2
                or self.weights_shape[0] != self.n_kernels
                or self.weights_shape[1] % (self.kx * self.ky) != 0):
            raise ValueError("Incorrectly shaped weights encountered")

        output_shape = tuple(self.output_shape_source.shape)
        if len(output_shape) != 4:
            raise ValueError("Incorrect output_shape_source shape")
        if output_shape[0] != self.input.shape[0]:
            raise ValueError("output_shape_source.shape[0] != input.shape[0]")

        try:
            self.check_padding_is_safe(self.kx, self.ky, self.sliding)
        except ValueError as e:
            if not self.unsafe_padding:
                raise from_none(e)
            self.warning("The padding will be unsafe")
            self._create_hits(output_shape)

        padding = Deconv.compute_padding(output_shape[2], output_shape[1],
                                         self.kx, self.ky, self.sliding)
        if self.padding is None:  # pylint: disable=E0203
            self.padding = padding
        elif self.padding != padding:
            if not self.unsafe_padding:
                raise ValueError("Expected padding %s but got %s" %
                                 (padding, self.padding))
            self._create_hits(output_shape)

        if not self.output:
            self.output.reset(numpy.zeros(output_shape, dtype=self._dtype))
        else:
            assert self.output.shape == output_shape

        self._output_shape = output_shape

        self._sy, self._sx, self._n_channels = self._output_shape[1:]
        self._kernel_size = self.kx * self.ky * self._n_channels

        self._kernel_app_per_image = self.input.sample_size // self.n_kernels
        self._kernel_app_total = (self._kernel_app_per_image *
                                  self.input.shape[0])

        self.init_vectors(self.input, self.weights, self.output, self.hits)

    def _create_hits(self, output_shape):
        if not self.hits:
            self.hits.reset(numpy.zeros(output_shape, dtype=numpy.int32))
        else:
            assert self.hits.size == int(numpy.prod(output_shape))

    def _gpu_init(self, blas_class):
        defines = {
            "USE_ATOMICS": 1,
            "WEIGHTS_TRANSPOSED": int(self.weights_transposed),
            "BATCH": self._output_shape[0],
            "SX": self._sx,
            "SY": self._sy,
            "N_CHANNELS": self._n_channels,
            "KX": self.kx,
            "KY": self.ky,
            "N_KERNELS": self.n_kernels,
            "PAD_LEFT": self.padding[0],
            "PAD_TOP": self.padding[1],
            "PAD_RIGHT": self.padding[2],
            "PAD_BOTTOM": self.padding[3],
            "SLIDE_X": self.sliding[0],
            "SLIDE_Y": self.sliding[1],
            "USE_HITS": int(bool(self.hits)),
            "DECONV_MODE": int(bool(self.hits)) + 1,
            "OUTPUT_SIZE": self.output.size
        }

        self.build_program(
            defines,
            "%s/%s_%d_%dx%dx%d_%dx%d_%d" %
            (root.common.dirs.cache, self.__class__.__name__,
             self.input.shape[0], self._output_shape[2], self._output_shape[1],
             self._output_shape[3], self.kx, self.ky, self.n_kernels),
            dtype=self._dtype)

        self.krn_pack_ = self.get_kernel("DirectPack")
        unpack_bytes = (self._kernel_app_per_image * self.unpack_size *
                        self._kernel_size * self.input.itemsize)
        self.device.request_temp_buffer(unpack_bytes)

        if self.hits:
            self.krn_pack_.set_arg(3, self.hits.devmem)

            self.krn_apply_hits_ = self.get_kernel("apply_hits")
            self.krn_apply_hits_.set_args(self.output.devmem, self.hits.devmem)

        self.gemm_ = blas_class.gemm(self._dtype)
        self.np_one = numpy.ones(1, dtype=self._dtype)
        self.np_zero = numpy.zeros(1, dtype=self._dtype)
        self._const_i = numpy.zeros(1, dtype=numpy.int64)

    def ocl_init(self):
        ocl_blas.OCLBLAS.attach_to_device(self.device)
        self._gpu_init(ocl_blas.OCLBLAS)

        self._global_size_pack = lambda size: (size, )
        self._local_size_pack = None

        if self.hits:
            self.krn_clear_hits_ = self.get_kernel("clear_hits")
            self.krn_clear_hits_.set_arg(0, self.hits.devmem)

            self._global_size_hits = (self.output.size, )
            self._local_size_hits = None

        self.krn_clear_output_ = self.get_kernel("clear_output")
        self.krn_clear_output_.set_arg(0, self.output.devmem)

        self._clear_output = lambda: (self.execute_kernel(
            (self.output.size, ), None, self.krn_clear_output_))
        self._clear_hits = lambda: (self.execute_kernel(
            (self.hits.size, ), None, self.krn_clear_hits_))

        self._process_subblock = self._ocl_process_subblock

        self.krn_pack_.set_arg(1, self.output.devmem)

    def cuda_init(self):
        self._gpu_init(cublas.CUBLAS)

        block_size = self.device.suggest_block_size(self.krn_pack_)
        self._global_size_pack = (lambda size:
                                  (int(numpy.ceil(size / block_size)), 1, 1))
        self._local_size_pack = (block_size, 1, 1)

        if self.hits:
            block_size = self.device.suggest_block_size(self.krn_apply_hits_)
            self._global_size_hits = (int(
                numpy.ceil(self.output.size / block_size)), 1, 1)
            self._local_size_hits = (block_size, 1, 1)

        self._clear_output = lambda: self.output.devmem.memset32_async()
        self._clear_hits = lambda: self.hits.devmem.memset32_async()

        self._process_subblock = self._cuda_process_subblock

    def ocl_run(self):
        self.gpu_run()

    def cuda_run(self):
        self.gpu_run()

    def gpu_run(self):
        self.unmap_vectors(self.output, self.input, self.weights)
        unpack_data = self.device.get_temp_buffer()
        self._clear_output()
        if self.hits:
            self.hits.unmap()
            self._clear_hits()
        batch_size = self.output.shape[0]
        for i in range(0, batch_size, self.unpack_size):
            self._process_subblock(i, min(batch_size - i, self.unpack_size),
                                   unpack_data)
        if self.hits:
            self.execute_kernel(self._global_size_hits, self._local_size_hits,
                                self.krn_apply_hits_)

    def _cuda_process_subblock(self, start_image, image_count, unpack_data):
        output_offs = (start_image * self.input.sample_size *
                       self.input.itemsize)
        unpack_side = self._kernel_app_per_image * image_count

        self.gemm_(
            self.device.blas, cublas.CUBLAS_OP_T
            if self.weights_transposed else cublas.CUBLAS_OP_N,
            cublas.CUBLAS_OP_N, self._kernel_size, unpack_side,
            self.weights_shape[0], self.np_one, self.weights.devmem,
            int(self.input.devmem) + output_offs, self.np_zero, unpack_data)

        self.krn_pack_.set_arg(0, unpack_data)
        self.krn_pack_.set_arg(
            1,
            int(self.output.devmem) +
            start_image * self.output.sample_size * self.output.itemsize)
        limit = unpack_side * self._kernel_size
        self._const_i[0] = limit
        self.krn_pack_.set_arg(2, self._const_i)
        self.execute_kernel(self._global_size_pack(limit),
                            self._local_size_pack, self.krn_pack_)

    def _ocl_process_subblock(self, start_image, image_count, unpack_data):
        output_offs = start_image * self.input.sample_size
        unpack_side = self._kernel_app_per_image * image_count

        self.gemm_(self.device.blas,
                   cublas.CUBLAS_OP_T
                   if self.weights_transposed else cublas.CUBLAS_OP_N,
                   cublas.CUBLAS_OP_N,
                   self._kernel_size,
                   unpack_side,
                   self.weights_shape[0],
                   self.np_one,
                   self.weights.devmem,
                   self.input.devmem,
                   self.np_zero,
                   unpack_data,
                   offsetB=output_offs)

        self.krn_pack_.set_arg(0, unpack_data)
        self._const_i[0] = start_image * self.output.sample_size
        self.krn_pack_.set_arg(2, self._const_i)
        limit = unpack_side * self._kernel_size
        self.execute_kernel(self._global_size_pack(limit),
                            self._local_size_pack, self.krn_pack_)

    def numpy_run(self):
        raise NotImplementedError()
Пример #38
0
class Binarization(AcceleratedUnit, EmptyDeviceMethodsMixin):
    """
    Input Binarization. Input and output is 2d arrays of the same size.
    Each element A(i,j) (in row i and column j) of input is a float
    number between 0 and 1. Each element B(i,j) of output is equal 1 with
    probability A(i,j) and 0 with 1 - A(i,j).
    Must be assigned before initialize():
    * input

    Updates after run():
    * output

    Creates within initialize():
    * output

    Attributes:
        input: input as batch of samples.
        output: output as batch of samples.
    """
    def __init__(self, workflow, **kwargs):
        super(Binarization, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.rand = kwargs.get("rand", prng.get())
        self.demand("input", "batch_size")

    def run(self):
        """Batch binarization on CPU only.
        """
        self.output.map_invalidate()
        self.input.map_read()
        self.output.mem[:] = self.input.mem[:]
        self.output.mem[:self.batch_size, :] = self.matlab_binornd(
            1, self.input.mem[:self.batch_size, :])

    def initialize(self, device, **kwargs):
        super(Binarization, self).initialize(device=device, **kwargs)
        if not self.output or self.output.size != self.input.size:
            self.output.reset()
            self.output.mem = numpy.zeros_like(self.input.mem)
        self.output.initialize(self.device)

    def matlab_binornd(self, n, p_in):
        """
        Analogue binornd in Matlab, but n  must be scalar.

        The function generates a matrix of random variables,
        where the element at (i,j) position is generated from binomial
        distribution with the number of trials n and the probability of
        success p_in(i,j).

        Args:
            n (int): number of trials
            p_in (2 dimension numpy.array): success probability matrix
        Returns:
            res (2 dimension numpy.array): matrix of random variables
            generated from the binomial distribution
        """
        p = numpy.copy(p_in)
        if len(p.shape) == 2:
            nrow = p.shape[0]
            ncol = p.shape[1]
            p = numpy.transpose(p)
            p = p.flatten()
            dim = p.shape[0]
            p = matlib.repmat(p, n, 1)
            f = self.rand.rand(n, dim)
            res = f < p
            res = numpy.sum(res, axis=0)
            res = numpy.transpose(res.reshape(ncol, nrow)).reshape(nrow, ncol)
        elif len(p.shape) == 1:
            p = matlib.repmat(p, n, 1)
            dim = p.shape[0]
            p = matlib.repmat(p, n, 1)
            f = self.rand.rand(n, dim)
            res = f < p
            res = numpy.sum(res, axis=0)
        else:  # will make exeption
            raise ValueError("shape of input Binarization class "
                             "must be 1 or 2 dimensions")
        return res
Пример #39
0
class MeanDispNormalizer(AcceleratedUnit, TriviallyDistributable):
    """Normalizes multichannel byte images according to
    dataset mean and dispersion.

    Attributes:
        input: minibatch of images (dtype=numpy.uint8,
                                    shape[0]=minibatch_size).
        mean: mean image over the dataset (dtype=numpy.uint8).
        rdisp: 1.0 / dispersion over the dataset (float datatype).
        output: normalized float images of the same dtype as rdisp.
    """
    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "WORKER")
        super(MeanDispNormalizer, self).__init__(workflow, **kwargs)
        self.output = Array()
        self.global_size = None
        self.local_size = None
        self.demand("input", "mean", "rdisp")

    def init_unpickled(self):
        super(MeanDispNormalizer, self).init_unpickled()
        self.sources_["mean_disp_normalizer"] = {}

    def initialize(self, device, **kwargs):
        super(MeanDispNormalizer, self).initialize(device, **kwargs)

        for arr in self.input, self.mean, self.rdisp:
            if not isinstance(arr, Array):
                raise TypeError(
                    "veles.memory.Array type expected (got %s)" % type(arr))
            if not arr:
                raise ValueError("Invalid Array state")
        if len(self.input.shape) < 2:
            raise ValueError("input should be at least 2D")
        sample_size = self.mean.size
        if (self.input.sample_size != sample_size or
                self.rdisp.size != sample_size):
            raise ValueError(
                "Sample size of input differs from mean-rdisp size")

        if not self.output:
            self.output.reset(numpy.zeros(self.input.shape, self.rdisp.dtype))
        else:
            assert self.output.shape == self.input.shape

        self.init_vectors(self.input, self.mean, self.rdisp, self.output)

    def _gpu_init(self):
        dtype = self.rdisp.dtype
        sample_size = self.mean.size

        defines = {
            "input_type": numpy_dtype_to_opencl(self.input.dtype),
            "mean_type": numpy_dtype_to_opencl(self.mean.dtype),
            "SAMPLE_SIZE": sample_size
        }
        self.build_program(defines, self.__class__.__name__, dtype=dtype)
        self.assign_kernel("normalize_mean_disp")
        self.set_args(self.input, self.mean, self.rdisp, self.output)

    def ocl_init(self):
        self._gpu_init()
        self.global_size = [self.mean.size, self.input.shape[0]]

    def cuda_init(self):
        self._gpu_init()
        self.local_size = 1, 1, 1
        self.global_size = self.mean.size, self.input.shape[0], 1

    def _gpu_run(self):
        self.unmap_vectors(self.input, self.mean, self.rdisp, self.output)
        self.execute_kernel(self.global_size, self.local_size)

    def ocl_run(self):
        self._gpu_run()

    def cuda_run(self):
        self._gpu_run()

    def numpy_run(self):
        self.input.map_read()
        self.mean.map_read()
        self.rdisp.map_read()
        self.output.map_invalidate()

        dtype = self.output.dtype
        self.output.matrix[:] = (
            self.input.matrix.astype(dtype)[:] -
            self.mean.plain.astype(dtype)) * self.rdisp.plain
Пример #40
0
class Forward(ForwardBase):
    """Class for forward propagation units.

    Attributes:
        input: input layer values.
        output: output layer values.
        weights: weights.
        bias: bias.
        weights_stddev: magnitude of the random distribution for weights.
        bias_stddev: magnitude of the random distribution for bias.
        rand: prng.Rand() object for initial weights generation.
    """

    hide_from_registry = True
    MAPPING = set()

    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "WORKER")
        super(Forward, self).__init__(workflow, **kwargs)
        self.weights_stddev = kwargs.get("weights_stddev")
        self.bias_stddev = kwargs.get("bias_stddev", self.weights_stddev)
        self.weights_filling = kwargs.get("weights_filling", "uniform")
        self.bias_filling = kwargs.get("bias_filling", "uniform")
        self.rand = kwargs.get("rand", prng.get())
        self.weights_transposed = kwargs.get("weights_transposed", False)
        self.include_bias = kwargs.get("include_bias", True)
        self.demand("input")
        self.output = Array(shallow_pickle=True)
        self.weights = Array()
        self.bias = Array()
        self.forward_mode = False
        self.exports = ["weights", "bias", "include_bias", "weights_transposed"]

    def package_export(self):
        data = {}
        for attr in self.exports:
            value = getattr(self, attr)
            if value is not None:
                if isinstance(value, Array):
                    value.map_read()
                    value = value.mem
                data[attr] = value
        return data

    @property
    def forward_mode(self):
        return self._forward_mode

    @forward_mode.setter
    def forward_mode(self, value):
        if not isinstance(value, bool):
            raise TypeError("forward_mode must be boolean (got %s)" % type(value))
        self._forward_mode = value

    def initialize(self, device, **kwargs):
        self.forward_mode = kwargs.get("forward_mode", False)
        super(Forward, self).initialize(device=device, **kwargs)

    def generate_data_for_slave(self, slave):
        if self.forward_mode:
            return None
        data = [None, None]
        if self.weights:
            self.weights.map_read()
            data[0] = self.weights.mem
        if self.bias:
            self.bias.map_read()
            data[1] = self.bias.mem
        return data

    def generate_data_for_master(self):
        return None

    def apply_data_from_master(self, data):
        if self.forward_mode:
            return
        if self.weights:
            self.weights.map_invalidate()
            numpy.copyto(self.weights.mem, data[0])
        else:
            self.weights.reset(data[0])
        if self.bias:
            self.bias.map_invalidate()
            numpy.copyto(self.bias.mem, data[1])
        else:
            self.bias.reset(data[1])

    def apply_data_from_slave(self, data, slave):
        pass

    def drop_slave(self, slave):
        pass
Пример #41
0
class OffsetPooling(Pooling):
    """Pooling by offset forward propagation.

    Must be assigned before initialize():

    Updates after run():
        input_offset

    Creates within initialize():
        input_offset

    Attributes:
        input_offset: offsets in the input where elements are passed through.
    """

    MAPPING = set()
    hide_from_registry = True

    def __init__(self, workflow, **kwargs):
        super(OffsetPooling, self).__init__(workflow, **kwargs)
        self.input_offset = Array()
        self.demand("input")

    def initialize(self, device, **kwargs):
        super(OffsetPooling, self).initialize(device=device, **kwargs)

        if self._no_output:
            return
        if self.input_offset:
            assert self.input_offset.shape[1:] == self.output.shape[1:]
        if (not self.input_offset or
                self.input_offset.shape[0] != self.output.shape[0]):
            self.input_offset.reset(numpy.zeros(self.output.shape,
                                                dtype=numpy.int32))
        self.input_offset.initialize(self.device)

    def set_args(self, *args):
        super(OffsetPooling, self).set_args(self.input, self.output,
                                            self.input_offset, *args)

    def ocl_run(self):
        self.input_offset.unmap()
        super(OffsetPooling, self).ocl_run()

    def cuda_run(self):
        self.input_offset.unmap()
        super(OffsetPooling, self).cuda_run()

    def numpy_run(self):
        self.input_offset.map_invalidate()
        super(OffsetPooling, self).numpy_run()

    def numpy_run_cut(self, cut, coords):
        batch, y1, x1, ch, out_y, out_x = coords
        cut_index = self.numpy_run_cut_offset(
            cut, numpy.ravel_multi_index((batch, out_y, out_x, ch),
                                         self.output.shape))
        i, j = numpy.unravel_index(cut_index, cut.shape)
        idx = numpy.ravel_multi_index((batch, y1 + i, x1 + j, ch),
                                      self.input.shape)
        val = numpy.ravel(self.input.mem)[idx]
        self.input_offset.mem[batch, out_y, out_x, ch] = idx
        return val
Пример #42
0
class GradientDescentBase(AcceleratedUnit):
    """Base class for gradient descent units.

    Attributes:
        input: input layer values.
        output: output layer values.
        err_output: error to backpropagate.
        err_input: backpropagated error.
        weights: weights.
        bias: bias.
        batch_size: current minibatch size.
        learning_rate: gradient descent speed (positive).
        learning_rate_bias
        weights_decay: regularization for weights (see l1_vs_l2).
        weights_decay_bias
        gradient_moment: moment coefficient for weights.
        gradient_moment_bias
        gradient_weights_with_moment: accumulated moment.
        gradient_bias_with_moment
        batch_size: effective batch size (if None, get it from y).
        weights_transposed: assume weights matrix as a transposed one.
        apply_gradient: will apply gradient.
        gradient_changed: when True, slave will send gradients to master
            (assigned to True just before the run call, so it can be set to
            False inside ocl_run, numpy_run if necessary).
        ocl_set_const_args: True when constant arguments for the kernel
                            had been changed and need to be set again.
    """

    hide_from_registry = True
    MAPPING = set()

    REDUCE_SIZE = 64  # used for updating bias

    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "TRAINER")
        super(GradientDescentBase, self).__init__(workflow, **kwargs)
        self.err_input = Array(shallow_pickle=True)
        self.ocl_set_const_args = True
        self.weights = None
        self.bias = None
        self.demand("input", "err_output")
        self.learning_rate = kwargs.get("learning_rate", 0.01)
        self.learning_rate_bias = kwargs.get("learning_rate_bias", self.learning_rate)
        self.weights_decay = kwargs.get("weights_decay", 0.00005)
        self.weights_decay_bias = kwargs.get("weights_decay_bias", 0.0)
        self.l1_vs_l2 = kwargs.get("l1_vs_l2", 0)
        self.l1_vs_l2_bias = kwargs.get("l1_vs_l2_bias", self.l1_vs_l2)
        self.gradient_moment = kwargs.get("gradient_moment", 0)
        self.gradient_moment_bias = kwargs.get("gradient_moment_bias", self.gradient_moment)
        self.weights_transposed = kwargs.get("weights_transposed", False)
        self.need_err_input = kwargs.get("need_err_input", True)
        self.include_bias = kwargs.get("include_bias", True)
        self.factor_ortho = kwargs.get("factor_ortho", 0)
        self.col_sums = Array()  # for orthogonalization

        # Current gradient as it is without applying learning_rate etc.
        self.gradient_weights = Array()
        self.gradient_bias = Array()

        # Gradient with applied learning_rate etc.
        # optionally accumulated from the previous run
        self.accumulate_gradient = kwargs.get("accumulate_gradient", False)

        # When accumulate_gradient set to True:
        # 1. Calculate gd
        # 2. acc = acc_alpha * gd + acc_beta * acc
        # 3. gd = gd_alpha * acc + gd_beta * gd
        # 4. Apply moments to gd
        # 5. weights += gd if apply_gradient set to True
        self.acc_alpha = kwargs.get("acc_alpha", 0.0)
        self.acc_beta = kwargs.get("acc_beta", 0.0)
        self.gd_alpha = kwargs.get("gd_alpha", 0.0)
        self.gd_beta = kwargs.get("gd_beta", 1.0)

        self.accumulated_gradient_weights = Array()
        self.accumulated_gradient_bias = Array()

        # Gradient with accumulated moments
        self.gradient_weights_with_moment = Array()
        self.gradient_bias_with_moment = Array()

        # Sets to True when gradient changes
        self.gradient_changed = False

        # Gradient will be applied to weights immediately just after computing
        self.apply_gradient = kwargs.get("apply_gradient", not workflow.is_slave)

    @property
    def current_batch_size(self):
        batch_size = getattr(self, "batch_size", None)
        if batch_size is None:
            return self.err_output.mem.shape[0]
        return int(batch_size)

    def initialize(self, device, **kwargs):
        super(GradientDescentBase, self).initialize(device, **kwargs)

        if self.weights:
            assert len(self.weights.shape) == 2
            self.weights_shape = tuple(reversed(self.weights.shape)) if self.weights_transposed else self.weights.shape
        else:
            self.weights_shape = None

        self.learning_rate = kwargs.get("learning_rate", self.learning_rate)
        self.weights_decay = kwargs.get("weights_decay", self.weights_decay)
        self.gradient_moment = kwargs.get("gradient_moment", self.gradient_moment)
        self.learning_rate_bias = kwargs.get("learning_rate_bias", self.learning_rate_bias)
        self.weights_decay_bias = kwargs.get("weights_decay_bias", self.weights_decay_bias)
        self.gradient_moment_bias = kwargs.get("gradient_moment_bias", self.gradient_moment_bias)

        if self.weights:
            if not self.gradient_weights:
                self.gradient_weights.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights.size == self.weights.size

        if self.weights and self.accumulate_gradient:
            if not self.accumulated_gradient_weights:
                self.accumulated_gradient_weights.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.accumulated_gradient_weights.size == self.weights.size

        if self.weights and (self.gradient_moment or not self.is_standalone):
            if not self.gradient_weights_with_moment:
                self.gradient_weights_with_moment.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights_with_moment.size == self.weights.size

        if self.include_bias and self.bias and (not self.gradient_bias or self.gradient_bias.size != self.bias.size):
            self.gradient_bias.reset(numpy.zeros_like(self.bias.mem))

        if (
            self.include_bias
            and self.bias
            and self.accumulate_gradient
            and (not self.accumulated_gradient_bias or self.accumulated_gradient_bias.size != self.bias.size)
        ):
            self.accumulated_gradient_bias.reset(numpy.zeros_like(self.bias.mem))

        if self.include_bias and self.bias and (self.gradient_moment_bias or not self.is_standalone):
            if not self.gradient_bias_with_moment:
                self.gradient_bias_with_moment.reset(numpy.zeros_like(self.bias.mem))
            else:
                assert self.gradient_bias_with_moment.size == self.bias.size

        dtype = self.err_output.dtype
        if self.need_err_input:
            if not self.err_input:
                self.err_input.reset(numpy.zeros(self.input.shape, dtype))
            else:
                assert self.err_input.shape == self.input.shape

        if self.weights:
            side = self.weights_shape[0]
            other = self.weights.size // side
            if self.factor_ortho:
                if not self.col_sums:
                    self.col_sums.reset(numpy.zeros(other, dtype=dtype))
                else:
                    assert self.col_sums.size == other
                self.col_sums.initialize(self.device)
            self.reduce_size = roundup(min(self.reduce_size, other), 32)
            self.weights.initialize(self.device)

        for vec in self.bias, self.input, self.err_input:
            if vec:
                vec.initialize(self.device)
        self.init_vectors(
            self.err_output,
            self.gradient_weights,
            self.gradient_bias,
            self.accumulated_gradient_weights,
            self.accumulated_gradient_bias,
            self.gradient_weights_with_moment,
            self.gradient_bias_with_moment,
        )

    def gpu_weights_update(self):
        self.unmap_vectors(
            self.input,
            self.err_output,
            self.weights,
            self.gradient_weights,
            self.accumulated_gradient_weights,
            self.gradient_weights_with_moment,
        )

        if self.factor_ortho:
            self.col_sums.unmap()
            self.execute_kernel(self._global_size_ortho, self._local_size_ortho, self.krn_compute_col_sums_)

            self._weights_const[12] = self.factor_ortho
            self.krn_weights_.set_arg(12, self._weights_const[12:13])

        self._weights_const[4:12] = (
            self.learning_rate,
            self.weights_decay,
            self.l1_vs_l2,
            self.gradient_moment,
            self.acc_alpha,
            self.acc_beta,
            self.gd_alpha,
            self.gd_beta,
        )
        self.krn_weights_.set_args(
            self.device.skip(4),
            self._weights_const[4:5],
            self._weights_const[5:6],
            self._weights_const[6:7],
            self._weights_const[7:8],
            self._weights_const[8:9],
            self._weights_const[9:10],
            self._weights_const[10:11],
            self._weights_const[11:12],
        )

        self.execute_kernel(self._global_size_weights, self._local_size_weights, self.krn_weights_)

    def gpu_bias_update(self):
        if not self.include_bias:
            return

        self.unmap_vectors(
            self.err_output,
            self.bias,
            self.gradient_bias,
            self.accumulated_gradient_bias,
            self.gradient_bias_with_moment,
        )

        self._bias_const[5:13] = (
            self.learning_rate_bias,
            self.weights_decay_bias,
            self.l1_vs_l2_bias,
            self.gradient_moment_bias,
            self.acc_alpha,
            self.acc_beta,
            self.gd_alpha,
            self.gd_beta,
        )
        self.krn_bias_.set_args(
            self.device.skip(5),
            self._bias_const[5:6],
            self._bias_const[6:7],
            self._bias_const[7:8],
            self._bias_const[8:9],
            self._bias_const[9:10],
            self._bias_const[10:11],
            self._bias_const[11:12],
            self._bias_const[12:13],
        )

        self.execute_kernel(self._global_size_bias, self._local_size_bias, self.krn_bias_)

    def gpu_err_output_update(self):
        """Multiply err_output by activation derivative by output.
        """
        if self.krn_err_output_ is None:
            return
        self.err_output.unmap()
        self.output.unmap()
        self.execute_kernel(self._global_size_err_output, self._local_size_err_output, self.krn_err_output_)

    def numpy_err_output_update(self):
        """Multiply err_output by activation derivative by output.
        """
        pass

    def print_debug_data(self):
        """
        Show weights statistics
        """
        if not self.logger.isEnabledFor(logging.DEBUG):
            return
        self.weights.map_read()
        self.bias.map_read()
        self.gradient_bias.map_read()
        self.gradient_weights.map_read()
        weights = self.weights.mem
        bias = self.bias.mem
        grad_weights = self.gradient_weights.mem
        grad_bias = self.gradient_bias.mem

        weight_table = PrettyTable("TYPE", "Mean", "StdDev", "Min", "Max")
        weight_table.float_format = ".10"
        for (w_name, w_array) in [
            ("Weight", weights),
            ("Bias", bias),
            ("Grad Weight", grad_weights),
            ("Grad Bias", grad_bias),
        ]:
            w_mean = w_stddev = w_min = w_max = None
            if w_array is not None and w_array.size > 0:
                w_mean = numpy.mean(w_array)
                w_stddev = numpy.std(w_array)
                w_min = numpy.min(w_array)
                w_max = numpy.max(w_array)
            weight_table.add_row(w_name, w_mean, w_stddev, w_min, w_max)
        self.debug("\n" + weight_table.get_string())

    def generate_data_for_slave(self, slave):
        return (
            self.learning_rate,
            self.weights_decay,
            self.gradient_moment,
            self.learning_rate_bias,
            self.weights_decay_bias,
            self.gradient_moment_bias,
        )

    @staticmethod
    def fill_zeros(vector):
        if not vector:
            return
        vector.map_invalidate()
        vector.mem[:] = 0

    def apply_data_from_master(self, data):
        self.learning_rate = data[0]
        self.weights_decay = data[1]
        self.gradient_moment = data[2]
        self.learning_rate_bias = data[3]
        self.weights_decay_bias = data[4]
        self.gradient_moment_bias = data[5]
        self.fill_zeros(self.gradient_weights_with_moment)
        self.fill_zeros(self.gradient_bias_with_moment)
        self.fill_zeros(self.gradient_weights)
        self.fill_zeros(self.gradient_bias)
        self.fill_zeros(self.accumulated_gradient_weights)
        self.fill_zeros(self.accumulated_gradient_bias)

    def generate_data_for_master(self):
        if not self.gradient_changed:
            return None
        self.gradient_changed = False
        self.gradient_weights_with_moment.map_read()
        self.gradient_bias_with_moment.map_read()
        return (self.gradient_weights_with_moment.mem, self.gradient_bias_with_moment.mem)

    def apply_data_from_slave(self, data, slave):
        if self.weights:
            self.weights.map_write()
            self.gradient_weights_with_moment.map_write()
            self.gradient_weights_with_moment.mem *= self.gradient_moment
            self.gradient_weights_with_moment.mem += data[0]
            self.weights.mem += self.gradient_weights_with_moment.mem
        if self.bias:
            self.bias.map_write()
            self.gradient_bias_with_moment.map_write()
            self.gradient_bias_with_moment.mem *= self.gradient_moment_bias
            self.gradient_bias_with_moment.mem += data[1]
            self.bias.mem += self.gradient_bias_with_moment.mem

    def drop_slave(self, slave):
        pass

    def accumulate_gradient_f(self, accumulated_gradient, gradient):
        if accumulated_gradient and self.accumulate_gradient:
            accumulated_gradient[:] = gradient * self.acc_alpha + (
                self.acc_beta * accumulated_gradient if self.acc_beta else 0
            )

            gradient *= self.gd_beta
            gradient += self.gd_alpha * accumulated_gradient

        return gradient

    @staticmethod
    def numpy_gradient_step(weight, gradient, lr, factor_l12, l1_vs_l2, factor_ortho=0, weights_transposed=False):
        gradient = gradient.copy()
        gradient += factor_l12 * ((1.0 - l1_vs_l2) * weight + 0.5 * l1_vs_l2 * numpy.sign(weight))
        if factor_ortho:
            col_sums = reshape_transposed(weight).sum(axis=1) if weights_transposed else weight.sum(axis=0)
            for i, row in enumerate(gradient):
                row += (col_sums - weight[i]) * factor_ortho / weight.shape[0]
        gradient *= lr
        return gradient

    def run(self):
        self.gradient_changed = True
        super(GradientDescentBase, self).run()
        self.ocl_set_const_args = False
Пример #43
0
class EvaluatorSoftmax(EvaluatorBase):

    MAPPING = "evaluator_softmax"
    LOSS = "softmax"
    """Evaluator for nn softmax output from the batch labels.

    Must be assigned before initialize():
        output
        labels
        batch_size
        max_idx

    Updates after run():
        err_output
        n_err
        confusion_matrix
        max_err_output_sum

    Creates within initialize():
        err_output
        n_err
        confusion_matrix
        max_err_output_sum

    Attributes:
        labels: labels for Batch.
        output: output of the network_common as Batch.
        err_output: backpropagation errors based on labels.
        batch_size: number of elements in output to evaluate.
        confusion_matrix: confusion matrix for the output.
        compute_confusion_matrix: compute confusion matrix or not.
        max_idx: indexes of element with maximum real value for each sample.
        max_err_output_sum: maximum of backpropagated error sum by sample.
    """
    def __init__(self, workflow, **kwargs):
        super(EvaluatorSoftmax, self).__init__(workflow, **kwargs)
        self.compute_confusion_matrix = kwargs.get("compute_confusion_matrix",
                                                   True)
        self.confusion_matrix = Array()
        self.n_err = Array()
        self.max_err_output_sum = Array()
        self.class_keys = None
        self.demand("labels", "max_idx")
        if self.testing:
            self.demand("labels_mapping")

    def initialize(self, device, **kwargs):
        super(EvaluatorSoftmax, self).initialize(device=device, **kwargs)
        if self.testing:
            return
        self.sources_["evaluator"] = {}

        dtype = self.output.dtype

        if not self.n_err:
            self.n_err.reset(numpy.zeros(2, dtype=numpy.int32))
        else:
            assert self.n_err.size == 2

        out_size = self.output.sample_size
        if self.compute_confusion_matrix:
            if not self.confusion_matrix:
                self.confusion_matrix.reset(
                    numpy.zeros([out_size, out_size], numpy.int32))
            else:
                assert self.confusion_matrix.size == out_size * out_size
        else:
            self.confusion_matrix.reset()

        if not self.max_err_output_sum:
            self.max_err_output_sum.reset(numpy.zeros(1, dtype))
        else:
            assert self.max_err_output_sum.size == 1

        self.init_vectors(self.confusion_matrix, self.n_err, self.max_idx,
                          self.labels, self.max_err_output_sum)

    def _gpu_init(self):
        dtype = self.output.dtype
        block_size = min(self.err_output.shape[0], 256)
        self.build_program(cache_file_name="%s_%d_%d" %
                           (self.__class__.__name__, self.output.shape[0],
                            self.output.sample_size),
                           dtype=dtype,
                           block_size=block_size,
                           max_batch_size=self.err_output.shape[0],
                           output_size=self.err_output.sample_size)
        self.assign_kernel("evaluate_softmax")
        self.set_args(self.output, self.max_idx, self.labels,
                      self.skip_args(2), self.n_err, self.confusion_matrix,
                      self.max_err_output_sum, self.err_output)
        return block_size

    def ocl_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._global_size = [block_size]
        self._local_size = [block_size]

    def cuda_init(self):
        if self.testing:
            return
        block_size = self._gpu_init()
        self._global_size = (1, 1, 1)
        self._local_size = (block_size, 1, 1)

    def _gpu_run(self):
        self.unmap_vectors(self.err_output, self.output, self.max_idx,
                           self.labels, self.n_err, self.confusion_matrix,
                           self.max_err_output_sum)

        self.krn_constants_i_[0] = self.batch_size
        self.set_arg(3, self.krn_constants_i_[0:1])
        self.krn_constants_f_[0] = 1.0 / self.batch_size if self.mean else 1.0
        self.set_arg(4, self.krn_constants_f_[0:1])

        self.execute_kernel(self._global_size, self._local_size)

    def ocl_run(self):
        return self._gpu_run()

    def cuda_run(self):
        return self._gpu_run()

    def numpy_run(self):
        self.err_output.map_invalidate()
        for vec in self.output, self.max_idx, self.labels:
            vec.map_read()
        for vec in self.n_err, self.confusion_matrix, self.max_err_output_sum:
            vec.map_write()

        batch_size = self.batch_size
        labels = self.labels.mem
        confusion_matrix = self.confusion_matrix.mem

        n_ok = 0
        n_total = 0
        multiplier = 1.0 / batch_size if self.mean else 1.0
        for i in range(batch_size):  # loop by batch
            if labels[i] < 0:
                self.err_output.mem[i] = 0.0
                continue
            output = ravel(self.output[i])
            err_output = ravel(self.err_output[i])

            max_idx = self.max_idx[i]
            confusion_matrix[max_idx, labels[i]] += 1
            if max_idx == labels[i]:
                n_ok += 1
            n_total += 1

            # Compute softmax output error gradient
            err_output[:] = output[:]
            err_output[labels[i]] -= 1.0
            err_output *= multiplier
            if err_output.dtype in (numpy.complex64, numpy.complex128):
                self.max_err_output_sum[0] = max(self.max_err_output_sum[0],
                                                 numpy.linalg.norm(err_output))
            else:
                self.max_err_output_sum[0] = max(
                    self.max_err_output_sum[0], (numpy.fabs(err_output)).sum())
        # Set errors for excessive samples to zero
        if batch_size < self.err_output.mem.shape[0]:
            self.err_output.mem[batch_size:] = 0.0
        self.n_err[0] += batch_size - n_ok
        self.n_err[1] += n_total

    def get_metric_values(self):
        if self.testing:
            output_labels = {}
            class_keys = getattr(self, "class_keys", None)
            for index, labels in enumerate(self.merged_output[:]):
                max_value = 0
                for label_index, value in enumerate(labels):
                    if value >= max_value:
                        max_value = value
                        max_index = label_index
                if class_keys is not None:
                    output_labels[self.class_keys[TEST]
                                  [index]] = self.labels_mapping[max_index]
                else:
                    output_labels[index] = self.labels_mapping[max_index]
            return {"Output": output_labels}
        return {}
Пример #44
0
class BatchWeights(AcceleratedUnit, EmptyDeviceMethodsMixin):
    """Make weigths and biases from batch v and h.
    Must be assigned before initialize():
    * v
    * h
    * batch_size

    Updates after run():
    * hbias_batch
    * vbias_batch
    * W_batch

    Creates within initialize():
    * hbias_batch
    * vbias_batch
    * W_batch

    Attributes:
        v: input data  batch
        h: hidden states of input batch
        batch_size: size of batch
        hbias_batch: bias calculated from h
        vbias_batch: bias calculated from v
        W_batch: weigths calculated from batch v and h
    """
    def __init__(self, workflow, **kwargs):
        super(BatchWeights, self).__init__(workflow, **kwargs)
        self.vbias_batch = Array()
        self.hbias_batch = Array()
        self.weights_batch = Array()
        self.demand("v", "h", "batch_size")

    def initialize(self, device, **kwargs):
        super(BatchWeights, self).initialize(device=device, **kwargs)
        vbias_size = self.v.size // self.v.shape[0]
        hbias_size = self.h.size // self.h.shape[0]
        W_size = vbias_size * hbias_size
        if not self.hbias_batch:
            self.hbias_batch.reset(
                numpy.zeros((1, hbias_size), dtype=self.h.mem.dtype))
        else:
            assert self.hbias_batch.size == hbias_size
        if not self.vbias_batch:
            self.vbias_batch.reset(
                numpy.zeros((1, vbias_size), dtype=self.h.mem.dtype))
        else:
            assert self.vbias_batch.size == vbias_size
        if not self.weights_batch:
            self.weights_batch.reset(
                numpy.zeros((vbias_size, hbias_size), dtype=self.h.mem.dtype))
        else:
            assert self.weights_batch.size == W_size
        self.init_vectors(self.weights_batch, self.vbias_batch,
                          self.hbias_batch, self.v, self.h)

    def run(self):
        self.v.map_read()
        self.h.map_read()
        for v in self.weights_batch, self.hbias_batch, self.vbias_batch:
            v.map_invalidate()
        self.weights_batch.mem[:] = numpy.dot(
            numpy.transpose(self.v.mem[0: self.batch_size, :]),
            self.h.mem[0: self.batch_size, :]) / \
            self.batch_size
        for bv in (self.vbias_batch, self.v), (self.hbias_batch, self.h):
            bv[0].mem[:] = (numpy.sum(bv[1].mem[:self.batch_size, :], 0) /
                            self.batch_size)
            bv[0].shape = (1, bv[0].size)
Пример #45
0
class EvaluatorBase(AcceleratedUnit, TriviallyDistributable):
    hide_from_registry = True
    """Base class for evaluators.
    """
    def __init__(self, workflow, **kwargs):
        kwargs["view_group"] = kwargs.get("view_group", "EVALUATOR")
        super(EvaluatorBase, self).__init__(workflow, **kwargs)
        self.mean = kwargs.get("mean", True)
        self.err_output = Array()
        self._merged_output = Array()
        self.krn_constants_i_ = None
        self.krn_constants_f_ = None
        self.demand("output", "batch_size")
        if self.testing:
            self.demand("class_lengths", "offset")

    @property
    def mean(self):
        """
        :return: True if the error function averages values. Default is True.
        """
        return self._mean

    @mean.setter
    def mean(self, value):
        if not isinstance(value, bool):
            raise TypeError("mean must be boolean (got %s)" % type(value))
        self._mean = value

    @property
    def merged_output(self):
        assert self.testing
        return self._merged_output.mem

    def initialize(self, device, **kwargs):
        super(EvaluatorBase, self).initialize(device, **kwargs)
        dtype = self.output.dtype
        if self.testing:
            self._merged_output.reset(
                numpy.zeros(
                    (self.class_lengths[TEST], ) + self.output.shape[1:],
                    dtype))
            return

        self.krn_constants_i_ = numpy.zeros(1, numpy.int32)
        self.krn_constants_f_ = numpy.zeros(1, dtype)
        self.err_output.reset(numpy.zeros_like(self.output.mem, dtype))

        for vec in self.output, self.err_output:
            vec.initialize(self.device)

    def run(self):
        if self.testing:
            self.output.map_read()
            self.merge_output()
            return
        return super(EvaluatorBase, self).run()

    def merge_output(self):
        self.merged_output[self.offset - self.batch_size:self.offset] = \
            self.output[:self.batch_size]

    def get_metric_names(self):
        if self.testing:
            return {"Output"}
        return set()

    def get_metric_values(self):
        if self.testing:
            return {"Output": self.merged_output}
        return {}
Пример #46
0
class Uniform(AcceleratedUnit):
    """Generates random numbers from uniform distribution.

    Attributes:
        num_states: number of random states for parallel generation.
        states: Array of random states.
        prng: veles.prng.RandomGenerator for initial states generation.
        output_bytes: number of output bytes to generate.
    """

    backend_methods = AcceleratedUnit.backend_methods + ("fill",)

    def __init__(self, workflow, **kwargs):
        super(Uniform, self).__init__(workflow, **kwargs)
        self.num_states = kwargs.get("num_states", 256)
        self.states = Array()
        self.prng = kwargs.get("prng", get())
        self.output_bytes = kwargs.get("output_bytes", 0)
        self.output = Array()
        self.cl_const = numpy.zeros(1, dtype=numpy.int32)

    def init_unpickled(self):
        super(Uniform, self).init_unpickled()
        self.sources_["random"] = {}

    def initialize(self, device, **kwargs):
        super(Uniform, self).initialize(device, **kwargs)

        if not self.states or self.states.size != self.num_states * 16:
            self.states.reset(numpy.empty(self.num_states * 16 * 2,
                                          dtype=numpy.uint32))
            self.states.mem[:] = self.prng.randint(0, (1 << 32) + 1,
                                                   self.states.size)

        if not self.output or self.output.nbytes < self.output_bytes:
            self.output_bytes = roundup(self.output_bytes,
                                        self.num_states * 16 * 8)
            self.output.reset(numpy.zeros(self.output_bytes, numpy.uint8))
        else:
            self.output_bytes = self.output.nbytes

        self.init_vectors(self.states, self.output)

    def _gpu_init(self):
        self.build_program({}, "uniform_%d" % self.num_states)

        self.assign_kernel("random_xorshift1024star")
        self.set_args(self.states, self.cl_const, self.output)

    def ocl_init(self):
        self._gpu_init()
        self._global_size = [self.num_states]
        self._local_size = None

    def cuda_init(self):
        self._gpu_init()
        n = self.num_states
        l = 1
        while not (n & 1) and l < 32:
            n >>= 1
            l <<= 1
        self._global_size = (n, 1, 1)
        self._local_size = (l, 1, 1)

    def _gpu_fill(self, nbytes):
        bytes_per_round = self.num_states * 16 * 8
        nbytes = roundup(nbytes, bytes_per_round)
        if nbytes > self.output.nbytes:
            raise error.Bug("nbytes > self.output.nbytes")
        self.unmap_vectors(self.states, self.output)
        self.cl_const[0] = nbytes // bytes_per_round
        self.set_arg(1, self.cl_const)
        self.execute_kernel(self._global_size, self._local_size)

    def ocl_fill(self, nbytes):
        self._gpu_fill(nbytes)

    def cuda_fill(self, nbytes):
        self._gpu_fill(nbytes)

    def numpy_fill(self, nbytes):
        bytes_per_round = self.num_states * 16 * 8
        nbytes = roundup(nbytes, bytes_per_round)
        if nbytes > self.output.nbytes:
            raise error.Bug("nbytes > self.output.nbytes")
        self.states.map_write()
        self.output.map_invalidate()
        n_rounds = nbytes // bytes_per_round

        u64 = numpy.array([1181783497276652981], dtype=numpy.uint64)
        s0 = numpy.zeros(1, dtype=numpy.uint64)
        s1 = numpy.zeros(1, dtype=numpy.uint64)

        states = self.states.mem.view(dtype=numpy.uint64)
        states = states.reshape(states.size // 16, 16)
        output = self.output.mem.view(dtype=numpy.uint64)
        for i in range(self.num_states):
            offs = i
            s = states[i]
            self.p = 0
            for _round in range(n_rounds):
                for _iter in range(16):
                    output[offs] = self._next_rand(s, s0, s1, u64)
                    offs += self.num_states

    def _next_rand(self, s, s0, s1, u64):
        s0[0] = s[self.p]
        self.p = (self.p + 1) & 15
        s1[0] = s[self.p]
        s1 ^= s1 << 31
        s1 ^= s1 >> 11
        s0 ^= s0 >> 30
        s0 ^= s1
        s[self.p] = s0[0]
        return (s0 * u64)[0]

    def fill(self, nbytes):
        self._backend_fill_(nbytes)

    def ocl_run(self):
        self.ocl_fill(self.output.nbytes)

    def cuda_run(self):
        self.cuda_fill(self.output.nbytes)

    def numpy_run(self):
        self.numpy_fill(self.output.nbytes)