Exemplo n.º 1
0
 def __init__(self, backend, project, _id, internal_id):
     self._backend = backend
     self._project = project
     self._id = _id
     self._internal_id = internal_id
     self._channels_values_sender = ChannelsValuesSender(self)
     self._execution_context = ExecutionContext(backend, self)
Exemplo n.º 2
0
    def test_send_values_from_multiple_channels(self):
        # given
        numeric_values = [
            ChannelValue(x=i, y=i, ts=self._TS + i) for i in range(0, 3)
        ]

        text_values = [
            ChannelValue(x=i, y="text", ts=self._TS + i) for i in range(0, 3)
        ]

        image_values = [
            ChannelValue(x=i,
                         y={'image_value': {
                             'data': "base64Image=="
                         }},
                         ts=self._TS + i) for i in range(0, 3)
        ]
        # and
        channels_values_sender = ChannelsValuesSender(
            experiment=self._EXPERIMENT)

        # when
        for channel_value in numeric_values:
            channels_values_sender.send(self._NUMERIC_CHANNEL.name,
                                        self._NUMERIC_CHANNEL.channelType,
                                        channel_value)

        for channel_value in text_values:
            channels_values_sender.send(self._TEXT_CHANNEL.name,
                                        self._TEXT_CHANNEL.channelType,
                                        channel_value)

        for channel_value in image_values:
            channels_values_sender.send(self._IMAGE_CHANNEL.name,
                                        self._IMAGE_CHANNEL.channelType,
                                        channel_value)

        # and
        channels_values_sender.join()

        # then
        # pylint: disable=protected-access
        (args, _) = self._EXPERIMENT._send_channels_values.call_args
        self.assertEqual(len(args), 1)
        self.assertEqual(
            sorted(args[0]),
            sorted([
                ChannelIdWithValues(channel_id=self._NUMERIC_CHANNEL.id,
                                    channel_values=numeric_values),
                ChannelIdWithValues(channel_id=self._TEXT_CHANNEL.id,
                                    channel_values=text_values),
                ChannelIdWithValues(channel_id=self._IMAGE_CHANNEL.id,
                                    channel_values=image_values)
            ]))
Exemplo n.º 3
0
 def __init__(self, client, _id, internal_id, project_full_id):
     self._client = client
     self._id = _id
     self._internal_id = internal_id
     self._project_full_id = project_full_id
     self._channels_values_sender = ChannelsValuesSender(self)
     self._ping_thread = None
     self._hardware_metric_thread = None
     self._aborting_thread = None
     self._stdout_uploader = None
     self._stderr_uploader = None
     self._uncaught_exception_handler = sys.__excepthook__
Exemplo n.º 4
0
    def test_send_when_waiting_for_next_value_timed_out(self):
        # given
        numeric_values = [
            ChannelValue(x=i, y=i, ts=self._TS + i) for i in range(0, 3)
        ]

        # and
        semaphore = threading.Semaphore(0)
        # pylint: disable=protected-access
        self._EXPERIMENT._send_channels_values.side_effect = lambda _: semaphore.release(
        )

        # and
        channels_values_sender = ChannelsValuesSender(
            experiment=self._EXPERIMENT)

        # when
        for channel_value in numeric_values:
            channels_values_sender.send(self._NUMERIC_CHANNEL.name,
                                        self._NUMERIC_CHANNEL.channelType,
                                        channel_value)

        # then
        # pylint: disable=protected-access
        semaphore.acquire()
        self._EXPERIMENT._send_channels_values.assert_called_with([
            ChannelIdWithValues(channel_id=self._NUMERIC_CHANNEL.id,
                                channel_values=numeric_values)
        ])

        # and
        self._EXPERIMENT._send_channels_values.reset_mock()
        channels_values_sender.join()
        # and
        self._EXPERIMENT._send_channels_values.assert_not_called()
Exemplo n.º 5
0
    def test_send_values_on_join(self):
        # given
        channel_value = ChannelValue(x=1, y="value", ts=self._TS)
        # and
        channels_values_sender = ChannelsValuesSender(experiment=self._EXPERIMENT)

        # when
        channels_values_sender.send(
            self._TEXT_CHANNEL.name, self._TEXT_CHANNEL.channelType, channel_value
        )
        # and
        channels_values_sender.join()

        # then
        # pylint: disable=protected-access
        self._EXPERIMENT._send_channels_values.assert_called_with(
            [
                ChannelIdWithValues(
                    channel_id=self._TEXT_CHANNEL.id,
                    channel_name=self._TEXT_CHANNEL.name,
                    channel_type=self._TEXT_CHANNEL.channelType,
                    channel_namespace=ChannelNamespace.USER,
                    channel_values=[channel_value],
                )
            ]
        )
Exemplo n.º 6
0
    def test_send_when_waiting_for_next_value_timed_out(self):
        # given
        numeric_values = [
            ChannelValue(x=i, y=i, ts=self._TS + i) for i in range(0, 3)
        ]

        # and
        channels_values_sender = ChannelsValuesSender(
            experiment=self._EXPERIMENT)

        # when
        for channel_value in numeric_values:
            channels_values_sender.send(self._NUMERIC_CHANNEL.name,
                                        self._NUMERIC_CHANNEL.channelType,
                                        channel_value)

        # and
        time.sleep(self.__TIMEOUT * 2)

        # then
        # pylint: disable=protected-access
        self._EXPERIMENT._send_channels_values.assert_called_with([
            ChannelIdWithValues(channel_id=self._NUMERIC_CHANNEL.id,
                                channel_values=numeric_values)
        ])

        # and
        self._EXPERIMENT._send_channels_values.reset_mock()
        channels_values_sender.join()
        # and
        self._EXPERIMENT._send_channels_values.assert_not_called()
Exemplo n.º 7
0
    def test_send_images_in_smaller_batches(self):
        # and
        value = "base64Image=="
        channels_values = [
            ChannelValue(
                x=i,
                y={
                    'image_value': {
                        'data':
                        value + value * int(self._IMAGES_BATCH_IMAGE_SIZE /
                                            (len(value)))
                    }
                },
                ts=self._TS + i) for i in range(0, self._IMAGES_BATCH_SIZE * 3)
        ]
        # and
        channels_values_sender = ChannelsValuesSender(
            experiment=self._EXPERIMENT)

        # when
        for channel_value in channels_values:
            channels_values_sender.send(self._IMAGE_CHANNEL.name,
                                        self._IMAGE_CHANNEL.channelType,
                                        channel_value)
        # and
        channels_values_sender.join()

        # then
        # pylint: disable=protected-access
        self.assertEqual(self._EXPERIMENT._send_channels_values.mock_calls, [
            mock.call._send_channels_values([
                ChannelIdWithValues(
                    channel_id=self._IMAGE_CHANNEL.id,
                    channel_values=channels_values[0:self._IMAGES_BATCH_SIZE])
            ]),
            mock.call._send_channels_values([
                ChannelIdWithValues(
                    channel_id=self._IMAGE_CHANNEL.id,
                    channel_values=channels_values[self._IMAGES_BATCH_SIZE:self
                                                   ._IMAGES_BATCH_SIZE * 2])
            ]),
            mock.call._send_channels_values([
                ChannelIdWithValues(
                    channel_id=self._IMAGE_CHANNEL.id,
                    channel_values=channels_values[self._IMAGES_BATCH_SIZE *
                                                   2:])
            ])
        ])
Exemplo n.º 8
0
    def test_send_values_in_multiple_batches(self):
        # given
        channels_values = [
            ChannelValue(x=i, y="value{}".format(i), ts=self._TS + i)
            for i in range(0, self._BATCH_SIZE * 3)
        ]
        # and
        channels_values_sender = ChannelsValuesSender(
            experiment=self._EXPERIMENT)

        # when
        for channel_value in channels_values:
            channels_values_sender.send(self._TEXT_CHANNEL.name,
                                        self._TEXT_CHANNEL.channelType,
                                        channel_value)
        # and
        channels_values_sender.join()

        # then
        # pylint: disable=protected-access
        self.assertEqual(self._EXPERIMENT._send_channels_values.mock_calls, [
            mock.call._send_channels_values([
                ChannelIdWithValues(
                    channel_id=self._TEXT_CHANNEL.id,
                    channel_values=channels_values[0:self._BATCH_SIZE])
            ]),
            mock.call._send_channels_values([
                ChannelIdWithValues(channel_id=self._TEXT_CHANNEL.id,
                                    channel_values=channels_values[
                                        self._BATCH_SIZE:self._BATCH_SIZE * 2])
            ]),
            mock.call._send_channels_values([
                ChannelIdWithValues(
                    channel_id=self._TEXT_CHANNEL.id,
                    channel_values=channels_values[self._BATCH_SIZE *
                                                   2:self._BATCH_SIZE * 3])
            ])
        ])
Exemplo n.º 9
0
class Experiment(object):
    """A class for managing Neptune experiment.

    Each time User creates new experiment instance of this class is created.
    It lets you manage experiment, :meth:`~neptune.experiments.Experiment.log_metric`,
    :meth:`~neptune.experiments.Experiment.log_text`,
    :meth:`~neptune.experiments.Experiment.log_image`,
    :meth:`~neptune.experiments.Experiment.set_property`,
    and much more.


    Args:
        backend (:obj:`neptune.Backend`): A Backend object
        project (:obj:`neptune.Project`): The project this experiment belongs to
        _id (:obj:`str`): Experiment id
        internal_id (:obj:`str`): internal UUID

    Example:
        Assuming that `project` is an instance of :class:`~neptune.projects.Project`.

        .. code:: python3

            experiment = project.create_experiment()

    Warning:
        User should never create instances of this class manually.
        Always use: :meth:`~neptune.projects.Project.create_experiment`.

    """

    IMAGE_SIZE_LIMIT = 2097152

    def __init__(self, backend, project, _id, internal_id):
        self._backend = backend
        self._project = project
        self._id = _id
        self._internal_id = internal_id
        self._channels_values_sender = ChannelsValuesSender(self)
        self._execution_context = ExecutionContext(backend, self)

    @property
    def id(self):
        """Experiment short id

        | Combination of project key and unique experiment number.
        | Format is ``<project_key>-<experiment_number>``, for example: ``MPI-142``.

        Returns:
            :obj:`str` - experiment short id

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                exp_id = experiment.id

        """
        return self._id

    @property
    def name(self):
        """Experiment name

        Returns:
            :obj:`str` experiment name

        Examples:
            Assuming that `project` is an instance of :class:`~neptune.projects.Project`.

            .. code:: python3

                experiment = project.create_experiment('exp_name')
                exp_name = experiment.name
        """
        return self._backend.get_experiment(self._internal_id).name

    @property
    def state(self):
        """Current experiment state

        Possible values: `'running'`, `'succeeded'`, `'failed'`, `'aborted'`.

        Returns:
            :obj:`str` - current experiment state

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                state_str = experiment.state
        """
        return self._backend.get_experiment(self._internal_id).state

    @property
    def internal_id(self):
        return self._internal_id

    @property
    def limits(self):
        return {'channels': {'numeric': 1000, 'text': 100, 'image': 100}}

    def get_system_properties(self):
        """Retrieve experiment properties.

        | Experiment properties are for example: `owner`, `created`, `name`, `hostname`.
        | List of experiment properties may change over time.

        Returns:
            :obj:`dict` - dictionary mapping a property name to value.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                sys_properties = experiment.get_system_properties
        """
        experiment = self._backend.get_experiment(self._internal_id)
        return {
            'id': experiment.shortId,
            'name': experiment.name,
            'created': experiment.timeOfCreation,
            'finished': experiment.timeOfCompletion,
            'running_time': experiment.runningTime,
            'owner': experiment.owner,
            'storage_size': experiment.storageSize,
            'channels_size': experiment.channelsSize,
            'size': experiment.storageSize + experiment.channelsSize,
            'tags': experiment.tags,
            'notes': experiment.description,
            'description': experiment.description,
            'hostname': experiment.hostname
        }

    def get_tags(self):
        """Get tags associated with experiment.

        Returns:
            :obj:`list` of :obj:`str` with all tags for this experiment.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                experiment.get_tags()
        """
        return self._backend.get_experiment(self._internal_id).tags

    def append_tag(self, tag, *tags):
        """Append tag(s) to the current experiment.

        Alias: :meth:`~neptune.experiments.Experiment.append_tags`.
        Only ``[a-zA-Z0-9]`` and ``-`` (dash) characters are allowed in tags.

        Args:
            tag (single :obj:`str` or multiple :obj:`str` or :obj:`list` of :obj:`str`):
                Tag(s) to add to the current experiment.

                    * If :obj:`str` is passed, singe tag is added.
                    * If multiple - comma separated - :obj:`str` are passed, all of them are added as tags.
                    * If :obj:`list` of :obj:`str` is passed, all elements of the :obj:`list` are added as tags.

        Examples:

            .. code:: python3

                neptune.append_tag('new-tag')  # single tag
                neptune.append_tag('first-tag', 'second-tag', 'third-tag')  # few str
                neptune.append_tag(['first-tag', 'second-tag', 'third-tag'])  # list of str
        """
        if isinstance(tag, list):
            tags_list = tag
        else:
            tags_list = [tag] + list(tags)
        self._backend.update_tags(experiment=self,
                                  tags_to_add=tags_list,
                                  tags_to_delete=[])

    def append_tags(self, tag, *tags):
        """Append tag(s) to the current experiment.

        Alias for: :meth:`~neptune.experiments.Experiment.append_tag`
        """
        self.append_tag(tag, *tags)

    def remove_tag(self, tag):
        """Removes single tag from the experiment.

        Args:
            tag (:obj:`str`): Tag to be removed

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                # assuming experiment has tags: `['tag-1', 'tag-2']`.
                experiment.remove_tag('tag-1')

        Note:
            Removing a tag that is not assigned to this experiment is silently ignored.
        """
        self._backend.update_tags(experiment=self,
                                  tags_to_add=[],
                                  tags_to_delete=[tag])

    def get_channels(self):
        """Alias for :meth:`~neptune.experiments.Experiment.get_logs`
        """
        return self.get_logs()

    def get_logs(self):
        """Retrieve all log names along with their last values for this experiment.

        Returns:
            :obj:`dict` - A dictionary mapping a log names to the log's last value.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                exp_logs = experiment.get_logs()
        """
        experiment = self._backend.get_experiment(self.internal_id)
        channels_last_values_by_name = dict(
            (ch.channelName, ch) for ch in experiment.channelsLastValues)
        channels = dict()
        for ch in experiment.channels:
            last_value = channels_last_values_by_name.get(ch.name, None)
            if last_value is not None:
                ch.x = last_value.x
                ch.y = last_value.y
            elif ch.lastX is not None:
                ch.x = ch.lastX
                ch.y = None
            else:
                ch.x = None
                ch.y = None
            channels[ch.name] = ch
        return channels

    def _get_system_channels(self):
        channels = self._backend.get_system_channels(self)
        return dict((ch.name, ch) for ch in channels)

    def send_metric(self, channel_name, x, y=None, timestamp=None):
        """Log metrics (numeric values) in Neptune.

        Alias for :meth:`~neptune.experiments.Experiment.log_metric`
        """
        return self.log_metric(channel_name, x, y, timestamp)

    def log_metric(self, log_name, x, y=None, timestamp=None):
        """Log metrics (numeric values) in Neptune

        | If a log with provided ``log_name`` does not exist, it is created automatically.
        | If log exists (determined by ``log_name``), then new value is appended to it.

        Args:
            log_name (:obj:`str`): The name of log, i.e. `mse`, `loss`, `accuracy`.
            x (:obj:`double`): Depending, whether ``y`` parameter is passed:

                * ``y`` not passed: The value of the log (data-point).
                * ``y`` passed: Index of log entry being appended. Must be strictly increasing.

            y (:obj:`double`, optional, default is ``None``): The value of the log (data-point).
            timestamp (:obj:`time`, optional, default is ``None``):
                Timestamp to be associated with log entry. Must be Unix time.
                If ``None`` is passed, `time.time() <https://docs.python.org/3.6/library/time.html#time.time>`_
                (Python 3.6 example) is invoked to obtain timestamp.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment` and
            'accuracy' log does not exists:

            .. code:: python3

                # Both calls below have the same effect

                # Common invocation, providing log name and value
                experiment.log_metric('accuracy', 0.5)
                experiment.log_metric('accuracy', 0.65)
                experiment.log_metric('accuracy', 0.8)

                # Providing both x and y params
                experiment.log_metric('accuracy', 0, 0.5)
                experiment.log_metric('accuracy', 1, 0.65)
                experiment.log_metric('accuracy', 2, 0.8)

        Note:
            For efficiency, logs are uploaded in batches via a queue.
            Hence, if you log a lot of data, you may experience slight delays in Neptune web application.
        Note:
            Passing either ``x`` or ``y`` coordinate as NaN or +/-inf causes this log entry to be ignored.
            Warning is printed to ``stdout``.
        """
        x, y = self._get_valid_x_y(x, y)

        if not is_float(y):
            raise InvalidChannelValue(expected_type='float',
                                      actual_type=type(y).__name__)

        if is_nan_or_inf(y):
            _logger.warning(
                'Invalid metric value: %s for channel %s. '
                'Metrics with nan or +/-inf values will not be sent to server',
                y, log_name)
        elif x is not None and is_nan_or_inf(x):
            _logger.warning(
                'Invalid metric x-coordinate: %s for channel %s. '
                'Metrics with nan or +/-inf x-coordinates will not be sent to server',
                x, log_name)
        else:
            value = ChannelValue(x, dict(numeric_value=y), timestamp)
            self._channels_values_sender.send(log_name,
                                              ChannelType.NUMERIC.value, value)

    def send_text(self, channel_name, x, y=None, timestamp=None):
        """Log text data in Neptune.

        Alias for :meth:`~neptune.experiments.Experiment.log_text`
        """
        return self.log_text(channel_name, x, y, timestamp)

    def log_text(self, log_name, x, y=None, timestamp=None):
        """Log text data in Neptune

        | If a log with provided ``log_name`` does not exist, it is created automatically.
        | If log exists (determined by ``log_name``), then new value is appended to it.

        Args:
            log_name (:obj:`str`): The name of log, i.e. `mse`, `my_text_data`, `timing_info`.
            x (:obj:`double` or :obj:`str`): Depending, whether ``y`` parameter is passed:

                * ``y`` not passed: The value of the log (data-point). Must be ``str``.
                * ``y`` passed: Index of log entry being appended. Must be strictly increasing.

            y (:obj:`str`, optional, default is ``None``): The value of the log (data-point).
            timestamp (:obj:`time`, optional, default is ``None``):
                Timestamp to be associated with log entry. Must be Unix time.
                If ``None`` is passed, `time.time() <https://docs.python.org/3.6/library/time.html#time.time>`_
                (Python 3.6 example) is invoked to obtain timestamp.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                # common case, where log name and data are passed
                neptune.log_text('my_text_data', str(data_item))

                # log_name, x and timestamp are passed
                neptune.log_text(log_name='logging_losses_as_text',
                                 x=str(val_loss),
                                 timestamp=1560430912)

        Note:
            For efficiency, logs are uploaded in batches via a queue.
            Hence, if you log a lot of data, you may experience slight delays in Neptune web application.
        Note:
            Passing ``x`` coordinate as NaN or +/-inf causes this log entry to be ignored.
            Warning is printed to ``stdout``.
        """
        x, y = self._get_valid_x_y(x, y)

        if x is not None and is_nan_or_inf(x):
            x = None

        if not isinstance(y, six.string_types):
            raise InvalidChannelValue(expected_type='str',
                                      actual_type=type(y).__name__)

        if x is not None and is_nan_or_inf(x):
            _logger.warning(
                'Invalid metric x-coordinate: %s for channel %s. '
                'Metrics with nan or +/-inf x-coordinates will not be sent to server',
                x, log_name)
        else:
            value = ChannelValue(x, dict(text_value=y), timestamp)
            self._channels_values_sender.send(log_name, ChannelType.TEXT.value,
                                              value)

    def send_image(self,
                   channel_name,
                   x,
                   y=None,
                   name=None,
                   description=None,
                   timestamp=None):
        """Log image data in Neptune.

        Alias for :meth:`~neptune.experiments.Experiment.log_image`
        """
        return self.log_image(channel_name, x, y, name, description, timestamp)

    def log_image(self,
                  log_name,
                  x,
                  y=None,
                  image_name=None,
                  description=None,
                  timestamp=None):
        """Log image data in Neptune

        | If a log with provided ``log_name`` does not exist, it is created automatically.
        | If log exists (determined by ``log_name``), then new value is appended to it.

        Args:
            log_name (:obj:`str`): The name of log, i.e. `bboxes`, `visualisations`, `sample_images`.
            x (:obj:`double`): Depending, whether ``y`` parameter is passed:

                * ``y`` not passed: The value of the log (data-point). See ``y`` parameter.
                * ``y`` passed: Index of log entry being appended. Must be strictly increasing.

            y (multiple types supported, optional, default is ``None``):

                The value of the log (data-point). Can be one of the following types:

                * :obj:`PIL image`
                  `Pillow docs <https://pillow.readthedocs.io/en/latest/reference/Image.html#image-module>`_
                * :obj:`matplotlib.figure.Figure`
                  `Matplotlib 3.1.1 docs <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.figure.Figure.html>`_
                * :obj:`str` - path to image file
                * 2-dimensional :obj:`numpy.array` - interpreted as grayscale image
                * 3-dimensional :obj:`numpy.array` - behavior depends on last dimension

                    * if last dimension is 1 - interpreted as grayscale image
                    * if last dimension is 3 - interpreted as RGB image
                    * if last dimension is 4 - interpreted as RGBA image

            image_name (:obj:`str`, optional, default is ``None``): Image name
            description (:obj:`str`, optional, default is ``None``): Image description
            timestamp (:obj:`time`, optional, default is ``None``):
                Timestamp to be associated with log entry. Must be Unix time.
                If ``None`` is passed, `time.time() <https://docs.python.org/3.6/library/time.html#time.time>`_
                (Python 3.6 example) is invoked to obtain timestamp.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                # path to image file
                experiment.log_image('bbox_images', 'pictures/image.png')
                experiment.log_image('bbox_images', x=5, 'pictures/image.png')
                experiment.log_image('bbox_images', 'pictures/image.png', image_name='difficult_case')

                # PIL image
                img = PIL.Image.new('RGB', (60, 30), color = 'red')
                experiment.log_image('fig', img)

                # 2d numpy array
                array = numpy.random.rand(300, 200)*255
                experiment.log_image('fig', array)

                # 3d grayscale array
                array = numpy.random.rand(300, 200, 1)*255
                experiment.log_image('fig', array)

                # 3d RGB array
                array = numpy.random.rand(300, 200, 3)*255
                experiment.log_image('fig', array)

                # 3d RGBA array
                array = numpy.random.rand(300, 200, 4)*255
                experiment.log_image('fig', array)

                # matplotlib figure example 1
                from matplotlib import pyplot
                pyplot.plot([1, 2, 3, 4])
                pyplot.ylabel('some numbers')
                experiment.log_image('plots', plt.gcf())

                # matplotlib figure example 2
                from matplotlib import pyplot
                import numpy

                numpy.random.seed(19680801)
                data = numpy.random.randn(2, 100)

                figure, axs = pyplot.subplots(2, 2, figsize=(5, 5))
                axs[0, 0].hist(data[0])
                axs[1, 0].scatter(data[0], data[1])
                axs[0, 1].plot(data[0], data[1])
                axs[1, 1].hist2d(data[0], data[1])

                experiment.log_image('diagrams', figure)

        Note:
            For efficiency, logs are uploaded in batches via a queue.
            Hence, if you log a lot of data, you may experience slight delays in Neptune web application.
        Note:
            Passing ``x`` coordinate as NaN or +/-inf causes this log entry to be ignored.
            Warning is printed to ``stdout``.
        Warning:
            Only images up to 2MB are supported. Larger files will not be logged to Neptune.
        """
        x, y = self._get_valid_x_y(x, y)

        if x is not None and is_nan_or_inf(x):
            x = None

        image_content = get_image_content(y)
        if len(image_content) > self.IMAGE_SIZE_LIMIT:
            _logger.warning(
                'Your image is larger than 2MB. Neptune supports logging images smaller than 2MB. '
                'Resize or increase compression of this image')
            image_content = None

        input_image = dict(name=image_name, description=description)
        if image_content:
            input_image['data'] = base64.b64encode(image_content).decode(
                'utf-8')

        if x is not None and is_nan_or_inf(x):
            _logger.warning(
                'Invalid metric x-coordinate: %s for channel %s. '
                'Metrics with nan or +/-inf x-coordinates will not be sent to server',
                x, log_name)
        else:
            value = ChannelValue(x, dict(image_value=input_image), timestamp)
            self._channels_values_sender.send(log_name,
                                              ChannelType.IMAGE.value, value)

    def send_artifact(self, artifact, destination=None):
        """Save an artifact (file) in experiment storage.

        Alias for :meth:`~neptune.experiments.Experiment.log_artifact`
        """
        return self.log_artifact(artifact, destination)

    def log_artifact(self, artifact, destination=None):
        """Save an artifact (file) in experiment storage.

        Args:
            artifact (:obj:`str`): A path to the file in local filesystem.
            destination (:obj:`str`, optional, default is ``None``):
                A destination path.
                If ``None`` is passed, an artifact file name will be used.

        Raises:
            `FileNotFound`: When ``artifact`` file was not found.
            `StorageLimitReached`: When storage limit in the project has been reached.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                # simple use
                experiment.log_artifact('images/wrong_prediction_1.png')

                # save file in other directory
                experiment.log_artifact('images/wrong_prediction_1.png', 'validation/images/wrong_prediction_1.png')

                # save file under different name
                experiment.log_artifact('images/wrong_prediction_1.png', 'images/my_image_1.png')
        """
        if not os.path.exists(artifact):
            raise FileNotFound(artifact)

        target_name = os.path.basename(
            artifact) if destination is None else destination

        upload_to_storage(
            upload_entries=[
                UploadEntry(os.path.abspath(artifact),
                            normalize_file_name(target_name))
            ],
            upload_api_fun=self._backend.upload_experiment_output,
            upload_tar_api_fun=self._backend.extract_experiment_output,
            experiment=self)

    def download_artifact(self, path, destination_dir=None):
        """Download an artifact (file) from the experiment storage.

        Download a file indicated by ``path`` from the experiment artifacts and save it in ``destination_dir``.

        Args:
            path (:obj:`str`): Path to the file to be downloaded.
            destination_dir (:obj:`str`):
                The directory where the file will be downloaded.
                If ``None`` is passed, the file will be downloaded to the current working directory.

        Raises:
            `NotADirectory`: When ``destination_dir`` is not a directory.
            `FileNotFound`: If a path in experiment artifacts does not exist.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                experiment.download_artifact('forest_results.pkl', '/home/user/files/')
        """
        if not destination_dir:
            destination_dir = os.getcwd()

        project_storage_path = "/{exp_id}/output/{file}".format(exp_id=self.id,
                                                                file=path)
        destination_path = os.path.join(destination_dir,
                                        os.path.basename(path))

        if not os.path.exists(destination_dir):
            os.makedirs(destination_dir)
        elif not os.path.isdir(destination_dir):
            raise NotADirectory(destination_dir)

        try:
            self._backend.download_data(self._project, project_storage_path,
                                        destination_path)
        except PathInProjectNotFound:
            raise FileNotFound(path)

    def download_sources(self, path=None, destination_dir=None):
        """Download a directory or a single file from experiment's sources as a ZIP archive.

        Download a subdirectory (or file) ``path`` from the experiment sources and save it in ``destination_dir``
        as a ZIP archive. The name of an archive will be a name of downloaded directory (or file) with '.zip' extension.

        Args:
            path (:obj:`str`):
                Path of a directory or file in experiment sources to be downloaded.
                If ``None`` is passed, all source files will be downloaded.

            destination_dir (:obj:`str`): The directory where the archive will be downloaded.
                If ``None`` is passed, the archive will be downloaded to the current working directory.

        Raises:
            `NotADirectory`: When ``destination_dir`` is not a directory.
            `FileNotFound`: If a path in experiment sources does not exist.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                # Download all experiment sources to current working directory
                experiment.download_sources()

                # Download a single directory
                experiment.download_sources('src/my-module')

                # Download all experiment sources to user-defined directory
                experiment.download_sources(destination_dir='/tmp/sources/')

                # Download a single directory to user-defined directory
                experiment.download_sources('src/my-module', 'sources/')
        """
        if not path:
            path = ""
        if not destination_dir:
            destination_dir = os.getcwd()

        if not os.path.exists(destination_dir):
            os.makedirs(destination_dir)
        elif not os.path.isdir(destination_dir):
            raise NotADirectory(destination_dir)

        download_request = self._backend.prepare_source_download_reuqest(
            self, path)
        self._download_from_request(download_request, destination_dir, path)

    def download_artifacts(self, path=None, destination_dir=None):
        """Download a directory or a single file from experiment's artifacts as a ZIP archive.

        Download a subdirectory (or file) ``path`` from the experiment artifacts and save it in ``destination_dir``
        as a ZIP archive. The name of an archive will be a name of downloaded directory (or file) with '.zip' extension.

        Args:
            path (:obj:`str`):
                Path of a directory or file in experiment artifacts to be downloaded.
                If ``None`` is passed, all artifacts will be downloaded.

            destination_dir (:obj:`str`): The directory where the archive will be downloaded.
                If ``None`` is passed, the archive will be downloaded to the current working directory.

        Raises:
            `NotADirectory`: When ``destination_dir`` is not a directory.
            `FileNotFound`: If a path in experiment artifacts does not exist.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                # Download all experiment artifacts to current working directory
                experiment.download_artifacts()

                # Download a single directory
                experiment.download_artifacts('data/images')

                # Download all experiment artifacts to user-defined directory
                experiment.download_artifacts(destination_dir='/tmp/artifacts/')

                # Download a single directory to user-defined directory
                experiment.download_artifacts('data/images', 'artifacts/')
        """
        if not path:
            path = ""
        if not destination_dir:
            destination_dir = os.getcwd()

        if not os.path.exists(destination_dir):
            os.makedirs(destination_dir)
        elif not os.path.isdir(destination_dir):
            raise NotADirectory(destination_dir)

        download_request = self._backend.prepare_output_download_reuqest(
            self, path)
        self._download_from_request(download_request, destination_dir, path)

    def _download_from_request(self, download_request, destination_dir, path):
        sleep_time = 1
        max_sleep_time = 16
        while not hasattr(download_request, "downloadUrl"):
            time.sleep(sleep_time)
            sleep_time = min(sleep_time * 2, max_sleep_time)
            download_request = self._backend.get_download_request(
                download_request.id)

        # We do not use Backend here cause `downloadUrl` can be any url (not only Neptune API endpoint)
        response = requests.get(url=download_request.downloadUrl,
                                headers={"Accept": "application/zip"},
                                stream=True)

        with response:
            filename = None
            if 'content-disposition' in response.headers:
                content_disposition = response.headers['content-disposition']
                filenames = re.findall("filename=(.+)", content_disposition)
                if filenames:
                    filename = filenames[0]

            if not filename:
                filename = os.path.basename(path.rstrip("/")) + ".zip"

            destination_path = os.path.join(destination_dir, filename)
            with open(destination_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=10 * 1024 *
                                                   1024):
                    if chunk:
                        f.write(chunk)

    def reset_log(self, log_name):
        """Resets the log.

        Removes all data from the log and enables it to be reused from scratch.

        Args:
            log_name (:obj:`str`): The name of log to reset.

        Raises:
            `ChannelDoesNotExist`: When the log with name ``log_name`` does not exist on the server.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                experiment.reset_log('my_metric')

        Note:
            Check Neptune web application to see that reset charts have no data.
        """
        channel = self._find_channel(log_name, ChannelNamespace.USER)
        if channel is None:
            raise ChannelDoesNotExist(self.id, log_name)
        self._backend.reset_channel(channel.id)

    def get_parameters(self):
        """Retrieve parameters for this experiment.

        Returns:
            :obj:`dict` - dictionary mapping a parameter name to value.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                exp_params = experiment.get_parameters()
        """
        experiment = self._backend.get_experiment(self.internal_id)
        return dict(
            (p.name, self._convert_parameter_value(p.value, p.parameterType))
            for p in experiment.parameters)

    def get_properties(self):
        """Retrieve User-defined properties for this experiment.

        Returns:
            :obj:`dict` - dictionary mapping a property key to value.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`.

            .. code:: python3

                exp_properties = experiment.get_properties()
        """
        experiment = self._backend.get_experiment(self.internal_id)
        return dict((p.key, p.value) for p in experiment.properties)

    def set_property(self, key, value):
        """Set `key-value` pair as an experiment property.

        If property with given ``key`` does not exist, it adds a new one.

        Args:
            key (:obj:`str`): Property key.
            value (:obj:`obj`): New value of a property.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                experiment.set_property('model', 'LightGBM')
                experiment.set_property('magic-number', 7)
        """
        properties = {
            p.key: p.value
            for p in self._backend.get_experiment(self.internal_id).properties
        }
        properties[key] = str(value)
        return self._backend.update_experiment(experiment=self,
                                               properties=properties)

    def remove_property(self, key):
        """Removes a property with given key.

        Args:
            key (single :obj:`str`):
                Key of property to remove.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                experiment.remove_property('host')
        """
        properties = {
            p.key: p.value
            for p in self._backend.get_experiment(self.internal_id).properties
        }
        del properties[key]
        return self._backend.update_experiment(experiment=self,
                                               properties=properties)

    def get_hardware_utilization(self):
        """Retrieve GPU, CPU and memory utilization data.

        Get hardware utilization metrics for entire experiment as a single
        `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
        object. Returned DataFrame has following columns (assuming single GPU with 0 index):

            * `x_ram` - time (in milliseconds) from the experiment start,
            * `y_ram` - memory usage in GB,
            * `x_cpu` - time (in milliseconds) from the experiment start,
            * `y_cpu` - CPU utilization percentage (0-100),
            * `x_gpu_util_0` - time (in milliseconds) from the experiment start,
            * `y_gpu_util_0` - GPU utilization percentage (0-100),
            * `x_gpu_mem_0` - time (in milliseconds) from the experiment start,
            * `y_gpu_mem_0` - GPU memory usage in GB.

        | If more GPUs are available they have their separate columns with appropriate indices (0, 1, 2, ...),
          for example: `x_gpu_util_1`, `y_gpu_util_1`.
        | The returned DataFrame may contain ``NaN`` s if one of the metrics has more values than others.

        Returns:
            :obj:`pandas.DataFrame` - DataFrame containing the hardware utilization metrics.

        Examples:
            The following values denote that after 3 seconds, the experiment used 16.7 GB of RAM

                * `x_ram` = 3000
                * `y_ram` = 16.7

            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                hardware_df = experiment.get_hardware_utilization()
        """
        metrics_csv = self._backend.get_metrics_csv(self)
        try:
            return pd.read_csv(metrics_csv)
        except EmptyDataError:
            return pd.DataFrame()

    def get_numeric_channels_values(self, *channel_names):
        """Retrieve values of specified metrics (numeric logs).

        The returned
        `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
        contains 1 additional column `x` along with the requested metrics.

        Args:
            *channel_names (one or more :obj:`str`): comma-separated metric names.

        Returns:
            :obj:`pandas.DataFrame` - DataFrame containing values for the requested metrics.

            | The returned DataFrame may contain ``NaN`` s if one of the metrics has more values than others.

        Example:
            Invoking ``get_numeric_channels_values('loss', 'auc')`` returns DataFrame with columns
            `x`, `loss`, `auc`.

            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                batch_channels = experiment.get_numeric_channels_values('batch-1-loss', 'batch-2-metric')
                epoch_channels = experiment.get_numeric_channels_values('epoch-1-loss', 'epoch-2-metric')

        Note:
            It's good idea to get metrics with common temporal pattern (like iteration or batch/epoch number).
            Thanks to this each row of returned DataFrame has metrics from the same moment in experiment.
            For example, combine epoch metrics to one DataFrame and batch metrics to the other.
        """

        channels_data = {}
        channels_by_name = self.get_channels()
        for channel_name in channel_names:
            channel_id = channels_by_name[channel_name].id
            try:
                channels_data[channel_name] = pd.read_csv(
                    self._backend.get_channel_points_csv(self, channel_id),
                    header=None,
                    names=[
                        'x_{}'.format(channel_name),
                        'y_{}'.format(channel_name)
                    ],
                    dtype=float)
            except EmptyDataError:
                channels_data[channel_name] = pd.DataFrame(columns=[
                    'x_{}'.format(channel_name), 'y_{}'.format(channel_name)
                ],
                                                           dtype=float)

        return align_channels_on_x(
            pd.concat(channels_data.values(), axis=1, sort=False))

    def _start(self,
               upload_source_entries=None,
               abort_callback=None,
               logger=None,
               upload_stdout=True,
               upload_stderr=True,
               send_hardware_metrics=True,
               run_monitoring_thread=True,
               handle_uncaught_exceptions=True):
        upload_to_storage(
            upload_entries=upload_source_entries,
            upload_api_fun=self._backend.upload_experiment_source,
            upload_tar_api_fun=self._backend.extract_experiment_source,
            experiment=self)

        self._execution_context.start(
            abort_callback=abort_callback,
            logger=logger,
            upload_stdout=upload_stdout,
            upload_stderr=upload_stderr,
            send_hardware_metrics=send_hardware_metrics,
            run_monitoring_thread=run_monitoring_thread,
            handle_uncaught_exceptions=handle_uncaught_exceptions)

    def stop(self, exc_tb=None):
        """Marks experiment as finished (succeeded or failed).

        Args:
            exc_tb (:obj:`str`, optional, default is ``None``): Additional traceback information
                to be stored in experiment details in case of failure (stacktrace, etc).
                If this argument is ``None`` the experiment will be marked as succeeded.
                Otherwise, experiment will be marked as failed.

        Examples:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                # Marks experiment as succeeded
                experiment.stop()

                # Assuming 'ex' is some exception,
                # it marks experiment as failed with exception info in experiment details.
                experiment.stop(str(ex))
        """

        self._channels_values_sender.join()

        try:
            if exc_tb is None:
                self._backend.mark_succeeded(self)
            else:
                self._backend.mark_failed(self, exc_tb)
        except ExperimentAlreadyFinished:
            pass

        self._execution_context.stop()

        # pylint: disable=protected-access
        self._project._pop_stopped_experiment()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_tb is None:
            self.stop()
        else:
            self.stop("\n".join(traceback.format_tb(exc_tb)) + "\n" +
                      repr(exc_val))

    def __str__(self):
        return 'Experiment({})'.format(self.id)

    def __repr__(self):
        return str(self)

    def __eq__(self, o):
        # pylint: disable=protected-access
        return self._id == o._id and self._internal_id == o._internal_id and self._project == o._project

    def __ne__(self, o):
        return not self.__eq__(o)

    @staticmethod
    def _convert_parameter_value(value, parameter_type):
        if parameter_type == 'double':
            return float(value)
        else:
            return value

    @staticmethod
    def _get_valid_x_y(x, y):
        """
        The goal of this function is to allow user to call experiment.log_* with any of:
            - single parameter treated as y value
            - both parameters (named/unnamed)
            - single named y parameter
        If intended X-coordinate is provided, it is validated to be a float value
        """
        if x is None and y is None:
            raise NoChannelValue()

        if x is None and y is not None:
            return None, y

        if x is not None and y is None:
            return None, x

        if x is not None and y is not None:
            if not is_float(x):
                raise InvalidChannelValue(expected_type='float',
                                          actual_type=type(x).__name__)
            return x, y

    def _send_channels_values(self, channels_with_values):
        self._backend.send_channels_values(self, channels_with_values)

    def _get_channels(self, channels_names_with_types):
        existing_channels = self.get_channels()
        channels_by_name = {}
        for (channel_name, channel_type) in channels_names_with_types:
            channel = existing_channels.get(channel_name, None)
            if channel is None:
                channel = self._create_channel(channel_name, channel_type)
            channels_by_name[channel.name] = channel
        return channels_by_name

    def _get_channel(self,
                     channel_name,
                     channel_type,
                     channel_namespace=ChannelNamespace.USER):
        channel = self._find_channel(channel_name, channel_namespace)
        if channel is None:
            channel = self._create_channel(channel_name, channel_type,
                                           channel_namespace)
        return channel

    def _find_channel(self, channel_name, channel_namespace):
        if channel_namespace == ChannelNamespace.USER:
            return self.get_channels().get(channel_name, None)
        elif channel_namespace == ChannelNamespace.SYSTEM:
            return self._get_system_channels().get(channel_name, None)
        else:
            raise RuntimeError(
                "Unknown channel namespace {}".format(channel_namespace))

    def _create_channel(self,
                        channel_name,
                        channel_type,
                        channel_namespace=ChannelNamespace.USER):
        if channel_namespace == ChannelNamespace.USER:
            return self._backend.create_channel(self, channel_name,
                                                channel_type)
        elif channel_namespace == ChannelNamespace.SYSTEM:
            return self._backend.create_system_channel(self, channel_name,
                                                       channel_type)
        else:
            raise RuntimeError(
                "Unknown channel namespace {}".format(channel_namespace))
Exemplo n.º 10
0
class Experiment(object):
    """It contains all the information about a Neptune Experiment

    This class lets you extract experiment by, short experiment id, names of all the channels,
    system properties and other properties, parameters, numerical channel values,
    information about the hardware utilization during the experiment

    Args:
        client(`neptune.Client'): Client object
        leaderboard_entry(`neptune.model.LeaderboardEntry`): LeaderboardEntry object

    Examples:
        Instantiate a session.

        >>> from neptune.sessions import Session
        >>> session = Session()

        Fetch a project and a list of experiments.

        >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
        >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

        Get an experiment instance.

        >>> experiment = experiments[0]
        >>> experiment
        Experiment(SAL-1609)

    Todo:
        Column sorting
    """

    def __init__(self, client, _id, internal_id, project_full_id):
        self._client = client
        self._id = _id
        self._internal_id = internal_id
        self._project_full_id = project_full_id
        self._channels_values_sender = ChannelsValuesSender(self)
        self._ping_thread = None
        self._hardware_metric_thread = None
        self._aborting_thread = None
        self._stdout_uploader = None
        self._stderr_uploader = None
        self._uncaught_exception_handler = sys.__excepthook__

    @property
    def id(self):
        """ Experiment short id

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Get experiment short id.

            >>> experiment.id
            'SAL-1609'

        """
        return self._id

    @property
    def name(self):
        return self._client.get_experiment(self._internal_id).name

    @property
    def state(self):
        return self._client.get_experiment(self._internal_id).state

    @property
    def internal_id(self):
        return self._internal_id

    def get_system_properties(self):
        """Retrieve system properties like owner, times of creation and completion, worker type, etc.

        Returns:
            dict: A dictionary mapping a property name to value.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Get experiment system properties.

            >>> experiment.get_system_properties

        Note:
            The list of supported system properties may change over time.

        """
        experiment = self._client.get_experiment(self._internal_id)
        return {
            'id': experiment.shortId,
            'name': experiment.name,
            'created': experiment.timeOfCreation,
            'finished': experiment.timeOfCompletion,
            'running_time': experiment.runningTime,
            'owner': experiment.owner,
            'size': experiment.storageSize,
            'tags': experiment.tags,
            'notes': experiment.description
        }

    def get_tags(self):
        return self._client.get_experiment(self._internal_id).tags

    def append_tag(self, tag):
        self._client.update_tags(experiment=self,
                                 tags_to_add=[tag],
                                 tags_to_delete=[])

    def remove_tag(self, tag):
        self._client.update_tags(experiment=self,
                                 tags_to_add=[],
                                 tags_to_delete=[tag])

    def get_channels(self):
        """Retrieve all channel names along with their representations for this experiment.

        Returns:
            dict: A dictionary mapping a channel name to channel.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Get experiment channels.

            >>> experiment.get_channels()

        """
        experiment = self._client.get_experiment(self.internal_id)
        channels_last_values_by_name = dict((ch.channelName, ch) for ch in experiment.channelsLastValues)
        channels = dict()
        for ch in experiment.channels:
            last_value = channels_last_values_by_name.get(ch.name, None)
            if last_value:
                ch.x = last_value.x
                ch.y = last_value.y
            else:
                ch.x = None
                ch.y = None
            channels[ch.name] = ch
        return channels

    def upload_source_files(self, source_files):
        """
        Raises:
            `StorageLimitReached`: When storage limit in the project has been reached.
        """
        files_list = []
        for source_file in source_files:
            if not os.path.exists(source_file):
                raise FileNotFound(source_file)
            files_list.append((os.path.abspath(source_file), source_file))

        upload_to_storage(files_list=files_list,
                          upload_api_fun=self._client.upload_experiment_source,
                          upload_tar_api_fun=self._client.extract_experiment_source,
                          experiment=self)

    def send_metric(self, channel_name, x, y=None, timestamp=None):
        x, y = self._get_valid_x_y(x, y)

        if not is_float(y):
            raise InvalidChannelValue(expected_type='float', actual_type=type(y).__name__)

        value = ChannelValue(x, dict(numeric_value=y), timestamp)
        self._channels_values_sender.send(channel_name, 'numeric', value)

    def send_text(self, channel_name, x, y=None, timestamp=None):
        x, y = self._get_valid_x_y(x, y)

        if not isinstance(y, six.string_types):
            raise InvalidChannelValue(expected_type='str', actual_type=type(y).__name__)

        value = ChannelValue(x, dict(text_value=y), timestamp)
        self._channels_values_sender.send(channel_name, 'text', value)

    def send_image(self, channel_name, x, y=None, name=None, description=None, timestamp=None):
        x, y = self._get_valid_x_y(x, y)

        input_image = dict(
            name=name,
            description=description,
            data=base64.b64encode(get_image_content(y)).decode('utf-8')
        )

        value = ChannelValue(x, dict(image_value=input_image), timestamp)
        self._channels_values_sender.send(channel_name, 'image', value)

    def send_artifact(self, artifact):
        """
        Raises:
            `StorageLimitReached`: When storage limit in the project has been reached.
        """
        if not os.path.exists(artifact):
            raise FileNotFound(artifact)

        upload_to_storage(files_list=[(os.path.abspath(artifact), artifact)],
                          upload_api_fun=self._client.upload_experiment_output,
                          upload_tar_api_fun=self._client.extract_experiment_output,
                          experiment=self)

    def send_graph(self, graph_id, value):
        """Upload a tensorflow graph for this experiment.

        Args:
            graph_id: a string UUID identifying the graph (managed by user)
            value: a string representation of Tensorflow graph

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Send graph to experiment.
            >>> import uuid
            >>> experiment.send_graph(str(uuid.uuid4()), str("tf.GraphDef instance"))

        """

        self._client.put_tensorflow_graph(self, graph_id, value)

    def get_parameters(self):
        """Retrieve parameters for this experiment.

        Returns:
            dict: A dictionary mapping a parameter name to value.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Get experiment parameters.

            >>> experiment.get_parameters()

        """
        experiment = self._client.get_experiment(self.internal_id)
        return dict((p.name, p.value) for p in experiment.parameters)

    def get_properties(self):
        """Retrieve user-defined properties for this experiment.

        Returns:
            dict: A dictionary mapping a property key to value.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Get experiment properties.

            >>> experiment.get_properties

        """
        experiment = self._client.get_experiment(self.internal_id)
        return dict((p.key, p.value) for p in experiment.properties)

    def set_property(self, key, value):
        properties = {p.key: p.value for p in self._client.get_experiment(self.internal_id).properties}
        properties[key] = value
        return self._client.update_experiment(
            experiment=self,
            properties=properties
        )

    def remove_property(self, key):
        properties = {p.key: p.value for p in self._client.get_experiment(self.internal_id).properties}
        del properties[key]
        return self._client.update_experiment(
            experiment=self,
            properties=properties
        )

    def get_hardware_utilization(self):
        """Retrieve RAM, CPU and GPU utilization throughout the experiment.

        The returned DataFrame contains 2 columns (x_*, y_*) for each of: RAM, CPU and each GPU.
        The x_ column contains the time (in milliseconds) from the experiment start,
        while the y_ column contains the value of the appropriate metric.

        RAM and GPU memory usage is returned in gigabytes.
        CPU and GPU utilization is returned as a percentage (0-100).

        E.g. For an experiment using a single GPU, this method will return a DataFrame
        of the following columns:

        x_ram, y_ram, x_cpu, y_cpu, x_gpu_util_0, y_gpu_util_0, x_gpu_mem_0, y_gpu_mem_0

        The following values denote that after 3 seconds, the experiment used 16.7 GB of RAM.
        x_ram, y_ram = 3000, 16.7

        The returned DataFrame may contain NaNs if one of the metrics has more values than others.

        Returns:
            `pandas.DataFrame`: Dataframe containing the hardware utilization metrics throughout the experiment.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> experiment = experiments[0]

            Get hardware utilization channels.

            >>> experiment.get_hardware_utilization

        """
        metrics_csv = self._client.get_metrics_csv(self)
        try:
            return pd.read_csv(metrics_csv)
        except EmptyDataError:
            return pd.DataFrame()

    def get_numeric_channels_values(self, *channel_names):
        """
        Retrieve values of specified numeric channels.

        The returned DataFrame contains 1 additional column x along with the requested channels.

        E.g. get_numeric_channels_values('loss', 'auc') will return a DataFrame of the following structure:
            x, loss, auc

        The returned DataFrame may contain NaNs if one of the channels has more values than others.

        Args:
            *channel_names: variable length list of names of the channels to retrieve values for.

        Returns:
            `pandas.DataFrame`: Dataframe containing the values for the requested numerical channels.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> exp = experiments[0]

            Get numeric channel value for channels 'unet_0 batch sum loss' and 'unet_1 batch sum loss'.

            >>> batch_channels = exp.get_numeric_channels_values('unet_0 batch sum loss', 'unet_1 batch sum loss')
            >>> epoch_channels = exp.get_numeric_channels_values('unet_0 epoch_val sum loss', 'Learning Rate')

        Note:
            Remember to fetch the dataframe for the channels that have a common temporal/iteration axis x.
            For example combine epoch channels to one dataframe and batch channels to the other
        """

        channels_data = {}
        channels_by_name = self.get_channels()
        for channel_name in channel_names:
            channel_id = channels_by_name[channel_name].id
            try:
                channels_data[channel_name] = pd.read_csv(
                    self._client.get_channel_points_csv(self, channel_id),
                    header=None,
                    names=['x_{}'.format(channel_name), 'y_{}'.format(channel_name)],
                    dtype=float
                )
            except EmptyDataError:
                channels_data[channel_name] = pd.DataFrame(
                    columns=['x_{}'.format(channel_name), 'y_{}'.format(channel_name)],
                    dtype=float
                )

        return align_channels_on_x(pd.concat(channels_data.values(), axis=1, sort=False))

    def stop(self, exc_tb=None):

        self._channels_values_sender.join()

        try:
            if exc_tb is None:
                self._client.mark_succeeded(self)
            else:
                self._client.mark_failed(self, exc_tb)
        except ExperimentAlreadyFinished:
            pass

        if self._ping_thread:
            self._ping_thread.interrupt()
            self._ping_thread = None

        if self._hardware_metric_thread:
            self._hardware_metric_thread.interrupt()
            self._hardware_metric_thread = None

        if self._aborting_thread:
            self._aborting_thread.interrupt()
            self._aborting_thread = None

        if self._stdout_uploader:
            self._stdout_uploader.close()

        if self._stderr_uploader:
            self._stderr_uploader.close()

        pop_stopped_experiment()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_tb is None:
            self.stop()
        else:
            self.stop("\n".join(traceback.format_tb(exc_tb)) + "\n" + repr(exc_val))

    def __str__(self):
        return 'Experiment({})'.format(self.id)

    def __repr__(self):
        return str(self)

    def __eq__(self, o):
        # pylint: disable=protected-access
        return self._id == o._id and self._internal_id == o._internal_id and self._project_full_id == o._project_full_id

    def __ne__(self, o):
        return not self.__eq__(o)

    @staticmethod
    def _get_valid_x_y(x, y):
        if x is None:
            raise NoChannelValue()

        if y is None:
            y = x
            x = None
        elif not is_float(x):
            raise InvalidChannelValue(expected_type='float', actual_type=type(x).__name__)

        return x, y

    def _send_channels_values(self, channels_with_values):
        self._client.send_channels_values(self, channels_with_values)

    def _get_channel(self, channel_name, channel_type):
        channel = self._find_channel(channel_name)
        if channel is None:
            channel = self._create_channel(channel_name, channel_type)
        return channel

    def _find_channel(self, channel_name):
        return self.get_channels().get(channel_name, None)

    def _create_channel(self, channel_name, channel_type):
        return self._client.create_channel(self, channel_name, channel_type)
Exemplo n.º 11
0
    def create_experiment(self,
                          name=None,
                          description=None,
                          params=None,
                          properties=None,
                          tags=None,
                          upload_source_files=None,
                          abort_callback=None,
                          upload_stdout=True,
                          upload_stderr=True,
                          send_hardware_metrics=True,
                          run_monitoring_thread=True,
                          handle_uncaught_exceptions=True):
        """
        Raises:
            `ExperimentValidationError`: When provided arguments are invalid.
            `ExperimentLimitReached`: When experiment limit in the project has been reached.
        """

        if name is None:
            name = "Untitled"

        if description is None:
            description = ""

        if params is None:
            params = {}

        if properties is None:
            properties = {}

        if tags is None:
            tags = []

        abortable = abort_callback is not None or DefaultAbortImpl.requirements_installed(
        )

        experiment = self.client.create_experiment(
            project=self,
            name=name,
            description=description,
            params=params,
            properties=properties,
            tags=tags,
            abortable=abortable,
            monitored=run_monitoring_thread)

        if upload_source_files is None:
            main_file = sys.argv[0]
            main_abs_path = os.path.join(os.getcwd(),
                                         os.path.basename(main_file))
            if os.path.isfile(main_abs_path):
                upload_source_files = [
                    os.path.relpath(main_abs_path, os.getcwd())
                ]
            else:
                upload_source_files = []

        experiment.upload_source_files(upload_source_files)

        def exception_handler(exc_type, exc_val, exc_tb):
            experiment.stop("\n".join(traceback.format_tb(exc_tb)) + "\n" +
                            repr(exc_val))

            sys.__excepthook__(exc_type, exc_val, exc_tb)

        if handle_uncaught_exceptions:
            # pylint:disable=protected-access
            experiment._uncaught_exception_handler = exception_handler
            sys.excepthook = exception_handler

        # pylint:disable=protected-access
        experiment._channels_values_sender = ChannelsValuesSender(experiment)

        if abortable:
            # pylint:disable=protected-access
            if abort_callback:
                abort_impl = CustomAbortImpl(abort_callback)
            else:
                abort_impl = DefaultAbortImpl(pid=os.getpid())
            websocket_factory = ReconnectingWebsocketFactory(
                client=self.client, experiment_id=experiment.internal_id)
            experiment._aborting_thread = AbortingThread(
                websocket_factory=websocket_factory,
                abort_impl=abort_impl,
                experiment_id=experiment.internal_id)
            experiment._aborting_thread.start()

        if upload_stdout and not is_notebook():
            # pylint:disable=protected-access
            experiment._stdout_uploader = StdOutWithUpload(experiment)

        if upload_stderr and not is_notebook():
            # pylint:disable=protected-access
            experiment._stderr_uploader = StdErrWithUpload(experiment)

        if run_monitoring_thread:
            # pylint:disable=protected-access
            experiment._ping_thread = PingThread(client=self.client,
                                                 experiment=experiment)
            experiment._ping_thread.start()

        if send_hardware_metrics and SystemMonitor.requirements_installed():
            # pylint:disable=protected-access
            gauge_mode = GaugeMode.CGROUP if in_docker() else GaugeMode.SYSTEM
            metric_service = MetricServiceFactory(
                self.client,
                os.environ).create(gauge_mode=gauge_mode,
                                   experiment=experiment,
                                   reference_timestamp=time.time())

            experiment._hardware_metric_thread = HardwareMetricReportingThread(
                metric_service=metric_service,
                metric_sending_interval_seconds=3)
            experiment._hardware_metric_thread.start()

        push_new_experiment(experiment)

        return experiment