def __init__(self, workspace, experiment, node_id, port_name, data_type_id):
        """
        INTERNAL USE ONLY. Initialize an intermediate dataset.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the dataset.
        experiment : Experiment
            Parent experiment of the dataset.
        node_id : str
            Module node id from the experiment graph.
        port_name : str
            Output port of the module.
        data_type_id : str
            Serialization format of the raw data.
            See the azureml.DataTypeIds class for constants.
        """
        _not_none('workspace', workspace)
        _not_none('experiment', experiment)
        _not_none_or_empty('node_id', node_id)
        _not_none_or_empty('port_name', port_name)
        _not_none_or_empty('data_type_id', data_type_id)

        self.workspace = workspace
        self.experiment = experiment
        self.node_id = node_id
        self.port_name = port_name
        self.data_type_id = data_type_id

        if is_supported(self.data_type_id):
            self.to_dataframe = self._to_dataframe
Пример #2
0
    def __init__(self, workspace, experiment, node_id, port_name,
                 data_type_id):
        """
        INTERNAL USE ONLY. Initialize an intermediate dataset.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the dataset.
        experiment : Experiment
            Parent experiment of the dataset.
        node_id : str
            Module node id from the experiment graph.
        port_name : str
            Output port of the module.
        data_type_id : str
            Serialization format of the raw data.
            See the azureml.DataTypeIds class for constants.
        """
        _not_none('workspace', workspace)
        _not_none('experiment', experiment)
        _not_none_or_empty('node_id', node_id)
        _not_none_or_empty('port_name', port_name)
        _not_none_or_empty('data_type_id', data_type_id)

        self.workspace = workspace
        self.experiment = experiment
        self.node_id = node_id
        self.port_name = port_name
        self.data_type_id = data_type_id

        if is_supported(self.data_type_id):
            self.to_dataframe = self._to_dataframe
Пример #3
0
    def __getitem__(self, index):
        '''Retrieve a dataset by index or by name (case-sensitive).'''
        _not_none('index', index)

        datasets = self._get_datasets()
        if isinstance(index, numbers.Integral):
            return self._create_dataset(list(datasets)[index])
        else:
            for dataset in datasets:
                if dataset['Name'] == index:
                    return self._create_dataset(dataset)

        raise IndexError('A data set named "{}" does not exist'.format(index))
    def __getitem__(self, index):
        '''Retrieve an experiment by index or by id.'''
        _not_none('index', index)

        experiments = self._get_experiments()
        if isinstance(index, numbers.Integral):
            return self._create_experiment(list(experiments)[index])
        else:
            for experiment in experiments:
                if experiment['ExperimentId'] == index:
                    return self._create_experiment(experiment)

        raise IndexError()
    def __getitem__(self, index):
        '''Retrieve a dataset by index or by name (case-sensitive).'''
        _not_none('index', index)

        datasets = self._get_datasets()
        if isinstance(index, numbers.Integral):
            return self._create_dataset(list(datasets)[index])
        else:
            for dataset in datasets:
                if dataset['Name'] == index:
                    return self._create_dataset(dataset)

        raise IndexError()
Пример #6
0
    def __getitem__(self, index):
        '''Retrieve an experiment by index or by id.'''
        _not_none('index', index)

        experiments = self._get_experiments()
        if isinstance(index, numbers.Integral):
            return self._create_experiment(list(experiments)[index])
        else:
            for experiment in experiments:
                if experiment['ExperimentId'] == index:
                    return self._create_experiment(experiment)

        raise IndexError()
    def __init__(self, workspace, metadata):
        """
        INTERNAL USE ONLY. Initialize an experiment.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the experiment.
        metadata : dict
            Dictionary of experiment metadata as returned by the REST API.
        """
        _not_none('workspace', workspace)
        _not_none('metadata', metadata)

        self.workspace = workspace
        self._metadata = metadata
Пример #8
0
    def __init__(self, workspace, metadata):
        """
        INTERNAL USE ONLY. Initialize an experiment.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the experiment.
        metadata : dict
            Dictionary of experiment metadata as returned by the REST API.
        """
        _not_none('workspace', workspace)
        _not_none('metadata', metadata)

        self.workspace = workspace
        self._metadata = metadata
Пример #9
0
    def _update_from_dataframe(self,
                               dataframe,
                               data_type_id=None,
                               name=None,
                               description=None):
        """
        Serialize the specified DataFrame and replace the existing dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str, optional
            Format to serialize to.
            If None, the existing format is preserved.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str, optional
            Name for the dataset.
            If None, the name of the existing dataset is used.
        description : str, optional
            Description for the dataset.
            If None, the name of the existing dataset is used.
        """
        _not_none('dataframe', dataframe)

        if data_type_id is None:
            data_type_id = self.data_type_id
        if name is None:
            name = self.name
        if description is None:
            description = self.description

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        self._upload_and_refresh(raw_data, data_type_id, name, description)
    def __init__(self, workspace, example_filter=None):
        """
        INTERNAL USE ONLY. Initialize an experiment collection.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the experiments.
        example_filter : bool
            True to include only examples.
            False to include only user-created.
            None to include all.
        """
        _not_none('workspace', workspace)

        self.workspace = workspace
        self._example_filter = example_filter
Пример #11
0
    def __init__(self, workspace, example_filter=None):
        """
        INTERNAL USE ONLY. Initialize an experiment collection.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the experiments.
        example_filter : bool
            True to include only examples.
            False to include only user-created.
            None to include all.
        """
        _not_none('workspace', workspace)

        self.workspace = workspace
        self._example_filter = example_filter
    def _update_from_dataframe(self, dataframe, data_type_id=None, name=None,
                              description=None):
        """
        Serialize the specified DataFrame and replace the existing dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str, optional
            Format to serialize to.
            If None, the existing format is preserved.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str, optional
            Name for the dataset.
            If None, the name of the existing dataset is used.
        description : str, optional
            Description for the dataset.
            If None, the name of the existing dataset is used.
        """
        _not_none('dataframe', dataframe)

        if data_type_id is None:
            data_type_id = self.data_type_id
        if name is None:
            name = self.name
        if description is None:
            description = self.description

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        self._upload_and_refresh(raw_data, data_type_id, name, description)
    def add_from_dataframe(self, dataframe, data_type_id, name, description):
        """
        Serialize the specified DataFrame and upload it as a new dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str
            Format to serialize to.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('dataframe', dataframe)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        return self._upload(raw_data, data_type_id, name, description)
Пример #14
0
    def add_from_dataframe(self, dataframe, data_type_id, name, description):
        """
        Serialize the specified DataFrame and upload it as a new dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str
            Format to serialize to.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('dataframe', dataframe)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        return self._upload(raw_data, data_type_id, name, description)
Пример #15
0
    def _update_from_raw_data(self,
                              raw_data,
                              data_type_id=None,
                              name=None,
                              description=None):
        """
        Upload already serialized raw data and replace the existing dataset.

        Parameters
        ----------
        raw_data: bytes
            Dataset contents to upload.
        data_type_id : str
            Serialization format of the raw data.
            If None, the format of the existing dataset is used.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
                'ARFF'
            See the azureml.DataTypeIds class for constants.
        name : str, optional
            Name for the dataset.
            If None, the name of the existing dataset is used.
        description : str, optional
            Description for the dataset.
            If None, the name of the existing dataset is used.
        """
        _not_none('raw_data', raw_data)

        if data_type_id is None:
            data_type_id = self.data_type_id
        if name is None:
            name = self.name
        if description is None:
            description = self.description

        self._upload_and_refresh(raw_data, data_type_id, name, description)
def serialize_dataframe(writer, data_type_id, dataframe):
    """
    Serialize a dataframe.

    Parameters
    ----------
    writer : file
        File-like object to write to. Must be opened in binary mode.
    data_type_id : dict
        Serialization format to use.
        See the azureml.DataTypeIds class for constants.
    dataframe: pandas.DataFrame
        Dataframe to serialize.
    """
    _not_none('writer', writer)
    _not_none_or_empty('data_type_id', data_type_id)
    _not_none('dataframe', dataframe)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    serializer[0](writer=writer, dataframe=dataframe)
Пример #17
0
def serialize_dataframe(writer, data_type_id, dataframe):
    """
    Serialize a dataframe.

    Parameters
    ----------
    writer : file
        File-like object to write to. Must be opened in binary mode.
    data_type_id : dict
        Serialization format to use.
        See the azureml.DataTypeIds class for constants.
    dataframe: pandas.DataFrame
        Dataframe to serialize.
    """
    _not_none('writer', writer)
    _not_none_or_empty('data_type_id', data_type_id)
    _not_none('dataframe', dataframe)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    serializer[0](writer=writer, dataframe=dataframe)
Пример #18
0
    def __init__(self, workspace=None, metadata=None):
        """
        INTERNAL USE ONLY. Initialize a dataset.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the dataset.
        metadata : dict
            Dictionary of dataset metadata as returned by the REST API.
        """
        _not_none('metadata', metadata)
        _not_none('workspace', workspace)

        self.workspace = workspace
        self._metadata = metadata

        if is_supported(self.data_type_id):
            self.to_dataframe = self._to_dataframe

        if not self.is_example:
            self.update_from_raw_data = self._update_from_raw_data
            self.update_from_dataframe = self._update_from_dataframe
    def __init__(self, workspace=None, metadata=None):
        """
        INTERNAL USE ONLY. Initialize a dataset.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the dataset.
        metadata : dict
            Dictionary of dataset metadata as returned by the REST API.
        """
        _not_none('metadata', metadata)
        _not_none('workspace', workspace)

        self.workspace = workspace
        self._metadata = metadata

        if is_supported(self.data_type_id):
            self.to_dataframe = self._to_dataframe

        if not self.is_example:
            self.update_from_raw_data = self._update_from_raw_data
            self.update_from_dataframe = self._update_from_dataframe
    def _update_from_raw_data(self, raw_data, data_type_id=None, name=None,
                             description=None):
        """
        Upload already serialized raw data and replace the existing dataset.

        Parameters
        ----------
        raw_data: bytes
            Dataset contents to upload.
        data_type_id : str
            Serialization format of the raw data.
            If None, the format of the existing dataset is used.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
                'ARFF'
            See the azureml.DataTypeIds class for constants.
        name : str, optional
            Name for the dataset.
            If None, the name of the existing dataset is used.
        description : str, optional
            Description for the dataset.
            If None, the name of the existing dataset is used.
        """
        _not_none('raw_data', raw_data)

        if data_type_id is None:
            data_type_id = self.data_type_id
        if name is None:
            name = self.name
        if description is None:
            description = self.description

        self._upload_and_refresh(raw_data, data_type_id, name, description)
    def add_from_raw_data(self, raw_data, data_type_id, name, description):
        """
        Upload already serialized raw data as a new dataset.

        Parameters
        ----------
        raw_data: bytes
            Dataset contents to upload.
        data_type_id : str
            Serialization format of the raw data.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
                'ARFF'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('raw_data', raw_data)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        return self._upload(raw_data, data_type_id, name, description)
Пример #22
0
    def add_from_raw_data(self, raw_data, data_type_id, name, description):
        """
        Upload already serialized raw data as a new dataset.

        Parameters
        ----------
        raw_data: bytes
            Dataset contents to upload.
        data_type_id : str
            Serialization format of the raw data.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
                'ARFF'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('raw_data', raw_data)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        return self._upload(raw_data, data_type_id, name, description)
def deserialize_dataframe(reader, data_type_id):
    """
    Deserialize a dataframe.

    Parameters
    ----------
    reader : file
        File-like object to read from. Must be opened in binary mode.
    data_type_id : dict
        Serialization format of the raw data.
        See the azureml.DataTypeIds class for constants.

    Returns
    -------
    pandas.DataFrame
        Dataframe object.
    """
    _not_none('reader', reader)
    _not_none_or_empty('data_type_id', data_type_id)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    return serializer[1](reader=reader)
Пример #24
0
def deserialize_dataframe(reader, data_type_id):
    """
    Deserialize a dataframe.

    Parameters
    ----------
    reader : file
        File-like object to read from. Must be opened in binary mode.
    data_type_id : dict
        Serialization format of the raw data.
        See the azureml.DataTypeIds class for constants.

    Returns
    -------
    pandas.DataFrame
        Dataframe object.
    """
    _not_none('reader', reader)
    _not_none_or_empty('data_type_id', data_type_id)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    return serializer[1](reader=reader)