Пример #1
0
def create(
    dataset,
    session_id,
    target,
    features=None,
    prediction_window=100,
    validation_set="auto",
    max_iterations=10,
    batch_size=32,
    verbose=True,
):
    """
    Create an :class:`ActivityClassifier` model.

    Parameters
    ----------
    dataset : SFrame
        Input data which consists of `sessions` of data where each session is
        a sequence of data. The data must be in `stacked` format, grouped by
        session. Within each session, the data is assumed to be sorted
        temporally. Columns in `features` will be used to train a model that
        will make a prediction using labels in the `target` column.

    session_id : string
        Name of the column that contains a unique ID for each session.

    target : string
        Name of the column containing the target variable. The values in this
        column must be of string or integer type. Use `model.classes` to
        retrieve the order in which the classes are mapped.

    features : list[string], optional
        Name of the columns containing the input features that will be used
        for classification. If set to `None`, all columns except `session_id`
        and `target` will be used.

    prediction_window : int, optional
        Number of time units between predictions. For example, if your input
        data is sampled at 100Hz, and the `prediction_window` is set to 100,
        then this model will make a prediction every 1 second.

    validation_set : SFrame, optional
        A dataset for monitoring the model's generalization performance to
        prevent the model from overfitting to the training data.

        For each row of the progress table, accuracy is measured over the
        provided training dataset and the `validation_set`. The format of this
        SFrame must be the same as the training set.

        When set to 'auto', a validation set is automatically sampled from the
        training data (if the training data has > 100 sessions). If
        validation_set is set to None, then all the data will be used for
        training.

    max_iterations : int , optional
        Maximum number of iterations/epochs made over the data during the
        training phase.

    batch_size : int, optional
        Number of sequence chunks used per training step. Must be greater than
        the number of GPUs in use.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ActivityClassifier
        A trained :class:`ActivityClassifier` model.

    Examples
    --------
    .. sourcecode:: python

        >>> import turicreate as tc

        # Training on dummy data
        >>> data = tc.SFrame({
        ...    'accelerometer_x': [0.1, 0.2, 0.3, 0.4, 0.5] * 10,
        ...    'accelerometer_y': [0.5, 0.4, 0.3, 0.2, 0.1] * 10,
        ...    'accelerometer_z': [0.01, 0.01, 0.02, 0.02, 0.01] * 10,
        ...    'session_id': [0, 0, 0] * 10 + [1, 1] * 10,
        ...    'activity': ['walk', 'run', 'run'] * 10 + ['swim', 'swim'] * 10
        ... })

        # Create an activity classifier
        >>> model = tc.activity_classifier.create(data,
        ...     session_id='session_id', target='activity',
        ...     features=['accelerometer_x', 'accelerometer_y', 'accelerometer_z'])

        # Make predictions (as probability vector, or class)
        >>> predictions = model.predict(data)
        >>> predictions = model.predict(data, output_type='probability_vector')

        # Get both predictions and classes together
        >>> predictions = model.classify(data)

        # Get topk predictions (instead of only top-1) if your labels have more
        # 2 classes
        >>> predictions = model.predict_topk(data, k = 3)

        # Evaluate the model
        >>> results = model.evaluate(data)

    See Also
    --------
    ActivityClassifier, util.random_split_by_session
    """

    _tkutl._raise_error_if_not_sframe(dataset, "dataset")
    if not isinstance(target, str):
        raise _ToolkitError("target must be of type str")
    if not isinstance(session_id, str):
        raise _ToolkitError("session_id must be of type str")
    if not isinstance(batch_size, int):
        raise _ToolkitError("batch_size must be of type int")

    _tkutl._raise_error_if_sframe_empty(dataset, "dataset")
    _tkutl._numeric_param_check_range("prediction_window", prediction_window, 1, 400)
    _tkutl._numeric_param_check_range("max_iterations", max_iterations, 0, _six.MAXSIZE)

    if features is None:
        features = _fe_tkutl.get_column_names(
            dataset, interpret_as_excluded=True, column_names=[session_id, target]
        )
    if not hasattr(features, "__iter__"):
        raise TypeError("Input 'features' must be a list.")
    if not all([isinstance(x, str) for x in features]):
        raise TypeError("Invalid feature %s: Feature names must be of type str." % x)
    if len(features) == 0:
        raise TypeError("Input 'features' must contain at least one column name.")

    start_time = _time.time()
    dataset = _tkutl._toolkits_select_columns(dataset, features + [session_id, target])
    _tkutl._raise_error_if_sarray_not_expected_dtype(
        dataset[target], target, [str, int]
    )
    _tkutl._raise_error_if_sarray_not_expected_dtype(
        dataset[session_id], session_id, [str, int]
    )

    for feature in features:
        _tkutl._handle_missing_values(dataset, feature, "training_dataset")

    # Check for missing values for sframe validation set
    if isinstance(validation_set, _SFrame):
        _tkutl._raise_error_if_sframe_empty(validation_set, "validation_set")
        for feature in features:
            _tkutl._handle_missing_values(validation_set, feature, "validation_set")

    # C++ model
    name = "activity_classifier"

    import turicreate as _turicreate

    # Imports tensorflow
    import turicreate.toolkits.libtctensorflow

    model = _turicreate.extensions.activity_classifier()
    options = {}
    options["prediction_window"] = prediction_window
    options["batch_size"] = batch_size
    options["max_iterations"] = max_iterations
    options["verbose"] = verbose
    options["_show_loss"] = False

    model.train(dataset, target, session_id, validation_set, options)
    return ActivityClassifier(model_proxy=model, name=name)
Пример #2
0
def create(dataset, session_id, target, features=None, prediction_window=100,
           validation_set='auto', max_iterations=10, batch_size=32, verbose=True):
    """
    Create an :class:`ActivityClassifier` model.

    Parameters
    ----------
    dataset : SFrame
        Input data which consists of `sessions` of data where each session is
        a sequence of data. The data must be in `stacked` format, grouped by
        session. Within each session, the data is assumed to be sorted
        temporally. Columns in `features` will be used to train a model that
        will make a prediction using labels in the `target` column.

    session_id : string
        Name of the column that contains a unique ID for each session.

    target : string
        Name of the column containing the target variable. The values in this
        column must be of string or integer type. Use `model.classes` to
        retrieve the order in which the classes are mapped.

    features : list[string], optional
        Name of the columns containing the input features that will be used
        for classification. If set to `None`, all columns except `session_id`
        and `target` will be used.

    prediction_window : int, optional
        Number of time units between predictions. For example, if your input
        data is sampled at 100Hz, and the `prediction_window` is set to 100,
        then this model will make a prediction every 1 second.

    validation_set : SFrame, optional
        A dataset for monitoring the model's generalization performance to
        prevent the model from overfitting to the training data.

        For each row of the progress table, accuracy is measured over the
        provided training dataset and the `validation_set`. The format of this
        SFrame must be the same as the training set.

        When set to 'auto', a validation set is automatically sampled from the
        training data (if the training data has > 100 sessions). If
        validation_set is set to None, then all the data will be used for
        training.

    max_iterations : int , optional
        Maximum number of iterations/epochs made over the data during the
        training phase.

    batch_size : int, optional
        Number of sequence chunks used per training step. Must be greater than
        the number of GPUs in use.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ActivityClassifier
        A trained :class:`ActivityClassifier` model.

    Examples
    --------
    .. sourcecode:: python

        >>> import turicreate as tc

        # Training on dummy data
        >>> data = tc.SFrame({
        ...    'accelerometer_x': [0.1, 0.2, 0.3, 0.4, 0.5] * 10,
        ...    'accelerometer_y': [0.5, 0.4, 0.3, 0.2, 0.1] * 10,
        ...    'accelerometer_z': [0.01, 0.01, 0.02, 0.02, 0.01] * 10,
        ...    'session_id': [0, 0, 0] * 10 + [1, 1] * 10,
        ...    'activity': ['walk', 'run', 'run'] * 10 + ['swim', 'swim'] * 10
        ... })

        # Create an activity classifier
        >>> model = tc.activity_classifier.create(train,
        ...     session_id='session_id', target='activity',
        ...     features=['accelerometer_x', 'accelerometer_y', 'accelerometer_z'])

        # Make predictions (as probability vector, or class)
        >>> predictions = model.predict(data)
        >>> predictions = model.predict(data, output_type='probability_vector')

        # Get both predictions and classes together
        >>> predictions = model.classify(data)

        # Get topk predictions (instead of only top-1) if your labels have more
        # 2 classes
        >>> predictions = model.predict_topk(data, k = 3)

        # Evaluate the model
        >>> results = model.evaluate(data)

    See Also
    --------
    ActivityClassifier, util.random_split_by_session
    """
    _tkutl._raise_error_if_not_sframe(dataset, "dataset")
    from ._model_architecture import _net_params
    from ._model_architecture import _define_model, _fit_model
    from ._sframe_sequence_iterator import SFrameSequenceIter as _SFrameSequenceIter
    from ._sframe_sequence_iterator import prep_data as _prep_data

    if not isinstance(target, str):
        raise _ToolkitError('target must be of type str')
    if not isinstance(session_id, str):
        raise _ToolkitError('session_id must be of type str')
    _tkutl._raise_error_if_sframe_empty(dataset, 'dataset')
    _tkutl._numeric_param_check_range('prediction_window', prediction_window, 1, 400)
    _tkutl._numeric_param_check_range('max_iterations', max_iterations, 0, _six.MAXSIZE)

    if features is None:
        features = _fe_tkutl.get_column_names(dataset,
                                              interpret_as_excluded=True,
                                              column_names=[session_id, target])
    if not hasattr(features, '__iter__'):
        raise TypeError("Input 'features' must be a list.")
    if not all([isinstance(x, str) for x in features]):
        raise TypeError("Invalid feature %s: Feature names must be of type str." % x)
    if len(features) == 0:
        raise TypeError("Input 'features' must contain at least one column name.")

    start_time = _time.time()
    dataset = _tkutl._toolkits_select_columns(dataset, features + [session_id, target])
    _tkutl._raise_error_if_sarray_not_expected_dtype(dataset[target], target, [str, int])
    _tkutl._raise_error_if_sarray_not_expected_dtype(dataset[session_id], session_id, [str, int])

    # Encode the target column to numerical values
    use_target = target is not None
    dataset, target_map = _encode_target(dataset, target)

    predictions_in_chunk = 20
    chunked_data, num_sessions = _prep_data(dataset, features, session_id, prediction_window,
                                            predictions_in_chunk, target=target, verbose=verbose)

    if isinstance(validation_set, str) and validation_set == 'auto':
        if num_sessions < 100:
            validation_set = None
        else:
            dataset, validation_set = _random_split_by_session(dataset, session_id)

    # Create data iterators
    num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=num_sessions)
    user_provided_batch_size = batch_size
    batch_size = max(batch_size, num_gpus, 1)
    data_iter = _SFrameSequenceIter(chunked_data, len(features),
                                    prediction_window, predictions_in_chunk,
                                    batch_size, use_target=use_target)

    if validation_set is not None:
        _tkutl._raise_error_if_not_sframe(validation_set, 'validation_set')
        _tkutl._raise_error_if_sframe_empty(validation_set, 'validation_set')
        validation_set = _tkutl._toolkits_select_columns(
            validation_set, features + [session_id, target])
        validation_set = validation_set.filter_by(target_map.keys(), target)
        validation_set, mapping = _encode_target(validation_set, target, target_map)
        chunked_validation_set, _ = _prep_data(validation_set, features, session_id, prediction_window,
                                            predictions_in_chunk, target=target, verbose=False)

        valid_iter = _SFrameSequenceIter(chunked_validation_set, len(features),
                                    prediction_window, predictions_in_chunk,
                                    batch_size, use_target=use_target)
    else:
        valid_iter = None

    # Define model architecture
    context = _mxnet_utils.get_mxnet_context(max_devices=num_sessions)
    loss_model, pred_model = _define_model(features, target_map, prediction_window,
                                           predictions_in_chunk, context)

    # Train the model
    log = _fit_model(loss_model, data_iter, valid_iter,
                     max_iterations, num_gpus, verbose)

    # Set up prediction model
    pred_model.bind(data_shapes=data_iter.provide_data, label_shapes=None,
                    for_training=False)
    arg_params, aux_params = loss_model.get_params()
    pred_model.init_params(arg_params=arg_params, aux_params=aux_params)

    # Save the model
    state = {
        '_pred_model': pred_model,
        'verbose': verbose,
        'training_time': _time.time() - start_time,
        'target': target,
        'classes': sorted(target_map.keys()),
        'features': features,
        'session_id': session_id,
        'prediction_window': prediction_window,
        'max_iterations': max_iterations,
        'num_examples': len(dataset),
        'num_sessions': num_sessions,
        'num_classes': len(target_map),
        'num_features': len(features),
        'training_accuracy': log['train_acc'],
        'training_log_loss': log['train_loss'],
        '_target_id_map': target_map,
        '_id_target_map': {v: k for k, v in target_map.items()},
        '_predictions_in_chunk': predictions_in_chunk,
        '_recalibrated_batch_size': data_iter.batch_size,
        'batch_size' : user_provided_batch_size
    }

    if validation_set is not None:
        state['valid_accuracy'] = log['valid_acc']
        state['valid_log_loss'] = log['valid_loss']

    model = ActivityClassifier(state)
    return model
Пример #3
0
def create(dataset,
           session_id,
           target,
           features=None,
           prediction_window=100,
           validation_set='auto',
           max_iterations=10,
           batch_size=32,
           verbose=True,
           **kwargs):
    """
    Create an :class:`ActivityClassifier` model.

    Parameters
    ----------
    dataset : SFrame
        Input data which consists of `sessions` of data where each session is
        a sequence of data. The data must be in `stacked` format, grouped by
        session. Within each session, the data is assumed to be sorted
        temporally. Columns in `features` will be used to train a model that
        will make a prediction using labels in the `target` column.

    session_id : string
        Name of the column that contains a unique ID for each session.

    target : string
        Name of the column containing the target variable. The values in this
        column must be of string or integer type. Use `model.classes` to
        retrieve the order in which the classes are mapped.

    features : list[string], optional
        Name of the columns containing the input features that will be used
        for classification. If set to `None`, all columns except `session_id`
        and `target` will be used.

    prediction_window : int, optional
        Number of time units between predictions. For example, if your input
        data is sampled at 100Hz, and the `prediction_window` is set to 100,
        then this model will make a prediction every 1 second.

    validation_set : SFrame, optional
        A dataset for monitoring the model's generalization performance to
        prevent the model from overfitting to the training data.

        For each row of the progress table, accuracy is measured over the
        provided training dataset and the `validation_set`. The format of this
        SFrame must be the same as the training set.

        When set to 'auto', a validation set is automatically sampled from the
        training data (if the training data has > 100 sessions). If
        validation_set is set to None, then all the data will be used for
        training.

    max_iterations : int , optional
        Maximum number of iterations/epochs made over the data during the
        training phase.

    batch_size : int, optional
        Number of sequence chunks used per training step. Must be greater than
        the number of GPUs in use.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ActivityClassifier
        A trained :class:`ActivityClassifier` model.

    Examples
    --------
    .. sourcecode:: python

        >>> import turicreate as tc

        # Training on dummy data
        >>> data = tc.SFrame({
        ...    'accelerometer_x': [0.1, 0.2, 0.3, 0.4, 0.5] * 10,
        ...    'accelerometer_y': [0.5, 0.4, 0.3, 0.2, 0.1] * 10,
        ...    'accelerometer_z': [0.01, 0.01, 0.02, 0.02, 0.01] * 10,
        ...    'session_id': [0, 0, 0] * 10 + [1, 1] * 10,
        ...    'activity': ['walk', 'run', 'run'] * 10 + ['swim', 'swim'] * 10
        ... })

        # Create an activity classifier
        >>> model = tc.activity_classifier.create(data,
        ...     session_id='session_id', target='activity',
        ...     features=['accelerometer_x', 'accelerometer_y', 'accelerometer_z'])

        # Make predictions (as probability vector, or class)
        >>> predictions = model.predict(data)
        >>> predictions = model.predict(data, output_type='probability_vector')

        # Get both predictions and classes together
        >>> predictions = model.classify(data)

        # Get topk predictions (instead of only top-1) if your labels have more
        # 2 classes
        >>> predictions = model.predict_topk(data, k = 3)

        # Evaluate the model
        >>> results = model.evaluate(data)

    See Also
    --------
    ActivityClassifier, util.random_split_by_session
    """
    from .._mxnet import _mxnet_utils
    from ._mx_model_architecture import _net_params
    from ._sframe_sequence_iterator import SFrameSequenceIter as _SFrameSequenceIter
    from ._sframe_sequence_iterator import prep_data as _prep_data
    from ._mx_model_architecture import _define_model_mxnet, _fit_model_mxnet
    from ._mps_model_architecture import _define_model_mps, _fit_model_mps
    from .._mps_utils import (use_mps as _use_mps, mps_device_name as
                              _mps_device_name, ac_weights_mps_to_mxnet as
                              _ac_weights_mps_to_mxnet)

    _tkutl._raise_error_if_not_sframe(dataset, "dataset")
    if not isinstance(target, str):
        raise _ToolkitError('target must be of type str')
    if not isinstance(session_id, str):
        raise _ToolkitError('session_id must be of type str')
    _tkutl._raise_error_if_sframe_empty(dataset, 'dataset')
    _tkutl._numeric_param_check_range('prediction_window', prediction_window,
                                      1, 400)
    _tkutl._numeric_param_check_range('max_iterations', max_iterations, 0,
                                      _six.MAXSIZE)

    if features is None:
        features = _fe_tkutl.get_column_names(
            dataset,
            interpret_as_excluded=True,
            column_names=[session_id, target])
    if not hasattr(features, '__iter__'):
        raise TypeError("Input 'features' must be a list.")
    if not all([isinstance(x, str) for x in features]):
        raise TypeError(
            "Invalid feature %s: Feature names must be of type str." % x)
    if len(features) == 0:
        raise TypeError(
            "Input 'features' must contain at least one column name.")

    start_time = _time.time()
    dataset = _tkutl._toolkits_select_columns(dataset,
                                              features + [session_id, target])
    _tkutl._raise_error_if_sarray_not_expected_dtype(dataset[target], target,
                                                     [str, int])
    _tkutl._raise_error_if_sarray_not_expected_dtype(dataset[session_id],
                                                     session_id, [str, int])

    params = {'use_tensorflow': False, 'show_deprecated_warnings': False}

    if '_advanced_parameters' in kwargs:
        # Make sure no additional parameters are provided
        new_keys = set(kwargs['_advanced_parameters'].keys())
        set_keys = set(params.keys())
        unsupported = new_keys - set_keys
        if unsupported:
            raise _ToolkitError(
                'Unknown advanced parameters: {}'.format(unsupported))

        params.update(kwargs['_advanced_parameters'])

    if params['use_tensorflow'] and not (params['show_deprecated_warnings']):

        # Imports tensorflow
        import tensorflow as _tf
        from ._tf_model_architecture import ActivityTensorFlowModel, _fit_model_tf

        # Supresses verbosity to only errors
        _tf.compat.v1.logging.set_verbosity(_tf.compat.v1.logging.ERROR)

    if isinstance(validation_set, str) and validation_set == 'auto':
        # Computing the number of unique sessions in this way is relatively
        # expensive. Ideally we'd incorporate this logic into the C++ code that
        # chunks the raw data by prediction window.
        # TODO: https://github.com/apple/turicreate/issues/991
        unique_sessions = _SFrame({'session': dataset[session_id].unique()})
        if len(unique_sessions) < _MIN_NUM_SESSIONS_FOR_SPLIT:
            print(
                "The dataset has less than the minimum of",
                _MIN_NUM_SESSIONS_FOR_SPLIT,
                "sessions required for train-validation split. Continuing without validation set"
            )
            validation_set = None
        else:
            dataset, validation_set = _random_split_by_session(
                dataset, session_id)

    for feature in features:
        _tkutl._handle_missing_values(dataset, feature, 'training_dataset')

    # Encode the target column to numerical values
    use_target = target is not None
    dataset, target_map = _encode_target(dataset, target)

    predictions_in_chunk = 20
    chunked_data, num_sessions = _prep_data(dataset,
                                            features,
                                            session_id,
                                            prediction_window,
                                            predictions_in_chunk,
                                            target=target,
                                            verbose=verbose)

    # Decide whether to use MPS GPU, MXnet GPU or CPU
    num_mxnet_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=num_sessions)
    use_mps = _use_mps() and num_mxnet_gpus == 0 and not (
        params['use_tensorflow'])

    if verbose:
        if use_mps:
            print('Using GPU to create model ({})'.format(_mps_device_name()))
        elif num_mxnet_gpus == 1:
            print('Using GPU to create model (CUDA)')
        elif num_mxnet_gpus > 1:
            print(
                'Using {} GPUs to create model (CUDA)'.format(num_mxnet_gpus))
        elif params['use_tensorflow']:
            print('Using Tensorflow to create model')
        else:
            print('Using CPU to create model')

    # Create data iterators
    user_provided_batch_size = batch_size
    batch_size = max(batch_size, num_mxnet_gpus, 1)

    use_mx_data_batch = not (use_mps or params['use_tensorflow'])
    data_iter = _SFrameSequenceIter(chunked_data,
                                    len(features),
                                    prediction_window,
                                    predictions_in_chunk,
                                    batch_size,
                                    use_target=use_target,
                                    mx_output=use_mx_data_batch)

    if validation_set is not None:
        _tkutl._raise_error_if_not_sframe(validation_set, 'validation_set')
        _tkutl._raise_error_if_sframe_empty(validation_set, 'validation_set')
        validation_set = _tkutl._toolkits_select_columns(
            validation_set, features + [session_id, target])
        for feature in features:
            _tkutl._handle_missing_values(dataset, feature, 'validation_set')
        validation_set = validation_set.filter_by(list(target_map.keys()),
                                                  target)
        validation_set, mapping = _encode_target(validation_set, target,
                                                 target_map)
        chunked_validation_set, _ = _prep_data(validation_set,
                                               features,
                                               session_id,
                                               prediction_window,
                                               predictions_in_chunk,
                                               target=target,
                                               verbose=False)

        valid_iter = _SFrameSequenceIter(chunked_validation_set,
                                         len(features),
                                         prediction_window,
                                         predictions_in_chunk,
                                         batch_size,
                                         use_target=use_target,
                                         mx_output=use_mx_data_batch)
    else:
        valid_iter = None

    # Define model architecture
    context = _mxnet_utils.get_mxnet_context(max_devices=num_sessions)

    # Always create MXNet models, as the pred_model is later saved to the state
    # If MPS is used - the loss_model will be overwritten
    loss_model, pred_model = _define_model_mxnet(len(target_map),
                                                 prediction_window,
                                                 predictions_in_chunk, context)

    if use_mps:
        loss_model = _define_model_mps(batch_size,
                                       len(features),
                                       len(target_map),
                                       prediction_window,
                                       predictions_in_chunk,
                                       is_prediction_model=False)

        log = _fit_model_mps(loss_model, data_iter, valid_iter, max_iterations,
                             verbose)
    else:

        if params['use_tensorflow']:
            net_params = _initialize_with_mxnet_weights(
                loss_model, chunked_data, features, prediction_window,
                predictions_in_chunk, batch_size, use_target)
            ac_model = ActivityTensorFlowModel(net_params, batch_size,
                                               len(features), len(target_map),
                                               prediction_window,
                                               predictions_in_chunk)
            # Train the model using Tensorflow
            log = _fit_model_tf(ac_model, net_params, data_iter, valid_iter,
                                max_iterations, verbose, 1e-3)
        else:
            # Train the model using Mxnet
            log = _fit_model_mxnet(loss_model, data_iter, valid_iter,
                                   max_iterations, num_mxnet_gpus, verbose)

    # Set up prediction model
    pred_model.bind(data_shapes=data_iter.provide_data,
                    label_shapes=None,
                    for_training=False)

    if use_mps:
        mps_params = loss_model.export()
        arg_params, aux_params = _ac_weights_mps_to_mxnet(
            mps_params, _net_params['lstm_h'])

    elif params['use_tensorflow']:
        # Copy the weights back in the MXNet format
        arg_params, aux_params = ac_model.get_weights()

    else:
        arg_params, aux_params = loss_model.get_params()

    pred_model.init_params(arg_params=arg_params, aux_params=aux_params)

    # Save the model
    state = {
        '_pred_model': pred_model,
        'verbose': verbose,
        'training_time': _time.time() - start_time,
        'target': target,
        'classes': sorted(target_map.keys()),
        'features': features,
        'session_id': session_id,
        'prediction_window': prediction_window,
        'max_iterations': max_iterations,
        'num_examples': len(dataset),
        'num_sessions': num_sessions,
        'num_classes': len(target_map),
        'num_features': len(features),
        'training_accuracy': log['train_acc'],
        'training_log_loss': log['train_loss'],
        '_target_id_map': target_map,
        '_id_target_map': {v: k
                           for k, v in target_map.items()},
        '_predictions_in_chunk': predictions_in_chunk,
        '_recalibrated_batch_size': data_iter.batch_size,
        'batch_size': user_provided_batch_size
    }

    if validation_set is not None:
        state['valid_accuracy'] = log['valid_acc']
        state['valid_log_loss'] = log['valid_loss']

    model = ActivityClassifier(state)
    return model