def __init__(self, data, buffer_size=8, mode='threaded', workers=None, on_batch_loaded=None): valid = ( 'threaded', 'multiprocessing', ) utils.assert_raise(mode in valid, ValueError, 'mode must be one of: ' + ', '.join(valid)) utils.assert_raise(buffer_size >= 2, ValueError, 'buffer_size must be greater or equal to 2') if mode == 'threaded': self._executor = C.ThreadPoolExecutor(workers) else: self._executor = C.ProcessPoolExecutor(workers) if on_batch_loaded is None: on_batch_loaded = _identity self._queue = PriorityQueue(buffer_size) self._data = data self._thread = None self._on_batch_loaded = on_batch_loaded self._cache_buffer = [] self._caching = False
def __init__(self, data, batch_size=1, shuffle=True, drop_last=False, total_samples=None): utils.assert_raise(isinstance(data, (list, tuple)), ValueError, '"data" must be a list or a tuple') self._batch_size = batch_size self._shuffle = shuffle self._drop_last = drop_last self._total_samples = total_samples self._container = self._create_container(data) l0 = len(self._container[0]) for c in self._container[1:]: utils.assert_raise( len(c) == l0, ValueError, 'All data must have the same length!') indices = self._container[0].indices for c in self._container: c._indices = indices self.reset()
def split(self, ratio): """Check :meth:`cogitare.data.AbsDataHolder.split` Split the :class:`~cogitare.data.DataSet` into two :class:`~cogitare.data.DataSet`. Args: ratio (:obj:`float`): ratio of the split. Must be between 0 and 1. Returns: (data1, data2): two :class:`~cogitare.data.DataSet`. Example:: >>> print(dataset) DataSet with: containers: [ TensorHolder with 1094x64 samples TensorHolder with 1094x64 samples ], batch size: 64 >>> ds1, ds2 = data.split(0.8) >>> print(ds1) DataSet with: containers: [ TensorHolder with 875x64 samples TensorHolder with 875x64 samples ], batch size: 64 >>> print(ds2) DataSet with: containers: [ TensorHolder with 219x64 samples TensorHolder with 219x64 samples ], batch size: 64 """ utils.assert_raise(0 < ratio < 1, ValueError, '"ratio" must be between 0 and 1') d1, d2 = [], [] for c in self.container: a, b = c.split(ratio) d1.append(a) d2.append(b) data1 = self.__class__(d1, batch_size=self._batch_size, shuffle=self._shuffle, drop_last=self._drop_last) data2 = self.__class__(d2, batch_size=self._batch_size, shuffle=self._shuffle, drop_last=self._drop_last) return data1, data2
def evaluate_with_metrics(self, dataset, metrics, *args, **kwargs): """ Iterate over batches in the dataset using metrics defined in the ``metrics`` argument, and then return a dict mapping {matric_name -> list of results}. This method does not affect training variables and can be used to evaluate the model performance in a different data (such as validation and test sets). The ``metrics`` must be defined as: - key: a name for this metric. The metric name must follow variable naming convention. - value: a callable object, that accepts two parameters as input. The first parameter will be the model output, and the second parameter will be the batch data. Args: dataset: batch iterator metrics (dict): a dict mapping metric name to a callable. args/kwargs: :meth:`~cogitare.Model.forward` arguments. If provided, the forward will receive these parameters. Returns: output (dict): a dict mapping the metric name with a list containing the metric output for each batch in the dataset. Example:: >>> metrics = { ... 'loss': model.metric_loss, ... 'precision': metrics.precision ... } >>> model.evaluate_with_metrics(validation_dataset, metrics) {'loss': [1.0, 0.8, 0.9], 'precision': [0.6, 0.55, 0.58]} """ utils.assert_raise( isinstance(metrics, dict), ValueError, '"metrics" must be a dict with metric_name -> metric_function') result = dict() for sample in dataset: output = self.predict(sample) for key, call in metrics.items(): holder = result.get(key, list()) holder.append(call(output, sample)) result[key] = holder return result
def __init__(self, monitor='epoch', desc=None, freq=1): super(ProgressBar, self).__init__(freq) if desc is None: desc = monitor utils.assert_raise(monitor in ('epoch', 'batch'), ValueError, 'Monitor must be one of: "epoch", "batch"') self._monitor = monitor self._desc = desc self._total = None self._bar = None self._var = None
def total_samples(self, value): if hasattr(self._data, '__len__'): size = len(self._data) else: size = None if size is not None: utils.assert_raise( value <= size, ValueError, 'The value must be lesser or equal to the' 'length of the input data') utils.assert_raise(value >= 1, ValueError, 'number of samples must be greater or equal to 1') self._total_samples = value self._remaining_samples = value self._requires_reset = True
def _apply_plugin(self, plugin, hook, override): utils.assert_raise( hook in self.valid_hooks, ValueError, 'Invalid hook {}. Expected on of the following: {}'.format( hook, ', '.join(self.valid_hooks))) plugin = utils._ntuple(plugin, 1) container = self._plugins[hook] for p in plugin: if not isinstance(p, PluginInterface): p = PluginInterface.from_function(p) if p.name in container and not override: raise ValueError( 'A plugin with name "{}" already exists'.format(p.name)) container[p.name] = p
def __init__(self, data, batch_size=1, shuffle=True, drop_last=False, total_samples=None, mode='sequential', single=False, on_sample_loaded=None, on_batch_loaded=None): valid_modes = ['threaded', 'multiprocessing', 'sequential'] utils.assert_raise(mode in valid_modes, ValueError, '"mode" must be one of: ' + ', '.join(valid_modes)) if on_sample_loaded is None: on_sample_loaded = _identity if on_batch_loaded is None: on_batch_loaded = _identity self._indices = None self._single = single self._mode = mode self._total_samples = total_samples self._remaining_samples = None self._on_sample_loaded = on_sample_loaded self._on_batch_loaded = on_batch_loaded self._data = data self._batch_size = batch_size self._current_batch = 0 self._drop_last = drop_last self._shuffle = shuffle self._requires_reset = True if mode == 'sequential': self._get = None elif mode == 'threaded': self._get = threaded.get else: self._get = multiprocessing.get
def __init__(self, input_size, num_classes=2, dropout=0.0, bias=True, use_cuda=False): super(LogisticRegression, self).__init__() self.use_cuda = use_cuda utils.assert_raise(num_classes >= 2, ValueError, '"num_classes" must be greater than or equal 2') utils.assert_raise(0 <= dropout < 1, ValueError, '"dropout" value must be between 0 and 1') utils.assert_raise( input_size >= 1, ValueError, '"input_size" value must be greater than or equal 1') self.arguments = { 'input_size': input_size, 'num_classes': num_classes, 'dropout': dropout, 'bias': bias, } self.linear = nn.Linear(input_size, num_classes, bias) if use_cuda: self.cuda()
def split(self, ratio): """Split the data holder into two data holders. The first one will receive *total_samples * ratio* samples, and the second data holder will receive the remaining samples. Args: ratio (:obj:`float`): ratio of the split. Must be between 0 and 1. Returns: (data1, data2): two data holder, in the same type that the original. Example:: >>> print(data) TensorHolder with 875x64 samples >>> data1, data2 = data.split(0.8) >>> print(data1) TensorHolder with 700x64 samples >>> print(data2) TensorHolder with 175x64 samples """ utils.assert_raise(0 < ratio < 1, ValueError, '"ratio" must be between 0 and 1') pos = int(math.floor(self.total_samples * ratio)) data1 = self._clone() data2 = self._clone() data1._indices = self.indices[:pos] data2._indices = self.indices[pos:] data1._total_samples = pos data2._total_samples = self.total_samples - pos return data1, data2
def test_assert_raise(self): for exp in (ValueError, IndexError, Exception): with pytest.raises(exp) as info: utils.assert_raise(1 == 2, exp, 'test message') self.assertIn('test message', str(info.value)) utils.assert_raise(1 == 1, ValueError, 'not raises')
def __init__(self, input_size, num_layers, hidden_size, activation=None, in_dropout=0.0, hidden_dropout=0.0, loss_function=None, bias=True, use_cuda=None): super(FeedForward, self).__init__() if activation is None: activation = [nn.Tanh()] * (num_layers - 1) activation.append(nn.LogSoftmax(dim=1)) if loss_function is None: loss_function = nn.NLLLoss() utils.assert_raise(input_size >= 1, ValueError, '"input_size" must be greater or equal to 1') utils.assert_raise(num_layers >= 1, ValueError, '"num_layers" must be greater or equal to 1') self.input_size = input_size self.num_layers = num_layers self.in_dropout = in_dropout self.loss_function = loss_function self.use_cuda = use_cuda self.hidden_size = utils._ntuple(hidden_size, num_layers) self.activation = utils._ntuple(activation, num_layers) self.hidden_dropout = utils._ntuple(hidden_dropout, num_layers) self.bias = utils._ntuple(bias, num_layers) # make some assertions before continuing utils.assert_raise( len(self.hidden_size) == num_layers, ValueError, '"hidden_size" must have the same length that "num_layers"') utils.assert_raise( len(self.activation) == num_layers, ValueError, '"activation" must have the same length that "num_layers"') utils.assert_raise( len(self.hidden_dropout) == num_layers, ValueError, '"hidden_dropout" must have the same length that "num_layers"') utils.assert_raise( len(self.bias) == num_layers, ValueError, '"bias" must have the same length that "num_layers"') self._mlp = self._make_model() if use_cuda: self.cuda()