def test_as_values(self): x = ContinuousVariable("x") values = Value._as_values(x, [0., 1., 2.]) # pylint: disable=protected-access self.assertIsInstance(values[0], Value) self.assertEqual(values[0], 0) s = StringVariable("s") values = Value._as_values(s, ["a", "b", ""]) # pylint: disable=protected-access self.assertIsInstance(values[0], Value) self.assertEqual(values[0], "a")
def test_hash(self): v = 1234.5 val = Value(ContinuousVariable("var"), v) self.assertTrue(val == v and hash(val) == hash(v)) v = "test" val = Value(StringVariable("var"), v) self.assertTrue(val == v and hash(val) == hash(v)) v = 1234.5 val = Value(TimeVariable("var"), v) self.assertTrue(val == v and hash(val) == hash(v)) val = Value(DiscreteVariable("var", ["red", "green", "blue"]), 1) self.assertRaises(TypeError, hash, val)
def extract_column(self, table: Table, var: Variable): data, _ = table.get_column_view(var) if var.is_string: return list(map(var.str_val, data)) elif var.is_discrete and not self.use_values: values = np.array([*var.values, None], dtype=object) idx = data.astype(int) idx[~np.isfinite(data)] = len(values) - 1 return values[idx].tolist() elif var.is_time: # time always needs Values due to str(val) formatting return Value._as_values(var, data.tolist()) # pylint: disable=protected-access elif not self.use_values: return data.tolist() else: return Value._as_values(var, data.tolist()) # pylint: disable=protected-access
def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: values, prob = p.results if self.class_var.is_discrete: values = [Value(class_var, v) for v in values] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents()
def _update_predictions_model(self): results = [] headers = [] for p in self._non_errored_predictors(): values = p.results.unmapped_predicted target = p.predictor.domain.class_var if target.is_discrete: prob = p.results.unmapped_probabilities values = [Value(target, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) headers.append(p.predictor.name) if results: results = list(zip(*(zip(*res) for res in results))) model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents()
def get_class(self): """ Return the class value as an instance of :obj:`Orange.data.Value`. Throws an exception if there are multiple classes. """ self._check_single_class() return Value(self._domain.class_var, self._y[0])
def get_classes(self): """ Return the class value as a list of instances of :obj:`Orange.data.Value`. """ return (Value(var, value) for var, value in zip(self._domain.class_vars, self._y))
def __getitem__(self, key): if not isinstance(key, Integral): key = self._domain.index(key) if 0 <= key < len(self._domain.attributes): value = self._x[key] elif key >= len(self._domain.attributes): value = self._y[key - len(self.domain.attributes)] else: value = self._metas[-1 - key] return Value(self._domain[key], value)
def __getitem__(self, key): idx = key if isinstance(key, Integral) else self._domain.index(key) if 0 <= idx < len(self._domain.attributes): value = self._x[idx] elif idx >= len(self._domain.attributes): value = self._y[idx - len(self.domain.attributes)] else: value = self._metas[-1 - idx] var = self._domain[idx] if isinstance(key, DiscreteVariable) and var is not key: value = key.get_mapper_from(var)(value) var = key return Value(var, value)
def __getitem__(self, key): """ Indexing of SqlTable is performed in the following way: If a single row is requested, it is fetched from the database and returned as a SqlRowInstance. A new SqlTable with appropriate filters is constructed and returned otherwise. """ if isinstance(key, int): # one row return self._fetch_row(key) if not isinstance(key, tuple): # row filter key = (key, Ellipsis) if len(key) != 2: raise IndexError("Table indices must be one- or two-dimensional") row_idx, col_idx = key if isinstance(row_idx, int): try: col_idx = self.domain.index(col_idx) var = self.domain[col_idx] return Value( var, next(self._query([var], rows=[row_idx]))[0] ) except TypeError: pass elif not (row_idx is Ellipsis or row_idx == slice(None)): # TODO if row_idx specify multiple rows, one of the following must # happen # - the new table remembers which rows are selected (implement # table.limit_rows and whatever else is necessary) # - return an ordinary (non-SQL) Table # - raise an exception raise NotImplementedError("Row indices must be integers.") # multiple rows OR single row but multiple columns: # construct a new table table = self.copy() table.domain = self.domain.select_columns(col_idx) # table.limit_rows(row_idx) return table
def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None and self.class_var is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: if isinstance(p.results, str): continue values = p.results.predicted[0] if self.class_var.is_discrete: # if values were added to class_var between building the # model and predicting, add zeros for new class values, # which are always at the end prob = p.results.probabilities[0] prob = numpy.c_[prob, numpy.zeros( (prob.shape[0], len(class_var.values) - prob.shape[1]))] values = [Value(class_var, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents()
def test_repr_value(self): # https://github.com/biolab/orange3/pull/1760 var = TimeVariable('time') self.assertEqual(var.repr_val(Value(var, 416.3)), '416.3')
def test_val(self): a = StringVariable("a") self.assertEqual(a.to_val(None), "") self.assertEqual(a.str_val(""), "?") self.assertEqual(a.str_val(Value(a, "")), "?") self.assertEqual(a.repr_val(Value(a, "foo")), '"foo"')
def __call__(self, data, ret=Value): if not 0 <= ret <= 2: raise ValueError("invalid value of argument 'ret'") if ret > 0 and any(v.is_continuous for v in self.domain.class_vars): raise ValueError("cannot predict continuous distributions") # Call the predictor if isinstance(data, np.ndarray): prediction = self.predict(np.atleast_2d(data)) elif isinstance(data, scipy.sparse.csr.csr_matrix): prediction = self.predict(data) elif isinstance(data, (Table, Instance)): if isinstance(data, Instance): data = Table(data.domain, [data]) if data.domain != self.domain: data = data.transform(self.domain) prediction = self.predict_storage(data) elif isinstance(data, (list, tuple)): if not isinstance(data[0], (list, tuple)): data = [data] data = Table(self.original_domain, data) data = data.transform(self.domain) prediction = self.predict_storage(data) else: raise TypeError("Unrecognized argument (instance of '{}')".format( type(data).__name__)) # Parse the result into value and probs multitarget = len(self.domain.class_vars) > 1 if isinstance(prediction, tuple): value, probs = prediction elif prediction.ndim == 1 + multitarget: value, probs = prediction, None elif prediction.ndim == 2 + multitarget: value, probs = None, prediction else: raise TypeError("model returned a %i-dimensional array", prediction.ndim) # Ensure that we have what we need to return if ret != Model.Probs and value is None: value = np.argmax(probs, axis=-1) if ret != Model.Value and probs is None: if multitarget: max_card = max(len(c.values) for c in self.domain.class_vars) probs = np.zeros(value.shape + (max_card, ), float) for i, cvar in enumerate(self.domain.class_vars): probs[:, i, :] = one_hot(value[:, i]) else: probs = one_hot(value) if ret == Model.ValueProbs: return value, probs else: return probs # Return what we need to if ret == Model.Probs: return probs if isinstance(data, Instance) and not multitarget: value = Value(self.domain.class_var, value[0]) if ret == Model.Value: return value else: # ret == Model.ValueProbs return value, probs
def values(self): return (Value(var, val) for var, val in zip(self.domain.variables, self))
def __call__(self, data, ret=Value): multitarget = len(self.domain.class_vars) > 1 def one_hot_probs(value): if not multitarget: return one_hot(value, dim=len(self.domain.class_var.values) if self.domain is not None else None) max_card = max(len(c.values) for c in self.domain.class_vars) probs = np.zeros(value.shape + (max_card, ), float) for i in range(len(self.domain.class_vars)): probs[:, i, :] = one_hot(value[:, i]) return probs def extend_probabilities(probs): """ Since SklModels and models implementing `fit` and not `fit_storage` do not guarantee correct prediction dimensionality, extend dimensionality of probabilities when it does not match the number of values in the domain. """ class_vars = self.domain.class_vars max_values = max(len(cv.values) for cv in class_vars) if max_values == probs.shape[-1]: return probs if not self.supports_multiclass: probs = probs[:, np.newaxis, :] probs_ext = np.zeros((len(probs), len(class_vars), max_values)) for c, used_vals in enumerate(self.used_vals): for i, cv in enumerate(used_vals): probs_ext[:, c, cv] = probs[:, c, i] if not self.supports_multiclass: probs_ext = probs_ext[:, 0, :] return probs_ext def fix_dim(x): return x[0] if one_d else x if not 0 <= ret <= 2: raise ValueError("invalid value of argument 'ret'") if ret > 0 and any(v.is_continuous for v in self.domain.class_vars): raise ValueError("cannot predict continuous distributions") # Convert 1d structures to 2d and remember doing it one_d = True if isinstance(data, Instance): data = Table.from_list(data.domain, [data]) elif isinstance(data, (list, tuple)) \ and not isinstance(data[0], (list, tuple)): data = [data] elif isinstance(data, np.ndarray) and data.ndim == 1: data = np.atleast_2d(data) else: one_d = False # if sparse convert to csr_matrix if scipy.sparse.issparse(data): data = data.tocsr() # Call the predictor backmappers = None n_values = [] if isinstance(data, (np.ndarray, scipy.sparse.csr.csr_matrix)): prediction = self.predict(data) elif isinstance(data, Table): backmappers, n_values = self.get_backmappers(data) data = self.data_to_model_domain(data) prediction = self.predict_storage(data) elif isinstance(data, (list, tuple)): data = Table.from_list(self.original_domain, data) data = data.transform(self.domain) prediction = self.predict_storage(data) else: raise TypeError("Unrecognized argument (instance of '{}')".format( type(data).__name__)) # Parse the result into value and probs if isinstance(prediction, tuple): value, probs = prediction elif prediction.ndim == 1 + multitarget: value, probs = prediction, None elif prediction.ndim == 2 + multitarget: value, probs = None, prediction else: raise TypeError("model returned a %i-dimensional array", prediction.ndim) # Ensure that we have what we need to return; backmapp everything if probs is None and (ret != Model.Value or backmappers is not None): probs = one_hot_probs(value) if probs is not None: probs = extend_probabilities(probs) probs = self.backmap_probs(probs, n_values, backmappers) if ret != Model.Probs: if value is None: value = np.argmax(probs, axis=-1) # probs are already backmapped else: value = self.backmap_value(value, probs, n_values, backmappers) # Return what we need to if ret == Model.Probs: return fix_dim(probs) if isinstance(data, Instance) and not multitarget: value = [Value(self.domain.class_var, value[0])] if ret == Model.Value: return fix_dim(value) else: # ret == Model.ValueProbs return fix_dim(value), fix_dim(probs)
def __call__(self, data, ret=Value): multitarget = len(self.domain.class_vars) > 1 def one_hot_probs(value): if not multitarget: return one_hot(value) max_card = max(len(c.values) for c in self.domain.class_vars) probs = np.zeros(value.shape + (max_card,), float) for i in range(len(self.domain.class_vars)): probs[:, i, :] = one_hot(value[:, i]) return probs def fix_dim(x): return x[0] if one_d else x def data_to_model_domain(): if data.domain == self.domain: return data if self.original_domain.attributes != data.domain.attributes \ and data.X.size \ and not np.isnan(data.X).all(): new_data = data.transform(self.original_domain) if np.isnan(new_data.X).all(): raise DomainTransformationError( "domain transformation produced no defined values") return new_data.transform(self.domain) return data.transform(self.domain) if not 0 <= ret <= 2: raise ValueError("invalid value of argument 'ret'") if ret > 0 and any(v.is_continuous for v in self.domain.class_vars): raise ValueError("cannot predict continuous distributions") # Convert 1d structures to 2d and remember doing it one_d = True if isinstance(data, Instance): data = Table(data.domain, [data]) elif isinstance(data, (list, tuple)) \ and not isinstance(data[0], (list, tuple)): data = [data] elif isinstance(data, np.ndarray) and data.ndim == 1: data = np.atleast_2d(data) else: one_d = False # Call the predictor backmappers = None n_values = [] if isinstance(data, (np.ndarray, scipy.sparse.csr.csr_matrix)): prediction = self.predict(data) elif isinstance(data, Table): backmappers, n_values = self.get_backmappers(data) data = data_to_model_domain() prediction = self.predict_storage(data) elif isinstance(data, (list, tuple)): data = Table.from_list(self.original_domain, data) data = data.transform(self.domain) prediction = self.predict_storage(data) else: raise TypeError("Unrecognized argument (instance of '{}')" .format(type(data).__name__)) # Parse the result into value and probs if isinstance(prediction, tuple): value, probs = prediction elif prediction.ndim == 1 + multitarget: value, probs = prediction, None elif prediction.ndim == 2 + multitarget: value, probs = None, prediction else: raise TypeError("model returned a %i-dimensional array", prediction.ndim) # Ensure that we have what we need to return; backmapp everything if probs is None and (ret != Model.Value or backmappers is not None): probs = one_hot_probs(value) if probs is not None: probs = self.backmap_probs(probs, n_values, backmappers) if ret != Model.Probs: if value is None: value = np.argmax(probs, axis=-1) # probs are already backmapped else: value = self.backmap_value(value, probs, n_values, backmappers) # Return what we need to if ret == Model.Probs: return fix_dim(probs) if isinstance(data, Instance) and not multitarget: value = [Value(self.domain.class_var, value[0])] if ret == Model.Value: return fix_dim(value) else: # ret == Model.ValueProbs return fix_dim(value), fix_dim(probs)