def seq_to_svm_node(x): """convert a sequence or mapping to an SVMNode array""" length = len(x) # make two lists, one of indices, one of values # YYY Use isinstance instead of type...is so we could # easily use derived subclasses if isinstance(x, np.ndarray): iter_range = list(range(length)) iter_values = x elif isinstance(x, dict): iter_range = list(x).sort() iter_values = np.ndarray(list(x.values())) elif is_sequence_type(x): iter_range = list(range(length)) iter_values = np.asarray(x) else: raise TypeError("data must be a mapping or an ndarray or a sequence") # allocate c struct data = svmc.svm_node_array(length + 1) # insert markers into the c struct svmc.svm_node_array_set(data, length, -1, 0.0) # pass the list and the ndarray to the c struct svmc.svm_node_array_set(data, iter_range, iter_values) return data
def __setitem__(self, key, value): """Add a new IndexedCollectable to the collection Parameters ---------- item : IndexedCollectable or of derived class. Must have 'name' assigned. """ # local binding ulength = self._uniform_length # XXX should we check whether it is some other Collectable? if not isinstance(value, ArrayCollectable): # if it is only a single element iterable, attempt broadcasting if is_sequence_type(value) and len(value) == 1 \ and not ulength is None: if ulength > 1: # cannot use np.repeat, because it destroys dimensionality value = [value[0]] * ulength value = ArrayCollectable(value) if ulength is None: ulength = len(value) elif not len(value.value) == ulength: raise ValueError("Collectable '%s' with length [%i] does not match " "the required length [%i] of collection '%s'." % (key, len(value.value), ulength, str(self))) # tell the attribute to maintain the desired length value.set_length_check(ulength) Collection.__setitem__(self, key, value)
def _action(self, key, func, missingok=False, **kwargs): """Run specific func either on a single item or on all of them Parameters ---------- key : str Name of the conditional attribute func Function (not bound) to call given an item, and **kwargs missingok : bool If True - do not complain about wrong key """ if isinstance(key, basestring): if key.lower() == 'all': for key_ in self: self._action(key_, func, missingok=missingok, **kwargs) else: try: func(self[key], **kwargs) except: if missingok: return raise elif is_sequence_type(key): for item in key: self._action(item, func, missingok=missingok, **kwargs) else: raise ValueError, \ "Don't know how to handle variable given by %s" % key
def __iadd__(self, item): if is_sequence_type(item): for item_ in item: self.__iadd__(item_) else: if not hasattr(item, '__tags__'): raise ValueError("Cannot register %s " % item + \ "which has no __tags__ defined") if item.descr in self.__descriptions: raise ValueError("Cannot register %s, " % item + \ "an item with descriptions '%s' already exists" \ % item.descr) if len(item.__tags__) == 0: raise ValueError("Cannot register %s " % item + \ "which has empty __tags__") clf_internals = set(item.__tags__) if clf_internals.issubset(self._known_tags): self.__items.append(item) self.__keys |= clf_internals else: raise ValueError('Unknown clf internal(s) %s' % \ clf_internals.difference(self._known_tags)) # access by descr self.__descriptions[item.descr] = item return self
def __setitem__(self, key, value): """Add a new IndexedCollectable to the collection Parameters ---------- item : IndexedCollectable or of derived class. Must have 'name' assigned. """ # local binding ulength = self._uniform_length # XXX should we check whether it is some other Collectable? if not isinstance(value, ArrayCollectable): # if it is only a single element iterable, attempt broadcasting if is_sequence_type(value) and len(value) == 1 \ and not ulength is None: if ulength > 1: # cannot use np.repeat, because it destroys dimensionality value = [value[0]] * ulength value = ArrayCollectable(value) if ulength is None: ulength = len(value) elif not len(value.value) == ulength: raise ValueError( "Collectable '%s' with length [%i] does not match " "the required length [%i] of collection '%s'." % (key, len(value.value), ulength, str(self))) # tell the attribute to maintain the desired length value.set_length_check(ulength) Collection.__setitem__(self, key, value)
def __iadd__(self, item): if is_sequence_type(item): for item_ in item: self.__iadd__(item_) else: if not hasattr(item, '__tags__'): raise ValueError, "Cannot register %s " % item + \ "which has no __tags__ defined" if item.descr in self.__descriptions: raise ValueError("Cannot register %s, " % item + \ "an item with descriptions '%s' already exists" \ % item.descr) if len(item.__tags__) == 0: raise ValueError, "Cannot register %s " % item + \ "which has empty __tags__" clf_internals = set(item.__tags__) if clf_internals.issubset(self._known_tags): self.__items.append(item) self.__keys |= clf_internals else: raise ValueError, 'Unknown clf internal(s) %s' % \ clf_internals.difference(self._known_tags) # access by descr self.__descriptions[item.descr] = item return self
def seq_to_svm_node(x): """convert a sequence or mapping to an SVMNode array""" length = len(x) # make two lists, one of indices, one of values # YYY Use isinstance instead of type...is so we could # easily use derived subclasses if isinstance(x, np.ndarray): iter_range = range(length) iter_values = x elif isinstance(x, dict): iter_range = list(x).sort() iter_values = np.ndarray(x.values()) elif is_sequence_type(x): iter_range = range(length) iter_values = np.asarray(x) else: raise TypeError, "data must be a mapping or an ndarray or a sequence" # allocate c struct data = svmc.svm_node_array(length + 1) # insert markers into the c struct svmc.svm_node_array_set(data, length, -1, 0.0) # pass the list and the ndarray to the c struct svmc.svm_node_array_set(data, iter_range, iter_values) return data
def _action(self, key, func, missingok=False, **kwargs): """Run specific func either on a single item or on all of them Parameters ---------- key : str Name of the conditional attribute func Function (not bound) to call given an item, and **kwargs missingok : bool If True - do not complain about wrong key """ if isinstance(key, basestring): if key.upper() == 'ALL': for key_ in self: self._action(key_, func, missingok=missingok, **kwargs) else: try: func(self[key], **kwargs) except: if missingok: return raise elif is_sequence_type(key): for item in key: self._action(item, func, missingok=missingok, **kwargs) else: raise ValueError, \ "Don't know how to handle variable given by %s" % key
def get_limit_filter(limit, collection): """Create a filter array from a limit definition. Parameters ----------- limit : None or str or dict If ``None`` all elements will be included in the filter. If an single attribute name is given, its unique values will be used to define chunks of data that are marked in the filter as unique integers. Finally, if a dictionary is provided, its keys define attribute names and its values (single value or sequence thereof) attribute value, where all key-value combinations across all given items define a "selection" of elements to be included in the filter (OR combination). collection : Collection Dataset attribute collection instance that contains all attributes referenced in the limit specification, as well as defines the shape of the filter. Returns ------- array This array is either boolean, where a `True` elements represent including in the filter, or the array is numerical, where it unique integer values defines individual chunks of a filter. """ attr_length = collection.attr_length if limit is None: # no limits limit_filter = np.ones(attr_length, dtype='bool') elif isinstance(limit, str): # use the unique values of this attribute to permute each chunk # individually lattr = collection[limit] lattr_data = lattr.value limit_filter = np.zeros(attr_length, dtype='int') for i, uv in enumerate(lattr.unique): limit_filter[lattr_data == uv] = i elif isinstance(limit, dict): limit_filter = np.zeros(attr_length, dtype='bool') for a in limit: if is_sequence_type(limit[a]): for v in limit[a]: # enable the samples matching the value 'v' of the # current limit attribute 'a' limit_filter[collection[a].value == v] = True else: limit_filter[collection[a].value == limit[a]] = True else: raise RuntimeError("Unhandle condition") return limit_filter
def get_limit_filter(limit, collection): """Create a filter array from a limit definition. Parameters ----------- limit : None or str or dict If ``None`` all elements will be included in the filter. If an single attribute name is given, its unique values will be used to define chunks of data that are marked in the filter as unique integers. Finally, if a dictionary is provided, its keys define attribute names and its values (single value or sequence thereof) attribute value, where all key-value combinations across all given items define a "selection" of elements to be included in the filter (OR combination). collection : Collection Dataset attribute collection instance that contains all attributes referenced in the limit specification, as well as defines the shape of the filter. Returns ------- array This array is either boolean, where a `True` elements represent including in the filter, or the array is numerical, where it unique integer values defines individual chunks of a filter. """ attr_length = collection.attr_length if limit is None: # no limits limit_filter = np.ones(attr_length, dtype='bool') elif isinstance(limit, str): # use the unique values of this attribute to permute each chunk # individually lattr = collection[limit] lattr_data = lattr.value limit_filter = np.zeros(attr_length, dtype='int') for i, uv in enumerate(lattr.unique): limit_filter[lattr_data == uv] = i elif isinstance(limit, dict): limit_filter = np.zeros(attr_length, dtype='bool') for a in limit: if is_sequence_type(limit[a]): for v in limit[a]: # enable the samples matching the value 'v' of the # current limit attribute 'a' limit_filter[collection[a].value == v] = True else: limit_filter[collection[a].value == limit[a]] = True else: raise RuntimeError("Unhandle condition") return limit_filter
def _set(self, val): if not hasattr(val, 'view'): if is_sequence_type(val): try: val = np.asanyarray(val) except ValueError, e: if "setting an array element with a sequence" in str(e): val = np.asanyarray(val, dtype=object) else: raise else: raise ValueError("%s only takes ndarrays (or array-likes " "providing view(), or sequence that can " "be converted into arrays (got '%s')." % (self.__class__.__name__, str(type(val))))
def _set(self, val): if not hasattr(val, 'view'): if is_sequence_type(val): try: val = np.asanyarray(val) except ValueError, e: if "setting an array element with a sequence" in str(e): val = np.asanyarray(val, dtype=object) else: raise else: raise ValueError("%s only takes ndarrays (or array-likes " "providing view(), or sequence that can " "be converted into arrays (got '%s')." % (self.__class__.__name__, str(type(val))))
def to_hashable(x): """Convert x to something which dict wouldn't mind""" try: # silly attempt d = {x: None} return x except TypeError: pass if isinstance(x, dict): # keys are already hashable # and sort for deterministic order return tuple((k, to_hashable(v)) for (k, v) in sorted(x.iteritems())) elif is_sequence_type(x): return tuple(i for i in x) elif np.isscalar(x): return x return x # and then wait for the report for it to be added
def __iadd__(self, item): if is_sequence_type(item): for item_ in item: self.__iadd__(item_) else: if not hasattr(item, '__tags__'): raise ValueError, "Cannot register %s " % item + \ "which has no __tags__ defined" if len(item.__tags__) == 0: raise ValueError, "Cannot register %s " % item + \ "which has empty __tags__" clf_internals = set(item.__tags__) if clf_internals.issubset(self._known_tags): self.__items.append(item) self.__keys |= clf_internals else: raise ValueError, 'Unknown clf internal(s) %s' % \ clf_internals.difference(self._known_tags) return self
def get_samples_by_attr(dataset, attr, values, sort=True): """Return indices of samples given a list of attributes """ if not is_sequence_type(values) or isinstance(values, basestring): values = [values] # TODO: compare to plain for loop through the targets # on a real data example sel = np.array([], dtype=np.int16) sa = dataset.sa for value in values: sel = np.concatenate((sel, np.where(sa[attr].value == value)[0])) if sort: # place samples in the right order sel.sort() return sel
def __iadd__(self, item): if is_sequence_type(item): for item_ in item: self.__iadd__(item_) else: if not hasattr(item, '__tags__'): raise ValueError, "Cannot register %s " % item + \ "which has no __tags__ defined" if len(item.__tags__) == 0: raise ValueError, "Cannot register %s " % item + \ "which has empty __tags__" clf_internals = set(item.__tags__) if clf_internals.issubset(self._known_tags): self.__items.append(item) self.__keys |= clf_internals else: raise ValueError, 'Unknown clf internal(s) %s' % \ clf_internals.difference(self._known_tags) return self
def to_hashable(x): """Convert x to something which dict wouldn't mind""" try: # silly attempt d = {x: None} return x except TypeError: pass if isinstance(x, dict): # keys are already hashable # and sort for deterministic order return tuple( (k, to_hashable(v)) for (k, v) in sorted(x.iteritems())) elif is_sequence_type(x): return tuple(i for i in x) elif np.isscalar(x): return x return x # and then wait for the report for it to be added
def _get_cvec(self, data): """Estimate default and return scaled by it negative user's C values """ if not self.params.has_key("C"): # svm_type in [_svm.svmc.C_SVC]: raise RuntimeError, "Requested estimation of default C whenever C was not set" C = self.params.C if not is_sequence_type(C): # we were not given a tuple for balancing between classes C = [C] Cs = list(C[:]) # copy for i in xrange(len(Cs)): if Cs[i] < 0: Cs[i] = self._get_default_c(data.samples) * abs(Cs[i]) if __debug__: debug("SVM", "Default C for %s was computed to be %s" % (C[i], Cs[i])) return Cs
def _get_cvec(self, data): """Estimate default and return scaled by it negative user's C values """ if not 'C' in self.params:#svm_type in [_svm.svmc.C_SVC]: raise RuntimeError("Requested estimation of default C whenever C was not set") C = self.params.C if not is_sequence_type(C): # we were not given a tuple for balancing between classes C = [C] Cs = list(C[:]) # copy for i in range(len(Cs)): if Cs[i] < 0: Cs[i] = self._get_default_c(data.samples)*abs(Cs[i]) if __debug__: debug("SVM", "Default C for %s was computed to be %s" % (C[i], Cs[i])) return Cs
def get_samples_by_attr(dataset, attr, values, sort=True): """Return indices of samples given a list of attributes """ if not is_sequence_type(values) \ or isinstance(values, basestring): values = [values] # TODO: compare to plain for loop through the targets # on a real data example sel = np.array([], dtype=np.int16) sa = dataset.sa for value in values: sel = np.concatenate((sel, np.where(sa[attr].value == value)[0])) if sort: # place samples in the right order sel.sort() return sel
def _train(self, ds): # local binding chunks_attr = self.params.chunks_attr polyord = self.params.polyord opt_reg = self.params.opt_regs inspace = self.get_space() self._polycoords = None # global detrending is desired if chunks_attr is None: # consider the entire dataset reg = [] # create the timespan self._polycoords, polycoords_scaled = self._get_polycoords(ds, None) for n in range(polyord + 1): reg.append(legendre_(n, polycoords_scaled)[:, np.newaxis]) # chunk-wise detrending is desired else: # get the unique chunks uchunks = ds.sa[chunks_attr].unique # Process the polyord to be a list with length of the number of # chunks if not is_sequence_type(polyord): # repeat to be proper length polyord = [polyord] * len(uchunks) elif not chunks_attr is None and len(polyord) != len(uchunks): raise ValueError("If you specify a sequence of polyord values " "they sequence length must match the " "number of unique chunks in the dataset.") # loop over each chunk reg = [] update_polycoords = True # if the dataset know about the inspace we can store the # polycoords right away if not inspace is None and inspace in ds.sa: self._polycoords = ds.sa[inspace].value update_polycoords = False else: # otherwise we prepare and empty array that is going to be # filled below -- we know that those polycoords are going to # be ints self._polycoords = np.empty(len(ds), dtype='int') for n, chunk in enumerate(uchunks): # get the indices for that chunk cinds = ds.sa[chunks_attr].value == chunk # create the timespan polycoords, polycoords_scaled = self._get_polycoords(ds, cinds) if update_polycoords and not polycoords is None: self._polycoords[cinds] = polycoords # create each polyord with the value for that chunk for n in range(polyord[n] + 1): newreg = np.zeros((len(ds), 1)) newreg[cinds, 0] = legendre_(n, polycoords_scaled) reg.append(newreg) # if we don't handle in inspace, there is no need to store polycoords if inspace is None: self._polycoords = None # see if add in optional regs if not opt_reg is None: # add in the optional regressors, too for oreg in opt_reg: reg.append(ds.sa[oreg].value[np.newaxis].T) # combine the regs (time x reg) self._regs = np.hstack(reg)
def to_literal(self, attr, recurse=False): """Map numerical value back to literal ones. Parameters ---------- attr : sequence Numerical values to be mapped. recurse : bool Either to recursively change items within the sequence if those are iterable as well Please see the class documentation for more information. """ # we need one or the other map if self._lmap is None and self._nmap is None: raise RuntimeError("AttributeMap has no mapping information. " "Ever called to_numeric()?") if self._lmap is None: self._lmap = self._get_lmap() lmap = self._lmap if is_sequence_type(attr) and not isinstance(attr, str): # Choose lookup function if recurse: lookupfx = lambda x: self.to_literal(x, recurse=True) else: # just dictionary lookup lookupfx = lambda x: lmap[x] # To assure the preserving the container type target_constr = attr.__class__ # ndarrays are special since array is just a factory, and # ndarray takes shape as the first argument isarray = issubclass(target_constr, np.ndarray) if isarray: if attr.dtype is np.dtype('object'): target_constr = lambda x: np.array(x, dtype=object) else: # Otherwise no special handling target_constr = np.array # Perform lookup and store to the list resl = [lookupfx(k) for k in attr] # If necessary assure derived ndarray class type if isarray: if attr.dtype is np.dtype('object'): # we need first to create empty one and then # assign items -- god bless numpy resa = np.empty(len(resl), dtype=attr.dtype) resa[:] = resl else: resa = target_constr(resl) if not (attr.__class__ is np.ndarray): # to accommodate subclasses of ndarray res = resa.view(attr.__class__) else: res = resa else: res = target_constr(resl) return res else: return lmap[attr]
def _train(self, ds): # local binding chunks_attr = self.params.chunks_attr polyord = self.params.polyord opt_reg = self.params.opt_regs inspace = self.get_space() self._polycoords = None # global detrending is desired if chunks_attr is None: # consider the entire dataset reg = [] # create the timespan self._polycoords, polycoords_scaled = self._get_polycoords(ds, None) for n in range(polyord + 1): reg.append(legendre_(n, polycoords_scaled)[:, np.newaxis]) # chunk-wise detrending is desired else: # get the unique chunks uchunks = ds.sa[chunks_attr].unique # Process the polyord to be a list with length of the number of # chunks if not is_sequence_type(polyord): # repeat to be proper length polyord = [polyord] * len(uchunks) elif not chunks_attr is None and len(polyord) != len(uchunks): raise ValueError("If you specify a sequence of polyord values " "they sequence length must match the " "number of unique chunks in the dataset.") # loop over each chunk reg = [] update_polycoords = True # if the dataset know about the inspace we can store the # polycoords right away if not inspace is None and inspace in ds.sa: self._polycoords = ds.sa[inspace].value update_polycoords = False else: # otherwise we prepare and empty array that is going to be # filled below -- we know that those polycoords are going to # be ints self._polycoords = np.empty(len(ds), dtype='int') for n, chunk in enumerate(uchunks): # get the indices for that chunk cinds = ds.sa[chunks_attr].value == chunk # create the timespan polycoords, polycoords_scaled = self._get_polycoords(ds, cinds) if update_polycoords and not polycoords is None: self._polycoords[cinds] = polycoords # create each polyord with the value for that chunk for n in range(polyord[n] + 1): newreg = np.zeros((len(ds), 1)) newreg[cinds, 0] = legendre_(n, polycoords_scaled) reg.append(newreg) # if we don't handle in inspace, there is no need to store polycoords if inspace is None: self._polycoords = None # see if add in optional regs if not opt_reg is None: # add in the optional regressors, too for oreg in opt_reg: reg.append(ds.sa[oreg].value[np.newaxis].T) # combine the regs (time x reg) self._regs = np.hstack(reg)
def to_literal(self, attr, recurse=False): """Map numerical value back to literal ones. Parameters ---------- attr : sequence Numerical values to be mapped. recurse : bool Either to recursively change items within the sequence if those are iterable as well Please see the class documentation for more information. """ # we need one or the other map if self._lmap is None and self._nmap is None: raise RuntimeError("AttributeMap has no mapping information. " "Ever called to_numeric()?") if self._lmap is None: self._lmap = self._get_lmap() lmap = self._lmap if is_sequence_type(attr) and not isinstance(attr, str): # Choose lookup function if recurse: lookupfx = lambda x: self.to_literal(x, recurse=True) else: # just dictionary lookup lookupfx = lambda x:lmap[x] # To assure the preserving the container type target_constr = attr.__class__ # ndarrays are special since array is just a factory, and # ndarray takes shape as the first argument isarray = issubclass(target_constr, np.ndarray) if isarray: if attr.dtype is np.dtype('object'): target_constr = lambda x: np.array(x, dtype=object) else: # Otherwise no special handling target_constr = np.array # Perform lookup and store to the list resl = [lookupfx(k) for k in attr] # If necessary assure derived ndarray class type if isarray: if attr.dtype is np.dtype('object'): # we need first to create empty one and then # assign items -- god bless numpy resa = np.empty(len(resl), dtype=attr.dtype) resa[:] = resl else: resa = target_constr(resl) if not (attr.__class__ is np.ndarray): # to accommodate subclasses of ndarray res = resa.view(attr.__class__) else: res = resa else: res = target_constr(resl) return res else: return lmap[attr]