def __init__(self, val): super().__init__() if isinstance(val, bcolz.carray): self._carray = val elif isinstance(val, list) or isinstance(val, np.ndarray): self._carray = bcolz.carray(val, expectedlen=Column.DEFAULT_BLOCK_LEN) else: raise DazzleError("Invalid argument in ResultColumn.%s()" % method_name())
def replace_value(self, old, new): """ Replace old by new in my values """ if type(new) != int and type(new) != float and type(new) != bool: raise ValueError("new must be int, float or bool: in %s()" % (method_name())) ca = self.carray idx = bcolz.eval('ca == ' + str(old)) self.carray[idx] = new
def rename(self, new_name): if not re.match("[A-Za-z][_a-zA-Z0-9]*$", new_name): raise DazzleError("Invalid column identifier: '%s' in %s" % (new_name, method_name())) ct = self.table.ctable if new_name in ct.names: raise DazzleError("LiteralColumn identifier already in use: '%s' in %s" % (new_name, method_name())) col_pos = self.position carray = self.carray ct.delcol(self._name) # TODO should use keep=True, but didn't succeed ct.addcol(carray, name=new_name, pos=col_pos) self._name = new_name
def replace_list(self, old, new): """ Replace each ith value in old by each ith value in new in my values """ # TODO rewrite this method: it takes very long when there are many replacements to perform # for instance, replacing CategoryID in userinfo in avito dataset is very sloooow if len([x for x in new if type(x) != int and type(x) != float and type(x) != bool]) > 0: raise ValueError("Values in new must be int, float or bool: in %s()" % (method_name())) ca = self.carray cond = "(" + ") | (". join(["ca == " + str(o) for o in old]) + ")" mask = bcolz.eval(cond) rep = [i for i in mask.wheretrue()] pairs = dict(zip(old, new)) if len(rep) > 0: self.carray[rep] = [pairs[o] for o in self.carray[rep]]
def __init__(self, name, table, data=None): # TODO add default value (when resize), and expected_length params super().__init__() if not re.match("[A-Za-z][_a-zA-Z0-9]*$", name): raise DazzleError("Invalid column identifier: '%s' in %s()" % (name, method_name())) if type(table).__name__ != "Table": raise DazzleError("Table parameter expected %s" % (table)) if table.get_column(name) is not None: raise DazzleError("there is already a column with name %s' in table %s" % (name, table.name)) self._name = name self._ref_column = None # defined here and not in RefColumn because this property is set *before* the Column is transformed into a RefColumn self._table = table table.columns.append(self) if data is not None: if len(self._table.columns) > 0 and len(self._table.columns[0].carray) != len(data): raise DazzleError("Column %s should have same number of values as existing columns" % (name)) self.table.ctable.addcol(data, name, expectedlen=self.table.expected_length)
def mean(self, dtype=np.float64, skipna=True): raise DazzleError("RefColumn.%() should not be called" % method_name())
def table(self): if self._table is None: raise DazzleError('Invalid call to %s: table is undefined' % method_name()) return self._table