示例#1
0
文件: column.py 项目: mpage38/dazzle
    def __init__(self, val):
        super().__init__()

        if isinstance(val, bcolz.carray):
            self._carray = val
        elif isinstance(val, list) or isinstance(val, np.ndarray):
            self._carray = bcolz.carray(val, expectedlen=Column.DEFAULT_BLOCK_LEN)
        else:
            raise DazzleError("Invalid argument in ResultColumn.%s()" % method_name())
示例#2
0
文件: column.py 项目: mpage38/dazzle
    def replace_value(self, old, new):
        """
        Replace old by new in my values
        """
        if type(new) != int and type(new) != float and type(new) != bool:
            raise ValueError("new must be int, float or bool: in %s()" % (method_name()))

        ca = self.carray
        idx = bcolz.eval('ca == ' + str(old))
        self.carray[idx] = new
示例#3
0
文件: column.py 项目: mpage38/dazzle
    def rename(self, new_name):
        if not re.match("[A-Za-z][_a-zA-Z0-9]*$", new_name):
            raise DazzleError("Invalid column identifier: '%s' in %s" % (new_name, method_name()))

        ct = self.table.ctable

        if new_name in ct.names:
            raise DazzleError("LiteralColumn identifier already in use: '%s' in %s" % (new_name, method_name()))

        col_pos = self.position
        carray = self.carray
        ct.delcol(self._name) # TODO should use keep=True, but didn't succeed
        ct.addcol(carray, name=new_name, pos=col_pos)

        self._name = new_name
示例#4
0
文件: column.py 项目: mpage38/dazzle
    def replace_list(self, old, new):
        """
        Replace each ith value in old by each ith value in new in my values
        """
        # TODO rewrite this method: it takes very long when there are many replacements to perform
        # for instance, replacing CategoryID in userinfo in avito dataset is very sloooow

        if len([x for x in new if type(x) != int and type(x) != float and type(x) != bool]) > 0:
            raise ValueError("Values in new must be int, float or bool: in %s()" % (method_name()))

        ca = self.carray
        cond = "(" +  ") | (". join(["ca == " + str(o) for o in old]) + ")"
        mask = bcolz.eval(cond)
        rep = [i for i in mask.wheretrue()]
        pairs = dict(zip(old, new))
        if len(rep) > 0:
            self.carray[rep] = [pairs[o] for o in self.carray[rep]]
示例#5
0
文件: column.py 项目: mpage38/dazzle
    def __init__(self, name, table, data=None):  # TODO add default value (when resize), and expected_length params
        super().__init__()

        if not re.match("[A-Za-z][_a-zA-Z0-9]*$", name):
            raise DazzleError("Invalid column identifier: '%s' in %s()" % (name, method_name()))

        if type(table).__name__ != "Table":
            raise DazzleError("Table parameter expected %s" % (table))

        if table.get_column(name) is not None:
            raise DazzleError("there is already a column with name %s' in table %s" % (name, table.name))

        self._name = name
        self._ref_column = None # defined here and not in RefColumn because this property is set *before* the Column is transformed into a RefColumn

        self._table = table
        table.columns.append(self)

        if data is not None:
            if len(self._table.columns) > 0 and len(self._table.columns[0].carray) != len(data):
                raise DazzleError("Column %s should have same number of values as existing columns" % (name))

            self.table.ctable.addcol(data, name, expectedlen=self.table.expected_length)
示例#6
0
文件: column.py 项目: mpage38/dazzle
 def mean(self, dtype=np.float64, skipna=True):
     raise DazzleError("RefColumn.%() should not be called" % method_name())
示例#7
0
文件: column.py 项目: mpage38/dazzle
    def table(self):
        if self._table is None:
            raise DazzleError('Invalid call to %s: table is undefined' % method_name())

        return self._table