Exemplo n.º 1
0
def _check_header(data):
    """Validates the first few header bytes of a serialized DataFrame.

    If the serialized DataFrame is compressed, then this function performs
    decomression and returns the result

    Args:
        data: A bytearray representing the serialized DataFrame

    Returns:
        The argument bytearray, possibly decomressed

    Raises:
        DataFrameException: If the validation failed
    """
    if not isinstance(data, bytearray):
        raise ValueError(
            ("Invalid argument 'data'. "
             "Expected bytearray but found {}").format(type(data)))

    if data[0] == 0x64 and data[1] == 0x66:
        data = _decompress(data)

    # validate the first bytes of the header and
    # the used format version must start with '{v:'
    if (data[0] != 0x7b or data[1] != 0x76 or data[2] != 0x3a
            or (data[3] != 0x32 and data[3] != 0x31)):

        raise dataframe.DataFrameException("Unsupported encoding")

    if data[3] != 0x32:  # encoding version 2
        raise dataframe.DataFrameException(
            ("Unsupported encoding version (v:{})").format(data[3]))

    return data
Exemplo n.º 2
0
def _decompress(data):
    """Decompresses the given bytearray.

    Args:
        data: The bytearray to decompress

    Returns:
        The decompressed bytearray
    """
    if not len(data) > 2:
        raise dataframe.DataFrameException("Invalid data format")

    if data[0] != 0x64 or data[1] != 0x66:
        raise dataframe.DataFrameException(
            ("Invalid data format. Is not a .df file. "
             "Starts with 0x{} 0x{}").format(
                 bytearray(data[0].to_bytes(1, byteorder="big",
                                            signed=True)).hex(),
                 bytearray(data[1].to_bytes(1, byteorder="big",
                                            signed=True)).hex()))

    data[0] = 0x78
    data[1] = 0x9c
    data = zlib.decompress(data)
    return bytearray(data)
Exemplo n.º 3
0
    def _check_type(self, value):
        if isinstance(value, int):
            if (value < -2147483648) or (value > 2147483647):
                raise dataframe.DataFrameException(
                    "Out of range int (int32): {}".format(value))

        elif value is not None and not isinstance(value, np.int32):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "int (int32) but found {}".format(type(value))))
Exemplo n.º 4
0
    def _check_type(self, value):
        if isinstance(value, int):
            if (value < -9223372036854775808) or (value > 9223372036854775807):
                raise dataframe.DataFrameException(
                    "Out of range long (int64): {}".format(value))

        elif value is not None and not isinstance(value, np.int64):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "long (int64) but found {}".format(type(value))))
Exemplo n.º 5
0
    def _check_type(self, value):
        if value is None:
            raise dataframe.DataFrameException(
                ("Invalid argument. "
                 "BinaryColumn cannot use None values"))

        if not isinstance(value, bytearray):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "bytearray but found {}".format(type(value))))
Exemplo n.º 6
0
    def _check_type(self, value):
        if isinstance(value, int):
            if (value < -32768) or (value > 32767):
                raise dataframe.DataFrameException(
                    "Out of range short (int16): {}".format(value))

        elif value is not None and not isinstance(value, np.int16):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "short (int16) but found {}".format(type(value))))
Exemplo n.º 7
0
    def _check_type(self, value):
        if value is None:
            raise dataframe.DataFrameException(
                ("Invalid argument. "
                 "BooleanColumn cannot use None values"))

        if not isinstance(value, (bool, np.bool, np.bool_)):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "boolean (bool) but found {}").format(type(value)))
Exemplo n.º 8
0
    def _check_type(self, value):
        if value is None:
            raise dataframe.DataFrameException(
                ("Invalid argument. "
                 "DoubleColumn cannot use None values"))

        if not isinstance(value, float) and not isinstance(value, np.float64):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "double (float64) but found {}".format(type(value))))
Exemplo n.º 9
0
    def __init__(self, name=None, values=None):
        """Constructs a new BinaryColumn.

        The constructed Column will have the specified name or is unlabeled
        if the specified name is None or empty.
        The constructed Column has the content of the specified list
        or numpy array. If the argument specifying the Column values is
        an int, then the constructed Column is initialized with the given
        length and all Column entries are set to default values.

        Args:
            name: The name of the BinaryColumn as a string
            values: The content of the BinaryColumn.
                Must be a list or numpy array with dtype object, or an int
        """
        if values is None:
            values = self._create_array()

        if isinstance(values, list):
            for value in values:
                if value is None:
                    raise dataframe.DataFrameException(
                        "BinaryColumn cannot use None values")
                if not isinstance(value, bytearray):
                    raise dataframe.DataFrameException(
                        "List must only contain bytearrays")

            # create and set values manually because numpy changes bytearray
            # objects to ndarray types when all bytearrays have equal length
            tmp = np.empty(len(values), dtype="object")
            for i, value in enumerate(values):
                tmp[i] = value

            values = tmp

        elif isinstance(values, np.ndarray):
            if values.dtype != "object":
                raise dataframe.DataFrameException(
                    ("Invalid argument array. Expected "
                     "bytearray array (object) but found {}".format(
                         values.dtype)))

            for value in values:
                self._check_type(value)

        elif isinstance(values, int):
            values = self._create_array(size=values)
        else:
            raise dataframe.DataFrameException(
                ("Invalid argument array. Expected "
                 "list or numpy array but found {}".format(type(values))))

        super().__init__(name, values)
Exemplo n.º 10
0
    def __init__(self, name=None, values=None):
        """Constructs a new StringColumn.

        The constructed Column will have the specified name or is unlabeled
        if the specified name is None or empty.
        The constructed Column has the content of the specified list
        or numpy array. If the argument specifying the Column values is
        an int, then the constructed Column is initialized with the given
        length and all Column entries are set to default values.

        Args:
            name: The name of the StringColumn as a string
            values: The content of the StringColumn.
                Must be a list or numpy array with dtype object, or an int
        """
        if values is None:
            values = np.empty(0, dtype=np.object)

        if isinstance(values, list):
            for i, value in enumerate(values):
                self._check_type(value)
                if not value:
                    values[i] = StringColumn.DEFAULT_VALUE

            values = np.array(values, dtype=np.object)

        elif isinstance(values, np.ndarray):
            if values.dtype != "object":
                raise dataframe.DataFrameException(
                    ("Invalid argument array. Expected "
                     "string array (object) but found {}".format(values.dtype)))

            for i, value in enumerate(values):
                if not isinstance(value, str):
                    raise dataframe.DataFrameException(
                        ("Invalid element in argument array. Expected "
                         "string (str) but found {}".format(values.dtype)))

                if not value:
                    values[i] = StringColumn.DEFAULT_VALUE

        elif isinstance(values, int):
            values = np.empty(values, dtype=np.object)
            for i in range(values.shape[0]):
                values[i] = StringColumn.DEFAULT_VALUE

        else:
            raise dataframe.DataFrameException(
                ("Invalid argument array. Expected "
                 "list or numpy array but found {}".format(type(values))))

        super().__init__(name, values)
Exemplo n.º 11
0
    def __init__(self, name=None, values=None):
        """Constructs a new CharColumn.

        The constructed Column will have the specified name or is unlabeled
        if the specified name is None or empty.
        The constructed Column has the content of the specified list
        or numpy array. If the argument specifying the Column values is
        an int, then the constructed Column is initialized with the given
        length and all Column entries are set to default values.

        Args:
            name: The name of the CharColumn as a string
            values: The content of the CharColumn.
                Must be a list or numpy array with dtype uint8, or an int
        """
        if values is None:
            values = np.empty(0, dtype=np.uint8)

        if isinstance(values, list):
            charvals = self._create_array(len(values))
            for i, value in enumerate(values):
                self._check_type(value)
                charvals[i] = ord(value)

            values = charvals

        elif isinstance(values, np.ndarray):
            if values.dtype != "uint8":
                raise dataframe.DataFrameException(
                    ("Invalid argument array. Expected "
                     "char array (uint8) but found {}".format(values.dtype)))

            for i, value in enumerate(values):
                if (value < 32) or (value > 126):
                    raise dataframe.DataFrameException(
                        ("Invalid character value for CharColumn at index {}. "
                         "Only printable ASCII is permitted").format(i))

        elif isinstance(values, int):
            values = np.zeros(values, dtype=np.uint8)
            default_val = ord(CharColumn.DEFAULT_VALUE)
            for i in range(values.shape[0]):
                values[i] = default_val
        else:
            raise dataframe.DataFrameException(
                ("Invalid argument array. Expected "
                 "list or numpy array but found {}".format(type(values))))

        super().__init__(name, values)
Exemplo n.º 12
0
    def _check_type(self, value):
        if value is None:
            raise dataframe.DataFrameException(
                ("Invalid argument. "
                 "ByteColumn cannot use None values"))

        if isinstance(value, int):
            if (value < -128) or (value > 127):
                raise dataframe.DataFrameException(
                    "Out of range byte (int8): {}".format(value))

        elif not isinstance(value, np.int8):
            raise dataframe.DataFrameException(
                ("Invalid argument. Expected "
                 "byte (int8) but found {}".format(type(value))))
Exemplo n.º 13
0
def serialize(df, compress=False):
    """Serializes the given DataFrame to a bytearray.

    The compression of the returned bytearray is controlled by the additional
    boolean flag of this method.

    Args:
        df: The DataFrame to serialize. Must not be None
        compress: A boolean flag indicating whether the returned bytearray should
            be compressed. Must be a bool

    Returns;
        A bytearray representing the given DataFrame in a serialized form

    Raises:
        DataFrameException: If any errors occur during serialization or compression
    """
    if df is None:
        raise dataframe.DataFrameException(
            "DataFrame argument must not be None")

    if not isinstance(df, dataframe.DataFrame):
        raise ValueError(
            ("Invalid argument 'df'. "
             "Expected raven.struct.dataframe.DataFrame but found {}").format(
                 type(df)))

    if not isinstance(compress, bool):
        raise ValueError(("Invalid argument 'compress'. "
                          "Expected bool but found {}").format(type(compress)))

    return _compress(_serialize_v2(df)) if compress else _serialize_v2(df)
Exemplo n.º 14
0
    def _check_bounds(self, index):
        """Checks array bounds for the specified index.

        This method raises a DataFrameException if the specified
        index is out of bounds.
        """
        if index < 0 or index >= self._values.shape[0]:
            raise dataframe.DataFrameException("Invalid row index: {}".format(index))
Exemplo n.º 15
0
    def _check_type(self, value):
        if value is not None:
            if isinstance(value, str):
                if len(value) != 1:
                    raise dataframe.DataFrameException(
                        ("Invalid character value. Expected string of "
                         "length 1 but found length {}".format(len(value))))

                byte = ord(value)
                if (byte < 32) or (byte > 126):
                    raise dataframe.DataFrameException(
                        ("Invalid character value for NullableCharColumn. "
                         "Only printable ASCII is permitted"))

            else:
                raise dataframe.DataFrameException(
                    ("Invalid argument. Expected "
                     "char (str) but found {}".format(type(value))))
Exemplo n.º 16
0
def _cast_to_numeric_type(col, value):
    """Casts the specified double to the corresponding Number
    type of the specified Column.

    Args:
        col: The Column which specifies the numeric type
        value: The float value to cast

    Returns:
        A number which has the concrete type used
        by the specified Column
    """
    c = col.type_code()
    if col.is_nullable():
        if c == doublecolumn.NullableDoubleColumn.TYPE_CODE:
            return float(value)
        elif c == floatcolumn.NullableFloatColumn.TYPE_CODE:
            return float(value)
        elif c == bytecolumn.NullableByteColumn.TYPE_CODE:
            return int(value) if not np.isnan(value) else None
        elif c == shortcolumn.NullableShortColumn.TYPE_CODE:
            return int(value) if not np.isnan(value) else None
        elif c == intcolumn.NullableIntColumn.TYPE_CODE:
            return int(value) if not np.isnan(value) else None
        elif c == longcolumn.NullableLongColumn.TYPE_CODE:
            return int(value) if not np.isnan(value) else None
        else:
            raise dataframe.DataFrameException("Unrecognized column type")
    else:
        if c == doublecolumn.DoubleColumn.TYPE_CODE:
            return float(value)
        elif c == floatcolumn.FloatColumn.TYPE_CODE:
            return float(value)
        elif c == bytecolumn.ByteColumn.TYPE_CODE:
            return int(value)
        elif c == shortcolumn.ShortColumn.TYPE_CODE:
            return int(value)
        elif c == intcolumn.IntColumn.TYPE_CODE:
            return int(value)
        elif c == longcolumn.LongColumn.TYPE_CODE:
            return int(value)
        else:
            raise dataframe.DataFrameException("Unrecognized column type")
Exemplo n.º 17
0
    def __init__(self, name=None, values=None):
        """Constructs a new NullableBooleanColumn.

        The constructed Column will have the specified name or is unlabeled
        if the specified name is None or empty.
        The constructed Column has the content of the specified list
        or numpy array. If the argument specifying the Column values is
        an int, then the constructed Column is initialized with the given
        length and all Column entries are set to default values.

        Args:
            name: The name of the NullableBooleanColumn as a string
            values: The content of the NullableBooleanColumn.
                Must be a list or numpy array with dtype object, or an int
        """
        if values is None:
            values = np.empty(0, dtype=np.object)

        if isinstance(values, list):
            for value in values:
                self._check_type(value)

            values = np.array(values, dtype=np.object)

        elif isinstance(values, np.ndarray):
            if values.dtype != "object":
                raise dataframe.DataFrameException(
                    ("Invalid argument array. Expected "
                     "boolean array (object) but found {}".format(
                         values.dtype)))

            for value in values:
                self._check_type(value)

        elif isinstance(values, int):
            values = np.empty(values, dtype=np.object)
        else:
            raise dataframe.DataFrameException(
                ("Invalid argument array. Expected "
                 "list or numpy array but found {}".format(type(values))))

        super().__init__(name, values)
Exemplo n.º 18
0
    def __init__(self, name=None, values=None):
        """Assigns this Column instance the specified name and values.

        This constructor should be called by all subclasses.

        Args:
            name: The name to assign to this Column. Must be a string
            values: The values to assign to this Column. Must be a numpy array
        """
        if name is not None and not isinstance(name, str):
            raise dataframe.DataFrameException(
                ("Invalid argument 'name'. "
                 "Expected str but found {}").format(type(name)))

        if not isinstance(values, np.ndarray):
            raise dataframe.DataFrameException(
                ("Invalid argument 'values'. "
                 "Expected numpy.ndarray but found {}").format(type(values)))

        self._name = name
        self._values = values
Exemplo n.º 19
0
    def convert_to(self, typecode):
        converted = None
        if typecode == utils.type_code_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int8)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else 0

            converted = dataframe.DataFrame.ByteColumn(values=vals)
        elif typecode == utils.type_code_short_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int16)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else 0

            converted = dataframe.DataFrame.ShortColumn(values=vals)
        elif typecode == utils.type_code_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int32)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else 0

            converted = dataframe.DataFrame.IntColumn(values=vals)
        elif typecode == utils.type_code_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int64)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else 0

            converted = dataframe.DataFrame.LongColumn(values=vals)
        elif typecode == utils.type_code_string_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = chr(
                    x) if x is not None else utils.default_value_string_column(
                    )

            converted = dataframe.DataFrame.StringColumn(values=vals)
        elif typecode == utils.type_code_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float32)
            for i, x in np.ndenumerate(self._values):
                vals[i] = float(chr(x)) if x is not None else 0.0

            converted = dataframe.DataFrame.FloatColumn(values=vals)
        elif typecode == utils.type_code_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float64)
            for i, x in np.ndenumerate(self._values):
                vals[i] = float(chr(x)) if x is not None else 0.0

            converted = dataframe.DataFrame.DoubleColumn(values=vals)
        elif typecode == utils.type_code_char_column():
            vals = np.empty([self._values.shape[0]], dtype=np.uint8)
            ord_default = ord(CharColumn.DEFAULT_VALUE)
            for i, x in np.ndenumerate(self._values):
                vals[i] = x if x is not None else ord_default

            converted = dataframe.DataFrame.CharColumn(values=vals)
        elif typecode == utils.type_code_boolean_column():
            values_true = {"t", "1", "y"}
            values_false = {"f", "0", "n"}
            vals = np.empty([self._values.shape[0]], dtype=np.bool)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    x = chr(x).lower()
                    is_true = x in values_true
                    is_false = x in values_false
                    if not is_true and not is_false:
                        raise dataframe.DataFrameException(
                            ("Invalid boolean character: '{}'".format(
                                self._values[i])))

                    vals[i] = is_true
                else:
                    vals[i] = False

            converted = dataframe.DataFrame.BooleanColumn(values=vals)
        elif typecode == utils.type_code_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = bytearray(chr(x).encode("utf-8"))
                else:
                    vals[i] = bytearray.fromhex("00")

            converted = dataframe.DataFrame.BinaryColumn(values=vals)
        elif typecode == utils.type_code_nullable_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableByteColumn(values=vals)
        elif typecode == utils.type_code_nullable_short_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableShortColumn(values=vals)
        elif typecode == utils.type_code_nullable_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableIntColumn(values=vals)
        elif typecode == utils.type_code_nullable_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(chr(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableLongColumn(values=vals)
        elif typecode == utils.type_code_nullable_string_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = chr(x) if x is not None else None

            converted = dataframe.DataFrame.NullableStringColumn(values=vals)
        elif typecode == utils.type_code_nullable_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = float(chr(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableFloatColumn(values=vals)
        elif typecode == utils.type_code_nullable_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = float(chr(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableDoubleColumn(values=vals)
        elif typecode == NullableCharColumn.TYPE_CODE:
            converted = self.clone()
        elif typecode == utils.type_code_nullable_boolean_column():
            values_true = {"t", "1", "y"}
            values_false = {"f", "0", "n"}
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    x = chr(x).lower()
                    is_true = x in values_true
                    is_false = x in values_false
                    if not is_true and not is_false:
                        raise dataframe.DataFrameException(
                            ("Invalid boolean character: '{}'".format(
                                self._values[i])))

                    vals[i] = is_true
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBooleanColumn(values=vals)
        elif typecode == utils.type_code_nullable_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = bytearray(chr(x).encode("utf-8"))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBinaryColumn(values=vals)
        else:
            raise dataframe.DataFrameException(
                "Unknown column type code: {}".format(typecode))

        # pylint: disable=protected-access
        converted._name = self._name
        return converted
Exemplo n.º 20
0
def setitem_impl(arg, position, value):
    """Implementation of the __setitem__() function.

    Args:
        arg: The DataFrame instance on which the function was called upon
        position: The position argument passed to the function
        value: The value argument passed to the function
    """
    if isinstance(position, tuple):
        if len(position) > 2:
            raise dataframe.DataFrameException(
                ("Invalid position argument. Too many "
                 "positions specified: {}").format(len(position)))

        cols = position[0]
        rows = position[1]
        if isinstance(cols, (int, str)):
            # check for negative column indices
            if isinstance(cols, int) and cols < 0:
                if abs(cols) > arg.columns():
                    raise dataframe.DataFrameException(
                        "Invalid column index: {}".format(cols))

                cols = cols % arg.columns()

            if rows is None:
                # implements df[x, :] = Column
                # and        df["x", :] = Column
                arg.set_column(cols, value)
            elif isinstance(rows, int):
                # implements df[x, y] = v
                # and        df["x", y] = v
                if rows < 0:
                    if abs(rows) > arg.rows():
                        raise dataframe.DataFrameException(
                            "Invalid row index: {}".format(rows))

                    rows = rows % arg.rows()

                arg.get_column(cols).set_value(rows, value)
            elif isinstance(rows, str):
                # implements df[x, "y_regex"] = v | func | lamda
                # and        df["x", "y_regex"] = v | func | lamda
                arg.replace(cols, rows, replacement=value)
            elif isinstance(rows, tuple):
                # implements df[x, (y0, y1, ..., yn)] = (v0, v1, ..., vn)
                # and        df["x", (y0, y1, ..., yn)] = (v0, v1, ..., vn)
                col = arg.get_column(cols)
                if isinstance(value, (list, tuple)):
                    if len(rows) != len(value):
                        raise dataframe.DataFrameException((
                            "Invalid value argument. The specified "
                            "list/tuple has a size of {} but the row position "
                            "argument has a size of {}").format(
                                len(value), len(rows)))

                    for i, index in enumerate(rows):
                        col.set_value(index, value[i])

                else:
                    # implements df[x, (y0, y1, ..., yn)] = v
                    # and        df["x", (y0, y1, ..., yn)] = v
                    for index in rows:
                        col.set_value(index, value)

            elif isinstance(rows, slice):
                rows = rows.indices(arg.rows())
                start = rows[0]
                stop = rows[1]
                step = rows[2]
                col = arg.get_column(cols)
                if isinstance(value, (list, tuple)):
                    # implements df[x, y0:y1:y2] = (v0, v1, ..., vn)
                    # and        df["x", y0:y1:y2] = (v0, v1, ..., vn)
                    if ((stop - start) // step) != len(value):
                        raise dataframe.DataFrameException((
                            "Invalid value argument. The specified "
                            "list/tuple has a size of {} but the row position "
                            "argument has a size of {}").format(
                                len(value), (stop - start) // step))

                    i = 0
                    for index in range(start, stop, step):
                        col.set_value(index, value[i])
                        i += 1

                else:
                    # implements df[x, y0:y1:y2] = v
                    # and        df["x", y0:y1:y2] = v
                    for index in range(start, stop, step):
                        col.set_value(index, value)

            else:
                # invalid type for row position arg
                raise dataframe.DataFrameException(
                    ("Invalid row position type. "
                     "Expected int or str but found {}").format(type(rows)))

        elif isinstance(cols, (tuple, slice)):
            # prefetch the selected columns as a DataFrame
            if isinstance(cols, tuple):
                cols_selected = arg.get_columns(cols=cols)
            else:  # is slice
                cols_selected = (dataframe.NullableDataFrame(
                    arg._internal_columns()[cols]) if arg.is_nullable() else
                                 dataframe.DefaultDataFrame(
                                     arg._internal_columns()[cols]))

            if isinstance(rows, int):
                if rows < 0:
                    if abs(rows) > arg.rows():
                        raise dataframe.DataFrameException(
                            "Invalid row index: {}".format(rows))

                    rows = rows % arg.rows()

                if isinstance(value, (tuple, list)):
                    # implements df[(x0, x1, ..., xn), y] = [v0, v1, ..., vn]
                    # and        df[x0:x1:x2, y] = [v0, v1, ..., vn]
                    cols_selected.set_row(rows, value)
                else:
                    # implements df[(x0, x1, ..., xn), y] = v
                    # and        df[x0:x1:x2, y] = v
                    cols_selected.set_row(rows,
                                          [value] * cols_selected.columns())

            elif isinstance(rows, tuple):
                if isinstance(value, (list, tuple)):
                    # implements df[(x0, x1, ..., xn), (y0, y1, ..., ym)] = [[ ], [ ], ..., [ ]]
                    # and        df[x0:x1:x2, (y0, y1, ..., ym)] = [[ ], [ ], ..., [ ]]
                    if len(value) == 0:
                        raise dataframe.DataFrameException((
                            "Invalid value argument. The specified list/tuple "
                            "of row values is empty"))

                    if isinstance(value[0], (list, tuple)):
                        if len(rows) != len(value):
                            raise dataframe.DataFrameException((
                                "Invalid value argument. The specified list/tuple "
                                "has a size of {} but the row position argument "
                                "has a size of {}").format(
                                    len(value), len(rows)))

                        for i, index in enumerate(rows):
                            cols_selected.set_row(index, value[i])
                    else:
                        for index in rows:
                            cols_selected.set_row(index, value)

                elif isinstance(value, dataframe.DataFrame):
                    # implements df[(x0, x1, ..., xn), (y0, y1, ..., ym)] = vDataFrame
                    # and        df[x0:x1:x2, (y0, y1, ..., ym)] = vDataFrame
                    if len(rows) != value.rows():
                        rmsg1 = "rows" if value.rows() != 1 else "row"
                        rmsg2 = "rows" if len(rows) != 1 else "row"
                        raise dataframe.DataFrameException(
                            ("Invalid value argument. The specified "
                             "DataFrame has {} {} but the row position "
                             "argument specified {} {}").format(
                                 value.rows(), rmsg1, len(rows), rmsg2))

                    for i, index in enumerate(rows):
                        cols_selected.set_row(index, value.get_row(i))

                else:
                    # implements df[(x0, x1, ..., xn), (y0, y1, ..., ym)] = v
                    # and        df[x0:x1:x2, (y0, y1, ..., ym)] = v
                    value = [value] * cols_selected.columns()
                    for index in rows:
                        cols_selected.set_row(index, value)

            elif isinstance(rows, slice):
                rows = rows.indices(cols_selected.rows())
                start = rows[0]
                stop = rows[1]
                step = rows[2]
                if isinstance(value, (list, tuple)):
                    # implements df[(x0, x1, ..., xn), y0:y1:y2] = [ .. ]
                    # and        df[x0:x1:x2, y0:y1:y2] = [ .. ]
                    for index in range(start, stop, step):
                        cols_selected.set_row(index, value)

                elif isinstance(value, dataframe.DataFrame):
                    # implements df[(x0, x1, ..., xn), y0:y1:y2] = vDataFrame
                    # and        df[x0:x1:x2, y0:y1:y2] = vDataFrame
                    i = 0
                    for index in range(start, stop, step):
                        cols_selected.set_row(index, value.get_row(i))
                        i += 1

                else:
                    # implements df[(x0, x1, ..., xn), y0:y1:y2] = v
                    # and        df[x0:x1:x2, y0:y1:y2] = v
                    value = [value] * cols_selected.columns()
                    for index in range(start, stop, step):
                        cols_selected.set_row(index, value)

            elif isinstance(rows, str):
                raise dataframe.DataFrameException(
                    ("Invalid column position type. A replacement operation "
                     "must only specify a single column "
                     "but found {}").format(type(cols)))

            else:
                # invalid type for row position arg
                raise dataframe.DataFrameException(
                    ("Invalid row position type. "
                     "Expected int or str but found {}").format(type(rows)))

        else:
            # invalid type for column position arg
            raise dataframe.DataFrameException(
                ("Invalid column position type. "
                 "Expected int or str but found {}").format(type(cols)))

    elif isinstance(position, int):
        # check for negative column indices
        if position < 0:
            if abs(position) > arg.columns():
                raise dataframe.DataFrameException(
                    "Invalid column index: {}".format(position))

            position = position % arg.columns()

        # implements df[x] = Column
        if position == arg.columns():
            arg.add_column(value)
        else:
            arg.set_column(position, value)
    elif isinstance(position, str):
        # and        df["x"] = Column
        arg.set_column(position, value)
    else:
        # invalid type for entire position arg
        raise dataframe.DataFrameException(("Invalid position type. "
                                            "Expected int or str but "
                                            "found {}").format(type(position)))
Exemplo n.º 21
0
    def __init__(self, name=None, values=None):
        """Constructs a new NullableCharColumn.

        The constructed Column will have the specified name or is unlabeled
        if the specified name is None or empty.
        The constructed Column has the content of the specified list
        or numpy array. If the argument specifying the Column values is
        an int, then the constructed Column is initialized with the given
        length and all Column entries are set to default values.

        Args:
            name: The name of the NullableCharColumn as a string
            values: The content of the NullableCharColumn.
                Must be a list or numpy array with dtype object, or an int
        """
        if values is None:
            values = np.empty(0, dtype=np.object)

        if isinstance(values, list):
            charvals = np.zeros(len(values), dtype=np.object)
            for i, value in enumerate(values):
                self._check_type(value)
                if value is None:
                    charvals[i] = None
                else:
                    charvals[i] = ord(value)

            values = charvals

        elif isinstance(values, np.ndarray):
            if values.dtype != "object":
                raise dataframe.DataFrameException(
                    ("Invalid argument array. Expected "
                     "char array (object) but found {}".format(values.dtype)))

            for i, value in enumerate(values):
                if value is not None:
                    if isinstance(value, str):
                        if len(value) != 1:
                            raise dataframe.DataFrameException((
                                "Invalid character value in numpy array argument. "
                                "Expected string of length 1 but found "
                                "length {}".format(len(value))))

                        byte = ord(value)
                        if (byte < 32) or (byte > 126):
                            raise dataframe.DataFrameException((
                                "Invalid character value for NullableCharColumn at index {}. "
                                "Only printable ASCII is permitted").format(i))

                        values[i] = byte

                    elif isinstance(value, int):
                        if (value < 32) or (value > 126):
                            raise dataframe.DataFrameException((
                                "Invalid character value for NullableCharColumn at index {}. "
                                "Only printable ASCII is permitted").format(i))

                    else:
                        raise dataframe.DataFrameException(
                            ("Invalid argument. Expected "
                             "char (str) but found {}".format(type(value))))

        elif isinstance(values, int):
            values = np.empty(values, dtype=np.object)
        else:
            raise dataframe.DataFrameException(
                ("Invalid argument array. Expected "
                 "list or numpy array but found {}".format(type(values))))

        super().__init__(name, values)
Exemplo n.º 22
0
    def convert_to(self, typecode):
        converted = None
        if typecode == utils.type_code_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int8)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int8(x))
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.ByteColumn(values=vals)
        elif typecode == ShortColumn.TYPE_CODE:
            vals = np.empty([self._values.shape[0]], dtype=np.int16)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int16(x))
                else:
                    vals[i] = 0

            converted = ShortColumn(values=vals)
        elif typecode == utils.type_code_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int32)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int32(x))
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.IntColumn(values=vals)
        elif typecode == utils.type_code_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int64)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int64(x))
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.LongColumn(values=vals)
        elif typecode == utils.type_code_string_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = str(x)
                else:
                    vals[i] = utils.default_value_string_column()

            converted = dataframe.DataFrame.StringColumn(values=vals)
        elif typecode == utils.type_code_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float32)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = float(x)
                else:
                    vals[i] = 0.0

            converted = dataframe.DataFrame.FloatColumn(values=vals)
        elif typecode == utils.type_code_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float64)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = float(x)
                else:
                    vals[i] = 0.0

            converted = dataframe.DataFrame.DoubleColumn(values=vals)
        elif typecode == utils.type_code_char_column():
            vals = np.zeros([self._values.shape[0]], dtype=np.uint8)
            ord_default = ord(utils.default_value_char_column())
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = ord(str(x)[0])
                else:
                    vals[i] = ord_default

            converted = dataframe.DataFrame.CharColumn(values=vals)
        elif typecode == utils.type_code_boolean_column():
            vals = np.empty([self._values.shape[0]], dtype=np.bool)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = (x != 0)
                else:
                    vals[i] = False

            converted = dataframe.DataFrame.BooleanColumn(values=vals)
        elif typecode == utils.type_code_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = bytearray(
                        int(x).to_bytes(2, byteorder="big", signed=True))
                else:
                    vals[i] = bytearray(
                        int(0).to_bytes(2, byteorder="big", signed=True))

            converted = dataframe.DataFrame.BinaryColumn(values=vals)
        elif typecode == utils.type_code_nullable_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int8(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableByteColumn(values=vals)
        elif typecode == NullableShortColumn.TYPE_CODE:
            converted = self.clone()
        elif typecode == utils.type_code_nullable_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int32(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableIntColumn(values=vals)
        elif typecode == utils.type_code_nullable_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int64(x)) if x is not None else None

            converted = dataframe.DataFrame.NullableLongColumn(values=vals)
        elif typecode == utils.type_code_nullable_string_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = str(x)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableStringColumn(values=vals)
        elif typecode == utils.type_code_nullable_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = float(x)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableFloatColumn(values=vals)
        elif typecode == utils.type_code_nullable_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = float(x)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableDoubleColumn(values=vals)
        elif typecode == utils.type_code_nullable_char_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = ord(str(x)[0])
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableCharColumn(values=vals)
        elif typecode == utils.type_code_nullable_boolean_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = (x != 0)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBooleanColumn(values=vals)
        elif typecode == utils.type_code_nullable_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = bytearray(
                        int(x).to_bytes(2, byteorder="big", signed=True))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBinaryColumn(values=vals)
        else:
            raise dataframe.DataFrameException(
                "Unknown column type code: {}".format(typecode))

        # pylint: disable=protected-access
        converted._name = self._name
        return converted
Exemplo n.º 23
0
 def _check_type(self, value):
     if value is not None:
         if not isinstance(value, bytearray):
             raise dataframe.DataFrameException(
                 ("Invalid argument. Expected "
                  "bytearray but found {}".format(type(value))))
Exemplo n.º 24
0
    def convert_to(self, typecode):
        converted = None
        if typecode == utils.type_code_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int8)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int8(x))

            converted = dataframe.DataFrame.ByteColumn(values=vals)
        elif typecode == utils.type_code_short_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int16)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int16(x))

            converted = dataframe.DataFrame.ShortColumn(values=vals)
        elif typecode == utils.type_code_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int32)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int32(x))

            converted = dataframe.DataFrame.IntColumn(values=vals)
        elif typecode == utils.type_code_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int64)
            for i, x in np.ndenumerate(self._values):
                vals[i] = int(np.int64(x))

            converted = dataframe.DataFrame.LongColumn(values=vals)
        elif typecode == StringColumn.TYPE_CODE:
            converted = self.clone()
        elif typecode == utils.type_code_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float32)
            for i, x in np.ndenumerate(self._values):
                vals[i] = float(np.float32(x))

            converted = dataframe.DataFrame.FloatColumn(values=vals)
        elif typecode == utils.type_code_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float64)
            for i, x in np.ndenumerate(self._values):
                vals[i] = float(np.float64(x))

            converted = dataframe.DataFrame.DoubleColumn(values=vals)
        elif typecode == utils.type_code_char_column():
            vals = np.empty([self._values.shape[0]], dtype=np.uint8)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = ord(x[0])
                else:
                    vals[i] = utils.default_value_char_column()

            converted = dataframe.DataFrame.CharColumn(values=vals)
        elif typecode == utils.type_code_boolean_column():
            values_true = {"true", "t", "1", "yes", "y", "on"}
            values_false = {"false", "f", "0", "no", "n", "off"}
            vals = np.empty([self._values.shape[0]], dtype=np.bool)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    x = x.lower()
                    is_true = x in values_true
                    is_false = x in values_false
                    if not is_true and not is_false:
                        raise dataframe.DataFrameException(
                            ("Invalid boolean string: '{}'".format(self._values[i])))

                    vals[i] = is_true
                else:
                    vals[i] = False

            converted = dataframe.DataFrame.BooleanColumn(values=vals)
        elif typecode == utils.type_code_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = bytearray.fromhex(x)
                else:
                    vals[i] = bytearray(b'\x00')

            converted = dataframe.DataFrame.BinaryColumn(values=vals)
        elif typecode == utils.type_code_nullable_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int8(x))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableByteColumn(values=vals)
        elif typecode == utils.type_code_nullable_short_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int16(x))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableShortColumn(values=vals)
        elif typecode == utils.type_code_nullable_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int32(x))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableIntColumn(values=vals)
        elif typecode == utils.type_code_nullable_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = int(np.int64(x))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableLongColumn(values=vals)
        elif typecode == NullableStringColumn.TYPE_CODE:
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(vals):
                vals[i] = x

            converted = NullableStringColumn(values=vals)
        elif typecode == utils.type_code_nullable_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = float(np.float32(x))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableFloatColumn(values=vals)
        elif typecode == utils.type_code_nullable_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = float(np.float64(x))
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableDoubleColumn(values=vals)
        elif typecode == utils.type_code_nullable_char_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = ord(x[0])
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableCharColumn(values=vals)
        elif typecode == utils.type_code_nullable_boolean_column():
            values_true = {"true", "t", "1", "yes", "y", "on"}
            values_false = {"false", "f", "0", "no", "n", "off"}
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    x = x.lower()
                    is_true = x in values_true
                    is_false = x in values_false
                    if not is_true and not is_false:
                        raise dataframe.DataFrameException(
                            ("Invalid boolean string: '{}'".format(self._values[i])))

                    vals[i] = is_true
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBooleanColumn(values=vals)
        elif typecode == utils.type_code_nullable_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = bytearray.fromhex(x)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBinaryColumn(values=vals)
        else:
            raise dataframe.DataFrameException(
                "Unknown column type code: {}".format(typecode))

        # pylint: disable=protected-access
        converted._name = self._name
        return converted
Exemplo n.º 25
0
    def convert_to(self, typecode):
        converted = None
        if typecode == utils.type_code_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int8)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) > 0:
                    vals[i] = x[0]
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.ByteColumn(values=vals)
        elif typecode == utils.type_code_short_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int16)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 2:
                    vals[i] = int.from_bytes(x[0:2],
                                             byteorder="big",
                                             signed=True)
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.ShortColumn(values=vals)
        elif typecode == utils.type_code_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int32)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 4:
                    vals[i] = int.from_bytes(x[0:4],
                                             byteorder="big",
                                             signed=True)
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.IntColumn(values=vals)
        elif typecode == utils.type_code_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.int64)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 8:
                    vals[i] = int.from_bytes(x[0:8],
                                             byteorder="big",
                                             signed=True)
                else:
                    vals[i] = 0

            converted = dataframe.DataFrame.LongColumn(values=vals)
        elif typecode == utils.type_code_string_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = x.hex()
                else:
                    vals[i] = utils.default_value_string_column()

            converted = dataframe.DataFrame.StringColumn(values=vals)
        elif typecode == utils.type_code_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float32)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 4:
                    vals[i] = unpack(">f", x[0:4])[0]
                else:
                    vals[i] = 0.0

            converted = dataframe.DataFrame.FloatColumn(values=vals)
        elif typecode == utils.type_code_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.float64)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 8:
                    vals[i] = unpack(">d", x[0:8])[0]
                else:
                    vals[i] = 0.0

            converted = dataframe.DataFrame.DoubleColumn(values=vals)
        elif typecode == utils.type_code_char_column():
            vals = np.empty([self._values.shape[0]], dtype=np.uint8)
            ord_default = ord(utils.default_value_char_column())
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) > 0:
                    vals[i] = int(x[0])
                else:
                    vals[i] = ord_default

            converted = dataframe.DataFrame.CharColumn(values=vals)
        elif typecode == utils.type_code_boolean_column():
            vals = np.empty([self._values.shape[0]], dtype=np.bool)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    is_zero = True
                    for y in x:
                        if y != 0:
                            is_zero = False
                            break

                    vals[i] = not is_zero
                else:
                    vals[i] = False

            converted = dataframe.DataFrame.BooleanColumn(values=vals)
        elif typecode == BinaryColumn.TYPE_CODE:
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) > 0:
                    b = bytearray(len(x))
                    b[:] = x
                    vals[i] = b
                else:
                    vals[i] = bytearray(b"\x00")

            converted = BinaryColumn(values=vals)
        elif typecode == utils.type_code_nullable_byte_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) > 0:
                    vals[i] = x[0]
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableByteColumn(values=vals)
        elif typecode == utils.type_code_nullable_short_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 2:
                    vals[i] = int.from_bytes(x[0:2],
                                             byteorder="big",
                                             signed=True)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableShortColumn(values=vals)
        elif typecode == utils.type_code_nullable_int_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 4:
                    vals[i] = int.from_bytes(x[0:4],
                                             byteorder="big",
                                             signed=True)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableIntColumn(values=vals)
        elif typecode == utils.type_code_nullable_long_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 8:
                    vals[i] = int.from_bytes(x[0:8],
                                             byteorder="big",
                                             signed=True)
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableLongColumn(values=vals)
        elif typecode == utils.type_code_nullable_string_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    vals[i] = x.hex()
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableStringColumn(values=vals)
        elif typecode == utils.type_code_nullable_float_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 4:
                    vals[i] = unpack(">f", x[0:4])[0]
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableFloatColumn(values=vals)
        elif typecode == utils.type_code_nullable_double_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) >= 8:
                    vals[i] = unpack(">d", x[0:8])[0]
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableDoubleColumn(values=vals)
        elif typecode == utils.type_code_nullable_char_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None and len(x) > 0:
                    vals[i] = int(x[0])
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableCharColumn(values=vals)
        elif typecode == utils.type_code_nullable_boolean_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                if x is not None:
                    is_zero = True
                    for y in x:
                        if y != 0:
                            is_zero = False
                            break

                    vals[i] = not is_zero
                else:
                    vals[i] = None

            converted = dataframe.DataFrame.NullableBooleanColumn(values=vals)
        elif typecode == NullableBinaryColumn.TYPE_CODE:
            converted = self.clone()
        else:
            raise dataframe.DataFrameException(
                "Unknown column type code: {}".format(typecode))

        # pylint: disable=protected-access
        converted._name = self._name
        return converted
Exemplo n.º 26
0
    def convert_to(self, typecode):
        converted = None
        if typecode == utils.type_code_byte_column():
            converted = dataframe.DataFrame.ByteColumn(
                values=self._values.astype(np.int8))
        elif typecode == utils.type_code_short_column():
            converted = dataframe.DataFrame.ShortColumn(
                values=self._values.astype(np.int16))
        elif typecode == utils.type_code_int_column():
            converted = dataframe.DataFrame.IntColumn(
                values=self._values.astype(np.int32))
        elif typecode == LongColumn.TYPE_CODE:
            converted = self.clone()
        elif typecode == utils.type_code_string_column():
            vals = self._values.astype(np.object)
            for i, x in np.ndenumerate(vals):
                vals[i] = str(x)

            converted = dataframe.DataFrame.StringColumn(values=vals)
        elif typecode == utils.type_code_float_column():
            converted = dataframe.DataFrame.FloatColumn(
                values=self._values.astype(np.float32))
        elif typecode == utils.type_code_double_column():
            converted = dataframe.DataFrame.DoubleColumn(
                values=self._values.astype(np.float64))
        elif typecode == utils.type_code_char_column():
            vals = self._values.astype(np.uint8)
            for i, x in np.ndenumerate(vals):
                vals[i] = ord(str(x)[0])

            converted = dataframe.DataFrame.CharColumn(values=vals)
        elif typecode == utils.type_code_boolean_column():
            converted = dataframe.DataFrame.BooleanColumn(
                values=self._values.astype(np.bool))
        elif typecode == utils.type_code_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = bytearray(
                    int(x).to_bytes(8, byteorder="big", signed=True))

            converted = dataframe.DataFrame.BinaryColumn(values=vals)
        elif typecode == utils.type_code_nullable_byte_column():
            vals = self._values.astype(np.int8)
            converted = dataframe.DataFrame.NullableByteColumn(
                values=vals.astype(np.object))
        elif typecode == utils.type_code_nullable_short_column():
            vals = self._values.astype(np.int16)
            converted = dataframe.DataFrame.NullableShortColumn(
                values=vals.astype(np.object))
        elif typecode == utils.type_code_nullable_int_column():
            vals = self._values.astype(np.int32)
            converted = dataframe.DataFrame.NullableIntColumn(
                values=vals.astype(np.object))
        elif typecode == NullableLongColumn.TYPE_CODE:
            converted = NullableLongColumn(
                values=self._values.astype(np.object))
        elif typecode == utils.type_code_nullable_string_column():
            vals = self._values.astype(np.object)
            for i, x in np.ndenumerate(vals):
                vals[i] = str(x)

            converted = dataframe.DataFrame.NullableStringColumn(values=vals)
        elif typecode == utils.type_code_nullable_float_column():
            vals = self._values.astype(np.float32)
            vals = vals.astype(np.object)
            converted = dataframe.DataFrame.NullableFloatColumn(values=vals)
        elif typecode == utils.type_code_nullable_double_column():
            vals = self._values.astype(np.float64)
            vals = vals.astype(np.object)
            converted = dataframe.DataFrame.NullableDoubleColumn(values=vals)
        elif typecode == utils.type_code_nullable_char_column():
            vals = self._values.astype(np.object)
            for i, x in np.ndenumerate(vals):
                vals[i] = ord(str(x)[0])

            converted = dataframe.DataFrame.NullableCharColumn(values=vals)
        elif typecode == utils.type_code_nullable_boolean_column():
            vals = self._values.astype(np.bool)
            vals = vals.astype(np.object)
            converted = dataframe.DataFrame.NullableBooleanColumn(values=vals)
        elif typecode == utils.type_code_nullable_binary_column():
            vals = np.empty([self._values.shape[0]], dtype=np.object)
            for i, x in np.ndenumerate(self._values):
                vals[i] = bytearray(
                    int(x).to_bytes(8, byteorder="big", signed=True))

            converted = dataframe.DataFrame.NullableBinaryColumn(values=vals)
        else:
            raise dataframe.DataFrameException(
                "Unknown column type code: {}".format(typecode))

        # pylint: disable=protected-access
        converted._name = self._name
        return converted
Exemplo n.º 27
0
 def _check_type(self, value):
     if value is not None and not isinstance(value, str):
         raise dataframe.DataFrameException(
             ("Invalid argument. Expected "
              "string (str) but found {}".format(type(value))))
Exemplo n.º 28
0
def _deserialize_v2(buffer):
    """Deserialization from the binary-based version 2 format (v2).

    Args:
        buffer: The bytearray representing the DataFrame to deserialize

    Returns:
        A DataFrame from the given bytearray
    """
    #HEADER
    ptr = 5  # index pointer
    dftype = buffer[ptr]
    if not dftype in (0x64, 0x6e):
        raise dataframe.DataFrameException(
            "Unsupported DataFrame implementation")

    # header format is {v:2;irrrrccccName1.Name2.ttllllbbb}0x...
    # code of the DataFrame implementation
    impl_default = (dftype == 0x64)
    ptr += 1
    rows = int.from_bytes(buffer[ptr:ptr + 4], byteorder="big", signed=False)
    ptr += 4
    cols = int.from_bytes(buffer[ptr:ptr + 4], byteorder="big", signed=False)
    ptr += 4

    # column labels
    names = []
    for i in range(cols):
        c0 = ptr  # first char
        while buffer[ptr] != 0x00:
            ptr += 1
        ptr += 1
        names.append(buffer[c0:ptr - 1].decode("utf-8"))

    # column types
    types = []
    for i in range(cols):
        types.append(buffer[ptr])
        ptr += 1

    df = None
    columns = []
    if not impl_default:  # NullableDataFrame
        # first read the entire lookup list into memory
        lookup_length = int.from_bytes(buffer[ptr:ptr + 4],
                                       byteorder="big",
                                       signed=False)
        ptr += 4
        lookup_bits = BitVector(buffer[ptr:ptr + lookup_length])

        # list index pointing to the next readable bit within the lookup list
        li = 0
        ptr += lookup_length
        if buffer[ptr] != 0x7d:  # header closing brace '}' missing
            raise dataframe.DataFrameException("Invalid format")

        #END HEADER

        #PAYLOAD
        for i in range(cols):
            val = np.empty(rows, dtype=np.object)
            if types[i] == bytecolumn.NullableByteColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 1
                    b = int.from_bytes(buffer[ptr:ptr + 1],
                                       byteorder="big",
                                       signed=True)
                    if b == 0:
                        if not lookup_bits.get(li):
                            val[j] = 0

                        li += 1
                    else:
                        val[j] = b

                columns.append(bytecolumn.NullableByteColumn(names[i], val))

            elif types[i] == shortcolumn.NullableShortColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 2
                    b = int.from_bytes(buffer[ptr - 1:ptr + 1],
                                       byteorder="big",
                                       signed=True)
                    if b == 0:
                        if not lookup_bits.get(li):
                            val[j] = 0

                        li += 1
                    else:
                        val[j] = b

                columns.append(shortcolumn.NullableShortColumn(names[i], val))

            elif types[i] == intcolumn.NullableIntColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 4
                    b = int.from_bytes(buffer[ptr - 3:ptr + 1],
                                       byteorder="big",
                                       signed=True)
                    if b == 0:
                        if not lookup_bits.get(li):
                            val[j] = 0

                        li += 1
                    else:
                        val[j] = b

                columns.append(intcolumn.NullableIntColumn(names[i], val))

            elif types[i] == longcolumn.NullableLongColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 8
                    b = int.from_bytes(buffer[ptr - 7:ptr + 1],
                                       byteorder="big",
                                       signed=True)
                    if b == 0:
                        if not lookup_bits.get(li):
                            val[j] = 0

                        li += 1
                    else:
                        val[j] = b

                columns.append(longcolumn.NullableLongColumn(names[i], val))

            elif types[i] == stringcolumn.NullableStringColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 1
                    c0 = ptr  # marks the first character of each string
                    while buffer[ptr] != 0x00:
                        ptr += 1

                    if (ptr - c0) == 0:
                        if not lookup_bits.get(li):
                            val[j] = ""

                        li += 1
                    else:
                        val[j] = buffer[c0:ptr].decode("utf-8")

                columns.append(stringcolumn.NullableStringColumn(
                    names[i], val))

            elif types[i] == floatcolumn.NullableFloatColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 4
                    # since Python does not have float32, we need to do a conversion
                    # over numpy and str to get the same precision as the original value
                    f = float(
                        str(
                            np.float32(
                                unpack(">f", buffer[ptr - 3:ptr + 1])[0])))
                    if f == 0.0:
                        if not lookup_bits.get(li):
                            val[j] = 0.0

                        li += 1
                    else:
                        val[j] = f

                columns.append(floatcolumn.NullableFloatColumn(names[i], val))

            elif types[i] == doublecolumn.NullableDoubleColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 8
                    f = unpack(">d", buffer[ptr - 7:ptr + 1])[0]
                    if f == 0.0:
                        if not lookup_bits.get(li):
                            val[j] = 0.0

                        li += 1
                    else:
                        val[j] = f

                columns.append(doublecolumn.NullableDoubleColumn(
                    names[i], val))

            elif types[i] == charcolumn.NullableCharColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 1
                    c = int.from_bytes(buffer[ptr:ptr + 1],
                                       byteorder="big",
                                       signed=False)
                    if c == 0:
                        val[j] = None
                    else:
                        val[j] = chr(c)

                columns.append(charcolumn.NullableCharColumn(names[i], val))

            elif types[i] == booleancolumn.NullableBooleanColumn.TYPE_CODE:
                length = int(rows / 8 if (rows % 8 == 0) else ((rows / 8) + 1))
                ptr += 1  # focus on next readable position
                bits = BitVector(buffer[ptr:ptr + length])
                for j in range(rows):
                    if not bits.get(j):
                        if not lookup_bits.get(li):
                            val[j] = False

                        li += 1
                    else:
                        val[j] = True

                # let the base pointer jump forward to the last read byte
                ptr += (length - 1)
                columns.append(
                    booleancolumn.NullableBooleanColumn(names[i], val))

            elif types[i] == binarycolumn.NullableBinaryColumn.TYPE_CODE:
                for j in range(rows):
                    ptr += 1
                    length = int.from_bytes(buffer[ptr:ptr + 4],
                                            byteorder="big",
                                            signed=False)
                    ptr += 3
                    if length != 0:
                        data = bytearray(length)
                        for k in range(length):
                            ptr += 1
                            data[k] = buffer[ptr]

                        val[j] = data

                columns.append(binarycolumn.NullableBinaryColumn(
                    names[i], val))

            else:
                raise dataframe.DataFrameException(
                    ("Unknown column with type code {}").format(types[i]))

        #END PAYLOAD
        if cols == 0:  # uninitialized instance
            df = dataframe.NullableDataFrame()
        else:
            df = dataframe.NullableDataFrame(columns)

    else:  # DefaultDataFrame
        if buffer[ptr] != 0x7d:  # header closing brace '}'
            raise dataframe.DataFrameException("Invalid format")

        #END HEADER

        #PAYLOAD
        for i in range(cols):
            if types[i] == bytecolumn.ByteColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.int8)
                for j in range(rows):
                    ptr += 1
                    val[j] = int.from_bytes(buffer[ptr:ptr + 1],
                                            byteorder="big",
                                            signed=True)

                columns.append(bytecolumn.ByteColumn(names[i], val))

            elif types[i] == shortcolumn.ShortColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.int16)
                for j in range(rows):
                    ptr += 2
                    val[j] = int.from_bytes(buffer[ptr - 1:ptr + 1],
                                            byteorder="big",
                                            signed=True)

                columns.append(shortcolumn.ShortColumn(names[i], val))

            elif types[i] == intcolumn.IntColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.int32)
                for j in range(rows):
                    ptr += 4
                    val[j] = int.from_bytes(buffer[ptr - 3:ptr + 1],
                                            byteorder="big",
                                            signed=True)

                columns.append(intcolumn.IntColumn(names[i], val))

            elif types[i] == longcolumn.LongColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.int64)
                for j in range(rows):
                    ptr += 8
                    val[j] = int.from_bytes(buffer[ptr - 7:ptr + 1],
                                            byteorder="big",
                                            signed=True)

                columns.append(longcolumn.LongColumn(names[i], val))

            elif types[i] == stringcolumn.StringColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.object)
                for j in range(rows):
                    ptr += 1
                    c0 = ptr  # marks the first character of each string
                    while buffer[ptr] != 0x00:
                        ptr += 1

                    if (ptr - c0) == 0:
                        val[j] = stringcolumn.StringColumn.DEFAULT_VALUE
                    else:
                        val[j] = buffer[c0:ptr].decode("utf-8")

                columns.append(stringcolumn.StringColumn(names[i], val))

            elif types[i] == floatcolumn.FloatColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.float32)
                for j in range(rows):
                    ptr += 4
                    # since Python does not have float32, we need to do a conversion
                    # over numpy and str to get the same precision as the original value
                    val[j] = float(
                        str(
                            np.float32(
                                unpack(">f", buffer[ptr - 3:ptr + 1])[0])))

                columns.append(floatcolumn.FloatColumn(names[i], val))

            elif types[i] == doublecolumn.DoubleColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.float64)
                for j in range(rows):
                    ptr += 8
                    val[j] = unpack(">d", buffer[ptr - 7:ptr + 1])[0]

                columns.append(doublecolumn.DoubleColumn(names[i], val))

            elif types[i] == charcolumn.CharColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.uint8)
                for j in range(rows):
                    ptr += 1
                    c = int.from_bytes(buffer[ptr:ptr + 1],
                                       byteorder="big",
                                       signed=False)
                    val[j] = c

                columns.append(charcolumn.CharColumn(names[i], val))

            elif types[i] == booleancolumn.BooleanColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.bool)
                length = int(rows / 8 if (rows % 8 == 0) else ((rows / 8) + 1))
                ptr += 1  # focus on next readable position
                bits = BitVector(buffer[ptr:ptr + length])
                for j in range(rows):
                    val[j] = bits.get(j)

                ptr += (length - 1)
                columns.append(booleancolumn.BooleanColumn(names[i], val))

            elif types[i] == binarycolumn.BinaryColumn.TYPE_CODE:
                val = np.empty(rows, dtype=np.object)
                for j in range(rows):
                    ptr += 1
                    length = int.from_bytes(buffer[ptr:ptr + 4],
                                            byteorder="big",
                                            signed=False)
                    ptr += 3
                    data = bytearray(length)
                    for k in range(length):
                        ptr += 1
                        data[k] = buffer[ptr]

                    val[j] = data

                columns.append(binarycolumn.BinaryColumn(names[i], val))

            else:
                raise dataframe.DataFrameException(
                    ("Unknown column with type code {}").format(types[i]))

        #END PAYLOAD
        if cols == 0:  # uninitialized instance
            df = dataframe.DefaultDataFrame()
        else:
            df = dataframe.DefaultDataFrame(columns)

    return df
Exemplo n.º 29
0
def getitem_impl(arg, position):
    """Implementation of the __getitem__() function

    Args:
        arg: The DataFrame instance on which the function was called upon
        position: The position argument passed to the function

    Returns:
        The value at the specified position
    """
    if isinstance(position, tuple):
        if len(position) > 2:
            raise dataframe.DataFrameException(
                ("Invalid position argument. Too many "
                 "positions specified: {}").format(len(position)))

        cols = position[0]
        rows = position[1]
        if isinstance(cols, (int, str)):
            # check for negative column indices
            if isinstance(cols, int) and cols < 0:
                if abs(cols) > arg.columns():
                    raise dataframe.DataFrameException(
                        "Invalid column index: {}".format(cols))

                cols = cols % arg.columns()

            if rows is None:
                # implements df[x, :] and df["x", :]
                return arg.get_columns(cols=cols)
            elif isinstance(rows, int):
                # implements df[x, y] and df["x", y]
                if rows < 0:
                    if abs(rows) > arg.rows():
                        raise dataframe.DataFrameException(
                            "Invalid row index: {}".format(rows))

                    rows = rows % arg.rows()

                return arg.get_column(cols).get_value(rows)
            elif isinstance(rows, str):
                # implements df[x, "y_regex"] and df["x", "y_regex"]
                return arg.filter(cols, rows)
            elif isinstance(rows, tuple):
                # implements df[x, (y0, y1, ..., yn)]
                # and        df["x", (y0, y1, ..., yn)]
                col_selected = arg.get_column(cols)
                col = column.Column.like(col_selected, length=len(rows))
                df = (dataframe.NullableDataFrame(col) if arg.is_nullable()
                      else dataframe.DefaultDataFrame(col))

                for i, row_index in enumerate(rows):
                    col[i] = col_selected[row_index]

                return df

            elif isinstance(rows, slice):
                # implements df[x, y0:y1:y2]
                # and        df["x", y0:y1:y2]
                start = rows.start
                stop = rows.stop
                step = rows.step
                col_selected = arg.get_column(cols)
                # numpy returns an array view when slicing
                # so we have to copy the array explicitly
                # to get an independent instance
                col_values = col_selected._values[start:stop:step].copy()
                col = column.Column.like(col_selected, length=0)
                col._values = col_values
                return (dataframe.NullableDataFrame(col) if arg.is_nullable()
                        else dataframe.DefaultDataFrame(col))

        elif isinstance(cols, (tuple, slice)):
            # prefetch the selected columns as a DataFrame
            if isinstance(cols, tuple):
                cols_selected = arg.get_columns(cols=cols)
            else:  # is slice
                cols_selected = arg._internal_columns()[cols]
                cols_selected = (dataframe.NullableDataFrame(cols_selected)
                                 if arg.is_nullable() else
                                 dataframe.DefaultDataFrame(cols_selected))

            if rows is None:
                # implements df[(x0, x1, ..., xn), ]
                # and        df[x0:x1:x2, ]
                return cols_selected
            elif isinstance(rows, int):
                # implements df[(x0, x1, ..., xn), y]
                # and        df[x0:x1:x2, y]
                if rows < 0:
                    if abs(rows) > arg.rows():
                        raise dataframe.DataFrameException(
                            "Invalid row index: {}".format(rows))

                    rows = rows % arg.rows()

                return cols_selected.get_row(rows)

            elif isinstance(rows, tuple):
                # implements df[(x0, x1, ..., xn), (y0, y1, ..., ym)]
                # and        df[x0:x1:x2, (y0, y1, ..., ym)]
                cols = [
                    column.Column.like(col, length=len(rows))
                    for col in cols_selected._internal_columns()
                ]

                df = (dataframe.NullableDataFrame(cols) if arg.is_nullable()
                      else dataframe.DefaultDataFrame(cols))

                for i, row_index in enumerate(rows):
                    df.set_row(i, cols_selected.get_row(rows[i]))

                return df

            elif isinstance(rows, slice):
                # implements df[(x0, x1, ..., xn), y0:y1:y2]
                # and        df[x0:x1:x2, y0:y1:y2]
                start = rows.start
                stop = rows.stop
                step = rows.step
                cols = [None] * cols_selected.columns()
                for i, col in enumerate(cols_selected._internal_columns()):
                    col_values = col._values[start:stop:step].copy()
                    col_sliced = column.Column.like(col,
                                                    length=col_values.shape[0])
                    col_sliced._values = col_values
                    cols[i] = col_sliced

                return (dataframe.NullableDataFrame(cols) if arg.is_nullable()
                        else dataframe.DefaultDataFrame(cols))

            elif isinstance(rows, str):
                raise dataframe.DataFrameException(
                    ("Invalid column position type. A filter operation "
                     "must only specify a single column "
                     "but found {}").format(type(cols)))

        else:
            # invalid type for column position arg
            raise dataframe.DataFrameException(
                ("Invalid column position type. "
                 "Expected int or str but found {}").format(type(cols)))

    elif isinstance(position, int):
        # implements df[x]
        if position < 0:
            if abs(position) > arg.columns():
                raise dataframe.DataFrameException(
                    "Invalid column index: {}".format(position))

            position = position % arg.columns()

        return arg.get_column(position)
    elif isinstance(position, str):
        # implements df["x"]
        return arg.get_column(position)
    else:
        # invalid type for entire position arg
        raise dataframe.DataFrameException(("Invalid position type. "
                                            "Expected int or str but "
                                            "found {}").format(type(position)))

    # make pylint happy about missing return statement
    raise dataframe.DataFrameException("Implementation error")
Exemplo n.º 30
0
def _serialize_v2(df):
    """Serialization to the binary-based version 2 format (v2).

    Args:
        df: The DataFrame to serialize

    Returns:
        A bytearray representing the given DataFrame
    """
    buffer = bytearray()
    #HEADER
    # must start with {v:2;
    buffer.append(0x7b)
    buffer.append(0x76)
    buffer.append(0x3a)
    buffer.append(0x32)
    buffer.append(0x3b)

    # impl: default=0x64 nullable=0x6e
    buffer.append(0x6e if df.is_nullable() else 0x64)

    rows = df.rows()
    if rows > 0xffffffff:
        raise dataframe.DataFrameException(
            ("Unable to serialize DataFrame with "
             "row count greater than 0xffffffff"))

    buffer.extend(rows.to_bytes(4, byteorder="big", signed=False))
    cols = df.columns()
    if cols > 0xffffffff:
        raise dataframe.DataFrameException(
            ("Unable to serialize DataFrame with "
             "column count greater than 0xffffffff"))

    buffer.extend(cols.to_bytes(4, byteorder="big", signed=False))

    if df.has_column_names():
        for name in df.get_column_names():
            buffer.extend(bytearray(name, "utf-8"))
            # add null byte as name delimeter
            buffer.append(0x00)

    else:
        # set indices as strings
        for i in range(cols):
            buffer.extend(bytearray(str(i), "utf-8"))
            buffer.append(0x00)

    for col in df:
        buffer.append(col.type_code())

    if df.is_nullable():  # NullableDataFrame
        # The specification requires a lookup list for differentiating between
        # default values (for example: zeros for numbers) and actual null values.
        # This is implemented here as a bit vector initialized with all bits
        # set to zero.
        # As the lookup list is part of the header, we must first serialize the
        # entire payload and build the lookup list and then bind all the parts
        # together at the end
        header = buffer
        buffer = bytearray()

        # the lookup list
        lookup_bits = BitVector()
        #PAYLOAD
        for col in df:
            type_code = col.type_code()
            val = col.as_array()
            if type_code == bytecolumn.NullableByteColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.append(0x00)
                        lookup_bits.add1()
                    elif val[i] == 0:
                        buffer.append(0x00)
                        lookup_bits.add0()
                    else:
                        buffer.extend(
                            int(val[i]).to_bytes(1,
                                                 byteorder="big",
                                                 signed=True))

            elif type_code == shortcolumn.NullableShortColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.extend(b'\x00\x00')
                        lookup_bits.add1()
                    elif val[i] == 0:
                        buffer.extend(b'\x00\x00')
                        lookup_bits.add0()
                    else:
                        buffer.extend(
                            int(val[i]).to_bytes(2,
                                                 byteorder="big",
                                                 signed=True))

            elif type_code == intcolumn.NullableIntColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.extend(b'\x00\x00\x00\x00')
                        lookup_bits.add1()
                    elif val[i] == 0:
                        buffer.extend(b'\x00\x00\x00\x00')
                        lookup_bits.add0()
                    else:
                        buffer.extend(
                            int(val[i]).to_bytes(4,
                                                 byteorder="big",
                                                 signed=True))

            elif type_code == longcolumn.NullableLongColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.extend(b'\x00\x00\x00\x00\x00\x00\x00\x00')
                        lookup_bits.add1()
                    elif val[i] == 0:
                        buffer.extend(b'\x00\x00\x00\x00\x00\x00\x00\x00')
                        lookup_bits.add0()
                    else:
                        buffer.extend(
                            int(val[i]).to_bytes(8,
                                                 byteorder="big",
                                                 signed=True))

            elif type_code == stringcolumn.NullableStringColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        lookup_bits.add1()
                    elif len(val[i]) == 0:
                        lookup_bits.add0()
                    else:
                        buffer.extend(val[i].encode("utf-8"))

                    # add null character as string delimeter
                    buffer.append(0x00)

            elif type_code == floatcolumn.NullableFloatColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.extend(b'\x00\x00\x00\x00')
                        lookup_bits.add1()
                    # bit representation of zero is strictly
                    # defined so we compare directly
                    elif val[i] == 0.0:
                        buffer.extend(b'\x00\x00\x00\x00')
                        lookup_bits.add0()
                    else:
                        buffer.extend(pack(">f", val[i]))

            elif type_code == doublecolumn.NullableDoubleColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.extend(b'\x00\x00\x00\x00\x00\x00\x00\x00')
                        lookup_bits.add1()
                    # bit representation of zero is strictly
                    # defined so we compare directly
                    elif val[i] == 0.0:
                        buffer.extend(b'\x00\x00\x00\x00\x00\x00\x00\x00')
                        lookup_bits.add0()
                    else:
                        buffer.extend(pack(">d", val[i]))

            elif type_code == charcolumn.NullableCharColumn.TYPE_CODE:
                for i in range(rows):
                    if val[i] is None:
                        buffer.append(0x00)
                    else:
                        buffer.extend(val[i].to_bytes(1,
                                                      byteorder="big",
                                                      signed=True))

            elif type_code == booleancolumn.NullableBooleanColumn.TYPE_CODE:
                bits = BitVector()
                for i in range(rows):
                    if val[i] is None:
                        bits.add0()
                        lookup_bits.add1()
                    else:
                        if val[i]:
                            bits.add1()
                        else:
                            bits.add0()
                            lookup_bits.add0()
                buffer.extend(bits.tobytearray())

            elif type_code == binarycolumn.NullableBinaryColumn.TYPE_CODE:
                for i in range(rows):
                    dataLength = len(val[i]) if val[i] is not None else 0
                    buffer.extend(dataLength.to_bytes(4, byteorder="big"))
                    if val[i] is not None:
                        buffer.extend(val[i])

            else:
                raise dataframe.DataFrameException(
                    "Unknown column type: {}".format(type_code))

        #END PAYLOAD
        # copy operations to stick everything together
        payload = buffer
        # allocate buffer for the final result
        buffer = header
        # Number of byte blocks of the lookup list.
        # The specification requires that the lookup
        # list has a minimum length of one block
        blength = int(((lookup_bits.size() - 1) / 8) + 1)
        buffer.extend(blength.to_bytes(4, byteorder="big", signed=False))
        # copy lookup bits
        buffer.extend(lookup_bits.tobytearray())
        # add header closing brace '}'
        buffer.append(0x7d)
        # copy payload buffer
        buffer.extend(payload)

    else:  # DefaultDataFrame
        buffer.append(0x7d)  # add header closing brace '}'
        #END HEADER
        # As DefaultDataFrames do not have null values, no lookup list
        # is required and we just serialize all bytes as they are to
        # the payload section
        #PAYLOAD
        for col in df:
            type_code = col.type_code()
            val = col.as_array()
            if type_code == bytecolumn.ByteColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(
                        int(val[i]).to_bytes(1, byteorder="big", signed=True))

            elif type_code == shortcolumn.ShortColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(
                        int(val[i]).to_bytes(2, byteorder="big", signed=True))

            elif type_code == intcolumn.IntColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(
                        int(val[i]).to_bytes(4, byteorder="big", signed=True))

            elif type_code == longcolumn.LongColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(
                        int(val[i]).to_bytes(8, byteorder="big", signed=True))

            elif type_code == stringcolumn.StringColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(val[i].encode("utf-8"))
                    # add null character as string delimeter
                    buffer.append(0x00)

            elif type_code == floatcolumn.FloatColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(pack(">f", val[i]))

            elif type_code == doublecolumn.DoubleColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(pack(">d", val[i]))

            elif type_code == charcolumn.CharColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(
                        int(val[i]).to_bytes(1, byteorder="big", signed=True))

            elif type_code == booleancolumn.BooleanColumn.TYPE_CODE:
                bits = BitVector()
                for i in range(rows):
                    if val[i]:
                        bits.add1()
                    else:
                        bits.add0()
                buffer.extend(bits.tobytearray())

            elif type_code == binarycolumn.BinaryColumn.TYPE_CODE:
                for i in range(rows):
                    buffer.extend(len(val[i]).to_bytes(4, byteorder="big"))
                    buffer.extend(val[i])

            else:
                raise dataframe.DataFrameException(
                    "Unknown column type: {}".format(type_code))

        #END PAYLOAD

    return buffer