Пример #1
0
def pandas_col_to_ibis_type(col):
    import pandas.core.common as pdcom
    import ibis.expr.datatypes as dt
    import numpy as np
    dty = col.dtype

    # datetime types
    if pdcom.is_datetime64_dtype(dty):
        if pdcom.is_datetime64_ns_dtype(dty):
            return 'timestamp'
        else:
            raise com.IbisTypeError("Column {0} has dtype {1}, which is "
                                    "datetime64-like but does "
                                    "not use nanosecond units"
                                    .format(col.name, dty))
    if pdcom.is_timedelta64_dtype(dty):
        print("Warning: encoding a timedelta64 as an int64")
        return 'int64'

    if pdcom.is_categorical_dtype(dty):
        return dt.Category(len(col.cat.categories))

    if pdcom.is_bool_dtype(dty):
        return 'boolean'

    # simple numerical types
    if issubclass(dty.type, np.int8):
        return 'int8'
    if issubclass(dty.type, np.int16):
        return 'int16'
    if issubclass(dty.type, np.int32):
        return 'int32'
    if issubclass(dty.type, np.int64):
        return 'int64'
    if issubclass(dty.type, np.float32):
        return 'float'
    if issubclass(dty.type, np.float64):
        return 'double'
    if issubclass(dty.type, np.uint8):
        return 'int16'
    if issubclass(dty.type, np.uint16):
        return 'int32'
    if issubclass(dty.type, np.uint32):
        return 'int64'
    if issubclass(dty.type, np.uint64):
        raise com.IbisTypeError("Column {0} is an unsigned int64"
                                .format(col.name))

    if pdcom.is_object_dtype(dty):
        # TODO: overly broad?
        return 'string'

    raise com.IbisTypeError("Column {0} is dtype {1}"
                            .format(col.name, dty))
Пример #2
0
def _dt_to_float_ordinal(dt):
    """
    Convert :mod:`datetime` to the Gregorian date as UTC float days,
    preserving hours, minutes, seconds and microseconds.  Return value
    is a :func:`float`.
    """
    if isinstance(dt, (np.ndarray, Series)) and com.is_datetime64_ns_dtype(dt):
        base = dates.epoch2num(dt.asi8 / 1.0E9)
    else:
        base = dates.date2num(dt)
    return base
Пример #3
0
def pandas_col_to_ibis_type(col):
    import pandas.core.common as pdcom
    import ibis.expr.datatypes as dt
    import numpy as np
    dty = col.dtype

    # datetime types
    if pdcom.is_datetime64_dtype(dty):
        if pdcom.is_datetime64_ns_dtype(dty):
            return 'timestamp'
        else:
            raise com.IbisTypeError("Column {0} has dtype {1}, which is "
                                    "datetime64-like but does "
                                    "not use nanosecond units".format(
                                        col.name, dty))
    if pdcom.is_timedelta64_dtype(dty):
        print("Warning: encoding a timedelta64 as an int64")
        return 'int64'

    if pdcom.is_categorical_dtype(dty):
        return dt.Category(len(col.cat.categories))

    if pdcom.is_bool_dtype(dty):
        return 'boolean'

    # simple numerical types
    if issubclass(dty.type, np.int8):
        return 'int8'
    if issubclass(dty.type, np.int16):
        return 'int16'
    if issubclass(dty.type, np.int32):
        return 'int32'
    if issubclass(dty.type, np.int64):
        return 'int64'
    if issubclass(dty.type, np.float32):
        return 'float'
    if issubclass(dty.type, np.float64):
        return 'double'
    if issubclass(dty.type, np.uint8):
        return 'int16'
    if issubclass(dty.type, np.uint16):
        return 'int32'
    if issubclass(dty.type, np.uint32):
        return 'int64'
    if issubclass(dty.type, np.uint64):
        raise com.IbisTypeError("Column {0} is an unsigned int64".format(
            col.name))

    if pdcom.is_object_dtype(dty):
        # TODO: overly broad?
        return 'string'

    raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
Пример #4
0
    def _convert_listlike(arg, box):

        if isinstance(arg, (list,tuple)):
            arg = np.array(arg, dtype='O')

        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None)
                except ValueError:
                    pass

            return arg

        arg = com._ensure_object(arg)
        try:
            if format is not None:
                result = None

                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg)
                    except:
                        raise ValueError("cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg, format, coerce=coerce)
                    except (tslib.OutOfBoundsDatetime):
                        if errors == 'raise':
                            raise
                        result = arg
            else:
                result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
                                                 utc=utc, dayfirst=dayfirst,
                                                 coerce=coerce, unit=unit)
            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz='utc' if utc else None)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, None, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #5
0
    def _convert_listlike(arg, box):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None)
                except ValueError:
                    pass

            return arg

        arg = com._ensure_object(arg)
        try:
            if format is not None:
                result = None

                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg)
                    except:
                        raise ValueError(
                            "cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    result = tslib.array_strptime(arg, format, coerce=coerce)
            else:
                result = tslib.array_to_datetime(arg,
                                                 raise_=errors == 'raise',
                                                 utc=utc,
                                                 dayfirst=dayfirst,
                                                 coerce=coerce,
                                                 unit=unit)
            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz='utc' if utc else None)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, None, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #6
0
    def _convert_listlike(arg, box, format):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None)
                except ValueError:
                    pass

            return arg

        arg = com._ensure_object(arg)

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

            if format is not None:
                # There is a special fast-path for iso8601 formatted
                # datetime strings, so in those cases don't use the inferred
                # format because this path makes process slower in this
                # special case
                format_is_iso8601 = ('%Y-%m-%dT%H:%M:%S.%f'.startswith(format)
                                     or
                                     '%Y-%m-%d %H:%M:%S.%f'.startswith(format))
                if format_is_iso8601:
                    format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, coerce=coerce)
                    except:
                        raise ValueError(
                            "cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg,
                                                      format,
                                                      exact=exact,
                                                      coerce=coerce)
                    except (tslib.OutOfBoundsDatetime):
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # Only raise this error if the user provided the
                        # datetime format, and not when it was inferred
                        if not infer_datetime_format:
                            raise

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(arg,
                                                 raise_=errors == 'raise',
                                                 utc=utc,
                                                 dayfirst=dayfirst,
                                                 coerce=coerce,
                                                 unit=unit)

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz='utc' if utc else None)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, None, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #7
0
    def _convert_listlike(arg, box, format):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype="O")

        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz="utc" if utc else None)
                except ValueError:
                    pass

            return arg

        arg = com._ensure_object(arg)

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

            if format is not None:
                # There is a special fast-path for iso8601 formatted
                # datetime strings, so in those cases don't use the inferred
                # format because this path makes process slower in this
                # special case
                format_is_iso8601 = "%Y-%m-%dT%H:%M:%S.%f".startswith(format) or "%Y-%m-%d %H:%M:%S.%f".startswith(
                    format
                )
                if format_is_iso8601:
                    format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == "%Y%m%d":
                    try:
                        result = _attempt_YYYYMMDD(arg)
                    except:
                        raise ValueError("cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg, format, coerce=coerce)
                    except (tslib.OutOfBoundsDatetime):
                        if errors == "raise":
                            raise
                        result = arg
                    except ValueError:
                        # Only raise this error if the user provided the
                        # datetime format, and not when it was inferred
                        if not infer_datetime_format:
                            raise

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg, raise_=errors == "raise", utc=utc, dayfirst=dayfirst, coerce=coerce, unit=unit
                )

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz="utc" if utc else None)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, None, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #8
0
    def _convert_listlike(arg, box, format, name=None):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg,
                                         tz='utc' if utc else None,
                                         name=name)
                except ValueError:
                    pass

            return arg

        elif com.is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz='utc' if utc else None)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg,
                                                       unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result,
                                     tz='utc' if utc else None,
                                     name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg,
                                                      format,
                                                      exact=exact,
                                                      errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    freq=freq,
                    require_iso8601=require_iso8601)

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result,
                                       tz='utc' if utc else None,
                                       name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #9
0
 def test_compat(self):
     self.assertFalse(is_datetime64_ns_dtype(self.dtype))
     self.assertFalse(is_datetime64_ns_dtype("datetime64[ns, US/Eastern]"))
     self.assertFalse(is_datetime64_dtype(self.dtype))
     self.assertFalse(is_datetime64_dtype("datetime64[ns, US/Eastern]"))
Пример #10
0
 def test_compat(self):
     self.assertFalse(is_datetime64_ns_dtype(self.dtype))
     self.assertFalse(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]'))
     self.assertFalse(is_datetime64_dtype(self.dtype))
     self.assertFalse(is_datetime64_dtype('datetime64[ns, US/Eastern]'))
Пример #11
0
    def _convert_listlike(arg, box, format):

        if isinstance(arg, (list,tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None)
                except ValueError:
                    pass

            return arg
        elif format is None and com.is_integer_dtype(arg) and unit=='ns':
            result = arg.astype('datetime64[ns]')
            if box:
                return DatetimeIndex(result, tz='utc' if utc else None)

            return result

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = (
                ('%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or
                '%Y-%m-%d %H:%M:%S.%f'.startswith(format)) and
				format != '%Y'
            )
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, coerce=coerce)
                    except:
                        raise ValueError("cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(
                            arg, format, exact=exact, coerce=coerce
                        )
                    except (tslib.OutOfBoundsDatetime):
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(arg, raise_=errors=='raise',
                                                 utc=utc, dayfirst=dayfirst,
                                                 yearfirst=yearfirst, freq=freq,
                                                 coerce=coerce, unit=unit,
                                                 require_iso8601=require_iso8601)

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz='utc' if utc else None)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, None, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #12
0
    def _convert_listlike(arg, box, format, name=None):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None,
                                         name=name)
                except ValueError:
                    pass

            return arg

        elif com.is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz='utc' if utc else None)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif format is None and com.is_integer_dtype(arg) and unit == 'ns':
            result = arg.astype('datetime64[ns]')
            if box:
                return DatetimeIndex(result, tz='utc' if utc else None,
                                     name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(
                            arg, format, exact=exact, errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    freq=freq,
                    unit=unit,
                    require_iso8601=require_iso8601
                )

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result,
                                       tz='utc' if utc else None,
                                       name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
Пример #13
0
    def _convert_listlike(arg, box, format, name=None):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg,
                                         tz='utc' if utc else None,
                                         name=name)
                except ValueError:
                    pass

            return arg
        elif format is None and com.is_integer_dtype(arg) and unit == 'ns':
            result = arg.astype('datetime64[ns]')
            if box:
                return DatetimeIndex(result,
                                     tz='utc' if utc else None,
                                     name=name)

            return result

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = (('%Y-%m-%dT%H:%M:%S.%f'.startswith(format)
                                  or '%Y-%m-%d %H:%M:%S.%f'.startswith(format))
                                 and format != '%Y')
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError(
                            "cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg,
                                                      format,
                                                      exact=exact,
                                                      errors=errors)
                    except (tslib.OutOfBoundsDatetime):
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    freq=freq,
                    unit=unit,
                    require_iso8601=require_iso8601)

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result,
                                       tz='utc' if utc else None,
                                       name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e