def test_to_datetime_barely_out_of_bounds(self): # GH#19529 # GH#19382 close enough to bounds that dropping nanos would result # in an in-bounds datetime arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) with pytest.raises(tslib.OutOfBoundsDatetime): tslib.array_to_datetime(arr)
def test_coerce_outside_ns_bounds(self, invalid_date): arr = np.array([invalid_date], dtype='object') with pytest.raises(ValueError): tslib.array_to_datetime(arr, errors='raise') result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = np.array([iNaT], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected)
def test_coerce_outside_ns_bounds(self, invalid_date): arr = np.array([invalid_date], dtype='object') with pytest.raises(ValueError): tslib.array_to_datetime(arr, errors='raise') result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = np.array([tslib.iNaT], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected)
def test_parsing_timezone_offsets(self, dt_string): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added arr = np.array(['01-01-2013 00:00:00'], dtype=object) expected = tslib.array_to_datetime(arr) arr = np.array([dt_string], dtype=object) result = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected)
def test_to_datetime_barely_out_of_bounds(): # see gh-19382, gh-19529 # # Close enough to bounds that dropping nanos # would result in an in-bounds datetime. arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) msg = "Out of bounds nanosecond timestamp: 2262-04-11 23:47:16" with pytest.raises(tslib.OutOfBoundsDatetime, match=msg): tslib.array_to_datetime(arr)
def test_number_looking_strings_not_into_datetime(self): # GH#4601 # These strings don't look like datetimes so they shouldn't be # attempted to be converted arr = np.array(['-352.737091', '183.575577'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) arr = np.array(['1', '2', '3', '4', '5'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr)
def test_parsing_timezone_offsets(dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added. arr = np.array(["01-01-2013 00:00:00"], dtype=object) expected, _ = tslib.array_to_datetime(arr) arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected) assert result_tz is pytz.FixedOffset(expected_tz)
def test_coerce_outside_ns_bounds(invalid_date, errors): arr = np.array([invalid_date], dtype="object") kwargs = {"values": arr, "errors": errors} if errors == "raise": msg = "Out of bounds .* present at position 0" with pytest.raises(ValueError, match=msg): tslib.array_to_datetime(**kwargs) else: # coerce. result, _ = tslib.array_to_datetime(**kwargs) expected = np.array([iNaT], dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_outside_ns_bounds(invalid_date, errors): arr = np.array([invalid_date], dtype="object") kwargs = dict(values=arr, errors=errors) if errors == "raise": msg = "Out of bounds nanosecond timestamp" with pytest.raises(ValueError, match=msg): tslib.array_to_datetime(**kwargs) else: # coerce. result, _ = tslib.array_to_datetime(**kwargs) expected = np.array([iNaT], dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_of_invalid_datetimes(self): arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) # Without coercing, the presence of any invalid dates prevents # any values from being converted result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = ['2013-01-01T00:00:00.000000000-0000', iNaT, iNaT] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_coerce_of_invalid_datetimes(errors): arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) kwargs = {"values": arr, "errors": errors} if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]"))
def test_parsing_non_iso_timezone_offset(self): dt_string = '01-01-2013T00:00:00.000000000+0000' arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64('2013-01-01 00:00:00.000000000')]) tm.assert_numpy_array_equal(result, expected) assert result_tz is pytz.FixedOffset(0)
def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) result = tslib.array_to_datetime(arr) expected = ['2013-01-01T00:00:00.000000000-0000', '2013-01-02T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]')) arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) result = tslib.array_to_datetime(arr) expected = ['2013-09-16T00:00:00.000000000-0000', '2013-09-17T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def try_datetime(v): # safe coerce to datetime64 try: # GH19671 v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0] except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype, xref GH19671 try: from pandas._libs.tslibs import conversion from pandas import DatetimeIndex values, tz = conversion.datetime_to_datetime64(v) return DatetimeIndex(values).tz_localize("UTC").tz_convert( tz=tz) except (ValueError, TypeError): pass except Exception: pass return v.reshape(shape)
def try_datetime(v): # safe coerce to datetime64 try: # GH19671 v = tslib.array_to_datetime(v, require_iso8601=True, errors='raise')[0] except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype, xref GH19671 try: from pandas._libs.tslibs import conversion from pandas import DatetimeIndex values, tz = conversion.datetime_to_datetime64(v) return DatetimeIndex(values).tz_localize( 'UTC').tz_convert(tz=tz) except (ValueError, TypeError): pass except Exception: pass return v.reshape(shape)
def calc(carg): # calculate the actual result carg = carg.astype(object) parsed = parsing.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) return tslib.array_to_datetime(parsed, errors=errors)
def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) result, _ = tslib.array_to_datetime(arr) expected = ['2013-01-01T00:00:00.000000000-0000', '2013-01-02T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]')) arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) result, _ = tslib.array_to_datetime(arr) expected = ['2013-09-16T00:00:00.000000000-0000', '2013-09-17T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) result = tslib.array_to_datetime(arr, errors='coerce') expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"] expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_number_looking_strings_not_into_datetime(data): # see gh-4601 # # These strings don't look like datetimes, so # they shouldn't be attempted to be converted. arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr, errors="ignore") tm.assert_numpy_array_equal(result, arr)
def test_datetime_subclass(data, expected): # GH 25851 # ensure that subclassed datetime works with # array_to_datetime arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_of_invalid_datetimes(errors): arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) kwargs = dict(values=arr, errors=errors) if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype="M8[ns]"))
def test_parsing_different_timezone_offsets(self): # GH 17697 data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) result, result_tz = tslib.array_to_datetime(data) expected = np.array([datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400))], dtype=object) tm.assert_numpy_array_equal(result, expected) assert result_tz is None
def test_parsing_different_timezone_offsets(self): # GH 17697 data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) result, result_tz = tslib.array_to_datetime(data) expected = np.array([ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)) ], dtype=object) tm.assert_numpy_array_equal(result, expected) assert result_tz is None
def try_datetime(v): # safe coerce to datetime64 try: v = tslib.array_to_datetime(v, errors='raise') except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype try: from pandas import to_datetime return to_datetime(v) except: pass except: pass return v.reshape(shape)
def try_datetime(v): # safe coerce to datetime64 try: v = tslib.array_to_datetime(v, errors='raise') except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype try: from pandas import to_datetime return to_datetime(v) except Exception: pass except Exception: pass return v.reshape(shape)
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime( arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=tz == 'utc', dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 ) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result except ValueError as e: try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if utc: arg = arg.tz_convert(None).tz_localize('UTC') return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result = array_strptime(arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 ) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result except ValueError as e: try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex from pandas.core.arrays.datetimes import maybe_convert_dtype if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result, name=name) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime( arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result, tz_parsed = tslib.array_to_datetime( arg, errors=errors, utc=tz == 'utc', dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 ) if tz_parsed is not None: if box: # We can take a shortcut since the datetime64 numpy array # is in UTC return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) else: # Convert the datetime64 numpy array to an numpy array # of datetime objects result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result] return np.array(result, dtype=object) if box: # Ensure we return an Index in all cases where box=True if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): # e.g. an Index of datetime objects from pandas import Index return Index(result, name=name) return result except ValueError as e: try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)