def get_dataset(self, key, info): if self._data is None: self.read() if key.name in ['latitude', 'longitude']: lons, lats = self.get_lonlats() if key.name == 'latitude': return Projectable(lats, id=key) else: return Projectable(lons, id=key) avhrr_channel_index = {'1': 0, '2': 1, '3a': 2, '3b': 2, '4': 3, '5': 4} index = avhrr_channel_index[key.name] mask = False if key.name in ['3a', '3b'] and self._is3b is None: ch3a = bfield(self._data["id"]["id"], 10) self._is3b = np.logical_not(ch3a) if key.name == '3a': mask = np.tile(self._is3b, (1, 2048)) elif key.name == '3b': mask = np.tile(np.logical_not(self._is3b), (1, 2048)) data = self._data["image_data"][:, :, index] if key.calibration == 'counts': return Projectable(data, mask=mask, area=self.get_lonlats(), units='1') pg_spacecraft = ''.join(self.platform_name.split()).lower() jdays = (np.datetime64(self.start_time) - np.datetime64(str( self.year) + '-01-01T00:00:00Z')) / np.timedelta64(1, 'D') if index < 2 or key.name == '3a': data = calibrate_solar(data, index, self.year, jdays, pg_spacecraft) units = '%' if index > 2 or key.name == '3b': if self.times is None: self.times = time_seconds(self._data["timecode"], self.year) line_numbers = ( np.round((self.times - self.times[-1]) / np.timedelta64(166666667, 'ns'))).astype(np.int) line_numbers -= line_numbers[0] if self.prt is None: self.prt, self.ict, self.space = self.get_telemetry() chan = index + 1 data = calibrate_thermal(data, self.prt, self.ict[:, chan - 3], self.space[:, chan - 3], line_numbers, chan, pg_spacecraft) units = 'K' # TODO: check if entirely masked before returning return Projectable(data, mask=mask, units=units)
def test_frame_datetime64_handling_groupby(self): # it works! df = DataFrame([(3, np.datetime64('2012-07-03')), (3, np.datetime64('2012-07-04'))], columns=['a', 'date']) result = df.groupby('a').first() assert result['date'][3] == Timestamp('2012-07-03')
def test_bounds_with_different_units(self): out_of_bounds_dates = ( '1677-09-21', '2262-04-12', ) time_units = ('D', 'h', 'm', 's', 'ms', 'us') for date_string in out_of_bounds_dates: for unit in time_units: self.assertRaises( ValueError, Timestamp, np.datetime64(date_string, dtype='M8[%s]' % unit) ) in_bounds_dates = ( '1677-09-23', '2262-04-11', ) for date_string in in_bounds_dates: for unit in time_units: Timestamp( np.datetime64(date_string, dtype='M8[%s]' % unit) )
def timeparse(timestr): DEFAULT = datetime(1, 1, 1) bc = False if re.search(r'bce?', timestr, flags=re.I): bc = True timestr = re.sub(r'bce?', '', timestr, flags=re.I) if re.match('-', timestr, flags=re.I): bc = True timestr = timestr.replace('-', '', 1) if re.search(r'ad', timestr, flags=re.I): timestr = re.sub('ad', '', timestr, flags=re.I) if bc is True: timestr = "-%s" % timestr timestr = timestr.strip() try: t = numpy.datetime64(timestr).astype('datetime64[ms]').astype('int64') return t, str(numpy.datetime64(t, 'ms')) except: pass # try just using straight datetime parsing if bc is False: try: logger.debug('trying %s as direct parse', timestr) dt = parse(timestr, default=DEFAULT) t = numpy.datetime64(dt.isoformat()).astype('datetime64[ms]').astype('int64') return t, str(numpy.datetime64(t, 'ms')) except: pass return None, None
def test_nanosecond_timestamp(self): # GH 7610 expected = 1293840000000000005 t = Timestamp('2011-01-01') + offsets.Nano(5) self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 5) t = Timestamp(t) self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 5) t = Timestamp(np.datetime64('2011-01-01 00:00:00.000000005Z')) self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 5) expected = 1293840000000000010 t = t + offsets.Nano(5) self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 10) t = Timestamp(t) self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 10) t = Timestamp(np.datetime64('2011-01-01 00:00:00.000000010Z')) self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 10)
def test_isscalar_numpy_zerodim_arrays(self): for zerodim in [np.array(1), np.array('foobar'), np.array(np.datetime64('2014-01-01')), np.array(np.timedelta64(1, 'h')), np.array(np.datetime64('NaT'))]: self.assertFalse(is_scalar(zerodim)) self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim)))
def testResampleData(self): # test upsampling by a factor of 2 timestamps = numpy.array([numpy.datetime64( datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzlocal()) + datetime.timedelta(hours=i)) for i in xrange(8)]) values = numpy.linspace(0, 7, 8) newSamplingInterval = numpy.timedelta64(1800, 's') (newTimeStamps, newValues) = param_finder._resampleData(timestamps, values, newSamplingInterval) trueNewTimeStamps = numpy.array([numpy.datetime64( datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzlocal()) + datetime.timedelta(hours=0.5 * i)) for i in xrange(15)]) self.assertTrue(numpy.allclose(newValues, numpy.linspace(0, 7, 15))) timestampError = (numpy.sum( numpy.abs(newTimeStamps - trueNewTimeStamps))).item().total_seconds() self.assertAlmostEqual(timestampError, 0) # test down-sampling by a factor of 2 newSamplingInterval = numpy.timedelta64(7200, 's') (newTimeStamps, newValues) = param_finder._resampleData(timestamps, values, newSamplingInterval) trueNewTimeStamps = numpy.array([numpy.datetime64( datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzlocal()) + datetime.timedelta(hours=2 * i)) for i in xrange(4)]) timestampError = (numpy.sum( numpy.abs(newTimeStamps - trueNewTimeStamps))).item().total_seconds() self.assertTrue(numpy.allclose(newValues, numpy.linspace(0, 6, 4))) self.assertAlmostEqual(timestampError, 0)
def travel_time(start_time, path, measurements_by_station, station_metadata, time_granularity=60*60): """Calculate the travel time along the given path at the given start time Args: path - list of station IDs that must be traversed to reach the destination start_time - start time datetime64 station_data - dataframes grouped by station time_granularity - granularity of samples in seconds """ time_granularity *= 1000000 # convert to microseconds time = datetime64_to_microseconds(start_time) total_dist = 0 for i in range(len(path)-1): # calculate how long it takes to get to the next station based on the # current time sid1 = path[i] sid2 = path[i+1] measurements = measurements_by_station[sid1] quantized = np.datetime64(time - time % time_granularity) filtered = measurements[measurements['timestamp'] == quantized] speed = filtered.iloc[0]['avgspeed'] if np.isnan(speed): return (np.nan, np.nan) station1_metadata = station_metadata.loc[sid1] station2_metadata = station_metadata.loc[sid2] dist = abs(station1_metadata['Abs_PM'] - station2_metadata['Abs_PM']) total_dist += dist # TODO: what if speed is NAN? interpolate time += 1000000 * 60 * 60 * dist / speed return (total_dist, np.datetime64(time) - start_time)
def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='D')]) self.assertEqual(pd.lib.infer_dtype(arr), 'period') arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='M')]) self.assertEqual(pd.lib.infer_dtype(arr), 'period') # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Period('2011-01', freq='D')]) self.assertEqual(pd.lib.infer_dtype(arr), 'period') arr = np.array([n, pd.Period('2011-01', freq='D'), n]) self.assertEqual(pd.lib.infer_dtype(arr), 'period') # different type of nat arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], dtype=object) self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], dtype=object) self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')
def test_datetime(self): expected = np.datetime64('2000-01-01T00Z') actual = _as_compatible_data(expected) self.assertEqual(expected, actual) self.assertEqual(np.ndarray, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype) expected = np.array([np.datetime64('2000-01-01T00Z')]) actual = _as_compatible_data(expected) self.assertEqual(np.asarray(expected), actual) self.assertEqual(np.ndarray, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype) expected = np.array([np.datetime64('2000-01-01T00Z', 'ns')]) actual = _as_compatible_data(expected) self.assertEqual(np.asarray(expected), actual) self.assertEqual(np.ndarray, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype) self.assertIs(expected, source_ndarray(np.asarray(actual))) expected = np.datetime64('2000-01-01T00Z', 'ns') actual = _as_compatible_data(datetime(2000, 1, 1)) self.assertEqual(np.asarray(expected), actual) self.assertEqual(np.ndarray, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
def safe_date(value, date_format, datetime_unit): date_str = value.strip().strip("'").strip('"') if date_str == '?': return np.datetime64('NaT', datetime_unit) else: dt = datetime.datetime.strptime(date_str, date_format) return np.datetime64(dt).astype("datetime64[%s]" % datetime_unit)
def test_datetime(self): expected = np.datetime64('2000-01-01T00') actual = _as_compatible_data(expected) self.assertEqual(expected, actual) self.assertEqual(np.datetime64, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype) expected = np.array([np.datetime64('2000-01-01T00')]) actual = _as_compatible_data(expected) self.assertEqual(np.asarray(expected), actual) self.assertEqual(NumpyArrayAdapter, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype) expected = np.array([np.datetime64('2000-01-01T00', 'ns')]) actual = _as_compatible_data(expected) self.assertEqual(np.asarray(expected), actual) self.assertEqual(NumpyArrayAdapter, type(actual)) self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype) self.assertIs(expected, source_ndarray(np.asarray(actual))) expected = pd.Timestamp('2000-01-01T00').to_datetime() actual = _as_compatible_data(expected) self.assertEqual(np.asarray(expected), actual) self.assertEqual(NumpyArrayAdapter, type(actual)) self.assertEqual(np.dtype('O'), actual.dtype)
def test_nat_items(self): # not a datetime nadt_no_unit = np.datetime64("NaT") nadt_s = np.datetime64("NaT", "s") nadt_d = np.datetime64("NaT", "ns") # not a timedelta natd_no_unit = np.timedelta64("NaT") natd_s = np.timedelta64("NaT", "s") natd_d = np.timedelta64("NaT", "ns") dts = [nadt_no_unit, nadt_s, nadt_d] tds = [natd_no_unit, natd_s, natd_d] for a, b in itertools.product(dts, dts): self._assert_func(a, b) self._assert_func([a], [b]) self._test_not_equal([a], b) for a, b in itertools.product(tds, tds): self._assert_func(a, b) self._assert_func([a], [b]) self._test_not_equal([a], b) for a, b in itertools.product(tds, dts): self._test_not_equal(a, b) self._test_not_equal(a, [b]) self._test_not_equal([a], [b]) self._test_not_equal([a], np.datetime64("2017-01-01", "s")) self._test_not_equal([b], np.datetime64("2017-01-01", "s")) self._test_not_equal([a], np.timedelta64(123, "s")) self._test_not_equal([b], np.timedelta64(123, "s"))
def SmartIntervalSearcher(curve_to_be_analysed): new_x_axis = np.empty(len(curve_to_be_analysed.x), dtype = np.datetime64('2015-01-01')) derivative = np.zeros(len(curve_to_be_analysed.x)) for i,data in enumerate(curve_to_be_analysed.x): new_x_axis[i] = np.datetime64(data.date) if len(curve_to_be_analysed.x) > 20: for i in xrange(len(curve_to_be_analysed)-1): delta = (new_x_axis[i+1] - new_x_axis[i]) / np.timedelta64(1,'D') derivative[i] = (curve_to_be_analysed.y[i+1] - curve_to_be_analysed.y[i]) / delta position = 0 for i in xrange(len(derivative)): if derivative[-2-i] < 0: pass else: position = len(derivative) - i + 2 break if position > 0.9*len(curve_to_be_analysed.x): if int(0.9*len(curve_to_be_analysed.x)) < 20: position = len(curve_to_be_analysed.x) - 20 else: position = int(0.9*len(curve_to_be_analysed.x)) else: position = 0 return position
def test_conversion(self): rs = self.dtc.convert(['2012-1-1'], None, None)[0] xp = datetime(2012, 1, 1).toordinal() self.assertEqual(rs, xp) rs = self.dtc.convert('2012-1-1', None, None) self.assertEqual(rs, xp) rs = self.dtc.convert(date(2012, 1, 1), None, None) self.assertEqual(rs, xp) rs = self.dtc.convert(datetime(2012, 1, 1).toordinal(), None, None) self.assertEqual(rs, xp) rs = self.dtc.convert('2012-1-1', None, None) self.assertEqual(rs, xp) rs = self.dtc.convert(Timestamp('2012-1-1'), None, None) self.assertEqual(rs, xp) # also testing datetime64 dtype (GH8614) rs = self.dtc.convert(np.datetime64('2012-01-01'), None, None) self.assertEqual(rs, xp) rs = self.dtc.convert(np.datetime64('2012-01-01 00:00:00+00:00'), None, None) self.assertEqual(rs, xp) rs = self.dtc.convert(np.array([np.datetime64('2012-01-01 00:00:00+00:00'), np.datetime64('2012-01-02 00:00:00+00:00')]), None, None) self.assertEqual(rs[0], xp)
def test_holomap_slider_unsorted_datetime_values_initialization(self): hmap = HoloMap([(np.datetime64(10005, 'D'), Curve([1, 2, 3])), (np.datetime64(10000, 'D'), Curve([1, 2, 4]))], sort=False) widgets = bokeh_renderer.get_widget(hmap, 'widgets') widgets() self.assertEqual(widgets.plot.current_key, (np.datetime64(10000, 'D'),)) self.assertEqual(widgets.plot.current_frame, hmap[np.datetime64(10000, 'D')])
def createCube(self, cellSize_xy): cellNumber_x = round((self.extent.XMax - self.extent.XMin) / cellSize_xy) cellNumber_y = round((self.extent.YMax - self.extent.YMin) / cellSize_xy) X = self.ssdo.xyCoords[:,0] Y = self.ssdo.xyCoords[:,1] time = self.ssdo.fields[self.timeField].data time = NUM.array([i for i in time], NUM.datetime64) startDateTime = NUM.datetime64('1970-01-01 00:00:00') T = time - startDateTime self.startTime = NUM.amin(T) + NUM.datetime64('1970-01-01 00:00:00') T = NUM.array([i.item().days for i in T], int) startT = NUM.amin(T) endT = NUM.amax(T) cellNumber_t = round((endT - startT) / self.cellSize_t) + 1 X = (X - self.extent.XMin) / self.cellSize_xy Y = (self.extent.YMax - Y) / self.cellSize_xy T = (T - startT) / self.cellSize_t X = NUM.floor(X) Y = NUM.floor(Y) T = NUM.floor(T) CellIdList = (cellNumber_x * cellNumber_y * T) + (cellNumber_x * Y) + X BothEnds = NUM.array([0, (cellNumber_t * cellNumber_x * cellNumber_y -1)]) CellIdList = NUM.concatenate((CellIdList, BothEnds), axis=0) CellIdList = NUM.array(CellIdList, dtype = 'int32') counts = NUM.bincount(CellIdList) counts[BothEnds[0]] = counts[BothEnds[0]] - 1 counts[BothEnds[1]] = counts[BothEnds[1]] - 1 return counts.reshape(cellNumber_t, cellNumber_x, cellNumber_y)
def compare_different_layers(): train_start_date = np.datetime64("2010-01-01") train_end_date = np.datetime64("2014-12-31") test_start_date = np.datetime64("2015-01-01") test_end_date = np.datetime64("2016-07-31") layers_list = [ [], [25], [50], [100], [25, 10], [40, 20], [50, 25], [75, 30], [100, 50], [50, 25, 10], [100, 50, 25], [200, 100, 50] ] for layers in layers_list: model = NikkeiModel(layers, './model/2010-2015_37-' + '-'.join([str(x) for x in layers]) + '-3') if not model.is_trained(): model.prepare_training_data(train_start_date, train_end_date) model.prepare_test_data(test_start_date, test_end_date) model.train(eval_on_test=True)
def run_backtest(): test_start_date = np.datetime64("2015-01-01") test_end_date = np.datetime64("2016-07-31") model = NikkeiModel([50, 25], './model/2010-2015_37-50-25-3') model.prepare_test_data(test_start_date, test_end_date) model.evaluate() model.backtest()
def mock_get_descriptor(query_parameters): variables = query_parameters['variables'] descriptor = { 'ls5_nbar_albers': { 'storage_units': { (636419476.0, -3900012.5, 1500012.5): { 'storage_shape': (2, 400, 400), 'storage_min': (636419476.0, -3999987.5, 1500012.5), 'irregular_indices': { u'time': [636419476.0, 661302607.0]}, 'storage_path': '/g/data/u46/public/datacube/data/LANDSAT_5_TM_NBAR_ALB_15_-40_1990.nc', 'storage_max': (661302607.0, -3900012.5, 1599987.5) } }, 'dimensions': [u'time', u'y', u'x'], 'result_max': ( numpy.datetime64('1990-12-16T10:10:07.000000000+1100'), -3956662.5, 1555062.5), 'irregular_indices': {u'time': array(['1990-03-03T10:11:16.000000000+1100', '1990-12-16T10:10:07.000000000+1100'], dtype='datetime64[ns]') }, 'result_min': ( numpy.datetime64('1990-03-03T10:11:16.000000000+1100'), -3962362.5, 1544587.5), 'result_shape': (2, 229, 420)}} descriptor['ls5_nbar_albers']['variables'] = { name: {'nodata_value': -999, 'datatype_name': dtype('int16')} for name in variables } return descriptor
def calcReturn(log, transactions, date): startt = str(np.datetime64(date + 'T00:01'))[:16] endt = str(np.datetime64(date + 'T23:59'))[:16] starty = str(np.datetime64(date + 'T00:01') - np.timedelta64(1, 'D'))[:16] endy = str(np.datetime64(date + 'T23:59') - np.timedelta64(1, 'D'))[:16] dft = log[startt : endt] dfy = log[starty : endy] try: tdf = transactions[date] except: tdf = [] if len(dfy) > 0: startValue = float(dfy.tail(1)['EUR'] + \ dfy.tail(1)['BTC'] * dfy.tail(1)['Bid']) openPrice = float((dfy.tail(1)['Bid'] + dfy.tail(1)['Ask'])/2) else: startValue = float(dft.head(1)['EUR'] + \ dft.head(1)['BTC'] * dft.head(1)['Bid']) openPrice = float((dft.head(1)['Bid'] + dft.head(1)['Ask'])/2) endValue = float(dft.tail(1)['EUR'] + \ dft.tail(1)['BTC'] * dft.tail(1)['Bid']) closePrice = float((dft.tail(1)['Bid'] + dft.tail(1)['Ask'])/2) if len(tdf) != 0: tdf['EUR']=tdf['AmountBTC']*(tdf['Bid']+tdf['Ask'])/2 + tdf['AmountEUR'] endValue = endValue - float(tdf.sum()['EUR']) retStrategy= endValue / startValue - 1 retHold = closePrice / openPrice - 1 return openPrice, closePrice, retHold, retStrategy
def test_fetch_basic(self): ts = carbonara.AggregatedTimeSerie.from_data( timestamps=[datetime64(2014, 1, 1, 12, 0, 0), datetime64(2014, 1, 1, 12, 0, 4), datetime64(2014, 1, 1, 12, 0, 9)], values=[3, 5, 6], aggregation=carbonara.Aggregation( "mean", numpy.timedelta64(1, 's'), None)) self.assertEqual( [(datetime64(2014, 1, 1, 12), 3), (datetime64(2014, 1, 1, 12, 0, 4), 5), (datetime64(2014, 1, 1, 12, 0, 9), 6)], list(ts.fetch())) self.assertEqual( [(datetime64(2014, 1, 1, 12, 0, 4), 5), (datetime64(2014, 1, 1, 12, 0, 9), 6)], list(ts.fetch( from_timestamp=datetime64(2014, 1, 1, 12, 0, 4)))) self.assertEqual( [(datetime64(2014, 1, 1, 12, 0, 4), 5), (datetime64(2014, 1, 1, 12, 0, 9), 6)], list(ts.fetch( from_timestamp=numpy.datetime64(iso8601.parse_date( "2014-01-01 12:00:04"))))) self.assertEqual( [(datetime64(2014, 1, 1, 12, 0, 4), 5), (datetime64(2014, 1, 1, 12, 0, 9), 6)], list(ts.fetch( from_timestamp=numpy.datetime64(iso8601.parse_date( "2014-01-01 13:00:04+01:00")))))
def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='D')]) assert lib.infer_dtype(arr, skipna=True) == 'period' arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='M')]) assert lib.infer_dtype(arr, skipna=True) == 'period' # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Period('2011-01', freq='D')]) assert lib.infer_dtype(arr, skipna=True) == 'period' arr = np.array([n, pd.Period('2011-01', freq='D'), n]) assert lib.infer_dtype(arr, skipna=True) == 'period' # different type of nat arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], dtype=object) assert lib.infer_dtype(arr, skipna=False) == 'mixed' arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], dtype=object) assert lib.infer_dtype(arr, skipna=False) == 'mixed'
def get_holidays(self, start, end, cal="FX"): # TODO use Pandas CustomBusinessDays to get more calendars holidays_list = [] if cal == "FX": # filter for Christmas & New Year's Day for i in range(1970, 2020): holidays_list.append(str(i) + "-12-25") holidays_list.append(str(i) + "-01-01") if cal == "WEEKDAY": bday = CustomBusinessDay(weekmask="Sat Sun") holidays_list = pandas.date_range(start, end, freq=bday) holidays_list = pandas.to_datetime(holidays_list).order() # floor start date start = np.datetime64(start) - np.timedelta64(1, "D") # ceiling end date end = np.datetime64(end) + np.timedelta64(1, "D") holidays_list = [x for x in holidays_list if x >= start and x <= end] return pandas.to_datetime(holidays_list)
def test_spacetime_join_select(drain_setup, spacetime_crime_agg): spacetime_crime_agg.execute() left = pd.DataFrame({'District':[1,2], 'Community Area':[1,2], 'date':[np.datetime64(date(2015,12,30)), np.datetime64(date(2015,12,31))]}) df = spacetime_crime_agg.join(left) print spacetime_crime_agg.select(df, {'district': ['12h']})
def test_get_date_time_from_metadata(): assert (dtt.get_date_time_from_metadata(md1) == '2014-12-27T00:00:00+00:00') assert (dtt.get_date_time_from_metadata(md1, formatting='ISO') == '2014-12-27T00:00:00+00:00') assert (dtt.get_date_time_from_metadata(md1, formatting='datetime64') == np.datetime64('2014-12-27T00:00:00.000000')) assert (dtt.get_date_time_from_metadata(md1, formatting='datetime') == dt1) assert (dtt.get_date_time_from_metadata(md2) == '2124-03-25T10:04:48-05:00') assert (dtt.get_date_time_from_metadata(md2, formatting='datetime') == dt2) assert (dtt.get_date_time_from_metadata(md2, formatting='datetime64') == np.datetime64('2124-03-25T10:04:48')) assert (dtt.get_date_time_from_metadata(md3) == '2016-07-12T22:57:32') assert (dtt.get_date_time_from_metadata(md3, formatting='datetime') == dt3) assert (dtt.get_date_time_from_metadata(md3, formatting='datetime64') == np.datetime64('2016-07-12T22:57:32.000000')) assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {}})) == None) assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {'date': '2016-07-12'}})) == '2016-07-12') assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {'time': '12:00'}})) == '12:00:00') assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {'time': '12:00', 'time_zone': 'CET'}})) == '12:00:00')
def test_query_dates(self): p = Pipeline() dates = np.array([(np.datetime64('2012-01-01')), (np.datetime64('2013-04-05')), (np.datetime64('2014-03-11')), (np.datetime64('2015-01-01'))], dtype=[('dt', 'M8[D]')]) inds = np.array([(i,) for i in xrange(dates.size)], dtype=[('f0', int)]) np_in = p.add(NumpyRead(dates)) q2_node = p.add(Query("dt <= DT('2014-01-01')")) np_in['output'] > q2_node['input'] np_out = p.add(NumpyWrite()) q2_node['output'] > np_out['input'] np_complement = p.add(NumpyWrite()) q2_node['complement'] > np_complement['input'] np_out_inds = p.add(NumpyWrite()) q2_node['output_inds'] > np_out_inds['input'] np_complement_inds = p.add(NumpyWrite()) q2_node['complement_inds'] > np_complement_inds['input'] self.run_pipeline(p) self.assertTrue(np.array_equal(np_out.get_stage().result, dates[:2])) self.assertTrue(np.array_equal(np_complement.get_stage().result, dates[2:])) self.assertTrue(np.array_equal(np_out_inds.get_stage().result, inds[:2])) self.assertTrue(np.array_equal(np_complement_inds.get_stage().result, inds[2:]))
def test_is_datetimelike_array_all_nan_nat_like(self): arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) assert lib.is_datetime_array(arr) assert lib.is_datetime64_array(arr) assert not lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) assert not lib.is_datetime_array(arr) assert not lib.is_datetime64_array(arr) assert lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), np.timedelta64('nat')]) assert not lib.is_datetime_array(arr) assert not lib.is_datetime64_array(arr) assert not lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, pd.NaT]) assert lib.is_datetime_array(arr) assert lib.is_datetime64_array(arr) assert lib.is_timedelta_or_timedelta64_array(arr) arr = np.array([np.nan, np.nan], dtype=object) assert not lib.is_datetime_array(arr) assert not lib.is_datetime64_array(arr) assert not lib.is_timedelta_or_timedelta64_array(arr) assert lib.is_datetime_with_singletz_array( np.array([pd.Timestamp('20130101', tz='US/Eastern'), pd.Timestamp('20130102', tz='US/Eastern')], dtype=object)) assert not lib.is_datetime_with_singletz_array( np.array([pd.Timestamp('20130101', tz='US/Eastern'), pd.Timestamp('20130102', tz='CET')], dtype=object))
def convert_to_date(array, fmt='%m-%d-%Y'): ## If array is a np.ndarray with type == np.datetime64, the array can be ## returned as such. If it is an np.ndarray of dtype 'object' then conversion ## to string is tried according to the fmt parameter. if(isinstance(array, np.ndarray) and np.issubdtype(array.dtype, np.datetime64)): ## no need to perform any conversion in this case return array elif(isinstance(array, list) or (isinstance(array, np.ndarray) and array.dtype == 'object')): return_value = [] # Pandas to_datetime handles all the cases where the passed in # data could be any of the combinations of # [list, nparray] X [python_datetime, np.datetime] # Because of the coerce=True flag, any non-compatible datetime type # will be converted to pd.NaT. By this comparision, we can figure # out if it is date castable or not. if(len(np.shape(array)) == 2): for elem in array: temp_val = pd.to_datetime(elem, errors='coerce', box=False, infer_datetime_format=True) temp_val = elem if (temp_val[0] == np.datetime64('NaT')) else temp_val return_value.append(temp_val) elif(isinstance(array, list)): temp_val = pd.to_datetime(array, errors='coerce', box=False, infer_datetime_format=True) return_value = array if (temp_val[0] == np.datetime64('NaT')) else temp_val else: temp_val = pd.to_datetime(array, errors='coerce', box=False, infer_datetime_format=True) temp_val = array if (temp_val[0] == np.datetime64('NaT')) else temp_val return_value = temp_val return return_value elif(isinstance(array, np.ndarray)): warnings.warn("Array could not be converted into a date") return array
def test_split_key_cmp(self): dt1 = numpy.datetime64("2015-01-01T15:03") dt1_1 = numpy.datetime64("2015-01-01T15:03") dt2 = numpy.datetime64("2015-01-05T15:03") td = numpy.timedelta64(60, 's') td2 = numpy.timedelta64(300, 's') self.assertEqual( carbonara.SplitKey.from_timestamp_and_sampling(dt1, td), carbonara.SplitKey.from_timestamp_and_sampling(dt1, td)) self.assertEqual( carbonara.SplitKey.from_timestamp_and_sampling(dt1, td), carbonara.SplitKey.from_timestamp_and_sampling(dt1_1, td)) self.assertNotEqual( carbonara.SplitKey.from_timestamp_and_sampling(dt1, td), carbonara.SplitKey.from_timestamp_and_sampling(dt2, td)) self.assertNotEqual( carbonara.SplitKey.from_timestamp_and_sampling(dt1, td), carbonara.SplitKey.from_timestamp_and_sampling(dt1, td2)) self.assertLess( carbonara.SplitKey.from_timestamp_and_sampling(dt1, td), carbonara.SplitKey.from_timestamp_and_sampling(dt2, td)) self.assertLessEqual( carbonara.SplitKey.from_timestamp_and_sampling(dt1, td), carbonara.SplitKey.from_timestamp_and_sampling(dt1, td)) self.assertGreater( carbonara.SplitKey.from_timestamp_and_sampling(dt2, td), carbonara.SplitKey.from_timestamp_and_sampling(dt1, td)) self.assertGreaterEqual( carbonara.SplitKey.from_timestamp_and_sampling(dt2, td), carbonara.SplitKey.from_timestamp_and_sampling(dt2, td))
# convolve daily doses with a transfer function for delayed effectiveness of vaccnes pts = int(VAX_ONSET_MU + 3 * VAX_ONSET_SIGMA) x = np.arange(-pts, pts + 1, 1) kernel = np.exp(-((x - VAX_ONSET_MU) ** 2) / (2 * VAX_ONSET_SIGMA ** 2)) kernel /= kernel.sum() convolved = convolve(daily, kernel, mode='same') effective_doses_per_100 = convolved.cumsum() immune = 0.4 * effective_doses_per_100[len(historical_doses_per_100):] / 100 return immune dates, new = covidlive_new_cases('ACT', start_date=np.datetime64('2021-05-10')) if dates[-1] >= np.datetime64('2022-03-25'): TEST_DETECTION_RATE = 0.35 elif dates[-1] >= np.datetime64('2022-01-09'): TEST_DETECTION_RATE = 0.27 else: TEST_DETECTION_RATE = 0.2 START_VAX_PROJECTIONS = 111 # Aug 29 all_dates = dates all_new = new # Current vaccination level: doses_per_100 = air_doses_per_100(n=len(dates))
# Generate a list of cutoff dates seconds_per_day = 24 * 60 * 60 day_gap = 5 start_unix = 1372633253 # Minimum datetime (2013-07-01 00:00:53) in seconds (unix) end_unix = 1404169154 # Maximum datetime (2014-06-30 23:59:14) in seconds (unix) dates = np.arange(start_unix, end_unix, day_gap * seconds_per_day) cutoff_dates = np.array([], dtype = 'datetime64[s]') # Set the seed for reproducibility np.random.seed(30061992) for begin, end in zip(dates[:-1], dates[1:]): cutoff_date = np.random.randint(low = begin, high = end) cutoff_dates = np.append(cutoff_dates, np.datetime64(cutoff_date, 's')) # Import the data in chunks with pandas read_csv data_chunks = pd.read_csv(filepath_or_buffer = filepath, sep = ",", #nrows = 2000, chunksize = chunk_size, converters = {#"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x), "POLYLINE": lambda x: json.loads(x), "GRID_POLYLINE": lambda x: eval(x)}) # Utilities for processing savedTest = False truncate = lambda x, y: x[:y] for idx, chunk in enumerate(data_chunks):
def benchmark(cls): """Run a speed benchmark!""" points = SplitKey.POINTS_PER_SPLIT sampling = numpy.timedelta64(5, 's') resample = numpy.timedelta64(35, 's') now = numpy.datetime64("2015-04-03 23:11") timestamps = numpy.sort( numpy.array([now + i * sampling for i in six.moves.range(points)])) print(cls.__name__) print("=" * len(cls.__name__)) for title, values in [ ("Simple continuous range", six.moves.range(points)), ("All 0", [float(0)] * points), ("All 1", [float(1)] * points), ("0 and 1", [0, 1] * (points // 2)), ("1 and 0 random", [random.randint(0, 1) for x in six.moves.range(points)]), ("Small number random pos/neg", [random.randint(-100000, 10000) for x in six.moves.range(points)]), ("Small number random pos", [random.randint(0, 20000) for x in six.moves.range(points)]), ("Small number random neg", [random.randint(-20000, 0) for x in six.moves.range(points)]), ("Sin(x)", list(map(math.sin, six.moves.range(points)))), ("random ", [random.random() for x in six.moves.range(points)]), ]: print(title) serialize_times = 50 aggregation = Aggregation("mean", sampling, None) ts = cls.from_data(aggregation, timestamps, values) t0 = time.time() key = ts.get_split_key() for i in six.moves.range(serialize_times): e, s = ts.serialize(key, compressed=False) t1 = time.time() print(" Uncompressed serialization speed: %.2f MB/s" % (((points * 2 * 8) / ((t1 - t0) / serialize_times)) / (1024.0 * 1024.0))) print(" Bytes per point: %.2f" % (len(s) / float(points))) t0 = time.time() for i in six.moves.range(serialize_times): cls.unserialize(s, key, 'mean') t1 = time.time() print(" Unserialization speed: %.2f MB/s" % (((points * 2 * 8) / ((t1 - t0) / serialize_times)) / (1024.0 * 1024.0))) t0 = time.time() for i in six.moves.range(serialize_times): o, s = ts.serialize(key, compressed=True) t1 = time.time() print(" Compressed serialization speed: %.2f MB/s" % (((points * 2 * 8) / ((t1 - t0) / serialize_times)) / (1024.0 * 1024.0))) print(" Bytes per point: %.2f" % (len(s) / float(points))) t0 = time.time() for i in six.moves.range(serialize_times): cls.unserialize(s, key, 'mean') t1 = time.time() print(" Uncompression speed: %.2f MB/s" % (((points * 2 * 8) / ((t1 - t0) / serialize_times)) / (1024.0 * 1024.0))) def per_sec(t1, t0): return 1 / ((t1 - t0) / serialize_times) t0 = time.time() for i in six.moves.range(serialize_times): list(ts.split()) t1 = time.time() print(" split() speed: %.2f Hz" % per_sec(t1, t0)) # NOTE(sileht): propose a new series with half overload timestamps pts = ts.ts.copy() tsbis = cls(ts=pts, aggregation=aggregation) tsbis.ts['timestamps'] = ( tsbis.timestamps - numpy.timedelta64(sampling * points / 2, 's')) t0 = time.time() for i in six.moves.range(serialize_times): ts.merge(tsbis) t1 = time.time() print(" merge() speed %.2f Hz" % per_sec(t1, t0)) for agg in [ 'mean', 'sum', 'max', 'min', 'std', 'median', 'first', 'last', 'count', '5pct', '90pct' ]: serialize_times = 3 if agg.endswith('pct') else 10 ts = cls(ts=pts, aggregation=aggregation) t0 = time.time() for i in six.moves.range(serialize_times): ts.resample(resample) t1 = time.time() print(" resample(%s) speed: %.2f Hz" % (agg, per_sec(t1, t0)))
"""Time series data manipulation, better with pancetta.""" import collections import functools import math import operator import random import re import struct import time import lz4.block import numpy import six UNIX_UNIVERSAL_START64 = numpy.datetime64("1970", 'ns') ONE_SECOND = numpy.timedelta64(1, 's') class BeforeEpochError(Exception): """Error raised when a timestamp before Epoch is used.""" def __init__(self, timestamp): self.timestamp = timestamp super(BeforeEpochError, self).__init__("%s is before Epoch" % timestamp) class UnknownAggregationMethod(Exception): """Error raised when the aggregation method is unknown.""" def __init__(self, agg): self.aggregation_method = agg
# Extract data types data_types = [] for category in config["categories"]: for data_type in category["allowed_data_types"]: data_types.append(data_type["name"]) # Prepare for plot max_time = matrix["timestamp"].max().strftime("%Y-%m-%d") min_time = matrix["timestamp"].min().strftime("%Y-%m-%d") figure, subplot = plt.subplots(len(data_types), 1, constrained_layout=True) figure.suptitle('Records by timeframe (' + time_frame_size + ') from ' + min_time + " to " + max_time) figure.set_figheight(30) figure.set_figwidth(30) ts = (time_ranges - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's') label_array = np.reshape(ts, (-1, 1)) feature_matrix = np.reshape(ts, (-1, 1)) for data_type in data_types: # Filter by data type data_type_records = matrix.loc[matrix["data_type"] == data_type] # Ignore the data_type column data_type_records = data_type_records.loc[:, "timestamp":] # Group by timeslots and count the records record_number_by_timeframe = data_type_records.groupby(pd.Grouper(key='timestamp', freq=time_frame_size)).mean() # Extract time ranges for data_type data_type_time_ranges = record_number_by_timeframe.reset_index()['timestamp'].to_numpy()
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import absolute_import import numpy as np import re import os import time UNIT = 'us' TYPE = 'M8[' + UNIT + ']' DTYPE = np.dtype(TYPE) NOT_A_DATE_TIME = np.datetime64('NaT') POSITIVE_INFINITY = np.datetime64('294247-01-09T04:00:54.775807') NEGATIVE_INFINITY = np.datetime64('-290308-12-22T19:59:05.224191') def is_undefined(numpy_time): return str(numpy_time) == str(NOT_A_DATE_TIME) def is_positive_infinity(numpy_time): return numpy_time == POSITIVE_INFINITY def is_negative_infinity(numpy_time): return numpy_time == NEGATIVE_INFINITY
def projected_vaccine_immune_population(t, historical_doses_per_100): """compute projected future susceptible population, given an array historical_doses_per_100 for cumulative doses doses per 100 population prior to and including today (length doesn't matter, so long as it goes back longer than VAX_ONSET_MU plus 3 * VAX_ONSET_SIGMA), and assuming a certain vaccine efficacy and rollout schedule""" # We assume vaccine effectiveness after each dose ramps up the integral of a Gaussian # with the following mean and stddev in days: VAX_ONSET_MU = 10.5 VAX_ONSET_SIGMA = 3.5 SEP = np.datetime64('2021-09-01').astype(int) - dates[-1].astype(int) OCT = np.datetime64('2021-10-01').astype(int) - dates[-1].astype(int) doses_per_100 = np.zeros_like(t) doses_per_100[0] = historical_doses_per_100[-1] # History of previously projected rates, so I can remake old projections: if dates[-1] >= np.datetime64('2021-10-21'): AUG_RATE = None SEP_RATE = None OCT_RATE = 0.1 elif dates[-1] >= np.datetime64('2021-10-30'): AUG_RATE = None SEP_RATE = None OCT_RATE = 0.5 elif dates[-1] >= np.datetime64('2021-10-10'): AUG_RATE = None SEP_RATE = None OCT_RATE = 1.3 else: AUG_RATE = 1.4 SEP_RATE = 1.6 OCT_RATE = 1.8 for i in range(1, len(doses_per_100)): if i < SEP: doses_per_100[i] = doses_per_100[i - 1] + AUG_RATE elif i < OCT: doses_per_100[i] = doses_per_100[i - 1] + SEP_RATE else: doses_per_100[i] = doses_per_100[i - 1] + OCT_RATE if dates[-1] >= np.datetime64('2021-11-21'): MAX_DOSES_PER_100 = 2 * 84.0 else: MAX_DOSES_PER_100 = 2 * 85.0 doses_per_100 = np.clip(doses_per_100, 0, MAX_DOSES_PER_100) all_doses_per_100 = np.concatenate([historical_doses_per_100, doses_per_100]) # The "prepend=0" makes it as if all the doses in the initial day were just # administered all at once, but as long as historical_doses_per_100 is long enough # for it to have taken full effect, it doesn't matter. daily = np.diff(all_doses_per_100, prepend=0) # convolve daily doses with a transfer function for delayed effectiveness of vaccnes pts = int(VAX_ONSET_MU + 3 * VAX_ONSET_SIGMA) x = np.arange(-pts, pts + 1, 1) kernel = np.exp(-((x - VAX_ONSET_MU) ** 2) / (2 * VAX_ONSET_SIGMA ** 2)) kernel /= kernel.sum() convolved = convolve(daily, kernel, mode='same') effective_doses_per_100 = convolved.cumsum() immune = 0.4 * effective_doses_per_100[len(historical_doses_per_100):] / 100 return immune
class TestIndexConstructorInference: @pytest.mark.parametrize("na_value", [None, np.nan]) @pytest.mark.parametrize("vtype", [list, tuple, iter]) def test_construction_list_tuples_nan(self, na_value, vtype): # GH#18505 : valid tuples containing NaN values = [(1, "two"), (3.0, na_value)] result = Index(vtype(values)) expected = MultiIndex.from_tuples(values) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "dtype", [ int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8" ], ) def test_constructor_int_dtype_float(self, dtype): # GH#18400 if is_unsigned_integer_dtype(dtype): index_type = UInt64Index else: index_type = Int64Index expected = index_type([0, 1, 2, 3]) result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("cast_index", [True, False]) @pytest.mark.parametrize("vals", [[True, False, True], np.array([True, False, True], dtype=bool)]) def test_constructor_dtypes_to_object(self, cast_index, vals): if cast_index: index = Index(vals, dtype=bool) else: index = Index(vals) assert type(index) is Index assert index.dtype == object def test_constructor_categorical_to_object(self): # GH#32167 Categorical data and dtype=object should return object-dtype ci = CategoricalIndex(range(5)) result = Index(ci, dtype=object) assert not isinstance(result, CategoricalIndex) def test_constructor_infer_periodindex(self): xp = period_range("2012-1-1", freq="M", periods=3) rs = Index(xp) tm.assert_index_equal(rs, xp) assert isinstance(rs, PeriodIndex) @pytest.mark.parametrize("pos", [0, 1]) @pytest.mark.parametrize( "klass,dtype,ctor", [ (DatetimeIndex, "datetime64[ns]", np.datetime64("nat")), (TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")), ], ) def test_constructor_infer_nat_dt_like(self, pos, klass, dtype, ctor, nulls_fixture, request): if isinstance(nulls_fixture, Decimal): # We dont cast these to datetime64/timedelta64 return expected = klass([NaT, NaT]) assert expected.dtype == dtype data = [ctor] data.insert(pos, nulls_fixture) warn = None if nulls_fixture is NA: expected = Index([NA, NaT]) mark = pytest.mark.xfail( reason="Broken with np.NaT ctor; see GH 31884") request.node.add_marker(mark) # GH#35942 numpy will emit a DeprecationWarning within the # assert_index_equal calls. Since we can't do anything # about it until GH#31884 is fixed, we suppress that warning. warn = DeprecationWarning result = Index(data) with tm.assert_produces_warning(warn): tm.assert_index_equal(result, expected) result = Index(np.array(data, dtype=object)) with tm.assert_produces_warning(warn): tm.assert_index_equal(result, expected) @pytest.mark.parametrize("swap_objs", [True, False]) def test_constructor_mixed_nat_objs_infers_object(self, swap_objs): # mixed np.datetime64/timedelta64 nat results in object data = [np.datetime64("nat"), np.timedelta64("nat")] if swap_objs: data = data[::-1] expected = Index(data, dtype=object) tm.assert_index_equal(Index(data), expected) tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
def get_reference_date(): return np.datetime64(get_today_str())
# Now get some basic parameters for the run start_date = '01/01/2011' end_date = '01/03/2016' plot = ['Belian', 'LF', 'B North', 'B South', 'E', 'Seraya', 'DC1', 'DC2'] LAI_MH = [6.69, 4.78, 3.00, 2.26, 3.84, 6.22, 5.93, 5.89] LAI_rad = [8.30, 5.76, 4.87, 3.73, 5.70, 9.01, 8.25, 9.35] LAI_hemiphot = [4.46, 3.76, 3.65, 3.44, 3.93, 4.27, 4.40, 4.05] Csoil = [ 8295.66, 11275.18, 3934.03, 4916.91, 11925.08, 24347.79, 8144.94, -9999. ] # # Initiate some arrays to host time series d, m, y = start_date.split('/') start = np.datetime64(y + '-' + m + '-' + d, 'D') d, m, y = end_date.split('/') end = np.datetime64(y + '-' + m + '-' + d, 'D') date = np.arange(start, end + np.timedelta64(1, 'D'), dtype='datetime64[D]') N_t = date.size mn2t_in = np.zeros(N_t) - 9999. mx2t_in = np.zeros(N_t) - 9999. vpd_in = np.zeros(N_t) - 9999. ssrd_in = np.zeros(N_t) - 9999. pptn_in = np.zeros(N_t) - 9999. mn2t21_in = np.zeros(N_t) - 9999. mx2t21_in = np.zeros(N_t) - 9999. vpd21_in = np.zeros(N_t) - 9999. ssrd21_in = np.zeros(N_t) - 9999.
Ta_LF[:first_ind] = np.nan Ta_LF[last_ind + 1:-1] = np.nan Ta_LF[np.where(GHT_powdB < -300)[0]] = np.nan GHT_powdB[np.where(GHT_powdB < -300)[0]] = np.nan return GHT_powdB, pdB_LF, T_amp, Ta_LF #%% #fGHT = [15, 35] lp_cutoff_period = 6 # hrs t_start = np.datetime64('2018-02-01T00:00') t_end = np.datetime64('2018-02-01T00:00') for station in stations: print(station) # station = 'BBWU'#TWLV' # %% Getting back the objects: with open('output_results/mp' + station + '.pickle', 'rb') as f: # Python 3: open(..., 'rb') t, t_dt64, freqs, Pdb_array, pp, data_dir, station = pickle.load( f, encoding='latin1') t_start = np.min(np.append(t_dt64, t_start)) t_end = np.max(np.append(t_dt64, t_end))
# See e.g. the discussion on the mailing list # # https://mail.python.org/pipermail/numpy-discussion/2020-April/080566.html # # and the issue # # https://github.com/numpy/numpy-stubs/issues/41 # # for more context. np.float32([1.0, 0.0, 0.0]) # E: incompatible type np.complex64([]) # E: incompatible type np.complex64(1, 2) # E: Too many arguments # TODO: protocols (can't check for non-existent protocols w/ __getattr__) np.datetime64(0) # E: non-matching overload dt_64 = np.datetime64(0, "D") td_64 = np.timedelta64(1, "h") dt_64 + dt_64 # E: Unsupported operand types td_64 - dt_64 # E: Unsupported operand types td_64 / dt_64 # E: No overload td_64 % 1 # E: Unsupported operand types td_64 % dt_64 # E: Unsupported operand types class A: def __float__(self): return 1.0
# Test for #25057 # pytz doesn't support fold. Check that we raise # if fold is passed with pytz msg = "pytz timezones do not support fold. Please use dateutil timezones." tz = pytz.timezone("Europe/London") with pytest.raises(ValueError, match=msg): Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize( "ts_input", [ 1572136200000000000, 1572136200000000000.0, np.datetime64(1572136200000000000, "ns"), "2019-10-27 01:30:00+01:00", datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), ], ) def test_timestamp_constructor_fold_conflict(ts_input, fold): # Test for #25057 # Check that we raise on fold conflict msg = ("Cannot pass fold with possibly unambiguous input: int, float, " "numpy.datetime64, str, or timezone-aware datetime-like. " "Pass naive datetime-like or build Timestamp from components.") with pytest.raises(ValueError, match=msg): Timestamp(ts_input=ts_input, fold=fold) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", None])
return pd.CategoricalIndex(data, categories=cats, ordered=idx.ordered, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(l) for l in idx.levels] labels = [[0, 0] for i in idx.levels] return pd.MultiIndex(levels=levels, labels=labels, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(type(idx).__name__)) _simple_fake_mapping = { 'b': np.bool_(True), 'V': np.void(b' '), 'M': np.datetime64('1970-01-01'), 'm': np.timedelta64(1), 'S': np.str_('foo'), 'a': np.str_('foo'), 'U': np.unicode_('foo'), 'O': 'foo' } def _scalar_from_dtype(dtype): if dtype.kind in ('i', 'f', 'u'): return dtype.type(1) elif dtype.kind == 'c': return dtype.type(complex(1, 0)) elif dtype.kind in _simple_fake_mapping: o = _simple_fake_mapping[dtype.kind]
dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10', 'output') for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') df = waterQuality.readSiteTS(siteNo, varLst=codeLst, freq='W') dictObs[siteNo] = df # calculate correlation tt = np.datetime64('2010-01-01') ind1 = np.where(df.index.values < tt)[0] ind2 = np.where(df.index.values >= tt)[0] dictLSTM = dictLSTMLst[1] corrMat = np.full([len(siteNoLst), len(codeLst), 3], np.nan) for ic, code in enumerate(codeLst): for siteNo in dictSite[code]: indS = siteNoLst.index(siteNo) v1 = dictLSTM[siteNo][code].iloc[ind2].values v2 = dictWRTDS[siteNo][code].iloc[ind2].values v3 = dictObs[siteNo][code].iloc[ind2].values rmse1, corr1 = utils.stat.calErr(v1, v2) rmse2, corr2 = utils.stat.calErr(v1, v3) rmse3, corr3 = utils.stat.calErr(v2, v3) corrMat[indS, ic, 0] = corr1 corrMat[indS, ic, 1] = corr2
# categoricals are handled separately _any_skipna_inferred_dtype = [ ('string', ['a', np.nan, 'c']), ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]), ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']), ('empty', [np.nan, np.nan, np.nan]), ('empty', []), ('mixed-integer', ['a', np.nan, 2]), ('mixed', ['a', np.nan, 2.0]), ('floating', [1.0, np.nan, 2.0]), ('integer', [1, np.nan, 2]), ('mixed-integer-float', [1, np.nan, 2.0]), ('decimal', [Decimal(1), np.nan, Decimal(2)]), ('boolean', [True, np.nan, False]), ('datetime64', [np.datetime64('2013-01-01'), np.nan, np.datetime64('2018-01-01')]), ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]), ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), # The following two dtypes are commented out due to GH 23554 # ('complex', [1 + 1j, np.nan, 2 + 2j]), # ('timedelta64', [np.timedelta64(1, 'D'), # np.nan, np.timedelta64(2, 'D')]), ('timedelta', [timedelta(1), np.nan, timedelta(2)]), ('time', [time(1), np.nan, time(2)]), ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]), ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])] ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id @pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
class TestDatetimeIndexComparisons(object): @pytest.mark.parametrize('other', [ datetime(2016, 1, 1), Timestamp('2016-01-01'), np.datetime64('2016-01-01') ]) def test_dti_cmp_datetimelike(self, other, tz): dti = pd.date_range('2016-01-01', periods=2, tz=tz) if tz is not None: if isinstance(other, np.datetime64): # no tzaware version available return elif isinstance(other, Timestamp): other = other.tz_localize(dti.tzinfo) else: other = tslib._localize_pydatetime(other, dti.tzinfo) result = dti == other expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = dti > other expected = np.array([False, True]) tm.assert_numpy_array_equal(result, expected) result = dti >= other expected = np.array([True, True]) tm.assert_numpy_array_equal(result, expected) result = dti < other expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) result = dti <= other expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) def dti_cmp_non_datetime(self, tz): # GH#19301 by convention datetime.date is not considered comparable # to Timestamp or DatetimeIndex. This may change in the future. dti = pd.date_range('2016-01-01', periods=2, tz=tz) other = datetime(2016, 1, 1).date() assert not (dti == other).any() assert (dti != other).all() with pytest.raises(TypeError): dti < other with pytest.raises(TypeError): dti <= other with pytest.raises(TypeError): dti > other with pytest.raises(TypeError): dti >= other @pytest.mark.parametrize('other', [None, np.nan, pd.NaT]) def test_dti_eq_null_scalar(self, other, tz): # GH#19301 dti = pd.date_range('2016-01-01', periods=2, tz=tz) assert not (dti == other).any() @pytest.mark.parametrize('other', [None, np.nan, pd.NaT]) def test_dti_ne_null_scalar(self, other, tz): # GH#19301 dti = pd.date_range('2016-01-01', periods=2, tz=tz) assert (dti != other).all() @pytest.mark.parametrize('other', [None, np.nan]) def test_dti_cmp_null_scalar_inequality(self, tz, other): # GH#19301 dti = pd.date_range('2016-01-01', periods=2, tz=tz) with pytest.raises(TypeError): dti < other with pytest.raises(TypeError): dti <= other with pytest.raises(TypeError): dti > other with pytest.raises(TypeError): dti >= other def test_dti_cmp_nat(self): left = pd.DatetimeIndex( [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]) right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) for lhs, rhs in [(left, right), (left.astype(object), right.astype(object))]: result = rhs == lhs expected = np.array([False, False, True]) tm.assert_numpy_array_equal(result, expected) result = lhs != rhs expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) expected = np.array([False, False, False]) tm.assert_numpy_array_equal(lhs == pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT == rhs, expected) expected = np.array([True, True, True]) tm.assert_numpy_array_equal(lhs != pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT != lhs, expected) expected = np.array([False, False, False]) tm.assert_numpy_array_equal(lhs < pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT > lhs, expected) def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) didx1 = pd.DatetimeIndex([ '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01', '2014-07-01' ]) didx2 = pd.DatetimeIndex([ '2014-02-01', '2014-03-01', pd.NaT, pd.NaT, '2014-06-01', '2014-07-01' ]) darr = np.array([ np_datetime64_compat('2014-02-01 00:00Z'), np_datetime64_compat('2014-03-01 00:00Z'), np_datetime64_compat('nat'), np.datetime64('nat'), np_datetime64_compat('2014-06-01 00:00Z'), np_datetime64_compat('2014-07-01 00:00Z') ]) cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] # Check pd.NaT is handles as the same as np.nan with tm.assert_produces_warning(None): for idx1, idx2 in cases: result = idx1 < idx2 expected = np.array([True, False, False, False, True, False]) tm.assert_numpy_array_equal(result, expected) result = idx2 > idx1 expected = np.array([True, False, False, False, True, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 <= idx2 expected = np.array([True, False, False, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx2 >= idx1 expected = np.array([True, False, False, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 == idx2 expected = np.array([False, False, False, False, False, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 != idx2 expected = np.array([True, True, True, True, True, False]) tm.assert_numpy_array_equal(result, expected) with tm.assert_produces_warning(None): for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: result = idx1 < val expected = np.array([False, False, False, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 > val tm.assert_numpy_array_equal(result, expected) result = idx1 <= val tm.assert_numpy_array_equal(result, expected) result = idx1 >= val tm.assert_numpy_array_equal(result, expected) result = idx1 == val tm.assert_numpy_array_equal(result, expected) result = idx1 != val expected = np.array([True, True, True, True, True, True]) tm.assert_numpy_array_equal(result, expected) # Check pd.NaT is handles as the same as np.nan with tm.assert_produces_warning(None): for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: result = idx1 < val expected = np.array([True, False, False, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 > val expected = np.array([False, False, False, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 <= val expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 >= val expected = np.array([False, False, True, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 == val expected = np.array([False, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 != val expected = np.array([True, True, False, True, True, True]) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize('op', [ operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le ]) def test_comparison_tzawareness_compat(self, op): # GH#18162 dr = pd.date_range('2016-01-01', periods=6) dz = dr.tz_localize('US/Pacific') with pytest.raises(TypeError): op(dr, dz) with pytest.raises(TypeError): op(dr, list(dz)) with pytest.raises(TypeError): op(dz, dr) with pytest.raises(TypeError): op(dz, list(dr)) # Check that there isn't a problem aware-aware and naive-naive do not # raise assert (dr == dr).all() assert (dr == list(dr)).all() assert (dz == dz).all() assert (dz == list(dz)).all() # Check comparisons against scalar Timestamps ts = pd.Timestamp('2000-03-14 01:59') ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam') assert (dr > ts).all() with pytest.raises(TypeError): op(dr, ts_tz) assert (dz > ts_tz).all() with pytest.raises(TypeError): op(dz, ts) @pytest.mark.parametrize('op', [ operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le ]) def test_nat_comparison_tzawareness(self, op): # GH#19276 # tzaware DatetimeIndex should not raise when compared to NaT dti = pd.DatetimeIndex([ '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01', '2014-07-01' ]) expected = np.array([op == operator.ne] * len(dti)) result = op(dti, pd.NaT) tm.assert_numpy_array_equal(result, expected) result = op(dti.tz_localize('US/Pacific'), pd.NaT) tm.assert_numpy_array_equal(result, expected) def test_dti_cmp_int_raises(self): rng = date_range('1/1/2000', periods=10) # raise TypeError for now with pytest.raises(TypeError): rng < rng[3].value def test_dti_cmp_list(self): rng = date_range('1/1/2000', periods=10) result = rng == list(rng) expected = rng == rng tm.assert_numpy_array_equal(result, expected)
print(e) q = ''' Select count(distinct user_id) As dau, USER_DATE from users group by USER_DATE ''' result = session.execute(q) x = [] y = [] nfo = [row for row in result] for i in nfo: x.append(i[0]) val = str(i[1]).split()[0] val = np.datetime64(val) y.append(val) df = pd.DataFrame({'Users': x, 'Date': y}) df = df.groupby(by='Date', as_index=False).sum() print(df) fig = px.line(df, x='Date', y='Users') fig.show() query = ''' Select count(DISTINCT users.user_id) as amount, resources.location_ FROM users JOIN RESOURCES ON users.user_id = RESOURCES.USER_ID GROUP BY resources.location_ ''' res = session.execute(query) nfo = [row for row in res] print(nfo)
pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), Timedelta(hours=2) ], ids=str) def delta(request): # Several ways of representing two hours return request.param @pytest.fixture(params=[ datetime(2011, 1, 1), DatetimeIndex(['2011-01-01', '2011-01-02']), DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'), np.datetime64('2011-01-01'), Timestamp('2011-01-01') ], ids=lambda x: type(x).__name__) def addend(request): return request.param class TestDatetimeIndexComparisons(object): @pytest.mark.parametrize('other', [ datetime(2016, 1, 1), Timestamp('2016-01-01'), np.datetime64('2016-01-01') ]) def test_dti_cmp_datetimelike(self, other, tz): dti = pd.date_range('2016-01-01', periods=2, tz=tz)
_any_skipna_inferred_dtype = [ ("string", ["a", np.nan, "c"]), ("string", ["a", pd.NA, "c"]), ("bytes", [b"a", np.nan, b"c"]), ("empty", [np.nan, np.nan, np.nan]), ("empty", []), ("mixed-integer", ["a", np.nan, 2]), ("mixed", ["a", np.nan, 2.0]), ("floating", [1.0, np.nan, 2.0]), ("integer", [1, np.nan, 2]), ("mixed-integer-float", [1, np.nan, 2.0]), ("decimal", [Decimal(1), np.nan, Decimal(2)]), ("boolean", [True, np.nan, False]), ("boolean", [True, pd.NA, False]), ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]), ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), # The following two dtypes are commented out due to GH 23554 # ('complex', [1 + 1j, np.nan, 2 + 2j]), # ('timedelta64', [np.timedelta64(1, 'D'), # np.nan, np.timedelta64(2, 'D')]), ("timedelta", [timedelta(1), np.nan, timedelta(2)]), ("time", [time(1), np.nan, time(2)]), ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]), ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
# close, volume, adj_close from the mpl-data/example directory. This array # stores the date as an np.datetime64 with a day unit ('D') in the 'date' # column. with cbook.get_sample_data('goog.npz') as datafile: data = np.load(datafile)['price_data'] print(data) fig, ax = plt.subplots() ax.plot('date', 'adj_close', data=data) # format the ticks ax.xaxis.set_major_locator(months) ax.xaxis.set_major_formatter(years_fmt) ax.xaxis.set_minor_locator(days) # round to nearest years. datemin = np.datetime64(data['date'][4], 'M') datemax = np.datetime64(data['date'][-1], 'M') + np.timedelta64(1, 'M') ax.set_xlim(datemin, datemax) # format the coords message box ax.format_xdata = mdates.DateFormatter('%m') ax.format_ydata = lambda x: '$%1.2f' % x # format the price. ax.grid(True) # rotates and right aligns the x labels, and moves the bottom of the # axes up to make room for them fig.autofmt_xdate() plt.show()
def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) didx1 = pd.DatetimeIndex([ '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01', '2014-07-01' ]) didx2 = pd.DatetimeIndex([ '2014-02-01', '2014-03-01', pd.NaT, pd.NaT, '2014-06-01', '2014-07-01' ]) darr = np.array([ np_datetime64_compat('2014-02-01 00:00Z'), np_datetime64_compat('2014-03-01 00:00Z'), np_datetime64_compat('nat'), np.datetime64('nat'), np_datetime64_compat('2014-06-01 00:00Z'), np_datetime64_compat('2014-07-01 00:00Z') ]) cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] # Check pd.NaT is handles as the same as np.nan with tm.assert_produces_warning(None): for idx1, idx2 in cases: result = idx1 < idx2 expected = np.array([True, False, False, False, True, False]) tm.assert_numpy_array_equal(result, expected) result = idx2 > idx1 expected = np.array([True, False, False, False, True, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 <= idx2 expected = np.array([True, False, False, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx2 >= idx1 expected = np.array([True, False, False, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 == idx2 expected = np.array([False, False, False, False, False, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 != idx2 expected = np.array([True, True, True, True, True, False]) tm.assert_numpy_array_equal(result, expected) with tm.assert_produces_warning(None): for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: result = idx1 < val expected = np.array([False, False, False, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 > val tm.assert_numpy_array_equal(result, expected) result = idx1 <= val tm.assert_numpy_array_equal(result, expected) result = idx1 >= val tm.assert_numpy_array_equal(result, expected) result = idx1 == val tm.assert_numpy_array_equal(result, expected) result = idx1 != val expected = np.array([True, True, True, True, True, True]) tm.assert_numpy_array_equal(result, expected) # Check pd.NaT is handles as the same as np.nan with tm.assert_produces_warning(None): for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: result = idx1 < val expected = np.array([True, False, False, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 > val expected = np.array([False, False, False, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 <= val expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 >= val expected = np.array([False, False, True, False, True, True]) tm.assert_numpy_array_equal(result, expected) result = idx1 == val expected = np.array([False, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = idx1 != val expected = np.array([True, True, False, True, True, True]) tm.assert_numpy_array_equal(result, expected)
This file is used to test the position of the Earth in ideal conditions. """ import numpy as np import matplotlib.pyplot as plt import localSun as current import math as math import random import help_functions as hp import itertools plt.close('all') day = 86400 - 86400 / 366 year = 31536000 idealStart = np.array([np.datetime64('2018-12-22T00:00:00', 's')]) tiltTest = math.radians(23.44) # Normal tilt print( "(1) Testing to see if the numbers are exact on the solstices and equinoxes. These numbers should be exact to arbitrary precision regardless of tilt.\n" ) # Solstices and equinoxes solsSun1 = current.localSun(time=np.array( [np.datetime64('2018-12-22T00:00:00', 's')]), tilt=tiltTest, day=day, year=year, start=idealStart) solsSun2 = current.localSun(time=np.array( [np.datetime64('2019-06-22T12:00:00', 's')]),
def make_plot(fnum, x, ydata, ylab, ylab2): hours = mdates.HourLocator(interval=2) # every year minutes = mdates.MinuteLocator(interval=30) # every half hour x_major_fmt = mdates.DateFormatter('%H:%M') f, ax1 = plt.subplots(num=fnum, clear=True) f.set_size_inches(10, 6) # x comes in as np.datetime64, but older matplotlib versions # can't handle this. For those convert to python datetime: if StrictVersion(matplotlib.__version__) < StrictVersion('2.2.2'): print("Legacy python datetime for the x axis") x = x.astype('O') for y, lab in ydata: ax1.plot(x, y, label=lab) ax1.set_ylabel(ylab) if len(ydata) > 1: l = ax1.legend() ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) # format the ticks ax1.xaxis.set_major_locator(hours) ax1.xaxis.set_major_formatter(x_major_fmt) ax1.xaxis.set_minor_locator(minutes) ax2 = ax1.twinx() ax2.set_ylabel(ylab2) ax2.spines['top'].set_visible(False) ax2.spines['left'].set_visible(False) # round x range to nearest hours. #datemax = np.datetime64(d['datetime'][-1], 'h') + np.timedelta64(1, 'h') # Shows the last 26 hours of data of the data in the dateset. Will freeze when no new data is availalbe. datemax = np.datetime64(datetime.datetime.now(), 'h') + np.timedelta64( 1, 'h' ) # Shows the last 26 hours of data, might show empty plot if there is no data datemin = datemax - np.timedelta64(26, 'h') # Exactly one day +2 hours if StrictVersion(matplotlib.__version__) < StrictVersion('2.2.2'): datemin = datemin.astype('O') datemax = datemax.astype('O') ax1.set_xlim(datemin, datemax) # Round the y axis similarly y_min, y_max = ax1.get_ylim() yt = ax1.get_yticks() ax1.set_ylim(yt[0], yt[-1]) # ax2 is empty, so the default y range is 0.0 to 1.0. # Set it to match such that the ticks line up: ax2.set_ylim(yt[0], yt[-1]) # Overwrite the tick decorator to convert C to F dynamically: ax1.yaxis.set_major_formatter(c2f_formatter) ax1.grid(b=True, which='major', color=(0.75, 0.75, 0.75), linestyle='-') ax1.grid(b=True, which='minor', color=(0.8, 0.8, 0.8), linestyle=':') plt.setp(ax1.xaxis.get_majorticklabels(), rotation=30, horizontalalignment='right') return f, ax1
curr_trans.at[0,'Trans Eff Dt']] df2 = pd.concat([df2,curr_trans], axis = 0) df2.loc[:,'Next Trans Eff Dt'] = (df2.sort_values('Trans Dt') ['Trans Eff Dt'].shift(-1)) df2.loc[df2['Next Trans Eff Dt'].isnull(), 'Next Trans Eff Dt'] = \ curr_trans.at[0,'Pol Exp Dt'] + np.timedelta64(1,'D') term_days = ((curr_trans.at[0,'Pol Exp Dt'] - curr_trans.at[0,'Pol Eff Dt']) / np.timedelta64(1,'D')) + 1 weights = (((df2['Next Trans Eff Dt'] - df2['Trans Eff Dt']) / np.timedelta64(1,'D')).values) / term_days weights = np.tile(weights.reshape(-1,1),(1,self.nCovs)) df_new.loc[df_new['Trans Number'] == i,coverages] = \ sum(weights * df2[coverages].values).round(2) df_new.loc[:,'Total Premium'] = df_new[coverages].sum(axis=1).round(2) return df_new def calcTransWrittenPremium(self,df): '''Premiums in df must be WP at time of transaction''' premiums = ['Coverage: ' + str(i) + ' Premium' for i in range(self.nCovs)] + ['Total Premium'] df.loc[:,premiums] = df[premiums] - df[premiums].shift(1).fillna(0) return df if __name__ == '__main__': x = insuranceDataGenerator() acct_df, claim_df = x.generateAccountTrans( start_date = np.datetime64('2010-01-01'))
class TestTimedeltaArray: @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")]) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") else: expected_dtype = np.dtype("int64") expected = arr.astype(expected_dtype) warn = None if dtype != expected_dtype: warn = FutureWarning msg = " will return exactly the specified dtype" with tm.assert_produces_warning(warn, match=msg): result = arr.astype(dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) def test_setitem_clears_freq(self): a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H")) a[0] = Timedelta("1H") assert a.freq is None @pytest.mark.parametrize( "obj", [ Timedelta(seconds=1), Timedelta(seconds=1).to_timedelta64(), Timedelta(seconds=1).to_pytimedelta(), ], ) def test_setitem_objects(self, obj): # make sure we accept timedelta64 and timedelta in addition to Timedelta tdi = pd.timedelta_range("2 Days", periods=4, freq="H") arr = TimedeltaArray(tdi, freq=tdi.freq) arr[0] = obj assert arr[0] == Timedelta(seconds=1) @pytest.mark.parametrize( "other", [ 1, np.int64(1), 1.0, np.datetime64("NaT"), pd.Timestamp("2021-01-01"), "invalid", np.arange(10, dtype="i8") * 24 * 3600 * 10**9, (np.arange(10) * 24 * 3600 * 10**9).view("datetime64[ns]"), pd.Timestamp("2021-01-01").to_period("D"), ], ) @pytest.mark.parametrize("index", [True, False]) def test_searchsorted_invalid_types(self, other, index): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = TimedeltaArray(data, freq="D") if index: arr = pd.Index(arr) msg = "|".join([ "searchsorted requires compatible dtype or scalar", "value should be a 'Timedelta', 'NaT', or array of those. Got", ]) with pytest.raises(TypeError, match=msg): arr.searchsorted(other)
def test_init_bounds_datetime_yaxis(self): start = np.datetime64(dt.datetime.today()) end = start+np.timedelta64(1, 's') bounds = (-10, start, 10, end) image = Image(np.flipud(self.array), bounds=bounds) self.assertEqual(image.bounds.lbrt(), bounds)
array_2 = np.arange(0, 100, 10) array_2.flags.writeable = array_2_writeable hundred_elements = np.arange(100) tm.assert_categorical_equal( cut(hundred_elements, array_1), cut(hundred_elements, array_2) ) @pytest.mark.parametrize( "conv", [ lambda v: Timestamp(v), lambda v: to_datetime(v), lambda v: np.datetime64(v), lambda v: Timestamp(v).to_pydatetime(), ], ) def test_datetime_bin(conv): data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")] bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"] expected = Series( IntervalIndex( [ Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])), ], "right", )
def test_init_data_datetime_yaxis(self): start = np.datetime64(dt.datetime.today()) end = start+np.timedelta64(1, 's') Image(np.flipud(self.array), bounds=(-10, start, 10, end))
tm.assert_series_equal(s, expected) # GH 14155 s = Series(10 * [np.timedelta64(10, "m")]) s.loc[[1, 2, 3]] = np.timedelta64(20, "m") expected = pd.Series(10 * [np.timedelta64(10, "m")]) expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, "m")) tm.assert_series_equal(s, expected) @pytest.mark.parametrize( "nat_val,should_cast", [ (pd.NaT, True), (np.timedelta64("NaT", "ns"), False), (np.datetime64("NaT", "ns"), True), ], ) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_dt64_series_assign_nat(nat_val, should_cast, tz): # some nat-like values should be cast to datetime64 when inserting # into a datetime64 series. Others should coerce to object # and retain their dtypes. dti = pd.date_range("2016-01-01", periods=3, tz=tz) base = pd.Series(dti) expected = pd.Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype) if not should_cast: expected = expected.astype(object) ser = base.copy(deep=True) ser[0] = nat_val