Exemplo n.º 1
0
    def get_dataset(self, key, info):
        if self._data is None:
            self.read()

        if key.name in ['latitude', 'longitude']:
            lons, lats = self.get_lonlats()
            if key.name == 'latitude':
                return Projectable(lats, id=key)
            else:
                return Projectable(lons, id=key)

        avhrr_channel_index = {'1': 0,
                               '2': 1,
                               '3a': 2,
                               '3b': 2,
                               '4': 3,
                               '5': 4}
        index = avhrr_channel_index[key.name]
        mask = False
        if key.name in ['3a', '3b'] and self._is3b is None:
            ch3a = bfield(self._data["id"]["id"], 10)
            self._is3b = np.logical_not(ch3a)

        if key.name == '3a':
            mask = np.tile(self._is3b, (1, 2048))
        elif key.name == '3b':
            mask = np.tile(np.logical_not(self._is3b), (1, 2048))

        data = self._data["image_data"][:, :, index]
        if key.calibration == 'counts':
            return Projectable(data,
                               mask=mask,
                               area=self.get_lonlats(),
                               units='1')

        pg_spacecraft = ''.join(self.platform_name.split()).lower()

        jdays = (np.datetime64(self.start_time) - np.datetime64(str(
            self.year) + '-01-01T00:00:00Z')) / np.timedelta64(1, 'D')
        if index < 2 or key.name == '3a':
            data = calibrate_solar(data, index, self.year, jdays,
                                   pg_spacecraft)
            units = '%'

        if index > 2 or key.name == '3b':
            if self.times is None:
                self.times = time_seconds(self._data["timecode"], self.year)
            line_numbers = (
                np.round((self.times - self.times[-1]) /
                         np.timedelta64(166666667, 'ns'))).astype(np.int)
            line_numbers -= line_numbers[0]
            if self.prt is None:
                self.prt, self.ict, self.space = self.get_telemetry()
            chan = index + 1
            data = calibrate_thermal(data, self.prt, self.ict[:, chan - 3],
                                     self.space[:, chan - 3], line_numbers,
                                     chan, pg_spacecraft)
            units = 'K'
        # TODO: check if entirely masked before returning
        return Projectable(data, mask=mask, units=units)
Exemplo n.º 2
0
 def test_frame_datetime64_handling_groupby(self):
     # it works!
     df = DataFrame([(3, np.datetime64('2012-07-03')),
                     (3, np.datetime64('2012-07-04'))],
                    columns=['a', 'date'])
     result = df.groupby('a').first()
     assert result['date'][3] == Timestamp('2012-07-03')
Exemplo n.º 3
0
    def test_bounds_with_different_units(self):
        out_of_bounds_dates = (
            '1677-09-21',
            '2262-04-12',
        )

        time_units = ('D', 'h', 'm', 's', 'ms', 'us')

        for date_string in out_of_bounds_dates:
            for unit in time_units:
                self.assertRaises(
                    ValueError,
                    Timestamp,
                    np.datetime64(date_string, dtype='M8[%s]' % unit)
                )

        in_bounds_dates = (
            '1677-09-23',
            '2262-04-11',
        )

        for date_string in in_bounds_dates:
            for unit in time_units:
                Timestamp(
                    np.datetime64(date_string, dtype='M8[%s]' % unit)
                )
Exemplo n.º 4
0
def timeparse(timestr):
    DEFAULT = datetime(1, 1, 1)
    bc = False
    if re.search(r'bce?', timestr, flags=re.I):
        bc = True
        timestr = re.sub(r'bce?', '', timestr, flags=re.I)
    if re.match('-', timestr, flags=re.I):
        bc = True
        timestr = timestr.replace('-', '', 1)
    if re.search(r'ad', timestr, flags=re.I):
        timestr = re.sub('ad', '', timestr, flags=re.I)

    if bc is True:
        timestr = "-%s" % timestr

    timestr = timestr.strip()

    try:
        t = numpy.datetime64(timestr).astype('datetime64[ms]').astype('int64')
        return t, str(numpy.datetime64(t, 'ms'))

    except:
        pass

    #  try just using straight datetime parsing
    if bc is False:
        try:
            logger.debug('trying %s as direct parse', timestr)
            dt = parse(timestr, default=DEFAULT)
            t = numpy.datetime64(dt.isoformat()).astype('datetime64[ms]').astype('int64')
            return t, str(numpy.datetime64(t, 'ms'))
        except:
            pass

    return None, None
Exemplo n.º 5
0
    def test_nanosecond_timestamp(self):
        # GH 7610
        expected = 1293840000000000005
        t = Timestamp('2011-01-01') + offsets.Nano(5)
        self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')")
        self.assertEqual(t.value, expected)
        self.assertEqual(t.nanosecond, 5)

        t = Timestamp(t)
        self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')")
        self.assertEqual(t.value, expected)
        self.assertEqual(t.nanosecond, 5)

        t = Timestamp(np.datetime64('2011-01-01 00:00:00.000000005Z'))
        self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')")
        self.assertEqual(t.value, expected)
        self.assertEqual(t.nanosecond, 5)

        expected = 1293840000000000010
        t = t + offsets.Nano(5)
        self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')")
        self.assertEqual(t.value, expected)
        self.assertEqual(t.nanosecond, 10)

        t = Timestamp(t)
        self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')")
        self.assertEqual(t.value, expected)
        self.assertEqual(t.nanosecond, 10)

        t = Timestamp(np.datetime64('2011-01-01 00:00:00.000000010Z'))
        self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')")
        self.assertEqual(t.value, expected)
        self.assertEqual(t.nanosecond, 10)
Exemplo n.º 6
0
 def test_isscalar_numpy_zerodim_arrays(self):
     for zerodim in [np.array(1), np.array('foobar'),
                     np.array(np.datetime64('2014-01-01')),
                     np.array(np.timedelta64(1, 'h')),
                     np.array(np.datetime64('NaT'))]:
         self.assertFalse(is_scalar(zerodim))
         self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim)))
Exemplo n.º 7
0
  def testResampleData(self):
    # test upsampling by a factor of 2
    timestamps = numpy.array([numpy.datetime64(
      datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzlocal()) +
      datetime.timedelta(hours=i)) for i in xrange(8)])
    values = numpy.linspace(0, 7, 8)
    newSamplingInterval = numpy.timedelta64(1800, 's')
    (newTimeStamps, newValues) = param_finder._resampleData(timestamps,
                                                            values,
                                                            newSamplingInterval)

    trueNewTimeStamps = numpy.array([numpy.datetime64(
      datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzlocal()) +
      datetime.timedelta(hours=0.5 * i)) for i in xrange(15)])
    self.assertTrue(numpy.allclose(newValues, numpy.linspace(0, 7, 15)))
    timestampError = (numpy.sum(
      numpy.abs(newTimeStamps - trueNewTimeStamps))).item().total_seconds()
    self.assertAlmostEqual(timestampError, 0)

    # test down-sampling by a factor of 2
    newSamplingInterval = numpy.timedelta64(7200, 's')
    (newTimeStamps, newValues) = param_finder._resampleData(timestamps,
                                                            values,
                                                            newSamplingInterval)
    trueNewTimeStamps = numpy.array([numpy.datetime64(
      datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzlocal()) +
      datetime.timedelta(hours=2 * i)) for i in xrange(4)])
    timestampError = (numpy.sum(
      numpy.abs(newTimeStamps - trueNewTimeStamps))).item().total_seconds()
    self.assertTrue(numpy.allclose(newValues, numpy.linspace(0, 6, 4)))
    self.assertAlmostEqual(timestampError, 0)
Exemplo n.º 8
0
def travel_time(start_time, path, measurements_by_station, station_metadata, time_granularity=60*60):
  """Calculate the travel time along the given path at the given start time
  
  Args:
  path - list of station IDs that must be traversed to reach the destination
  start_time - start time datetime64
  station_data - dataframes grouped by station
  time_granularity - granularity of samples in seconds
  """
  time_granularity *= 1000000 # convert to microseconds
  time = datetime64_to_microseconds(start_time)
  total_dist = 0
  
  for i in range(len(path)-1):
    # calculate how long it takes to get to the next station based on the
    # current time
    sid1 = path[i]
    sid2 = path[i+1]
    
    measurements = measurements_by_station[sid1]
    quantized = np.datetime64(time - time % time_granularity)
    filtered = measurements[measurements['timestamp'] == quantized]
    speed = filtered.iloc[0]['avgspeed']
    if np.isnan(speed):
      return (np.nan, np.nan)
    
    station1_metadata = station_metadata.loc[sid1]
    station2_metadata = station_metadata.loc[sid2]
    dist = abs(station1_metadata['Abs_PM'] - station2_metadata['Abs_PM'])
    total_dist += dist
    
    # TODO: what if speed is NAN? interpolate
    time += 1000000 * 60 * 60 * dist / speed

  return (total_dist, np.datetime64(time) - start_time)
Exemplo n.º 9
0
    def test_infer_dtype_period(self):
        # GH 13664
        arr = np.array([pd.Period('2011-01', freq='D'),
                        pd.Period('2011-02', freq='D')])
        self.assertEqual(pd.lib.infer_dtype(arr), 'period')

        arr = np.array([pd.Period('2011-01', freq='D'),
                        pd.Period('2011-02', freq='M')])
        self.assertEqual(pd.lib.infer_dtype(arr), 'period')

        # starts with nan
        for n in [pd.NaT, np.nan]:
            arr = np.array([n, pd.Period('2011-01', freq='D')])
            self.assertEqual(pd.lib.infer_dtype(arr), 'period')

            arr = np.array([n, pd.Period('2011-01', freq='D'), n])
            self.assertEqual(pd.lib.infer_dtype(arr), 'period')

        # different type of nat
        arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
                       dtype=object)
        self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')

        arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
                       dtype=object)
        self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')
Exemplo n.º 10
0
    def test_datetime(self):
        expected = np.datetime64('2000-01-01T00Z')
        actual = _as_compatible_data(expected)
        self.assertEqual(expected, actual)
        self.assertEqual(np.ndarray, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)

        expected = np.array([np.datetime64('2000-01-01T00Z')])
        actual = _as_compatible_data(expected)
        self.assertEqual(np.asarray(expected), actual)
        self.assertEqual(np.ndarray, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)

        expected = np.array([np.datetime64('2000-01-01T00Z', 'ns')])
        actual = _as_compatible_data(expected)
        self.assertEqual(np.asarray(expected), actual)
        self.assertEqual(np.ndarray, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
        self.assertIs(expected, source_ndarray(np.asarray(actual)))

        expected = np.datetime64('2000-01-01T00Z', 'ns')
        actual = _as_compatible_data(datetime(2000, 1, 1))
        self.assertEqual(np.asarray(expected), actual)
        self.assertEqual(np.ndarray, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
Exemplo n.º 11
0
def safe_date(value, date_format, datetime_unit):
    date_str = value.strip().strip("'").strip('"')
    if date_str == '?':
        return np.datetime64('NaT', datetime_unit)
    else:
        dt = datetime.datetime.strptime(date_str, date_format)
        return np.datetime64(dt).astype("datetime64[%s]" % datetime_unit)
Exemplo n.º 12
0
    def test_datetime(self):
        expected = np.datetime64('2000-01-01T00')
        actual = _as_compatible_data(expected)
        self.assertEqual(expected, actual)
        self.assertEqual(np.datetime64, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)

        expected = np.array([np.datetime64('2000-01-01T00')])
        actual = _as_compatible_data(expected)
        self.assertEqual(np.asarray(expected), actual)
        self.assertEqual(NumpyArrayAdapter, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)

        expected = np.array([np.datetime64('2000-01-01T00', 'ns')])
        actual = _as_compatible_data(expected)
        self.assertEqual(np.asarray(expected), actual)
        self.assertEqual(NumpyArrayAdapter, type(actual))
        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
        self.assertIs(expected, source_ndarray(np.asarray(actual)))

        expected = pd.Timestamp('2000-01-01T00').to_datetime()
        actual = _as_compatible_data(expected)
        self.assertEqual(np.asarray(expected), actual)
        self.assertEqual(NumpyArrayAdapter, type(actual))
        self.assertEqual(np.dtype('O'), actual.dtype)
Exemplo n.º 13
0
    def test_nat_items(self):
        # not a datetime
        nadt_no_unit = np.datetime64("NaT")
        nadt_s = np.datetime64("NaT", "s")
        nadt_d = np.datetime64("NaT", "ns")
        # not a timedelta
        natd_no_unit = np.timedelta64("NaT")
        natd_s = np.timedelta64("NaT", "s")
        natd_d = np.timedelta64("NaT", "ns")

        dts = [nadt_no_unit, nadt_s, nadt_d]
        tds = [natd_no_unit, natd_s, natd_d]
        for a, b in itertools.product(dts, dts):
            self._assert_func(a, b)
            self._assert_func([a], [b])
            self._test_not_equal([a], b)

        for a, b in itertools.product(tds, tds):
            self._assert_func(a, b)
            self._assert_func([a], [b])
            self._test_not_equal([a], b)

        for a, b in itertools.product(tds, dts):
            self._test_not_equal(a, b)
            self._test_not_equal(a, [b])
            self._test_not_equal([a], [b])
            self._test_not_equal([a], np.datetime64("2017-01-01", "s"))
            self._test_not_equal([b], np.datetime64("2017-01-01", "s"))
            self._test_not_equal([a], np.timedelta64(123, "s"))
            self._test_not_equal([b], np.timedelta64(123, "s"))
Exemplo n.º 14
0
def SmartIntervalSearcher(curve_to_be_analysed):
    new_x_axis = np.empty(len(curve_to_be_analysed.x), dtype = np.datetime64('2015-01-01'))
    derivative = np.zeros(len(curve_to_be_analysed.x))

    for i,data in enumerate(curve_to_be_analysed.x):
        new_x_axis[i] = np.datetime64(data.date)

    if len(curve_to_be_analysed.x) > 20:
        for i in xrange(len(curve_to_be_analysed)-1):
            delta = (new_x_axis[i+1] - new_x_axis[i]) / np.timedelta64(1,'D')
            derivative[i] = (curve_to_be_analysed.y[i+1] - curve_to_be_analysed.y[i]) / delta

        position = 0
        for i in xrange(len(derivative)):
            if derivative[-2-i] < 0:
                pass
            else:
                position = len(derivative) - i + 2
                break

        if position > 0.9*len(curve_to_be_analysed.x):
            if int(0.9*len(curve_to_be_analysed.x)) < 20:
                position = len(curve_to_be_analysed.x) - 20
            else:
                position = int(0.9*len(curve_to_be_analysed.x))
    else:
        position = 0

    return position
Exemplo n.º 15
0
    def test_conversion(self):
        rs = self.dtc.convert(['2012-1-1'], None, None)[0]
        xp = datetime(2012, 1, 1).toordinal()
        self.assertEqual(rs, xp)

        rs = self.dtc.convert('2012-1-1', None, None)
        self.assertEqual(rs, xp)

        rs = self.dtc.convert(date(2012, 1, 1), None, None)
        self.assertEqual(rs, xp)

        rs = self.dtc.convert(datetime(2012, 1, 1).toordinal(), None, None)
        self.assertEqual(rs, xp)

        rs = self.dtc.convert('2012-1-1', None, None)
        self.assertEqual(rs, xp)

        rs = self.dtc.convert(Timestamp('2012-1-1'), None, None)
        self.assertEqual(rs, xp)

        # also testing datetime64 dtype (GH8614)
        rs = self.dtc.convert(np.datetime64('2012-01-01'), None, None)
        self.assertEqual(rs, xp)

        rs = self.dtc.convert(np.datetime64('2012-01-01 00:00:00+00:00'), None, None)
        self.assertEqual(rs, xp)

        rs = self.dtc.convert(np.array([np.datetime64('2012-01-01 00:00:00+00:00'),
                                        np.datetime64('2012-01-02 00:00:00+00:00')]), None, None)
        self.assertEqual(rs[0], xp)
Exemplo n.º 16
0
 def test_holomap_slider_unsorted_datetime_values_initialization(self):
     hmap = HoloMap([(np.datetime64(10005, 'D'), Curve([1, 2, 3])),
                     (np.datetime64(10000, 'D'), Curve([1, 2, 4]))], sort=False)
     widgets = bokeh_renderer.get_widget(hmap, 'widgets')
     widgets()
     self.assertEqual(widgets.plot.current_key, (np.datetime64(10000, 'D'),))
     self.assertEqual(widgets.plot.current_frame, hmap[np.datetime64(10000, 'D')])
Exemplo n.º 17
0
 def createCube(self, cellSize_xy):
     cellNumber_x = round((self.extent.XMax - self.extent.XMin) / cellSize_xy)
     cellNumber_y = round((self.extent.YMax - self.extent.YMin) / cellSize_xy)
     X = self.ssdo.xyCoords[:,0]
     Y = self.ssdo.xyCoords[:,1]
     time = self.ssdo.fields[self.timeField].data
     time = NUM.array([i for i in time], NUM.datetime64)
     startDateTime = NUM.datetime64('1970-01-01 00:00:00')
     T = time - startDateTime
     self.startTime = NUM.amin(T) + NUM.datetime64('1970-01-01 00:00:00')
     T = NUM.array([i.item().days for i in T], int)
     startT = NUM.amin(T)
     endT = NUM.amax(T)
     cellNumber_t = round((endT - startT) / self.cellSize_t) + 1
     X = (X - self.extent.XMin) / self.cellSize_xy
     Y = (self.extent.YMax - Y) / self.cellSize_xy
     T = (T - startT) / self.cellSize_t
     X = NUM.floor(X)
     Y = NUM.floor(Y)
     T = NUM.floor(T)
     CellIdList = (cellNumber_x * cellNumber_y * T) + (cellNumber_x * Y) + X
     BothEnds = NUM.array([0, (cellNumber_t * cellNumber_x * cellNumber_y -1)])
     CellIdList = NUM.concatenate((CellIdList, BothEnds), axis=0)
     CellIdList = NUM.array(CellIdList, dtype = 'int32')
     counts = NUM.bincount(CellIdList)
     counts[BothEnds[0]] = counts[BothEnds[0]] - 1
     counts[BothEnds[1]] = counts[BothEnds[1]] - 1
     return counts.reshape(cellNumber_t, cellNumber_x, cellNumber_y)
Exemplo n.º 18
0
def compare_different_layers():
    train_start_date = np.datetime64("2010-01-01")
    train_end_date = np.datetime64("2014-12-31")
    test_start_date = np.datetime64("2015-01-01")
    test_end_date = np.datetime64("2016-07-31")

    layers_list = [
        [],
        [25],
        [50],
        [100],
        [25, 10],
        [40, 20],
        [50, 25],
        [75, 30],
        [100, 50],
        [50, 25, 10],
        [100, 50, 25],
        [200, 100, 50]
    ]

    for layers in layers_list:
        model = NikkeiModel(layers, './model/2010-2015_37-' + '-'.join([str(x) for x in layers]) + '-3')
        if not model.is_trained():
            model.prepare_training_data(train_start_date, train_end_date)
            model.prepare_test_data(test_start_date, test_end_date)
            model.train(eval_on_test=True)
Exemplo n.º 19
0
def run_backtest():
    test_start_date = np.datetime64("2015-01-01")
    test_end_date = np.datetime64("2016-07-31")
    model = NikkeiModel([50, 25], './model/2010-2015_37-50-25-3')
    model.prepare_test_data(test_start_date, test_end_date)
    model.evaluate()
    model.backtest()
Exemplo n.º 20
0
def mock_get_descriptor(query_parameters):
    variables = query_parameters['variables']
    descriptor = {
        'ls5_nbar_albers': {
            'storage_units': {
                (636419476.0, -3900012.5, 1500012.5): {
                    'storage_shape': (2, 400, 400),
                    'storage_min': (636419476.0, -3999987.5, 1500012.5),
                    'irregular_indices': {
                        u'time': [636419476.0, 661302607.0]},
                    'storage_path': '/g/data/u46/public/datacube/data/LANDSAT_5_TM_NBAR_ALB_15_-40_1990.nc',
                    'storage_max': (661302607.0, -3900012.5, 1599987.5)
                }
            },
            'dimensions': [u'time', u'y', u'x'],
            'result_max': (
                numpy.datetime64('1990-12-16T10:10:07.000000000+1100'), -3956662.5, 1555062.5),
            'irregular_indices': {u'time': array(['1990-03-03T10:11:16.000000000+1100',
                                                  '1990-12-16T10:10:07.000000000+1100'],
                                                 dtype='datetime64[ns]')
                                  },
            'result_min': (
                numpy.datetime64('1990-03-03T10:11:16.000000000+1100'), -3962362.5, 1544587.5),
            'result_shape': (2, 229, 420)}}

    descriptor['ls5_nbar_albers']['variables'] = {
        name: {'nodata_value': -999, 'datatype_name': dtype('int16')}
        for name in variables
        }
    return descriptor
Exemplo n.º 21
0
def calcReturn(log, transactions, date):
    startt = str(np.datetime64(date + 'T00:01'))[:16]
    endt   = str(np.datetime64(date + 'T23:59'))[:16]
    starty = str(np.datetime64(date + 'T00:01') - np.timedelta64(1, 'D'))[:16]
    endy   = str(np.datetime64(date + 'T23:59') - np.timedelta64(1, 'D'))[:16]

    dft = log[startt : endt]
    dfy = log[starty : endy]

    try:
        tdf = transactions[date]
    except:
        tdf = []

    if len(dfy) > 0:
        startValue = float(dfy.tail(1)['EUR'] + \
                           dfy.tail(1)['BTC'] * dfy.tail(1)['Bid'])
        openPrice  = float((dfy.tail(1)['Bid'] + dfy.tail(1)['Ask'])/2)
    else:
        startValue = float(dft.head(1)['EUR'] + \
                           dft.head(1)['BTC'] * dft.head(1)['Bid'])
        openPrice  = float((dft.head(1)['Bid'] + dft.head(1)['Ask'])/2)
        
    endValue   = float(dft.tail(1)['EUR'] + \
                       dft.tail(1)['BTC'] * dft.tail(1)['Bid'])
    closePrice = float((dft.tail(1)['Bid'] + dft.tail(1)['Ask'])/2)
    
    if len(tdf) != 0:
        tdf['EUR']=tdf['AmountBTC']*(tdf['Bid']+tdf['Ask'])/2 + tdf['AmountEUR']
        endValue   = endValue - float(tdf.sum()['EUR'])
    
    retStrategy= endValue / startValue - 1
    retHold    = closePrice / openPrice - 1
    return openPrice, closePrice, retHold, retStrategy
Exemplo n.º 22
0
 def test_fetch_basic(self):
     ts = carbonara.AggregatedTimeSerie.from_data(
         timestamps=[datetime64(2014, 1, 1, 12, 0, 0),
                     datetime64(2014, 1, 1, 12, 0, 4),
                     datetime64(2014, 1, 1, 12, 0, 9)],
         values=[3, 5, 6],
         aggregation=carbonara.Aggregation(
             "mean", numpy.timedelta64(1, 's'), None))
     self.assertEqual(
         [(datetime64(2014, 1, 1, 12), 3),
          (datetime64(2014, 1, 1, 12, 0, 4), 5),
          (datetime64(2014, 1, 1, 12, 0, 9), 6)],
         list(ts.fetch()))
     self.assertEqual(
         [(datetime64(2014, 1, 1, 12, 0, 4), 5),
          (datetime64(2014, 1, 1, 12, 0, 9), 6)],
         list(ts.fetch(
             from_timestamp=datetime64(2014, 1, 1, 12, 0, 4))))
     self.assertEqual(
         [(datetime64(2014, 1, 1, 12, 0, 4), 5),
          (datetime64(2014, 1, 1, 12, 0, 9), 6)],
         list(ts.fetch(
             from_timestamp=numpy.datetime64(iso8601.parse_date(
                 "2014-01-01 12:00:04")))))
     self.assertEqual(
         [(datetime64(2014, 1, 1, 12, 0, 4), 5),
          (datetime64(2014, 1, 1, 12, 0, 9), 6)],
         list(ts.fetch(
             from_timestamp=numpy.datetime64(iso8601.parse_date(
                 "2014-01-01 13:00:04+01:00")))))
Exemplo n.º 23
0
    def test_infer_dtype_period(self):
        # GH 13664
        arr = np.array([pd.Period('2011-01', freq='D'),
                        pd.Period('2011-02', freq='D')])
        assert lib.infer_dtype(arr, skipna=True) == 'period'

        arr = np.array([pd.Period('2011-01', freq='D'),
                        pd.Period('2011-02', freq='M')])
        assert lib.infer_dtype(arr, skipna=True) == 'period'

        # starts with nan
        for n in [pd.NaT, np.nan]:
            arr = np.array([n, pd.Period('2011-01', freq='D')])
            assert lib.infer_dtype(arr, skipna=True) == 'period'

            arr = np.array([n, pd.Period('2011-01', freq='D'), n])
            assert lib.infer_dtype(arr, skipna=True) == 'period'

        # different type of nat
        arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
                       dtype=object)
        assert lib.infer_dtype(arr, skipna=False) == 'mixed'

        arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
                       dtype=object)
        assert lib.infer_dtype(arr, skipna=False) == 'mixed'
Exemplo n.º 24
0
    def get_holidays(self, start, end, cal="FX"):
        # TODO use Pandas CustomBusinessDays to get more calendars
        holidays_list = []

        if cal == "FX":
            # filter for Christmas & New Year's Day
            for i in range(1970, 2020):
                holidays_list.append(str(i) + "-12-25")
                holidays_list.append(str(i) + "-01-01")

        if cal == "WEEKDAY":
            bday = CustomBusinessDay(weekmask="Sat Sun")

            holidays_list = pandas.date_range(start, end, freq=bday)

        holidays_list = pandas.to_datetime(holidays_list).order()

        # floor start date
        start = np.datetime64(start) - np.timedelta64(1, "D")

        # ceiling end date
        end = np.datetime64(end) + np.timedelta64(1, "D")

        holidays_list = [x for x in holidays_list if x >= start and x <= end]

        return pandas.to_datetime(holidays_list)
Exemplo n.º 25
0
def test_spacetime_join_select(drain_setup, spacetime_crime_agg):
    spacetime_crime_agg.execute()

    left = pd.DataFrame({'District':[1,2], 'Community Area':[1,2], 
        'date':[np.datetime64(date(2015,12,30)), np.datetime64(date(2015,12,31))]})
    df = spacetime_crime_agg.join(left)
    print spacetime_crime_agg.select(df, {'district': ['12h']})
Exemplo n.º 26
0
def test_get_date_time_from_metadata():
    assert (dtt.get_date_time_from_metadata(md1) ==
            '2014-12-27T00:00:00+00:00')
    assert (dtt.get_date_time_from_metadata(md1, formatting='ISO') ==
            '2014-12-27T00:00:00+00:00')
    assert (dtt.get_date_time_from_metadata(md1, formatting='datetime64') ==
            np.datetime64('2014-12-27T00:00:00.000000'))
    assert (dtt.get_date_time_from_metadata(md1, formatting='datetime') ==
            dt1)

    assert (dtt.get_date_time_from_metadata(md2) ==
            '2124-03-25T10:04:48-05:00')
    assert (dtt.get_date_time_from_metadata(md2, formatting='datetime') ==
            dt2)
    assert (dtt.get_date_time_from_metadata(md2, formatting='datetime64') ==
            np.datetime64('2124-03-25T10:04:48'))

    assert (dtt.get_date_time_from_metadata(md3) ==
            '2016-07-12T22:57:32')
    assert (dtt.get_date_time_from_metadata(md3, formatting='datetime') ==
            dt3)
    assert (dtt.get_date_time_from_metadata(md3, formatting='datetime64') ==
            np.datetime64('2016-07-12T22:57:32.000000'))

    assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {}})) ==
            None)
    assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {'date': '2016-07-12'}})) ==
            '2016-07-12')
    assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {'time': '12:00'}})) ==
            '12:00:00')
    assert (dtt.get_date_time_from_metadata(DictionaryTreeBrowser({'General': {'time': '12:00',
                                                                               'time_zone': 'CET'}})) ==
            '12:00:00')
Exemplo n.º 27
0
    def test_query_dates(self):

        p = Pipeline()

        dates = np.array([(np.datetime64('2012-01-01')), 
                          (np.datetime64('2013-04-05')), 
                          (np.datetime64('2014-03-11')),
                          (np.datetime64('2015-01-01'))], dtype=[('dt', 'M8[D]')])
        
        inds = np.array([(i,) for i in xrange(dates.size)], dtype=[('f0', int)])

        np_in = p.add(NumpyRead(dates))

        q2_node = p.add(Query("dt <= DT('2014-01-01')"))
        np_in['output'] > q2_node['input']

        np_out = p.add(NumpyWrite())
        q2_node['output'] > np_out['input']

        np_complement = p.add(NumpyWrite())
        q2_node['complement'] > np_complement['input']

        np_out_inds = p.add(NumpyWrite())
        q2_node['output_inds'] > np_out_inds['input']

        np_complement_inds = p.add(NumpyWrite())
        q2_node['complement_inds'] > np_complement_inds['input']

        self.run_pipeline(p)

        self.assertTrue(np.array_equal(np_out.get_stage().result, dates[:2]))
        self.assertTrue(np.array_equal(np_complement.get_stage().result, dates[2:]))
        self.assertTrue(np.array_equal(np_out_inds.get_stage().result, inds[:2]))
        self.assertTrue(np.array_equal(np_complement_inds.get_stage().result, inds[2:]))
Exemplo n.º 28
0
    def test_is_datetimelike_array_all_nan_nat_like(self):
        arr = np.array([np.nan, pd.NaT, np.datetime64('nat')])
        assert lib.is_datetime_array(arr)
        assert lib.is_datetime64_array(arr)
        assert not lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')])
        assert not lib.is_datetime_array(arr)
        assert not lib.is_datetime64_array(arr)
        assert lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, pd.NaT, np.datetime64('nat'),
                        np.timedelta64('nat')])
        assert not lib.is_datetime_array(arr)
        assert not lib.is_datetime64_array(arr)
        assert not lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, pd.NaT])
        assert lib.is_datetime_array(arr)
        assert lib.is_datetime64_array(arr)
        assert lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, np.nan], dtype=object)
        assert not lib.is_datetime_array(arr)
        assert not lib.is_datetime64_array(arr)
        assert not lib.is_timedelta_or_timedelta64_array(arr)

        assert lib.is_datetime_with_singletz_array(
            np.array([pd.Timestamp('20130101', tz='US/Eastern'),
                      pd.Timestamp('20130102', tz='US/Eastern')],
                     dtype=object))
        assert not lib.is_datetime_with_singletz_array(
            np.array([pd.Timestamp('20130101', tz='US/Eastern'),
                      pd.Timestamp('20130102', tz='CET')],
                     dtype=object))
Exemplo n.º 29
0
def convert_to_date(array, fmt='%m-%d-%Y'):
    ## If array is a np.ndarray with type == np.datetime64, the array can be
    ## returned as such. If it is an np.ndarray of dtype 'object' then conversion
    ## to string is tried according to the fmt parameter.

    if(isinstance(array, np.ndarray) and np.issubdtype(array.dtype, np.datetime64)):
        ## no need to perform any conversion in this case
        return array
    elif(isinstance(array, list) or (isinstance(array, np.ndarray) and array.dtype == 'object')):
        return_value = []
            # Pandas to_datetime handles all the cases where the passed in
            # data could be any of the combinations of
            #            [list, nparray] X [python_datetime, np.datetime]
            # Because of the coerce=True flag, any non-compatible datetime type
            # will be converted to pd.NaT. By this comparision, we can figure
            # out if it is date castable or not.
        if(len(np.shape(array)) == 2):
            for elem in array:
                temp_val = pd.to_datetime(elem, errors='coerce', box=False, infer_datetime_format=True)
                temp_val = elem if (temp_val[0] == np.datetime64('NaT')) else temp_val
                return_value.append(temp_val)
        elif(isinstance(array, list)):
            temp_val = pd.to_datetime(array, errors='coerce', box=False, infer_datetime_format=True)
            return_value = array if (temp_val[0] == np.datetime64('NaT')) else temp_val
        else:
            temp_val = pd.to_datetime(array, errors='coerce', box=False, infer_datetime_format=True)
            temp_val = array if (temp_val[0] == np.datetime64('NaT')) else temp_val
            return_value = temp_val
        return return_value
    elif(isinstance(array, np.ndarray)):
        warnings.warn("Array could not be converted into a date")
        return array
Exemplo n.º 30
0
    def test_split_key_cmp(self):
        dt1 = numpy.datetime64("2015-01-01T15:03")
        dt1_1 = numpy.datetime64("2015-01-01T15:03")
        dt2 = numpy.datetime64("2015-01-05T15:03")
        td = numpy.timedelta64(60, 's')
        td2 = numpy.timedelta64(300, 's')

        self.assertEqual(
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td))
        self.assertEqual(
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt1_1, td))
        self.assertNotEqual(
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt2, td))
        self.assertNotEqual(
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td2))

        self.assertLess(
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt2, td))
        self.assertLessEqual(
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td))

        self.assertGreater(
            carbonara.SplitKey.from_timestamp_and_sampling(dt2, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt1, td))
        self.assertGreaterEqual(
            carbonara.SplitKey.from_timestamp_and_sampling(dt2, td),
            carbonara.SplitKey.from_timestamp_and_sampling(dt2, td))
    # convolve daily doses with a transfer function for delayed effectiveness of vaccnes
    pts = int(VAX_ONSET_MU + 3 * VAX_ONSET_SIGMA)
    x = np.arange(-pts, pts + 1, 1)
    kernel = np.exp(-((x - VAX_ONSET_MU) ** 2) / (2 * VAX_ONSET_SIGMA ** 2))
    kernel /= kernel.sum()
    convolved = convolve(daily, kernel, mode='same')

    effective_doses_per_100 = convolved.cumsum()

    immune = 0.4 * effective_doses_per_100[len(historical_doses_per_100):] / 100

    return immune


dates, new = covidlive_new_cases('ACT', start_date=np.datetime64('2021-05-10'))

if dates[-1] >= np.datetime64('2022-03-25'):
    TEST_DETECTION_RATE = 0.35
elif dates[-1] >= np.datetime64('2022-01-09'):
    TEST_DETECTION_RATE = 0.27
else:
    TEST_DETECTION_RATE = 0.2

START_VAX_PROJECTIONS = 111  # Aug 29
all_dates = dates
all_new = new

# Current vaccination level:
doses_per_100 = air_doses_per_100(n=len(dates))
 # Generate a list of cutoff dates
 seconds_per_day = 24 * 60 * 60
 day_gap = 5
 
 start_unix = 1372633253 # Minimum datetime (2013-07-01 00:00:53) in seconds (unix)
 end_unix = 1404169154 # Maximum datetime (2014-06-30 23:59:14) in seconds (unix)
 
 dates = np.arange(start_unix, end_unix, day_gap * seconds_per_day)
 cutoff_dates = np.array([], dtype = 'datetime64[s]')
 
 # Set the seed for reproducibility
 np.random.seed(30061992)
 
 for begin, end in zip(dates[:-1], dates[1:]):
   cutoff_date = np.random.randint(low = begin, high = end)
   cutoff_dates = np.append(cutoff_dates, np.datetime64(cutoff_date, 's'))
 
 # Import the data in chunks with pandas read_csv
 data_chunks = pd.read_csv(filepath_or_buffer = filepath,
                           sep = ",",
                           #nrows = 2000,
                           chunksize = chunk_size,
                           converters = {#"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x),
                                         "POLYLINE": lambda x: json.loads(x),
                                         "GRID_POLYLINE": lambda x: eval(x)})
 
 # Utilities for processing
 savedTest = False
 truncate = lambda x, y: x[:y]
                                          
 for idx, chunk in enumerate(data_chunks):    
Exemplo n.º 33
0
    def benchmark(cls):
        """Run a speed benchmark!"""
        points = SplitKey.POINTS_PER_SPLIT
        sampling = numpy.timedelta64(5, 's')
        resample = numpy.timedelta64(35, 's')

        now = numpy.datetime64("2015-04-03 23:11")
        timestamps = numpy.sort(
            numpy.array([now + i * sampling for i in six.moves.range(points)]))

        print(cls.__name__)
        print("=" * len(cls.__name__))

        for title, values in [
            ("Simple continuous range", six.moves.range(points)),
            ("All 0", [float(0)] * points),
            ("All 1", [float(1)] * points),
            ("0 and 1", [0, 1] * (points // 2)),
            ("1 and 0 random",
             [random.randint(0, 1) for x in six.moves.range(points)]),
            ("Small number random pos/neg",
             [random.randint(-100000, 10000)
              for x in six.moves.range(points)]),
            ("Small number random pos",
             [random.randint(0, 20000) for x in six.moves.range(points)]),
            ("Small number random neg",
             [random.randint(-20000, 0) for x in six.moves.range(points)]),
            ("Sin(x)", list(map(math.sin, six.moves.range(points)))),
            ("random ", [random.random() for x in six.moves.range(points)]),
        ]:
            print(title)
            serialize_times = 50
            aggregation = Aggregation("mean", sampling, None)
            ts = cls.from_data(aggregation, timestamps, values)
            t0 = time.time()
            key = ts.get_split_key()
            for i in six.moves.range(serialize_times):
                e, s = ts.serialize(key, compressed=False)
            t1 = time.time()
            print("  Uncompressed serialization speed: %.2f MB/s" %
                  (((points * 2 * 8) / ((t1 - t0) / serialize_times)) /
                   (1024.0 * 1024.0)))
            print("   Bytes per point: %.2f" % (len(s) / float(points)))

            t0 = time.time()
            for i in six.moves.range(serialize_times):
                cls.unserialize(s, key, 'mean')
            t1 = time.time()
            print("  Unserialization speed: %.2f MB/s" %
                  (((points * 2 * 8) / ((t1 - t0) / serialize_times)) /
                   (1024.0 * 1024.0)))

            t0 = time.time()
            for i in six.moves.range(serialize_times):
                o, s = ts.serialize(key, compressed=True)
            t1 = time.time()
            print("  Compressed serialization speed: %.2f MB/s" %
                  (((points * 2 * 8) / ((t1 - t0) / serialize_times)) /
                   (1024.0 * 1024.0)))
            print("   Bytes per point: %.2f" % (len(s) / float(points)))

            t0 = time.time()
            for i in six.moves.range(serialize_times):
                cls.unserialize(s, key, 'mean')
            t1 = time.time()
            print("  Uncompression speed: %.2f MB/s" %
                  (((points * 2 * 8) / ((t1 - t0) / serialize_times)) /
                   (1024.0 * 1024.0)))

            def per_sec(t1, t0):
                return 1 / ((t1 - t0) / serialize_times)

            t0 = time.time()
            for i in six.moves.range(serialize_times):
                list(ts.split())
            t1 = time.time()
            print("  split() speed: %.2f Hz" % per_sec(t1, t0))

            # NOTE(sileht): propose a new series with half overload timestamps
            pts = ts.ts.copy()
            tsbis = cls(ts=pts, aggregation=aggregation)
            tsbis.ts['timestamps'] = (
                tsbis.timestamps -
                numpy.timedelta64(sampling * points / 2, 's'))

            t0 = time.time()
            for i in six.moves.range(serialize_times):
                ts.merge(tsbis)
            t1 = time.time()
            print("  merge() speed %.2f Hz" % per_sec(t1, t0))

            for agg in [
                    'mean', 'sum', 'max', 'min', 'std', 'median', 'first',
                    'last', 'count', '5pct', '90pct'
            ]:
                serialize_times = 3 if agg.endswith('pct') else 10
                ts = cls(ts=pts, aggregation=aggregation)
                t0 = time.time()
                for i in six.moves.range(serialize_times):
                    ts.resample(resample)
                t1 = time.time()
                print("  resample(%s) speed: %.2f Hz" % (agg, per_sec(t1, t0)))
Exemplo n.º 34
0
"""Time series data manipulation, better with pancetta."""

import collections
import functools
import math
import operator
import random
import re
import struct
import time

import lz4.block
import numpy
import six

UNIX_UNIVERSAL_START64 = numpy.datetime64("1970", 'ns')
ONE_SECOND = numpy.timedelta64(1, 's')


class BeforeEpochError(Exception):
    """Error raised when a timestamp before Epoch is used."""
    def __init__(self, timestamp):
        self.timestamp = timestamp
        super(BeforeEpochError,
              self).__init__("%s is before Epoch" % timestamp)


class UnknownAggregationMethod(Exception):
    """Error raised when the aggregation method is unknown."""
    def __init__(self, agg):
        self.aggregation_method = agg
Exemplo n.º 35
0
# Extract data types
data_types = []
for category in config["categories"]:
    for data_type in category["allowed_data_types"]:
        data_types.append(data_type["name"])

# Prepare for plot
max_time = matrix["timestamp"].max().strftime("%Y-%m-%d")
min_time = matrix["timestamp"].min().strftime("%Y-%m-%d")
figure, subplot = plt.subplots(len(data_types), 1, constrained_layout=True)

figure.suptitle('Records by timeframe (' + time_frame_size + ') from ' + min_time + " to " + max_time)
figure.set_figheight(30)
figure.set_figwidth(30)

ts = (time_ranges - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')

label_array = np.reshape(ts, (-1, 1))
feature_matrix = np.reshape(ts, (-1, 1))
for data_type in data_types:
    # Filter by data type
    data_type_records = matrix.loc[matrix["data_type"] == data_type]

    # Ignore the data_type column
    data_type_records = data_type_records.loc[:, "timestamp":]

    # Group by timeslots and count the records
    record_number_by_timeframe = data_type_records.groupby(pd.Grouper(key='timestamp', freq=time_frame_size)).mean()

    # Extract time ranges for data_type
    data_type_time_ranges = record_number_by_timeframe.reset_index()['timestamp'].to_numpy()
Exemplo n.º 36
0
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
# IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
import numpy as np
import re
import os
import time

UNIT = 'us'
TYPE = 'M8[' + UNIT + ']'
DTYPE = np.dtype(TYPE)

NOT_A_DATE_TIME = np.datetime64('NaT')
POSITIVE_INFINITY = np.datetime64('294247-01-09T04:00:54.775807')
NEGATIVE_INFINITY = np.datetime64('-290308-12-22T19:59:05.224191')


def is_undefined(numpy_time):
    return str(numpy_time) == str(NOT_A_DATE_TIME)


def is_positive_infinity(numpy_time):
    return numpy_time == POSITIVE_INFINITY


def is_negative_infinity(numpy_time):
    return numpy_time == NEGATIVE_INFINITY
def projected_vaccine_immune_population(t, historical_doses_per_100):
    """compute projected future susceptible population, given an array
    historical_doses_per_100 for cumulative doses doses per 100 population prior to and
    including today (length doesn't matter, so long as it goes back longer than
    VAX_ONSET_MU plus 3 * VAX_ONSET_SIGMA), and assuming a certain vaccine efficacy and
    rollout schedule"""

    # We assume vaccine effectiveness after each dose ramps up the integral of a Gaussian
    # with the following mean and stddev in days:
    VAX_ONSET_MU = 10.5 
    VAX_ONSET_SIGMA = 3.5

    SEP = np.datetime64('2021-09-01').astype(int) - dates[-1].astype(int)
    OCT = np.datetime64('2021-10-01').astype(int) - dates[-1].astype(int)

    doses_per_100 = np.zeros_like(t)
    doses_per_100[0] = historical_doses_per_100[-1]

    # History of previously projected rates, so I can remake old projections:
    if dates[-1] >= np.datetime64('2021-10-21'):
        AUG_RATE = None
        SEP_RATE = None
        OCT_RATE = 0.1
    elif dates[-1] >= np.datetime64('2021-10-30'):
        AUG_RATE = None
        SEP_RATE = None
        OCT_RATE = 0.5
    elif dates[-1] >= np.datetime64('2021-10-10'):
        AUG_RATE = None
        SEP_RATE = None
        OCT_RATE = 1.3
    else:
        AUG_RATE = 1.4
        SEP_RATE = 1.6
        OCT_RATE = 1.8

    for i in range(1, len(doses_per_100)):
        if i < SEP:
            doses_per_100[i] = doses_per_100[i - 1] + AUG_RATE
        elif i < OCT:
            doses_per_100[i] = doses_per_100[i - 1] + SEP_RATE
        else:
            doses_per_100[i] = doses_per_100[i - 1] + OCT_RATE

    if dates[-1] >= np.datetime64('2021-11-21'):
        MAX_DOSES_PER_100 = 2 * 84.0
    else:
        MAX_DOSES_PER_100 = 2 * 85.0
    doses_per_100 = np.clip(doses_per_100, 0, MAX_DOSES_PER_100)

    all_doses_per_100 = np.concatenate([historical_doses_per_100, doses_per_100])
    # The "prepend=0" makes it as if all the doses in the initial day were just
    # administered all at once, but as long as historical_doses_per_100 is long enough
    # for it to have taken full effect, it doesn't matter.
    daily = np.diff(all_doses_per_100, prepend=0)

    # convolve daily doses with a transfer function for delayed effectiveness of vaccnes
    pts = int(VAX_ONSET_MU + 3 * VAX_ONSET_SIGMA)
    x = np.arange(-pts, pts + 1, 1)
    kernel = np.exp(-((x - VAX_ONSET_MU) ** 2) / (2 * VAX_ONSET_SIGMA ** 2))
    kernel /= kernel.sum()
    convolved = convolve(daily, kernel, mode='same')

    effective_doses_per_100 = convolved.cumsum()

    immune = 0.4 * effective_doses_per_100[len(historical_doses_per_100):] / 100

    return immune
Exemplo n.º 38
0
class TestIndexConstructorInference:
    @pytest.mark.parametrize("na_value", [None, np.nan])
    @pytest.mark.parametrize("vtype", [list, tuple, iter])
    def test_construction_list_tuples_nan(self, na_value, vtype):
        # GH#18505 : valid tuples containing NaN
        values = [(1, "two"), (3.0, na_value)]
        result = Index(vtype(values))
        expected = MultiIndex.from_tuples(values)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "dtype",
        [
            int, "int64", "int32", "int16", "int8", "uint64", "uint32",
            "uint16", "uint8"
        ],
    )
    def test_constructor_int_dtype_float(self, dtype):
        # GH#18400
        if is_unsigned_integer_dtype(dtype):
            index_type = UInt64Index
        else:
            index_type = Int64Index

        expected = index_type([0, 1, 2, 3])
        result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("cast_index", [True, False])
    @pytest.mark.parametrize("vals",
                             [[True, False, True],
                              np.array([True, False, True], dtype=bool)])
    def test_constructor_dtypes_to_object(self, cast_index, vals):
        if cast_index:
            index = Index(vals, dtype=bool)
        else:
            index = Index(vals)

        assert type(index) is Index
        assert index.dtype == object

    def test_constructor_categorical_to_object(self):
        # GH#32167 Categorical data and dtype=object should return object-dtype
        ci = CategoricalIndex(range(5))
        result = Index(ci, dtype=object)
        assert not isinstance(result, CategoricalIndex)

    def test_constructor_infer_periodindex(self):
        xp = period_range("2012-1-1", freq="M", periods=3)
        rs = Index(xp)
        tm.assert_index_equal(rs, xp)
        assert isinstance(rs, PeriodIndex)

    @pytest.mark.parametrize("pos", [0, 1])
    @pytest.mark.parametrize(
        "klass,dtype,ctor",
        [
            (DatetimeIndex, "datetime64[ns]", np.datetime64("nat")),
            (TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")),
        ],
    )
    def test_constructor_infer_nat_dt_like(self, pos, klass, dtype, ctor,
                                           nulls_fixture, request):
        if isinstance(nulls_fixture, Decimal):
            # We dont cast these to datetime64/timedelta64
            return

        expected = klass([NaT, NaT])
        assert expected.dtype == dtype
        data = [ctor]
        data.insert(pos, nulls_fixture)

        warn = None
        if nulls_fixture is NA:
            expected = Index([NA, NaT])
            mark = pytest.mark.xfail(
                reason="Broken with np.NaT ctor; see GH 31884")
            request.node.add_marker(mark)
            # GH#35942 numpy will emit a DeprecationWarning within the
            #  assert_index_equal calls.  Since we can't do anything
            #  about it until GH#31884 is fixed, we suppress that warning.
            warn = DeprecationWarning

        result = Index(data)

        with tm.assert_produces_warning(warn):
            tm.assert_index_equal(result, expected)

        result = Index(np.array(data, dtype=object))

        with tm.assert_produces_warning(warn):
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("swap_objs", [True, False])
    def test_constructor_mixed_nat_objs_infers_object(self, swap_objs):
        # mixed np.datetime64/timedelta64 nat results in object
        data = [np.datetime64("nat"), np.timedelta64("nat")]
        if swap_objs:
            data = data[::-1]

        expected = Index(data, dtype=object)
        tm.assert_index_equal(Index(data), expected)
        tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
Exemplo n.º 39
0
def get_reference_date():
    return np.datetime64(get_today_str())
# Now get some basic parameters for the run
start_date = '01/01/2011'
end_date = '01/03/2016'
plot = ['Belian', 'LF', 'B North', 'B South', 'E', 'Seraya', 'DC1', 'DC2']
LAI_MH = [6.69, 4.78, 3.00, 2.26, 3.84, 6.22, 5.93, 5.89]
LAI_rad = [8.30, 5.76, 4.87, 3.73, 5.70, 9.01, 8.25, 9.35]
LAI_hemiphot = [4.46, 3.76, 3.65, 3.44, 3.93, 4.27, 4.40, 4.05]
Csoil = [
    8295.66, 11275.18, 3934.03, 4916.91, 11925.08, 24347.79, 8144.94, -9999.
]

#

# Initiate some arrays to host time series
d, m, y = start_date.split('/')
start = np.datetime64(y + '-' + m + '-' + d, 'D')
d, m, y = end_date.split('/')
end = np.datetime64(y + '-' + m + '-' + d, 'D')
date = np.arange(start, end + np.timedelta64(1, 'D'), dtype='datetime64[D]')

N_t = date.size

mn2t_in = np.zeros(N_t) - 9999.
mx2t_in = np.zeros(N_t) - 9999.
vpd_in = np.zeros(N_t) - 9999.
ssrd_in = np.zeros(N_t) - 9999.
pptn_in = np.zeros(N_t) - 9999.
mn2t21_in = np.zeros(N_t) - 9999.
mx2t21_in = np.zeros(N_t) - 9999.
vpd21_in = np.zeros(N_t) - 9999.
ssrd21_in = np.zeros(N_t) - 9999.
Exemplo n.º 41
0
    Ta_LF[:first_ind] = np.nan
    Ta_LF[last_ind + 1:-1] = np.nan
    Ta_LF[np.where(GHT_powdB < -300)[0]] = np.nan
    GHT_powdB[np.where(GHT_powdB < -300)[0]] = np.nan

    return GHT_powdB, pdB_LF, T_amp, Ta_LF


#%%

#fGHT = [15, 35]

lp_cutoff_period = 6  # hrs

t_start = np.datetime64('2018-02-01T00:00')
t_end = np.datetime64('2018-02-01T00:00')

for station in stations:
    print(station)
    #    station = 'BBWU'#TWLV'

    # %% Getting back the objects:
    with open('output_results/mp' + station + '.pickle',
              'rb') as f:  # Python 3: open(..., 'rb')
        t, t_dt64, freqs, Pdb_array, pp, data_dir, station = pickle.load(
            f, encoding='latin1')

    t_start = np.min(np.append(t_dt64, t_start))
    t_end = np.max(np.append(t_dt64, t_end))
Exemplo n.º 42
0
# See e.g. the discussion on the mailing list
#
# https://mail.python.org/pipermail/numpy-discussion/2020-April/080566.html
#
# and the issue
#
# https://github.com/numpy/numpy-stubs/issues/41
#
# for more context.
np.float32([1.0, 0.0, 0.0])  # E: incompatible type
np.complex64([])  # E: incompatible type

np.complex64(1, 2)  # E: Too many arguments
# TODO: protocols (can't check for non-existent protocols w/ __getattr__)

np.datetime64(0)  # E: non-matching overload

dt_64 = np.datetime64(0, "D")
td_64 = np.timedelta64(1, "h")

dt_64 + dt_64  # E: Unsupported operand types

td_64 - dt_64  # E: Unsupported operand types
td_64 / dt_64  # E: No overload
td_64 % 1  # E: Unsupported operand types
td_64 % dt_64  # E: Unsupported operand types


class A:
    def __float__(self):
        return 1.0
Exemplo n.º 43
0
    # Test for #25057
    # pytz doesn't support fold. Check that we raise
    # if fold is passed with pytz
    msg = "pytz timezones do not support fold. Please use dateutil timezones."
    tz = pytz.timezone("Europe/London")
    with pytest.raises(ValueError, match=msg):
        Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0)


@pytest.mark.parametrize("fold", [0, 1])
@pytest.mark.parametrize(
    "ts_input",
    [
        1572136200000000000,
        1572136200000000000.0,
        np.datetime64(1572136200000000000, "ns"),
        "2019-10-27 01:30:00+01:00",
        datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc),
    ],
)
def test_timestamp_constructor_fold_conflict(ts_input, fold):
    # Test for #25057
    # Check that we raise on fold conflict
    msg = ("Cannot pass fold with possibly unambiguous input: int, float, "
           "numpy.datetime64, str, or timezone-aware datetime-like. "
           "Pass naive datetime-like or build Timestamp from components.")
    with pytest.raises(ValueError, match=msg):
        Timestamp(ts_input=ts_input, fold=fold)


@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None])
Exemplo n.º 44
0
        return pd.CategoricalIndex(data,
                                   categories=cats,
                                   ordered=idx.ordered,
                                   name=idx.name)
    elif typ is pd.MultiIndex:
        levels = [_nonempty_index(l) for l in idx.levels]
        labels = [[0, 0] for i in idx.levels]
        return pd.MultiIndex(levels=levels, labels=labels, names=idx.names)
    raise TypeError("Don't know how to handle index of "
                    "type {0}".format(type(idx).__name__))


_simple_fake_mapping = {
    'b': np.bool_(True),
    'V': np.void(b' '),
    'M': np.datetime64('1970-01-01'),
    'm': np.timedelta64(1),
    'S': np.str_('foo'),
    'a': np.str_('foo'),
    'U': np.unicode_('foo'),
    'O': 'foo'
}


def _scalar_from_dtype(dtype):
    if dtype.kind in ('i', 'f', 'u'):
        return dtype.type(1)
    elif dtype.kind == 'c':
        return dtype.type(complex(1, 0))
    elif dtype.kind in _simple_fake_mapping:
        o = _simple_fake_mapping[dtype.kind]
Exemplo n.º 45
0
dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10', 'output')
for k, siteNo in enumerate(siteNoLst):
    print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
    saveFile = os.path.join(dirWRTDS, siteNo)
    df = pd.read_csv(saveFile, index_col=None).set_index('date')
    # df = utils.time.datePdf(df)
    dictWRTDS[siteNo] = df
# Observation
dictObs = dict()
for k, siteNo in enumerate(siteNoLst):
    print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
    df = waterQuality.readSiteTS(siteNo, varLst=codeLst, freq='W')
    dictObs[siteNo] = df

# calculate correlation
tt = np.datetime64('2010-01-01')
ind1 = np.where(df.index.values < tt)[0]
ind2 = np.where(df.index.values >= tt)[0]
dictLSTM = dictLSTMLst[1]
corrMat = np.full([len(siteNoLst), len(codeLst), 3], np.nan)
for ic, code in enumerate(codeLst):
    for siteNo in dictSite[code]:
        indS = siteNoLst.index(siteNo)
        v1 = dictLSTM[siteNo][code].iloc[ind2].values
        v2 = dictWRTDS[siteNo][code].iloc[ind2].values
        v3 = dictObs[siteNo][code].iloc[ind2].values
        rmse1, corr1 = utils.stat.calErr(v1, v2)
        rmse2, corr2 = utils.stat.calErr(v1, v3)
        rmse3, corr3 = utils.stat.calErr(v2, v3)
        corrMat[indS, ic, 0] = corr1
        corrMat[indS, ic, 1] = corr2
Exemplo n.º 46
0
# categoricals are handled separately
_any_skipna_inferred_dtype = [
    ('string', ['a', np.nan, 'c']),
    ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
    ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
    ('empty', [np.nan, np.nan, np.nan]),
    ('empty', []),
    ('mixed-integer', ['a', np.nan, 2]),
    ('mixed', ['a', np.nan, 2.0]),
    ('floating', [1.0, np.nan, 2.0]),
    ('integer', [1, np.nan, 2]),
    ('mixed-integer-float', [1, np.nan, 2.0]),
    ('decimal', [Decimal(1), np.nan, Decimal(2)]),
    ('boolean', [True, np.nan, False]),
    ('datetime64', [np.datetime64('2013-01-01'), np.nan,
                    np.datetime64('2018-01-01')]),
    ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]),
    ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
    # The following two dtypes are commented out due to GH 23554
    # ('complex', [1 + 1j, np.nan, 2 + 2j]),
    # ('timedelta64', [np.timedelta64(1, 'D'),
    #                  np.nan, np.timedelta64(2, 'D')]),
    ('timedelta', [timedelta(1), np.nan, timedelta(2)]),
    ('time', [time(1), np.nan, time(2)]),
    ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]),
    ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])]
ids, _ = zip(*_any_skipna_inferred_dtype)  # use inferred type as fixture-id


@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
Exemplo n.º 47
0
class TestDatetimeIndexComparisons(object):
    @pytest.mark.parametrize('other', [
        datetime(2016, 1, 1),
        Timestamp('2016-01-01'),
        np.datetime64('2016-01-01')
    ])
    def test_dti_cmp_datetimelike(self, other, tz):
        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
        if tz is not None:
            if isinstance(other, np.datetime64):
                # no tzaware version available
                return
            elif isinstance(other, Timestamp):
                other = other.tz_localize(dti.tzinfo)
            else:
                other = tslib._localize_pydatetime(other, dti.tzinfo)

        result = dti == other
        expected = np.array([True, False])
        tm.assert_numpy_array_equal(result, expected)

        result = dti > other
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = dti >= other
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(result, expected)

        result = dti < other
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = dti <= other
        expected = np.array([True, False])
        tm.assert_numpy_array_equal(result, expected)

    def dti_cmp_non_datetime(self, tz):
        # GH#19301 by convention datetime.date is not considered comparable
        # to Timestamp or DatetimeIndex.  This may change in the future.
        dti = pd.date_range('2016-01-01', periods=2, tz=tz)

        other = datetime(2016, 1, 1).date()
        assert not (dti == other).any()
        assert (dti != other).all()
        with pytest.raises(TypeError):
            dti < other
        with pytest.raises(TypeError):
            dti <= other
        with pytest.raises(TypeError):
            dti > other
        with pytest.raises(TypeError):
            dti >= other

    @pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
    def test_dti_eq_null_scalar(self, other, tz):
        # GH#19301
        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
        assert not (dti == other).any()

    @pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
    def test_dti_ne_null_scalar(self, other, tz):
        # GH#19301
        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
        assert (dti != other).all()

    @pytest.mark.parametrize('other', [None, np.nan])
    def test_dti_cmp_null_scalar_inequality(self, tz, other):
        # GH#19301
        dti = pd.date_range('2016-01-01', periods=2, tz=tz)

        with pytest.raises(TypeError):
            dti < other
        with pytest.raises(TypeError):
            dti <= other
        with pytest.raises(TypeError):
            dti > other
        with pytest.raises(TypeError):
            dti >= other

    def test_dti_cmp_nat(self):
        left = pd.DatetimeIndex(
            [pd.Timestamp('2011-01-01'), pd.NaT,
             pd.Timestamp('2011-01-03')])
        right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')])

        for lhs, rhs in [(left, right),
                         (left.astype(object), right.astype(object))]:
            result = rhs == lhs
            expected = np.array([False, False, True])
            tm.assert_numpy_array_equal(result, expected)

            result = lhs != rhs
            expected = np.array([True, True, False])
            tm.assert_numpy_array_equal(result, expected)

            expected = np.array([False, False, False])
            tm.assert_numpy_array_equal(lhs == pd.NaT, expected)
            tm.assert_numpy_array_equal(pd.NaT == rhs, expected)

            expected = np.array([True, True, True])
            tm.assert_numpy_array_equal(lhs != pd.NaT, expected)
            tm.assert_numpy_array_equal(pd.NaT != lhs, expected)

            expected = np.array([False, False, False])
            tm.assert_numpy_array_equal(lhs < pd.NaT, expected)
            tm.assert_numpy_array_equal(pd.NaT > lhs, expected)

    def test_dti_cmp_nat_behaves_like_float_cmp_nan(self):
        fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0])
        fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0])

        didx1 = pd.DatetimeIndex([
            '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01',
            '2014-07-01'
        ])
        didx2 = pd.DatetimeIndex([
            '2014-02-01', '2014-03-01', pd.NaT, pd.NaT, '2014-06-01',
            '2014-07-01'
        ])
        darr = np.array([
            np_datetime64_compat('2014-02-01 00:00Z'),
            np_datetime64_compat('2014-03-01 00:00Z'),
            np_datetime64_compat('nat'),
            np.datetime64('nat'),
            np_datetime64_compat('2014-06-01 00:00Z'),
            np_datetime64_compat('2014-07-01 00:00Z')
        ])

        cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]

        # Check pd.NaT is handles as the same as np.nan
        with tm.assert_produces_warning(None):
            for idx1, idx2 in cases:

                result = idx1 < idx2
                expected = np.array([True, False, False, False, True, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx2 > idx1
                expected = np.array([True, False, False, False, True, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= idx2
                expected = np.array([True, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx2 >= idx1
                expected = np.array([True, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == idx2
                expected = np.array([False, False, False, False, False, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != idx2
                expected = np.array([True, True, True, True, True, False])
                tm.assert_numpy_array_equal(result, expected)

        with tm.assert_produces_warning(None):
            for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]:
                result = idx1 < val
                expected = np.array([False, False, False, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 > val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= val
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 >= val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != val
                expected = np.array([True, True, True, True, True, True])
                tm.assert_numpy_array_equal(result, expected)

        # Check pd.NaT is handles as the same as np.nan
        with tm.assert_produces_warning(None):
            for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]:
                result = idx1 < val
                expected = np.array([True, False, False, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 > val
                expected = np.array([False, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= val
                expected = np.array([True, False, True, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 >= val
                expected = np.array([False, False, True, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == val
                expected = np.array([False, False, True, False, False, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != val
                expected = np.array([True, True, False, True, True, True])
                tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize('op', [
        operator.eq, operator.ne, operator.gt, operator.ge, operator.lt,
        operator.le
    ])
    def test_comparison_tzawareness_compat(self, op):
        # GH#18162
        dr = pd.date_range('2016-01-01', periods=6)
        dz = dr.tz_localize('US/Pacific')

        with pytest.raises(TypeError):
            op(dr, dz)
        with pytest.raises(TypeError):
            op(dr, list(dz))
        with pytest.raises(TypeError):
            op(dz, dr)
        with pytest.raises(TypeError):
            op(dz, list(dr))

        # Check that there isn't a problem aware-aware and naive-naive do not
        # raise
        assert (dr == dr).all()
        assert (dr == list(dr)).all()
        assert (dz == dz).all()
        assert (dz == list(dz)).all()

        # Check comparisons against scalar Timestamps
        ts = pd.Timestamp('2000-03-14 01:59')
        ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam')

        assert (dr > ts).all()
        with pytest.raises(TypeError):
            op(dr, ts_tz)

        assert (dz > ts_tz).all()
        with pytest.raises(TypeError):
            op(dz, ts)

    @pytest.mark.parametrize('op', [
        operator.eq, operator.ne, operator.gt, operator.ge, operator.lt,
        operator.le
    ])
    def test_nat_comparison_tzawareness(self, op):
        # GH#19276
        # tzaware DatetimeIndex should not raise when compared to NaT
        dti = pd.DatetimeIndex([
            '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01',
            '2014-07-01'
        ])
        expected = np.array([op == operator.ne] * len(dti))
        result = op(dti, pd.NaT)
        tm.assert_numpy_array_equal(result, expected)

        result = op(dti.tz_localize('US/Pacific'), pd.NaT)
        tm.assert_numpy_array_equal(result, expected)

    def test_dti_cmp_int_raises(self):
        rng = date_range('1/1/2000', periods=10)

        # raise TypeError for now
        with pytest.raises(TypeError):
            rng < rng[3].value

    def test_dti_cmp_list(self):
        rng = date_range('1/1/2000', periods=10)

        result = rng == list(rng)
        expected = rng == rng
        tm.assert_numpy_array_equal(result, expected)
Exemplo n.º 48
0
        print(e)

    q = '''
    Select count(distinct user_id) As dau, USER_DATE 
    from users
    group by USER_DATE
    '''

    result = session.execute(q)
    x = []
    y = []
    nfo = [row for row in result]
    for i in nfo:
        x.append(i[0])
        val = str(i[1]).split()[0]
        val = np.datetime64(val)
        y.append(val)

    df = pd.DataFrame({'Users': x, 'Date': y})
    df = df.groupby(by='Date', as_index=False).sum()
    print(df)
    fig = px.line(df, x='Date', y='Users')
    fig.show()
    query = '''
    Select count(DISTINCT users.user_id) as amount, resources.location_
    FROM users JOIN RESOURCES ON users.user_id = RESOURCES.USER_ID
    GROUP BY resources.location_
    '''
    res = session.execute(query)
    nfo = [row for row in res]
    print(nfo)
Exemplo n.º 49
0
    pd.offsets.Hour(2),
    timedelta(hours=2),
    np.timedelta64(2, 'h'),
    Timedelta(hours=2)
],
                ids=str)
def delta(request):
    # Several ways of representing two hours
    return request.param


@pytest.fixture(params=[
    datetime(2011, 1, 1),
    DatetimeIndex(['2011-01-01', '2011-01-02']),
    DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'),
    np.datetime64('2011-01-01'),
    Timestamp('2011-01-01')
],
                ids=lambda x: type(x).__name__)
def addend(request):
    return request.param


class TestDatetimeIndexComparisons(object):
    @pytest.mark.parametrize('other', [
        datetime(2016, 1, 1),
        Timestamp('2016-01-01'),
        np.datetime64('2016-01-01')
    ])
    def test_dti_cmp_datetimelike(self, other, tz):
        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
Exemplo n.º 50
0
_any_skipna_inferred_dtype = [
    ("string", ["a", np.nan, "c"]),
    ("string", ["a", pd.NA, "c"]),
    ("bytes", [b"a", np.nan, b"c"]),
    ("empty", [np.nan, np.nan, np.nan]),
    ("empty", []),
    ("mixed-integer", ["a", np.nan, 2]),
    ("mixed", ["a", np.nan, 2.0]),
    ("floating", [1.0, np.nan, 2.0]),
    ("integer", [1, np.nan, 2]),
    ("mixed-integer-float", [1, np.nan, 2.0]),
    ("decimal", [Decimal(1), np.nan, Decimal(2)]),
    ("boolean", [True, np.nan, False]),
    ("boolean", [True, pd.NA, False]),
    ("datetime64",
     [np.datetime64("2013-01-01"), np.nan,
      np.datetime64("2018-01-01")]),
    ("datetime", [pd.Timestamp("20130101"), np.nan,
                  pd.Timestamp("20180101")]),
    ("date", [date(2013, 1, 1), np.nan,
              date(2018, 1, 1)]),
    # The following two dtypes are commented out due to GH 23554
    # ('complex', [1 + 1j, np.nan, 2 + 2j]),
    # ('timedelta64', [np.timedelta64(1, 'D'),
    #                  np.nan, np.timedelta64(2, 'D')]),
    ("timedelta", [timedelta(1), np.nan, timedelta(2)]),
    ("time", [time(1), np.nan, time(2)]),
    ("period", [pd.Period(2013), pd.NaT,
                pd.Period(2018)]),
    ("interval", [pd.Interval(0, 1), np.nan,
                  pd.Interval(0, 2)]),
# close, volume, adj_close from the mpl-data/example directory.  This array
# stores the date as an np.datetime64 with a day unit ('D') in the 'date'
# column.
with cbook.get_sample_data('goog.npz') as datafile:
    data = np.load(datafile)['price_data']

print(data)

fig, ax = plt.subplots()
ax.plot('date', 'adj_close', data=data)

# format the ticks
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(days)

# round to nearest years.
datemin = np.datetime64(data['date'][4], 'M')
datemax = np.datetime64(data['date'][-1], 'M') + np.timedelta64(1, 'M')
ax.set_xlim(datemin, datemax)

# format the coords message box
ax.format_xdata = mdates.DateFormatter('%m')
ax.format_ydata = lambda x: '$%1.2f' % x  # format the price.
ax.grid(True)

# rotates and right aligns the x labels, and moves the bottom of the
# axes up to make room for them
fig.autofmt_xdate()

plt.show()
Exemplo n.º 52
0
    def test_dti_cmp_nat_behaves_like_float_cmp_nan(self):
        fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0])
        fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0])

        didx1 = pd.DatetimeIndex([
            '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01',
            '2014-07-01'
        ])
        didx2 = pd.DatetimeIndex([
            '2014-02-01', '2014-03-01', pd.NaT, pd.NaT, '2014-06-01',
            '2014-07-01'
        ])
        darr = np.array([
            np_datetime64_compat('2014-02-01 00:00Z'),
            np_datetime64_compat('2014-03-01 00:00Z'),
            np_datetime64_compat('nat'),
            np.datetime64('nat'),
            np_datetime64_compat('2014-06-01 00:00Z'),
            np_datetime64_compat('2014-07-01 00:00Z')
        ])

        cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]

        # Check pd.NaT is handles as the same as np.nan
        with tm.assert_produces_warning(None):
            for idx1, idx2 in cases:

                result = idx1 < idx2
                expected = np.array([True, False, False, False, True, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx2 > idx1
                expected = np.array([True, False, False, False, True, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= idx2
                expected = np.array([True, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx2 >= idx1
                expected = np.array([True, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == idx2
                expected = np.array([False, False, False, False, False, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != idx2
                expected = np.array([True, True, True, True, True, False])
                tm.assert_numpy_array_equal(result, expected)

        with tm.assert_produces_warning(None):
            for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]:
                result = idx1 < val
                expected = np.array([False, False, False, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 > val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= val
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 >= val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != val
                expected = np.array([True, True, True, True, True, True])
                tm.assert_numpy_array_equal(result, expected)

        # Check pd.NaT is handles as the same as np.nan
        with tm.assert_produces_warning(None):
            for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]:
                result = idx1 < val
                expected = np.array([True, False, False, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 > val
                expected = np.array([False, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= val
                expected = np.array([True, False, True, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 >= val
                expected = np.array([False, False, True, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == val
                expected = np.array([False, False, True, False, False, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != val
                expected = np.array([True, True, False, True, True, True])
                tm.assert_numpy_array_equal(result, expected)
This file is used to test the position of the Earth in ideal conditions.

"""
import numpy as np
import matplotlib.pyplot as plt
import localSun as current
import math as math
import random
import help_functions as hp
import itertools

plt.close('all')

day = 86400 - 86400 / 366
year = 31536000
idealStart = np.array([np.datetime64('2018-12-22T00:00:00', 's')])
tiltTest = math.radians(23.44)  # Normal tilt

print(
    "(1) Testing to see if the numbers are exact on the solstices and equinoxes. These numbers should be exact to arbitrary precision regardless of tilt.\n"
)

# Solstices and equinoxes
solsSun1 = current.localSun(time=np.array(
    [np.datetime64('2018-12-22T00:00:00', 's')]),
                            tilt=tiltTest,
                            day=day,
                            year=year,
                            start=idealStart)
solsSun2 = current.localSun(time=np.array(
    [np.datetime64('2019-06-22T12:00:00', 's')]),
Exemplo n.º 54
0
def make_plot(fnum, x, ydata, ylab, ylab2):

    hours = mdates.HourLocator(interval=2)  # every year
    minutes = mdates.MinuteLocator(interval=30)  # every half hour
    x_major_fmt = mdates.DateFormatter('%H:%M')

    f, ax1 = plt.subplots(num=fnum, clear=True)

    f.set_size_inches(10, 6)

    # x comes in as np.datetime64, but older matplotlib versions
    # can't handle this. For those convert to python datetime:
    if StrictVersion(matplotlib.__version__) < StrictVersion('2.2.2'):
        print("Legacy python datetime for the x axis")
        x = x.astype('O')

    for y, lab in ydata:
        ax1.plot(x, y, label=lab)
    ax1.set_ylabel(ylab)

    if len(ydata) > 1:
        l = ax1.legend()

    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)

    # format the ticks
    ax1.xaxis.set_major_locator(hours)
    ax1.xaxis.set_major_formatter(x_major_fmt)
    ax1.xaxis.set_minor_locator(minutes)

    ax2 = ax1.twinx()
    ax2.set_ylabel(ylab2)

    ax2.spines['top'].set_visible(False)
    ax2.spines['left'].set_visible(False)

    # round x range to nearest hours.
    #datemax = np.datetime64(d['datetime'][-1], 'h') + np.timedelta64(1, 'h') # Shows the last 26 hours of data of the data in the dateset. Will freeze when no new data is availalbe.
    datemax = np.datetime64(datetime.datetime.now(), 'h') + np.timedelta64(
        1, 'h'
    )  # Shows the last 26 hours of data, might show empty plot if there is no data
    datemin = datemax - np.timedelta64(26, 'h')  # Exactly one day +2 hours
    if StrictVersion(matplotlib.__version__) < StrictVersion('2.2.2'):
        datemin = datemin.astype('O')
        datemax = datemax.astype('O')
    ax1.set_xlim(datemin, datemax)

    # Round the y axis similarly
    y_min, y_max = ax1.get_ylim()
    yt = ax1.get_yticks()
    ax1.set_ylim(yt[0], yt[-1])

    # ax2 is empty, so the default y range is 0.0 to 1.0.
    # Set it to match such that the ticks line up:
    ax2.set_ylim(yt[0], yt[-1])

    # Overwrite the tick decorator to convert C to F dynamically:
    ax1.yaxis.set_major_formatter(c2f_formatter)

    ax1.grid(b=True, which='major', color=(0.75, 0.75, 0.75), linestyle='-')
    ax1.grid(b=True, which='minor', color=(0.8, 0.8, 0.8), linestyle=':')

    plt.setp(ax1.xaxis.get_majorticklabels(),
             rotation=30,
             horizontalalignment='right')

    return f, ax1
                                  curr_trans.at[0,'Trans Eff Dt']]
                df2 = pd.concat([df2,curr_trans], axis = 0)
                df2.loc[:,'Next Trans Eff Dt'] = (df2.sort_values('Trans Dt')
                    ['Trans Eff Dt'].shift(-1))
                df2.loc[df2['Next Trans Eff Dt'].isnull(),
                        'Next Trans Eff Dt'] = \
                        curr_trans.at[0,'Pol Exp Dt'] + np.timedelta64(1,'D')
                term_days = ((curr_trans.at[0,'Pol Exp Dt'] -
                              curr_trans.at[0,'Pol Eff Dt']) 
                            / np.timedelta64(1,'D')) + 1
                weights = (((df2['Next Trans Eff Dt'] - df2['Trans Eff Dt']) /
                             np.timedelta64(1,'D')).values) / term_days
                weights = np.tile(weights.reshape(-1,1),(1,self.nCovs))
                df_new.loc[df_new['Trans Number'] == i,coverages] = \
                         sum(weights * df2[coverages].values).round(2)
        df_new.loc[:,'Total Premium'] = df_new[coverages].sum(axis=1).round(2)
        return df_new
    
    def calcTransWrittenPremium(self,df):
        '''Premiums in df must be WP at time of transaction'''
        premiums = ['Coverage: ' + str(i) + ' Premium' 
                     for i in range(self.nCovs)] + ['Total Premium']
        df.loc[:,premiums] = df[premiums] - df[premiums].shift(1).fillna(0)
        return df


if __name__ == '__main__':
    x = insuranceDataGenerator()
    acct_df, claim_df = x.generateAccountTrans(
            start_date = np.datetime64('2010-01-01'))
Exemplo n.º 56
0
class TestTimedeltaArray:
    @pytest.mark.parametrize("dtype",
                             [int, np.int32, np.int64, "uint32", "uint64"])
    def test_astype_int(self, dtype):
        arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")])

        if np.dtype(dtype).kind == "u":
            expected_dtype = np.dtype("uint64")
        else:
            expected_dtype = np.dtype("int64")
        expected = arr.astype(expected_dtype)

        warn = None
        if dtype != expected_dtype:
            warn = FutureWarning
        msg = " will return exactly the specified dtype"
        with tm.assert_produces_warning(warn, match=msg):
            result = arr.astype(dtype)

        assert result.dtype == expected_dtype
        tm.assert_numpy_array_equal(result, expected)

    def test_setitem_clears_freq(self):
        a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H"))
        a[0] = Timedelta("1H")
        assert a.freq is None

    @pytest.mark.parametrize(
        "obj",
        [
            Timedelta(seconds=1),
            Timedelta(seconds=1).to_timedelta64(),
            Timedelta(seconds=1).to_pytimedelta(),
        ],
    )
    def test_setitem_objects(self, obj):
        # make sure we accept timedelta64 and timedelta in addition to Timedelta
        tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
        arr = TimedeltaArray(tdi, freq=tdi.freq)

        arr[0] = obj
        assert arr[0] == Timedelta(seconds=1)

    @pytest.mark.parametrize(
        "other",
        [
            1,
            np.int64(1),
            1.0,
            np.datetime64("NaT"),
            pd.Timestamp("2021-01-01"),
            "invalid",
            np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
            (np.arange(10) * 24 * 3600 * 10**9).view("datetime64[ns]"),
            pd.Timestamp("2021-01-01").to_period("D"),
        ],
    )
    @pytest.mark.parametrize("index", [True, False])
    def test_searchsorted_invalid_types(self, other, index):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = TimedeltaArray(data, freq="D")
        if index:
            arr = pd.Index(arr)

        msg = "|".join([
            "searchsorted requires compatible dtype or scalar",
            "value should be a 'Timedelta', 'NaT', or array of those. Got",
        ])
        with pytest.raises(TypeError, match=msg):
            arr.searchsorted(other)
Exemplo n.º 57
0
 def test_init_bounds_datetime_yaxis(self):
     start = np.datetime64(dt.datetime.today())
     end = start+np.timedelta64(1, 's')
     bounds = (-10, start, 10, end)
     image = Image(np.flipud(self.array), bounds=bounds)
     self.assertEqual(image.bounds.lbrt(), bounds)
Exemplo n.º 58
0
    array_2 = np.arange(0, 100, 10)
    array_2.flags.writeable = array_2_writeable

    hundred_elements = np.arange(100)
    tm.assert_categorical_equal(
        cut(hundred_elements, array_1), cut(hundred_elements, array_2)
    )


@pytest.mark.parametrize(
    "conv",
    [
        lambda v: Timestamp(v),
        lambda v: to_datetime(v),
        lambda v: np.datetime64(v),
        lambda v: Timestamp(v).to_pydatetime(),
    ],
)
def test_datetime_bin(conv):
    data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")]
    bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"]

    expected = Series(
        IntervalIndex(
            [
                Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
                Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])),
            ],
            "right",
        )
Exemplo n.º 59
0
 def test_init_data_datetime_yaxis(self):
     start = np.datetime64(dt.datetime.today())
     end = start+np.timedelta64(1, 's')
     Image(np.flipud(self.array), bounds=(-10, start, 10, end))
Exemplo n.º 60
0
    tm.assert_series_equal(s, expected)

    # GH 14155
    s = Series(10 * [np.timedelta64(10, "m")])
    s.loc[[1, 2, 3]] = np.timedelta64(20, "m")
    expected = pd.Series(10 * [np.timedelta64(10, "m")])
    expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, "m"))
    tm.assert_series_equal(s, expected)


@pytest.mark.parametrize(
    "nat_val,should_cast",
    [
        (pd.NaT, True),
        (np.timedelta64("NaT", "ns"), False),
        (np.datetime64("NaT", "ns"), True),
    ],
)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_dt64_series_assign_nat(nat_val, should_cast, tz):
    # some nat-like values should be cast to datetime64 when inserting
    #  into a datetime64 series.  Others should coerce to object
    #  and retain their dtypes.
    dti = pd.date_range("2016-01-01", periods=3, tz=tz)
    base = pd.Series(dti)
    expected = pd.Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype)
    if not should_cast:
        expected = expected.astype(object)

    ser = base.copy(deep=True)
    ser[0] = nat_val