Пример #1
0
    def test_tab_completion(self):
        # GH 9910
        s = Series(list('abcd'))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert 'str' in dir(s)
        assert 'dt' not in dir(s)
        assert 'cat' not in dir(s)

        # similarly for .dt
        s = Series(date_range('1/1/2015', periods=5))
        assert 'dt' in dir(s)
        assert 'str' not in dir(s)
        assert 'cat' not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list('abbcd'), dtype="category")
        assert 'cat' in dir(s)
        assert 'str' in dir(s)  # as it is a string categorical
        assert 'dt' not in dir(s)

        # similar to cat and str
        s = Series(date_range('1/1/2015', periods=5)).astype("category")
        assert 'cat' in dir(s)
        assert 'str' not in dir(s)
        assert 'dt' in dir(s)  # as it is a datetime categorical
Пример #2
0
    def test_series(self):

        # GH6407
        # inferring series

        # invalid type of Series
        for s in [ Series(np.arange(10)),
                   Series(np.arange(10.))]:
            self.assertRaises(TypeError, lambda : infer_freq(s))

        # a non-convertible string
        self.assertRaises(ValueError, lambda : infer_freq(Series(['foo','bar'])))

        # cannot infer on PeriodIndex
        for freq in [None, 'L', 'Y']:
            s = Series(period_range('2013',periods=10,freq=freq))
            self.assertRaises(TypeError, lambda : infer_freq(s))

        # DateTimeIndex
        for freq in ['M', 'L', 'S']:
            s = Series(date_range('20130101',periods=10,freq=freq))
            inferred = infer_freq(s)
            self.assertEqual(inferred,freq)

        s = Series(date_range('20130101','20130110'))
        inferred = infer_freq(s)
        self.assertEqual(inferred,'D')
Пример #3
0
    def test_astype_str(self):
        # test astype string - #10442
        result = date_range('2012-01-01', periods=4,
                            name='test_name').astype(str)
        expected = Index(['2012-01-01', '2012-01-02', '2012-01-03',
                          '2012-01-04'], name='test_name', dtype=object)
        tm.assert_index_equal(result, expected)

        # test astype string with tz and name
        result = date_range('2012-01-01', periods=3, name='test_name',
                            tz='US/Eastern').astype(str)
        expected = Index(['2012-01-01 00:00:00-05:00',
                          '2012-01-02 00:00:00-05:00',
                          '2012-01-03 00:00:00-05:00'],
                         name='test_name', dtype=object)
        tm.assert_index_equal(result, expected)

        # test astype string with freqH and name
        result = date_range('1/1/2011', periods=3, freq='H',
                            name='test_name').astype(str)
        expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
                          '2011-01-01 02:00:00'],
                         name='test_name', dtype=object)
        tm.assert_index_equal(result, expected)

        # test astype string with freqH and timezone
        result = date_range('3/6/2012 00:00', periods=2, freq='H',
                            tz='Europe/London', name='test_name').astype(str)
        expected = Index(['2012-03-06 00:00:00+00:00',
                          '2012-03-06 01:00:00+00:00'],
                         dtype=object, name='test_name')
        tm.assert_index_equal(result, expected)
Пример #4
0
    def test_tz_range_is_utc(self):
        exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
        dfexp = ('{"DT":{'
                 '"0":"2013-01-01T05:00:00.000Z",'
                 '"1":"2013-01-02T05:00:00.000Z"}}')

        tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
        self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
        dti = pd.DatetimeIndex(tz_range)
        self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
        df = DataFrame({'DT': dti})
        self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))

        tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
                                 tz='US/Eastern')
        self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
        dti = pd.DatetimeIndex(tz_range)
        self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
        df = DataFrame({'DT': dti})
        self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))

        tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
        self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
        dti = pd.DatetimeIndex(tz_range)
        self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
        df = DataFrame({'DT': dti})
        self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))
Пример #5
0
    def _check_generated_range(self, start, freq):
        freq = freq.upper()

        gen = date_range(start, periods=7, freq=freq)
        index = _dti(gen.values)
        if not freq.startswith('Q-'):
            self.assertEqual(infer_freq(index), gen.freqstr)
        else:
            inf_freq = infer_freq(index)
            self.assertTrue((inf_freq == 'Q-DEC' and
                             gen.freqstr in ('Q', 'Q-DEC', 'Q-SEP', 'Q-JUN',
                                             'Q-MAR'))
                            or
                            (inf_freq == 'Q-NOV' and
                             gen.freqstr in ('Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB'))
                            or
                            (inf_freq == 'Q-OCT' and
                             gen.freqstr in ('Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN')))

        gen = date_range(start, periods=5, freq=freq)
        index = _dti(gen.values)
        if not freq.startswith('Q-'):
            self.assertEqual(infer_freq(index), gen.freqstr)
        else:
            inf_freq = infer_freq(index)
            self.assertTrue((inf_freq == 'Q-DEC' and
                             gen.freqstr in ('Q', 'Q-DEC', 'Q-SEP', 'Q-JUN',
                                             'Q-MAR'))
                            or
                            (inf_freq == 'Q-NOV' and
                             gen.freqstr in ('Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB'))
                            or
                            (inf_freq == 'Q-OCT' and
                             gen.freqstr in ('Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN')))
Пример #6
0
    def test_to_period_tz_explicit_pytz(self):
        xp = date_range('1/1/2000', '4/1/2000').to_period()

        ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern'))

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)

        ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc)

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)

        ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal())

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)
Пример #7
0
    def test_to_period_tz_dateutil(self):
        xp = date_range('1/1/2000', '4/1/2000').to_period()

        ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern')

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)

        ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc())

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)

        ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal())

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)
Пример #8
0
    def test_groupby_groups_datetimeindex(self):
        # GH#1430
        periods = 1000
        ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods)
        df = DataFrame({'high': np.arange(periods),
                        'low': np.arange(periods)}, index=ind)
        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))

        # it works!
        groups = grouped.groups
        assert isinstance(list(groups.keys())[0], datetime)

        # GH#11442
        index = pd.date_range('2015/01/01', periods=5, name='date')
        df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
                           'B': [1, 2, 3, 4, 5]}, index=index)
        result = df.groupby(level='date').groups
        dates = ['2015-01-05', '2015-01-04', '2015-01-03',
                 '2015-01-02', '2015-01-01']
        expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date')
                    for date in dates}
        tm.assert_dict_equal(result, expected)

        grouped = df.groupby(level='date')
        for date in dates:
            result = grouped.get_group(date)
            data = [[df.loc[date, 'A'], df.loc[date, 'B']]]
            expected_index = pd.DatetimeIndex([date], name='date')
            expected = pd.DataFrame(data,
                                    columns=list('AB'),
                                    index=expected_index)
            tm.assert_frame_equal(result, expected)
Пример #9
0
    def test_to_period_tz_pytz(self):
        from pytz import utc as UTC

        xp = date_range('1/1/2000', '4/1/2000').to_period()

        ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern')

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)

        ts = date_range('1/1/2000', '4/1/2000', tz=UTC)

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)

        ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal())

        result = ts.to_period()[0]
        expected = ts[0].to_period()

        assert result == expected
        tm.assert_index_equal(ts.to_period(), xp)
Пример #10
0
    def test_iteration_preserves_tz(self):
        # see gh-8890
        index = date_range("2012-01-01", periods=3, freq='H', tz='US/Eastern')

        for i, ts in enumerate(index):
            result = ts
            expected = index[i]
            assert result == expected

        index = date_range("2012-01-01", periods=3, freq='H',
                           tz=dateutil.tz.tzoffset(None, -28800))

        for i, ts in enumerate(index):
            result = ts
            expected = index[i]
            assert result._repr_base == expected._repr_base
            assert result == expected

        # 9100
        index = pd.DatetimeIndex(['2014-12-01 03:32:39.987000-08:00',
                                  '2014-12-01 04:12:34.987000-08:00'])
        for i, ts in enumerate(index):
            result = ts
            expected = index[i]
            assert result._repr_base == expected._repr_base
            assert result == expected
Пример #11
0
    def test_categorical_series_repr_datetime_ordered(self):
        idx = date_range('2011-01-01 09:00', freq='H', periods=5)
        s = Series(Categorical(idx, ordered=True))
        exp = """0   2011-01-01 09:00:00
1   2011-01-01 10:00:00
2   2011-01-01 11:00:00
3   2011-01-01 12:00:00
4   2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
                                 2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa

        assert repr(s) == exp

        idx = date_range('2011-01-01 09:00', freq='H', periods=5,
                         tz='US/Eastern')
        s = Series(Categorical(idx, ordered=True))
        exp = """0   2011-01-01 09:00:00-05:00
1   2011-01-01 10:00:00-05:00
2   2011-01-01 11:00:00-05:00
3   2011-01-01 12:00:00-05:00
4   2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
                                             2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
                                             2011-01-01 13:00:00-05:00]"""  # noqa

        assert repr(s) == exp
Пример #12
0
    def test_join_aware(self):
        rng = date_range('1/1/2011', periods=10, freq='H')
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts_utc = ts.tz_localize('utc')

        self.assertRaises(Exception, ts.__add__, ts_utc)
        self.assertRaises(Exception, ts_utc.__add__, ts)

        test1 = DataFrame(np.zeros((6,3)),
                          index=date_range("2012-11-15 00:00:00", periods=6,
                                           freq="100L", tz="US/Central"))
        test2 = DataFrame(np.zeros((3,3)),
                          index=date_range("2012-11-15 00:00:00", periods=3,
                                           freq="250L", tz="US/Central"),
                          columns=range(3,6))

        result = test1.join(test2, how='outer')
        ex_index = test1.index.union(test2.index)

        self.assertTrue(result.index.equals(ex_index))
        self.assertTrue(result.index.tz.zone == 'US/Central')

        # non-overlapping
        rng = date_range("2012-11-15 00:00:00", periods=6,
                         freq="H", tz="US/Central")

        rng2 = date_range("2012-11-15 12:00:00", periods=6,
                         freq="H", tz="US/Eastern")

        result = rng.union(rng2)
        self.assertTrue(result.tz.zone == 'UTC')
Пример #13
0
    def test_ufunc_coercions(self):
        idx = date_range('2011-01-01', periods=3, freq='2D', name='x')

        delta = np.timedelta64(1, 'D')
        for result in [idx + delta, np.add(idx, delta)]:
            assert isinstance(result, DatetimeIndex)
            exp = date_range('2011-01-02', periods=3, freq='2D', name='x')
            tm.assert_index_equal(result, exp)
            assert result.freq == '2D'

        for result in [idx - delta, np.subtract(idx, delta)]:
            assert isinstance(result, DatetimeIndex)
            exp = date_range('2010-12-31', periods=3, freq='2D', name='x')
            tm.assert_index_equal(result, exp)
            assert result.freq == '2D'

        delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'),
                          np.timedelta64(3, 'D')])
        for result in [idx + delta, np.add(idx, delta)]:
            assert isinstance(result, DatetimeIndex)
            exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'],
                                freq='3D', name='x')
            tm.assert_index_equal(result, exp)
            assert result.freq == '3D'

        for result in [idx - delta, np.subtract(idx, delta)]:
            assert isinstance(result, DatetimeIndex)
            exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'],
                                freq='D', name='x')
            tm.assert_index_equal(result, exp)
            assert result.freq == 'D'
Пример #14
0
    def test_date_range_localize(self):
        rng = date_range('3/11/2012 03:00', periods=15, freq='H', tz='US/Eastern')
        rng2 = DatetimeIndex(['3/11/2012 03:00', '3/11/2012 04:00'],
                             tz='US/Eastern')
        rng3 = date_range('3/11/2012 03:00', periods=15, freq='H')
        rng3 = rng3.tz_localize('US/Eastern')

        self.assert_(rng.equals(rng3))

        # DST transition time
        val = rng[0]
        exp = Timestamp('3/11/2012 03:00', tz='US/Eastern')

        self.assertEquals(val.hour, 3)
        self.assertEquals(exp.hour, 3)
        self.assertEquals(val, exp)  # same UTC value
        self.assert_(rng[:2].equals(rng2))

        # Right before the DST transition
        rng = date_range('3/11/2012 00:00', periods=2, freq='H', tz='US/Eastern')
        rng2 = DatetimeIndex(['3/11/2012 00:00', '3/11/2012 01:00'],
                             tz='US/Eastern')
        self.assert_(rng.equals(rng2))
        exp = Timestamp('3/11/2012 00:00', tz='US/Eastern')
        self.assertEquals(exp.hour, 0)
        self.assertEquals(rng[0], exp)
        exp = Timestamp('3/11/2012 01:00', tz='US/Eastern')
        self.assertEquals(exp.hour, 1)
        self.assertEquals(rng[1], exp)

        rng = date_range('3/11/2012 00:00', periods=10, freq='H',
                         tz='US/Eastern')
        self.assert_(rng[2].hour == 3)
Пример #15
0
    def test_tz_localize_naive(self):
        rng = date_range('1/1/2011', periods=100, freq='H')

        conv = rng.tz_localize('US/Pacific')
        exp = date_range('1/1/2011', periods=100, freq='H', tz='US/Pacific')

        self.assert_(conv.equals(exp))
Пример #16
0
    def test_categorical_repr_datetime_ordered(self):
        idx = date_range('2011-01-01 09:00', freq='H', periods=5)
        c = Categorical(idx, ordered=True)
        exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
                                 2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa

        assert repr(c) == exp

        c = Categorical(idx.append(idx), categories=idx, ordered=True)
        exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
                                 2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa

        assert repr(c) == exp

        idx = date_range('2011-01-01 09:00', freq='H', periods=5,
                         tz='US/Eastern')
        c = Categorical(idx, ordered=True)
        exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
                                             2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
                                             2011-01-01 13:00:00-05:00]"""  # noqa

        assert repr(c) == exp

        c = Categorical(idx.append(idx), categories=idx, ordered=True)
        exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
                                             2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
                                             2011-01-01 13:00:00-05:00]"""  # noqa

        assert repr(c) == exp
Пример #17
0
    def test_categorical_index_repr_datetime_ordered(self):
        idx = date_range('2011-01-01 09:00', freq='H', periods=5)
        i = CategoricalIndex(Categorical(idx, ordered=True))
        exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
                  '2011-01-01 11:00:00', '2011-01-01 12:00:00',
                  '2011-01-01 13:00:00'],
                 categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')"""  # noqa

        assert repr(i) == exp

        idx = date_range('2011-01-01 09:00', freq='H', periods=5,
                         tz='US/Eastern')
        i = CategoricalIndex(Categorical(idx, ordered=True))
        exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
                  '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
                  '2011-01-01 13:00:00-05:00'],
                 categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""  # noqa

        assert repr(i) == exp

        i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
        exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
                  '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
                  '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
                  '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
                  '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
                 categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""  # noqa

        assert repr(i) == exp
Пример #18
0
    def test_constructor_with_datetimelike(self):

        # 12077
        # constructor wwth a datetimelike and NaT

        for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'),
                    date_range('1995-01-01 00:00:00', periods=5,
                               freq='s', tz='US/Eastern'),
                    timedelta_range('1 day', periods=5, freq='s')]:

            s = Series(dtl)
            c = Categorical(s)
            expected = type(dtl)(s)
            expected.freq = None
            tm.assert_index_equal(c.categories, expected)
            tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8'))

            # with NaT
            s2 = s.copy()
            s2.iloc[-1] = NaT
            c = Categorical(s2)
            expected = type(dtl)(s2.dropna())
            expected.freq = None
            tm.assert_index_equal(c.categories, expected)

            exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
            tm.assert_numpy_array_equal(c.codes, exp)

            result = repr(c)
            assert 'NaT' in result
Пример #19
0
    def test_date_range_businesshour(self):
        idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
                             '2014-07-04 11:00',
                             '2014-07-04 12:00', '2014-07-04 13:00',
                             '2014-07-04 14:00',
                             '2014-07-04 15:00', '2014-07-04 16:00'],
                            freq='BH')
        rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH')
        tm.assert_index_equal(idx, rng)

        idx = DatetimeIndex(
            ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH')
        rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH')
        tm.assert_index_equal(idx, rng)

        idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
                             '2014-07-04 11:00',
                             '2014-07-04 12:00', '2014-07-04 13:00',
                             '2014-07-04 14:00',
                             '2014-07-04 15:00', '2014-07-04 16:00',
                             '2014-07-07 09:00', '2014-07-07 10:00',
                             '2014-07-07 11:00',
                             '2014-07-07 12:00', '2014-07-07 13:00',
                             '2014-07-07 14:00',
                             '2014-07-07 15:00', '2014-07-07 16:00',
                             '2014-07-08 09:00', '2014-07-08 10:00',
                             '2014-07-08 11:00',
                             '2014-07-08 12:00', '2014-07-08 13:00',
                             '2014-07-08 14:00',
                             '2014-07-08 15:00', '2014-07-08 16:00'],
                            freq='BH')
        rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH')
        tm.assert_index_equal(idx, rng)
Пример #20
0
    def test_select_dtypes_include_exclude_mixed_scalars_lists(self):
        df = DataFrame({'a': list('abc'),
                        'b': list(range(1, 4)),
                        'c': np.arange(3, 6).astype('u1'),
                        'd': np.arange(4.0, 7.0, dtype='float64'),
                        'e': [True, False, True],
                        'f': pd.Categorical(list('abc')),
                        'g': pd.date_range('20130101', periods=3),
                        'h': pd.date_range('20130101', periods=3,
                                           tz='US/Eastern'),
                        'i': pd.date_range('20130101', periods=3,
                                           tz='CET'),
                        'j': pd.period_range('2013-01', periods=3,
                                             freq='M'),
                        'k': pd.timedelta_range('1 day', periods=3)})

        ri = df.select_dtypes(include=np.number,
                              exclude=['floating', 'timedelta'])
        ei = df[['b', 'c']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=[np.number, 'category'],
                              exclude='floating')
        ei = df[['b', 'c', 'f', 'k']]
        assert_frame_equal(ri, ei)
Пример #21
0
    def test_astype(self):
        # astype
        expected = np.array([[Timestamp('2013-01-01 00:00:00'),
                              Timestamp('2013-01-02 00:00:00'),
                              Timestamp('2013-01-03 00:00:00')],
                             [Timestamp('2013-01-01 00:00:00-0500',
                                        tz='US/Eastern'),
                              pd.NaT,
                              Timestamp('2013-01-03 00:00:00-0500',
                                        tz='US/Eastern')],
                             [Timestamp('2013-01-01 00:00:00+0100', tz='CET'),
                              pd.NaT,
                              Timestamp('2013-01-03 00:00:00+0100',
                                        tz='CET')]],
                            dtype=object).T
        result = self.tzframe.astype(object)
        assert_frame_equal(result, DataFrame(
            expected, index=self.tzframe.index, columns=self.tzframe.columns))

        result = self.tzframe.astype('datetime64[ns]')
        expected = DataFrame({'A': date_range('20130101', periods=3),
                              'B': (date_range('20130101', periods=3,
                                               tz='US/Eastern')
                                    .tz_convert('UTC')
                                    .tz_localize(None)),
                              'C': (date_range('20130101', periods=3,
                                               tz='CET')
                                    .tz_convert('UTC')
                                    .tz_localize(None))})
        expected.iloc[1, 1] = pd.NaT
        expected.iloc[1, 2] = pd.NaT
        assert_frame_equal(result, expected)
Пример #22
0
    def test_at_time_frame(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        rs = ts.at_time(rng[1])
        self.assertTrue((rs.index.hour == rng[1].hour).all())
        self.assertTrue((rs.index.minute == rng[1].minute).all())
        self.assertTrue((rs.index.second == rng[1].second).all())

        result = ts.at_time('9:30')
        expected = ts.at_time(time(9, 30))
        assert_frame_equal(result, expected)

        result = ts.loc[time(9, 30)]
        expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]

        assert_frame_equal(result, expected)

        # midnight, everything
        rng = date_range('1/1/2000', '1/31/2000')
        ts = DataFrame(np.random.randn(len(rng), 3), index=rng)

        result = ts.at_time(time(0, 0))
        assert_frame_equal(result, ts)

        # time doesn't exist
        rng = date_range('1/1/2012', freq='23Min', periods=384)
        ts = DataFrame(np.random.randn(len(rng), 2), rng)
        rs = ts.at_time('16:00')
        self.assertEqual(len(rs), 0)
Пример #23
0
 def test_date_range_nat(self):
     # GH#11587
     msg = "Neither `start` nor `end` can be NaT"
     with pytest.raises(ValueError, match=msg):
         date_range(start='2016-01-01', end=pd.NaT, freq='D')
     with pytest.raises(ValueError, match=msg):
         date_range(start=pd.NaT, end='2016-01-01', freq='D')
Пример #24
0
def test_select_x():
    assert utils.select_x(None) is None

    def _check(d, expected):
        x = utils.select_x(d)
        assert x == expected

    data = dict(col1=[1.0, 2.0, 3.0],  # Q
                col2=['A', 'B', 'C'],  # N
                col3=pd.date_range('2012', periods=3, freq='A'))  # T
    _check(data, 'col3')

    data = dict(col1=[1.0, 2.0, 3.0],  # Q
                col2=['A', 'B', 'C'])  # N
    _check(data, 'col2')

    data = dict(col1=[1.0, 2.0, 3.0])  # Q
    _check(data, 'col1')

    # Custom order
    data = dict(col1=[1.0, 2.0, 3.0],  # Q
                col2=['A', 'B', 'C'],  # N
                col3=pd.date_range('2012', periods=3, freq='A'),  # T
                col4=pd.date_range('2012', periods=3, freq='A'))  # T
    selected_x = utils.select_x(data, ['N', 'T', 'Q', 'O'])
    assert selected_x == "col2"

    # Len < 1
    assert utils.select_x(dict()) is None
Пример #25
0
 def test_CalendarDay_range_with_dst_crossing(self):
     # GH 20596
     result = date_range('2018-10-23', '2018-11-06', freq='7CD',
                         tz='Europe/Paris')
     expected = date_range('2018-10-23', '2018-11-06',
                           freq=pd.DateOffset(days=7), tz='Europe/Paris')
     tm.assert_index_equal(result, expected)
Пример #26
0
    def test_is_datetime_dtypes(self):

        ts = pd.date_range('20130101', periods=3)
        tsa = pd.date_range('20130101', periods=3, tz='US/Eastern')

        self.assertTrue(is_datetime64_dtype('datetime64'))
        self.assertTrue(is_datetime64_dtype('datetime64[ns]'))
        self.assertTrue(is_datetime64_dtype(ts))
        self.assertFalse(is_datetime64_dtype(tsa))

        self.assertFalse(is_datetime64_ns_dtype('datetime64'))
        self.assertTrue(is_datetime64_ns_dtype('datetime64[ns]'))
        self.assertTrue(is_datetime64_ns_dtype(ts))
        self.assertTrue(is_datetime64_ns_dtype(tsa))

        self.assertTrue(is_datetime64_any_dtype('datetime64'))
        self.assertTrue(is_datetime64_any_dtype('datetime64[ns]'))
        self.assertTrue(is_datetime64_any_dtype(ts))
        self.assertTrue(is_datetime64_any_dtype(tsa))

        self.assertFalse(is_datetime64tz_dtype('datetime64'))
        self.assertFalse(is_datetime64tz_dtype('datetime64[ns]'))
        self.assertFalse(is_datetime64tz_dtype(ts))
        self.assertTrue(is_datetime64tz_dtype(tsa))

        for tz in ['US/Eastern', 'UTC']:
            dtype = 'datetime64[ns, {}]'.format(tz)
            self.assertFalse(is_datetime64_dtype(dtype))
            self.assertTrue(is_datetime64tz_dtype(dtype))
            self.assertTrue(is_datetime64_ns_dtype(dtype))
            self.assertTrue(is_datetime64_any_dtype(dtype))
Пример #27
0
    def setup(self):
        N = 25000
        index = tm.makeStringIndex(N)
        self.df = DataFrame({'float1': np.random.randn(N),
                             'float2': np.random.randn(N)},
                            index=index)
        self.df_mixed = DataFrame({'float1': np.random.randn(N),
                                   'float2': np.random.randn(N),
                                   'string1': ['foo'] * N,
                                   'bool1': [True] * N,
                                   'int1': np.random.randint(0, N, size=N)},
                                  index=index)
        self.df_wide = DataFrame(np.random.randn(N, 100))
        self.start_wide = self.df_wide.index[10000]
        self.stop_wide = self.df_wide.index[15000]
        self.df2 = DataFrame({'float1': np.random.randn(N),
                              'float2': np.random.randn(N)},
                             index=date_range('1/1/2000', periods=N))
        self.start = self.df2.index[10000]
        self.stop = self.df2.index[15000]
        self.df_wide2 = DataFrame(np.random.randn(N, 100),
                                  index=date_range('1/1/2000', periods=N))
        self.df_dc = DataFrame(np.random.randn(N, 10),
                               columns=['C%03d' % i for i in range(10)])

        self.fname = '__test__.h5'

        self.store = HDFStore(self.fname)
        self.store.put('fixed', self.df)
        self.store.put('fixed_mixed', self.df_mixed)
        self.store.append('table', self.df2)
        self.store.append('table_mixed', self.df_mixed)
        self.store.append('table_wide', self.df_wide)
        self.store.append('table_wide2', self.df_wide2)
Пример #28
0
    def test_with_tz_ambiguous_times(self):
        tz = pytz.timezone('US/Eastern')

        rng = bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))

        # regular no problem
        self.assert_(rng.tz_validate())

        # March 13, 2011, spring forward, skip from 2 AM to 3 AM
        dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3,
                        freq=datetools.Hour())
        self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz)

        # after dst transition, it works
        dr = date_range(datetime(2011, 3, 13, 3, 30), periods=3,
                        freq=datetools.Hour(), tz=tz)

        # November 6, 2011, fall back, repeat 2 AM hour
        dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3,
                        freq=datetools.Hour())
        self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz)

        # UTC is OK
        dr = date_range(datetime(2011, 3, 13), periods=48,
                        freq=datetools.Minute(30), tz=pytz.utc)
Пример #29
0
    def setUp(self):
        date_index = date_range(datetime(2009, 12, 11), periods=3,
                                freq=datetools.bday)
        ts = Series([3, 1, 4], index=date_index)
        self.TS1 = ts

        date_index = date_range(datetime(2009, 12, 11), periods=5,
                                freq=datetools.bday)
        ts = Series([1, 5, 9, 2, 6], index=date_index)
        self.TS2 = ts

        date_index = date_range(datetime(2009, 12, 11), periods=3,
                                freq=datetools.bday)
        ts = Series([5, np.nan, 3], index=date_index)
        self.TS3 = ts

        date_index = date_range(datetime(2009, 12, 11), periods=5,
                                freq=datetools.bday)
        ts = Series([np.nan, 5, 8, 9, 7], index=date_index)
        self.TS4 = ts

        data = {'x1': self.TS2, 'x2': self.TS4}
        self.DF1 = DataFrame(data=data)

        data = {'x1': self.TS2, 'x2': self.TS4}
        self.DICT1 = data
Пример #30
0
    def test_select_dtypes_exclude_using_scalars(self):
        df = DataFrame({'a': list('abc'),
                        'b': list(range(1, 4)),
                        'c': np.arange(3, 6).astype('u1'),
                        'd': np.arange(4.0, 7.0, dtype='float64'),
                        'e': [True, False, True],
                        'f': pd.Categorical(list('abc')),
                        'g': pd.date_range('20130101', periods=3),
                        'h': pd.date_range('20130101', periods=3,
                                           tz='US/Eastern'),
                        'i': pd.date_range('20130101', periods=3,
                                           tz='CET'),
                        'j': pd.period_range('2013-01', periods=3,
                                             freq='M'),
                        'k': pd.timedelta_range('1 day', periods=3)})

        ri = df.select_dtypes(exclude=np.number)
        ei = df[['a', 'e', 'f', 'g', 'h', 'i', 'j']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(exclude='category')
        ei = df[['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i', 'j', 'k']]
        assert_frame_equal(ri, ei)

        pytest.raises(NotImplementedError,
                      lambda: df.select_dtypes(exclude='period'))
Пример #31
0
    def test_TimeSeries_deprecation(self):

        # deprecation TimeSeries, #10890
        with tm.assert_produces_warning(FutureWarning):
            pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3))
def ugh(exp):

    file = open_ncfile('../../'+exp+'/landCoverFrac/LPX-Bern_'+exp+
                       '_landCoverFrac_original.nc')

    lat = file.variables['latitude'][92:160]
    lon = file.variables['longitude'][584:668]

    time = pd.date_range(start='1/1/2001', end='1/01/2019', freq='M')

    veg1 = file.variables['landCoverFrac'][584:668,92:160,3612:,0]
    veg2 = file.variables['landCoverFrac'][584:668,92:160,3612:,1]
    veg3 = file.variables['landCoverFrac'][584:668,92:160,3612:,2]
    veg4 = file.variables['landCoverFrac'][584:668,92:160,3612:,3]
    veg5 = file.variables['landCoverFrac'][584:668,92:160,3612:,4]
    veg6 = file.variables['landCoverFrac'][584:668,92:160,3612:,5]
    veg7 = file.variables['landCoverFrac'][584:668,92:160,3612:,6]
    veg8 = file.variables['landCoverFrac'][584:668,92:160,3612:,7]
    veg9 = file.variables['landCoverFrac'][584:668,92:160,3612:,8]
    veg10 = file.variables['landCoverFrac'][584:668,92:160,3612:,9]
    veg11 = file.variables['landCoverFrac'][584:668,92:160,3612:,10]
    veg12 = file.variables['landCoverFrac'][584:668,92:160,3612:,11]
    veg13 = file.variables['landCoverFrac'][584:668,92:160,3612:,12]
    veg14 = file.variables['landCoverFrac'][584:668,92:160,3612:,13]
    veg15 = file.variables['landCoverFrac'][584:668,92:160,3612:,14]
    veg16 = file.variables['landCoverFrac'][584:668,92:160,3612:,15]
    veg17 = file.variables['landCoverFrac'][584:668,92:160,3612:,16]
    veg18 = file.variables['landCoverFrac'][584:668,92:160,3612:,17]
    veg19 = file.variables['landCoverFrac'][584:668,92:160,3612:,18]
    veg20 = file.variables['landCoverFrac'][584:668,92:160,3612:,19]

    pft_short = ['TrBR', 'TeNE', 'TeBE', 'TeBS', 'BNE', 'BNS', 'BBS', 'C3G',
                 'C4G', 'PeatGr', 'PeatSM', 'PeatTrBE', 'PeatTrBR', 'PeatHerb',
                 'C3Crop', 'C4Crop', 'C3Past', 'C4Past', 'UrbanBare']

    pfts = ['Tropical broad raingreen', 'Temperate needle evergreen',
            'Temperate broad evergeen', 'Temperate broad summergreen',
            'Boreal needle evergreen', 'Boreal needle summergreen',
            'Boreal broad summergreen', 'C3 herbaceous', 'C4 herbaceous',
            'Peat graminoid', 'Peat sphagnum moss',
            'Peat flood tolerant tropical broad evergreen',
            'Peat flood tolerant tropical broad raingreen',
            'Peat flood tolerant herbaceous', 'Cropland C3 herbaceous',
            'Cropland C4 herbaceous', 'Pasture C3 herbaceous',
            'Pasture C4 herbaceous', 'Urban Bare']

    vegs = [veg2, veg3, veg4, veg5, veg6, veg7, veg8, veg9, veg10, veg11, veg12,
            veg13, veg14, veg15, veg16, veg17, veg18, veg19, veg20]

    # create dataset
    ds = xr.Dataset({
        'TrBE': xr.DataArray(data   = np.transpose(veg1),
                             dims   = ['time', 'latitude', 'longitude'],
                             coords = [time, lat, lon],
                             attrs  = {'long_name': 'Tropical broad evergreen',
                                       'units'     : '%'})},
                    attrs = {'Conventions':'CF-1.6',
                             'Institution':'Climate and Environmental Physics, '
                                           'University of Bern',
                             'Source': 'Extracted from LPX-Bern_S3_01 at '
                                       '2019-08-13T17:58:26.478921',
                             'Title':'Fractional Land Cover of PFT output from '
                                     'LPX-Bern for GCP201',
                             'Contact': '*****@*****.**'}
                    )
    for v, ps, p in zip(vegs, pft_short, pfts):
        ds[ps] = xr.DataArray(data = np.transpose(v),
                             dims   = ['time', 'latitude', 'longitude'],
                             coords = [time, lat, lon],
                             attrs  = {'long_name': p,
                                       'units'     : '%'})

    ds['latitude'].attrs={'units':'degrees_north', 'long_name':'latitude',
                     'standard_name':'latitude', 'axis':'Y'}
    ds['longitude'].attrs={'units':'degrees_east', 'long_name':'longitude',
                     'standard_name':'longitude', 'axis':'X'}

    ds.to_netcdf('LPX-Bern_'+exp+'_landCoverFrac.nc',
                 encoding={'latitude':{'dtype': 'double'},
                           'longitude':{'dtype': 'double'},
                           'time':{'dtype': 'double'},
                           'TrBE':{'dtype': 'float32'},
                           'TrBR':{'dtype': 'float32'},
                           'TeNE':{'dtype': 'float32'},
                           'TeBE':{'dtype': 'float32'},
                           'TeBS':{'dtype': 'float32'},
                           'BNE':{'dtype': 'float32'},
                           'BNS':{'dtype': 'float32'},
                           'BBS':{'dtype': 'float32'},
                           'C3G':{'dtype': 'float32'},
                           'C4G':{'dtype': 'float32'},
                           'PeatGr':{'dtype': 'float32'},
                           'PeatSM':{'dtype': 'float32'},
                           'PeatTrBE':{'dtype': 'float32'},
                           'PeatTrBR':{'dtype': 'float32'},
                           'PeatHerb':{'dtype': 'float32'},
                           'C3Crop':{'dtype': 'float32'},
                           'C4Crop':{'dtype': 'float32'},
                           'C3Past':{'dtype': 'float32'},
                           'C4Past':{'dtype': 'float32'},
                           'UrbanBare':{'dtype': 'float32'}})
Пример #33
0
def lag():

    os.chdir(dir_in)
    #get names
    tg_list_name = sorted(os.listdir())

    x = 91
    y = 92

    for tg in range(x, y):

        os.chdir(dir_in)

        tg_name = tg_list_name[tg]
        print(tg_name, '\n')

        pred = pd.read_csv(tg_name)

        #create a daily time series - date_range
        #get only the ymd of the start and end times
        start_time = pred['date'][0].split(' ')[0]
        end_time = pred['date'].iloc[-1].split(' ')[0]

        print(start_time, ' - ', end_time, '\n')

        date_range = pd.date_range(start_time, end_time, freq='D')

        #defining time changing lambda functions
        time_str = lambda x: str(x)
        time_converted_str = pd.DataFrame(map(time_str, date_range),
                                          columns=['date'])
        time_converted_stamp = pd.DataFrame(date_range, columns=['timestamp'])
        """
        first prepare the six time lagging dataframes  
        then use the merge function to merge the original 
        predictor with the lagging dataframes
        """

        #prepare lagged time series for time only
        #note here that since MERRA has 3hrly data
        #the lag_hrs is increased from 6(eraint) to 31(MERRA)
        time_lagged = pd.DataFrame()
        lag_hrs = list(range(0, 31))
        for lag in lag_hrs:
            lag_name = 'lag' + str(lag)
            lam_delta = lambda x: str(x - dt.timedelta(hours=lag))
            lag_new = pd.DataFrame(map(lam_delta, time_converted_stamp['timestamp']), \
                                   columns = [lag_name])
            time_lagged = pd.concat([time_lagged, lag_new], axis=1)

        #datafrmae that contains all lagged time series  (just time)
        time_all = pd.concat([time_converted_str, time_lagged], axis=1)

        pred_lagged = pd.DataFrame()
        for ii in range(
                1, time_all.shape[1]):  #to loop through the lagged time series
            print(time_all.columns[ii])
            #extracting corresponding tag time series
            lag_ts = pd.DataFrame(time_all.iloc[:, ii])
            lag_ts.columns = ['date']
            #merge the selected tlagged time with the predictor on = "date"
            pred_new = pd.merge(pred, lag_ts, on=['date'], how='right')
            pred_new.drop('Unnamed: 0', axis=1, inplace=True)
            #sometimes nan values go to the bottom of the dataframe
            #sort df by date -> reset the index -> remove old index
            pred_new.sort_values(by='date', inplace=True)
            pred_new.reset_index(inplace=True)
            pred_new.drop('index', axis=1, inplace=True)

            #concatenate lagged dataframe
            if ii == 1:
                pred_lagged = pred_new
            else:
                pred_lagged = pd.concat([pred_lagged, pred_new.iloc[:, 1:]],
                                        axis=1)

        #cd to saving directory
        os.chdir(dir_out)
        pred_lagged.to_csv(tg_name)
        os.chdir(dir_in)
Пример #34
0
def obtain_data(config_file_path, metadata_file_path=None):
    """
        Uses read_config() to acquire a full dictionary of the config file and then uses the values contained within it
        to direct how data is processed and what variables are obtained.

        If a metadata file is provided, the config file will still be used for data organization, but the metadata will
        be pulled from the metadata file.

        Args:
            config_file_path : string of path to config file, should work with absolute or relative path
            metadata_file_path : string of path to metadata file if provided

        Returns:
            extracted_data : pandas dataframe of entire dataset, with the variables being organized into columns
            col_df : pandas series of what variables are stored in what columns, used to track which vars are provided
            station_name : string of file, including path, that was provided to dataset
            log_file : string of log file, including path, that was provided to dataset
            station_lat : station latitude in decimal degrees
            station_elev : station elevation in meters
            anemom_height : height of anemometer in meters
            fill_value : value pulled from config file that indicates missing data in output file
            script_mode : boolean flag for if user wants to correct data or not
            gen_bokeh : boolean flag for if user wants to plot graphs or not
    """

    # Open config file
    validate_file(config_file_path, ['ini'])
    config_dict = read_config(config_file_path)
    print('\nSuccessfully opened config file at %s' % config_file_path)

    # Open metadata file
    # If a metadata file is provided we will open it and overwrite values in config_dict with its values
    if metadata_file_path is not None:

        validate_file(metadata_file_path, 'xlsx')  # Validate file to make sure it exists and is the right type
        metadata_df = pd.read_excel(metadata_file_path, sheet_name=0, index_col=0, engine='openpyxl',
                                    keep_default_na=True, na_filter=True, verbose=True)
        print('\nSuccessfully opened metadata file at %s' % metadata_file_path)

        current_row = metadata_df.run_count.ne(2).idxmax() - 1
        metadata_series = metadata_df.iloc[current_row]

        config_dict['data_file_path'] = metadata_series.input_path
        config_dict['station_latitude'] = metadata_series.latitude
        config_dict['station_longitude'] = metadata_series.longitude
        config_dict['station_elevation'] = metadata_series.elev_m
        config_dict['anemometer_height'] = metadata_series.anemom_height_m
        config_dict['corr_flag'] = metadata_series.run_count

        # split file string on extension
        (file_name, station_extension) = os.path.splitext(config_dict['data_file_path'])

        # check to see if file is in a subdirectory in the same folder as the script
        if '/' in file_name:
            (folder_path, delimiter, _station_name) = file_name.rpartition('/')
        elif '\\' in file_name:
            (folder_path, delimiter, _station_name) = file_name.rpartition('\\')
        else:
            folder_path = os.getcwd()

        # Add new keys to config_dict for directory and file information to save files later on
        config_dict['station_name'] = str(metadata_series.id)
        config_dict['file_name'] = file_name
        config_dict['station_extension'] = station_extension
        config_dict['folder_path'] = folder_path

    else:  # No metadata file was provided, use the path info of the data file to construct path variables

        metadata_df = None
        metadata_series = None
        (file_name, station_extension) = os.path.splitext(config_dict['data_file_path'])

        # check to see if file is in a subdirectory or by itself
        if '/' in file_name:
            (folder_path, delimiter, station_name) = file_name.rpartition('/')
        elif '\\' in file_name:
            (folder_path, delimiter, station_name) = file_name.rpartition('\\')
        else:
            station_name = file_name
            folder_path = os.getcwd()

        # Add new keys to config_dict for directory and file information to save files later on
        config_dict['station_name'] = station_name
        config_dict['file_name'] = file_name
        config_dict['station_extension'] = station_extension
        config_dict['folder_path'] = folder_path

    # Check lines_of_header value, if 0 change it to NONE, if nonzero minus it by one
    if config_dict['lines_of_header'] == 0:
        config_dict['lines_of_header'] = None
    else:
        config_dict['lines_of_header'] = config_dict['lines_of_header'] - 1

    # Open data file
    validate_file(config_dict['data_file_path'], ['csv', 'xls', 'xlsx'])
    if station_extension == '.csv':  # csv file provided
        raw_data = pd.read_csv(config_dict['data_file_path'], delimiter=',', header=config_dict['lines_of_header'],
                               index_col=None, engine='python', skipfooter=config_dict['lines_of_footer'],
                               na_values=config_dict['missing_data_value'], keep_default_na=True,
                               na_filter=True, verbose=True, skip_blank_lines=True)

    elif station_extension == '.xlsx':
        raw_data = pd.read_excel(config_dict['data_file_path'], sheet_name=0, header=config_dict['lines_of_header'],
                                 index_col=None, engine='openpyxl', skipfooter=config_dict['lines_of_footer'],
                                 na_values=config_dict['missing_data_value'], keep_default_na=True,
                                 na_filter=True, verbose=True)

    elif station_extension == '.xls':
        raw_data = pd.read_excel(config_dict['data_file_path'], sheet_name=0, header=config_dict['lines_of_header'],
                                 index_col=None, engine='xlrd', skipfooter=config_dict['lines_of_footer'],
                                 na_values=config_dict['missing_data_value'], keep_default_na=True,
                                 na_filter=True, verbose=True)

    else:
        # This script is only handles csv and excel files. Validate_file() already catches this case
        raise IOError('\n\nProvided file was of type \'{}\' but script was expecting type \'{}\'.'
                      .format(station_extension, ['csv', 'xls', 'xlsx']))

    print('\nSuccessfully opened data file at %s' % config_dict['data_file_path'])

    # Handle any for network-specific oddities that may have slipped through
    raw_data = raw_data.replace(to_replace='NO RECORD   ', value=np.nan)  # catch for whitespaces on agriment

    # check for the existence of 'correction_files' folder and if not present make one
    if not os.path.exists(folder_path + '/correction_files'):
        os.makedirs(folder_path + '/correction_files')
        os.makedirs(folder_path + '/correction_files/before_graphs/')
        os.makedirs(folder_path + '/correction_files/after_graphs/')
        os.makedirs(folder_path + '/correction_files/histograms/')
    else:
        pass

    # Create log file for this new data file
    config_dict['log_file_path'] = config_dict['folder_path'] + \
        '/correction_files/' + config_dict['station_name'] + '_changes_log' + '.txt'
    log.basicConfig()
    logger = open(config_dict['log_file_path'], 'w')
    logger.write('The raw data for %s has been successfully read in at %s. \n \n' %
                 (config_dict['station_name'], dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    logger.close()
    print('\nSuccessfully created log file at %s.' % config_dict['log_file_path'])

    # Date handling, figures out the date format and extracts from string if needed
    if config_dict['date_format'] == 1:
        # Date is provided as a string, expected format is MM/DD/YYYY, time can be included as well.
        if config_dict['string_date_col'] != -1:
            data_date = np.array(raw_data.iloc[:, config_dict['string_date_col']])
            dt_date = pd.to_datetime(data_date, errors='raise')
            data_day = np.array(dt_date.day.astype('int'))
            data_month = np.array(dt_date.month.astype('int'))
            data_year = np.array(dt_date.year.astype('int'))
        else:
            # date format was provided as a string date but no string date was given
            raise ValueError('Date format parameter indicated a string date but none was provided')

    elif config_dict['date_format'] == 2:

        if config_dict['month_col'] != -1 and config_dict['day_col'] != -1 and config_dict['year_col'] != -1:
            data_month = np.array(raw_data.iloc[:, config_dict['month_col']].astype('int'))
            data_day = np.array(raw_data.iloc[:, config_dict['day_col']].astype('int'))
            data_year = np.array(raw_data.iloc[:, config_dict['year_col']].astype('int'))
        else:
            # date format was provided as separate columns but some were missing
            raise ValueError('Date format parameter indicated separate y/m/d columns but some or all were missing')

    elif config_dict['date_format'] == 3:
        # Date is pre-split between year column and DOY column

        if config_dict['day_of_year_col'] != -1 and config_dict['year_col'] != -1:
            data_doy = np.array(raw_data.iloc[:, config_dict['day_of_year_col']].astype('int'))
            data_year = np.array(raw_data.iloc[:, config_dict['year_col']].astype('int'))
        else:
            # date format was provided as separate year and doy columns but some were missing
            raise ValueError('Date format parameter indicated year and DOY columns but some or all were missing')

        dt_date = pd.to_datetime(data_year * 1000 + data_doy, format='%Y%j', errors='raise')
        data_day = np.array(dt_date.day.astype('int'))
        data_month = np.array(dt_date.month.astype('int'))
        data_year = np.array(dt_date.year.astype('int'))

    else:
        # Script cannot function without a time variable
        raise ValueError('Parameter error: date_format is set to an unexpected value.')

    #########################
    # Variable processing
    # Imports all weather variables, converts them into the correct units, and filters them to remove impossible values

    (data_tmax, tmax_col) = process_variable(config_dict, raw_data, 'maximum_temperature')
    (data_tmin, tmin_col) = process_variable(config_dict, raw_data, 'minimum_temperature')
    (data_tavg, tavg_col) = process_variable(config_dict, raw_data, 'average_temperature')
    (data_tdew, tdew_col) = process_variable(config_dict, raw_data, 'dewpoint_temperature')
    (data_ea, ea_col) = process_variable(config_dict, raw_data, 'vapor_pressure')
    (data_rhmax, rhmax_col) = process_variable(config_dict, raw_data, 'maximum_relative_humidity')
    (data_rhmin, rhmin_col) = process_variable(config_dict, raw_data, 'minimum_relative_humidity')
    (data_rhavg, rhavg_col) = process_variable(config_dict, raw_data, 'average_relative_humidity')
    (data_rs, rs_col) = process_variable(config_dict, raw_data, 'solar_radiation')
    (data_ws, ws_col) = process_variable(config_dict, raw_data, 'wind_speed')
    (data_precip, precip_col) = process_variable(config_dict, raw_data, 'precipitation')

    # HPRCC data reports '0' for missing observations as well as a text column, but this script doesn't interpret text
    # columns, so instead we see if both tmax and tmin have the same value (0, or -17.7778 depending on units) and if so
    # mark that row as missing
    # realistically tmax should never equal tmin, so this is an okay check to have in general
    for i in range(len(data_tmax)):
        if data_tmax[i] == data_tmin[i]:
            data_tmax[i] = np.nan
            data_tmin[i] = np.nan
            data_tavg[i] = np.nan
            data_tdew[i] = np.nan
            data_ea[i] = np.nan
            data_rhmax[i] = np.nan
            data_rhmin[i] = np.nan
            data_rhavg[i] = np.nan
            data_rs[i] = np.nan
            data_ws[i] = np.nan
            data_precip[i] = np.nan
        else:
            pass

    #########################
    # Dataframe Construction
    # In this section we convert the individual numpy arrays into a pandas dataframe to accomplish several goals:
    # 1. Make use of the pandas reindexing function to cover literal gaps in the dataset (not just missing values)
    # 2. Resample data to remove any duplicate records (same day appears twice in dataset, first instance is kept)
    # 3. Cleanly pass extracted data to the main script function

    # Create Datetime dataframe for reindexing
    datetime_df = pd.DataFrame({'year': data_year, 'month': data_month, 'day': data_day})
    datetime_df = pd.to_datetime(datetime_df)

    # Create a series of all dates in time series
    date_reindex = pd.date_range(datetime_df.iloc[0], datetime_df.iloc[-1])

    reindexing_additions = np.setdiff1d(np.array(date_reindex), np.array(datetime_df), assume_unique=False)

    logger = open(config_dict['log_file_path'], 'w')
    logger.write('The raw data file had %s missing date entries from its time record. \n \n' %
                 reindexing_additions.size)
    logger.close()

    print('\nSystem: The input data file had %s missing dates in its time record.' % reindexing_additions.size)

    # Create dataframe of data
    data_df = pd.DataFrame({'year': data_year, 'month': data_month,
                            'day': data_day, 'tavg': data_tavg, 'tmax': data_tmax, 'tmin': data_tmin,
                            'tdew': data_tdew, 'ea': data_ea, 'rhavg': data_rhavg, 'rhmax': data_rhmax,
                            'rhmin': data_rhmin, 'rs': data_rs, 'ws': data_ws, 'precip': data_precip},
                           index=datetime_df)

    # Create dataframe of column indices for weather variable, to track which ones were provided vs calculated
    col_df = pd.Series({'tmax': tmax_col, 'tmin': tmin_col, 'tavg': tavg_col, 'tdew': tdew_col, 'ea': ea_col,
                        'rhmax': rhmax_col, 'rhmin': rhmin_col, 'rhavg': rhavg_col, 'rs': rs_col, 'ws': ws_col,
                        'precip': precip_col})

    # Check for the existence of duplicate indexes
    # if found, since it cannot be determined which value is true, we default to first instance and remove all following
    data_df = data_df[~data_df.index.duplicated(keep='first')]

    # Reindex data with filled date series in case there are gaps in the data
    data_df = data_df.reindex(date_reindex, fill_value=np.nan)

    # Now replace M/D/Y columns with reindexed dates so there are no missing days
    data_df.year = date_reindex.year
    data_df.month = date_reindex.month
    data_df.day = date_reindex.day

    return data_df, col_df, metadata_df, metadata_series, config_dict
Пример #35
0
def test_getitem_unrecognized_scalar():
    # GH#32684 a scalar key that is not recognized by lib.is_scalar

    # a series that might be produced via `frame.dtypes`
    ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])

    key = ser.index[1]

    result = ser[key]
    assert result == 2


@pytest.mark.parametrize(
    "index",
    [
        date_range("2014-01-01", periods=20, freq="MS"),
        period_range("2014-01", periods=20, freq="M"),
        timedelta_range("0", periods=20, freq="H"),
    ],
)
def test_slice_with_zero_step_raises(index):
    ts = Series(np.arange(20), index)

    with pytest.raises(ValueError, match="slice step cannot be zero"):
        ts[::0]
    with pytest.raises(ValueError, match="slice step cannot be zero"):
        ts.loc[::0]
    with pytest.raises(ValueError, match="slice step cannot be zero"):
        ts.iloc[::0]

Пример #36
0
def test_subexpr_datetime():
    data = pd.date_range(start='01/01/2010', end='01/04/2010', freq='D').values
    s = symbol('s', discover(data))
    result = compute(s.truncate(days=2).day, data)
    expected = np.array([31, 2, 2, 4])
    np.testing.assert_array_equal(result, expected)
Пример #37
0
 def index(self) -> pd.DatetimeIndex:
     if self._index is None:
         self._index = pd.date_range(self.start_date,
                                     periods=self.prediction_length,
                                     freq=self.freq)
     return self._index
Пример #38
0
import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.api as sm
import patsy as pt

barium = woo.dataWoo('barium')
T = len(barium)

# monthly time series starting Feb. 1978:
barium.index = pd.date_range(start='1978-02', periods=T, freq='M')

# perform the Cochrane-Orcutt estimation (iterative procedure):
y, X = pt.dmatrices(
    'np.log(chnimp) ~ np.log(chempi) + np.log(gas) +'
    'np.log(rtwex) + befile6 + affile6 + afdec6',
    data=barium,
    return_type='dataframe')
reg = sm.GLSAR(y, X)
CORC_results = reg.iterative_fit(maxiter=100)
table = pd.DataFrame({
    'b_CORC': CORC_results.params,
    'se_CORC': CORC_results.bse
})
print(f'reg.rho: {reg.rho}\n')
print(f'table: \n{table}\n')
Пример #39
0
 def test_interp_datetime64(self):
     _skip_if_no_scipy()
     df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3))
     result = df.interpolate(method='nearest')
     expected = Series([1, 1, 3], index=date_range('1/1/2000', periods=3))
     assert_series_equal(result, expected)
Пример #40
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date    : 2018-09-14 21:57:25
# @Author  : cdl ([email protected])
# @Link    : https://github.com/cdlwhm1217096231/python3_spider
# @Version : $Id$

from pyecharts import Bar
import pandas as pd
import numpy as np


title = "bar chart"
index = pd.date_range('14/9/2018', periods=6, freq='M')
df1 = pd.DataFrame(np.random.randn(6), index=index)
df2 = pd.DataFrame(np.random.randn(6), index=index)

dtvalue1 = [i[0] for i in df1.values]
dtvalue2 = [i[0] for i in df2.values]
_index = [i for i in df1.index.format()]


bar = Bar(title, 'Profit and loss situation')
bar.add('profit', _index, dtvalue1)
bar.add('loss', _index, dtvalue2)
 def add_datetime_index(self, start_date, feature_added_df: pd.DataFrame):
     date_range = pd.date_range(start_date, periods=48 * 9, freq='30min')
     feature_added_df.index = date_range
Пример #42
0
    def test_equals(self):
        s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
        s2 = s1.copy()
        self.assert_(s1.equals(s2))

        s1[1] = 99
        self.assert_(not s1.equals(s2))

        # NaNs compare as equal
        s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3])
        s2 = s1.copy()
        self.assert_(s1.equals(s2))

        s2[0] = 9.9
        self.assert_(not s1.equals(s2))

        idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
        s1 = Series([1, 2, np.nan], index=idx)
        s2 = s1.copy()
        self.assert_(s1.equals(s2))

        # Add object dtype column with nans
        index = np.random.random(10)
        df1 = DataFrame(np.random.random(10,), index=index, columns=['floats'])
        df1['text'] = 'the sky is so blue. we could use more chocolate.'.split()
        df1['start'] = date_range('2000-1-1', periods=10, freq='T')
        df1['end'] = date_range('2000-1-1', periods=10, freq='D')
        df1['diff'] = df1['end'] - df1['start']
        df1['bool'] = (np.arange(10) % 3 == 0)
        df1.ix[::2] = nan
        df2 = df1.copy()
        self.assert_(df1['text'].equals(df2['text']))
        self.assert_(df1['start'].equals(df2['start']))
        self.assert_(df1['end'].equals(df2['end']))
        self.assert_(df1['diff'].equals(df2['diff']))
        self.assert_(df1['bool'].equals(df2['bool']))
        self.assert_(df1.equals(df2))
        self.assert_(not df1.equals(object))

        # different dtype
        different = df1.copy()
        different['floats'] = different['floats'].astype('float32')
        self.assert_(not df1.equals(different))

        # different index
        different_index = -index
        different = df2.set_index(different_index)
        self.assert_(not df1.equals(different))

        # different columns
        different = df2.copy()
        different.columns = df2.columns[::-1]
        self.assert_(not df1.equals(different))

        # DatetimeIndex
        index = pd.date_range('2000-1-1', periods=10, freq='T')
        df1 = df1.set_index(index)
        df2 = df1.copy()
        self.assert_(df1.equals(df2))

        # MultiIndex
        df3 = df1.set_index(['text'], append=True)
        df2 = df1.set_index(['text'], append=True)
        self.assert_(df3.equals(df2))

        df2 = df1.set_index(['floats'], append=True)
        self.assert_(not df3.equals(df2))

        # NaN in index
        df3 = df1.set_index(['floats'], append=True)
        df2 = df1.set_index(['floats'], append=True)
        self.assert_(df3.equals(df2))
Пример #43
0
def gen_all_quarters(start: datetime.datetime, end: datetime.datetime):
    idx = pd.date_range(start=start, end=end, freq="Q")
    # dt_list = [dt.strftime("%Y-%m-%d %H:%M:%S") for dt in idx]
    dt_list = [dt.to_pydatetime() for dt in idx]
    return dt_list
Пример #44
0
def main():
    
    # Calendar-Spread implementation example
            
    # default market environment
    market_env = MarketEnvironment()
    print(market_env)
    
    # options expirations
    T_short = "31-05-2020"
    T_long = "30-08-2020"
    
    # current underlying level
    S_t = market_env.get_S()

    # calendar-spread portfolio initialized (as empty portfolio)   
    calendar_spread_ptf = Portfolio(name="Calendar Spread Strategy")
    print(calendar_spread_ptf)
    
    # T_long-call
    Vanilla_Call_long = PlainVanillaOption(market_env, T=T_long, K=S_t)
    print(Vanilla_Call_long)

    # T_short-call
    Vanilla_Call_short = PlainVanillaOption(market_env, T=T_short, K=S_t)
    print(Vanilla_Call_short)

    # creation of Calendar-Spread portfolio strategy   
    calendar_spread_ptf.add_instrument(Vanilla_Call_long, 1)
    calendar_spread_ptf.add_instrument(Vanilla_Call_short, -1)    
    print(calendar_spread_ptf)
    
    # portfolio plotter instance
    calendar_spread_ptf_plotter = PortfolioPlotter(calendar_spread_ptf)
    
    # valuation date of the portfolio
    valuation_date = calendar_spread_ptf.get_t()
    print(valuation_date)
        
    # select metrics to plot
    for plot_metrics in ["price", "PnL", "delta", "theta", "gamma", "vega", "rho"]:
        
        plot_details_flag = True if plot_metrics == "price" else False

        # time-parameter as a date-range of 5 valuation dates between t and T_short
        last_date = T_short if plot_metrics in ["price", "PnL"] else date_string_to_datetime_obj(T_short) - pd.Timedelta(days=1)
        multiple_valuation_dates = pd.date_range(start=valuation_date, 
                                                 end=last_date, 
                                                 periods=5)
        print(multiple_valuation_dates)

        # Bull-Spread price plot
        calendar_spread_ptf_plotter.plot(t=last_date, plot_metrics=plot_metrics, 
                                         plot_details=plot_details_flag)
            
        # Plot at multiple dates
        calendar_spread_ptf_plotter.plot(t=multiple_valuation_dates, plot_metrics=plot_metrics)
    
        # Surface plot
        calendar_spread_ptf_plotter.plot(t=multiple_valuation_dates, plot_metrics=plot_metrics, 
                                         surf_plot=True)
    
        # Surface plot (rotate) - Underlying value side
        calendar_spread_ptf_plotter.plot(t=multiple_valuation_dates, plot_metrics=plot_metrics, 
                                         surf_plot=True, view=(0,180))
    
        # Price surface plot (rotate) - Date side
        calendar_spread_ptf_plotter.plot(t=multiple_valuation_dates, plot_metrics=plot_metrics, 
                                         surf_plot=True, view=(0,-90))
def data_read(date_range, crd, utc_offset):
    
    # Number of ASOS observations an hour
    interval = 12
    freq = ['5min', 'H']
    
    # Adjust datetimes to timezone corresponding to location of interest
    date_range = pd.date_range(start=date_range[0], end=date_range[-1], freq=freq[0]) 
    start_date, end_date = [date_range[0], date_range[-1]]
    
    # Generate strings from dates for comparison purposes
    date_str = [datetime.datetime.strftime(start_date, '%Y%m%d%H%M'),
                datetime.datetime.strftime(end_date, '%Y%m%d%H%M')]
    
    # Initialize DataFrame here to return nan DataFrame in case of failed FTP connection
    df = pd.DataFrame(np.nan, index=date_range, columns=['T_air', 'T_dew', 'u_r', 'p_air'])
    # Set up URL to appropriate data file
    data_url = wfile(date_str[0][0:6] + '.dat', crd)
    
    # Import data to DataFrame 'df'
    try:
        asos_data = pd.read_table(data_url, header=None)
    except:
        print('FTP connection failed. Exiting program...')
        sys.exit()
        
    ## Regex patterns for data mining through the .dat file(s)
    # Air temperature regex: string of 6 characters "(0-9)(0-9)/(0-9)(0-9)" bounded by 2 spaces
    T_pattern = r'\s.?\d\d[+-/].?\d\d\s'
    # Wind speed regex: string of 6 characters "(0-9)(0-9)KT " bounded by 2 numbers and a space
    # Note: This definition ignores gusts
    u_pattern = r"\s\d\d\d\d\d\D"
    # Note: This definition allows the gust becomes the effective wind speed
    # u_pattern = r"\d\d[K][T]\s\d"
    # Air pressure regex: string of 6 characters "SLP(0-9)(0-9)"
    p_pattern = r"[S][L][P]\d\d\d"
    
    # Iterate through all rows in ASOS data file. For dates in file that are within date range, extract data.
    for row in asos_data.iloc[:, 0]:
        if datetime.datetime.strptime(row[13:23], '%Y%m%d%H') in df.index:
            # If temperature pattern is found, extract data.
            if re.findall(T_pattern, row):
                date = datetime.datetime.strptime(row[13:25], '%Y%m%d%H%M')
                
                # Extract air temperature ('M' prefix indicates a negative temperature)
                T_air_str = re.findall(T_pattern, row)[0]                
                if T_air_str[1] == 'M':
                    df.loc[date, 'T_air'] = CtoK(-int(T_air_str[2:4]))
                else:
                    df.loc[date, 'T_air'] = CtoK(int(T_air_str[1:3]))
                    
                # Extract dew point temperature ('M' prefix indicates a negative temperature)
                if T_air_str[-4] == 'M':
                    df.loc[date, 'T_dew'] = CtoK(-int(T_air_str[-3:-1]))
                else:
                    df.loc[date, 'T_dew'] = CtoK(int(T_air_str[-3:-1]))
                    
                # Extract wind speed
                if re.findall(u_pattern, row):
                    u_str = re.findall(u_pattern, row)[0]
                    df.loc[date, 'u_r'] = int(u_str[4:6])
                else:
                    df.loc[date, 'u_r'] = 0
                
                # Extract air pressure
                if re.findall(p_pattern, row):
                    # Convert p_str to pressure in hPa
                    p_temp = 1000 + int(re.findall(p_pattern, row)[0][-3:])/10 
                    df.loc[date, 'p_air'] = int(p_temp)
                else:
                    df.loc[date, 'p_air'] = 1013.25
    
    # Average over all observations to produce hourly, then re-index to set dates to proper indices.
    df = pd.DataFrame(df.values.reshape(-1, interval, df.shape[1]).mean(1), columns=df.columns)
    df['date'] = pd.date_range(start=date_range[0], end=date_range[-1], freq=freq[1])
    df = df.set_index('date')
    
    # Delete ASOS data folder created locally
    shutil.rmtree(os.path.join(os.path.dirname(__file__), data_url.split('/')[-2]))

    return df['u_r'], df['T_dew'], df['p_air']
Пример #46
0
#Dividing the dataset into test and train
y_train=np.array(df['Prediction'][:-15])
X_train=np.array(X[:-15])
X_test=np.array(X[-15:])

#Training the model
model = LinearRegression()
model.fit(X_train,y_train)

forecast_prediction = model.predict(X_test)           #Prediction made

#To create a dataframe of predicted price
forecast_prediction=[df['Adj. Close'][-1]]+forecast_prediction.tolist()
Pred_data={"Forecast":forecast_prediction}
dates = pd.date_range('20180327', periods=16)
Pred_df = pd.DataFrame(data=Pred_data, index=dates)

print("Done.\n")
print(Pred_df[-15:])          #Displaying predicted price

#For ploting the graph of Date vs Price
df['Adj. Close'].plot()
Pred_df['Forecast'].plot()
plt.legend(loc=4)
plt.title("Google Stock Price")
plt.xlabel('Date')
plt.ylabel('Price')

plt.show()          #To display graph to the user
Пример #47
0
    def test_strftime(self):
        # GH 10086
        s = Series(date_range("20130101", periods=5))
        result = s.dt.strftime("%Y/%m/%d")
        expected = Series(
            ["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
        )
        tm.assert_series_equal(result, expected)

        s = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
        result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
        expected = Series(
            [
                "2015/02/03 11-22-33",
                "2015/02/04 11-22-33",
                "2015/02/05 11-22-33",
                "2015/02/06 11-22-33",
                "2015/02/07 11-22-33",
            ]
        )
        tm.assert_series_equal(result, expected)

        s = Series(period_range("20130101", periods=5))
        result = s.dt.strftime("%Y/%m/%d")
        expected = Series(
            ["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
        )
        tm.assert_series_equal(result, expected)

        s = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
        result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
        expected = Series(
            [
                "2015/02/03 11-22-33",
                "2015/02/03 11-22-34",
                "2015/02/03 11-22-35",
                "2015/02/03 11-22-36",
                "2015/02/03 11-22-37",
            ]
        )
        tm.assert_series_equal(result, expected)

        s = Series(date_range("20130101", periods=5))
        s.iloc[0] = pd.NaT
        result = s.dt.strftime("%Y/%m/%d")
        expected = Series(
            ["NaT", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
        )
        tm.assert_series_equal(result, expected)

        datetime_index = date_range("20150301", periods=5)
        result = datetime_index.strftime("%Y/%m/%d")

        expected = Index(
            ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
            dtype=np.object_,
        )
        # dtype may be S10 or U10 depending on python version
        tm.assert_index_equal(result, expected)

        period_index = period_range("20150301", periods=5)
        result = period_index.strftime("%Y/%m/%d")
        expected = Index(
            ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
            dtype="=U10",
        )
        tm.assert_index_equal(result, expected)

        s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
        result = s.dt.strftime("%Y-%m-%d %H:%M:%S")
        expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
        tm.assert_series_equal(result, expected)

        s = Series(period_range("20130101", periods=4, freq="H"))
        result = s.dt.strftime("%Y/%m/%d %H:%M:%S")
        expected = Series(
            [
                "2013/01/01 00:00:00",
                "2013/01/01 01:00:00",
                "2013/01/01 02:00:00",
                "2013/01/01 03:00:00",
            ]
        )

        s = Series(period_range("20130101", periods=4, freq="L"))
        result = s.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
        expected = Series(
            [
                "2013/01/01 00:00:00.000",
                "2013/01/01 00:00:00.001",
                "2013/01/01 00:00:00.002",
                "2013/01/01 00:00:00.003",
            ]
        )
        tm.assert_series_equal(result, expected)
Пример #48
0
# %%
download_dir = 'output.csv' #where you want the file to be downloaded to 
csv = open(download_dir, "w")
#"w" indicates that you're writing strings to the file
columnTitleRow = "datetime, WindSpeed, WindDir\n"
csv.write(columnTitleRow)
for i in range(len(time)):
    row=str(time[i])+","+str(speed[i])+","+str(direction[i])+"\n"
    print(row)
    csv.write(row)
csv.close()


# %%
idx = pd.date_range(time, periods=10, freq='min')

ts = pd.Series(range(len(idx)), index=idx)

ts
ts.resample('10min').mean()


# %%
time[1]


# %%


Пример #49
0
 def test_dt_accessor_no_new_attributes(self):
     # https://github.com/pandas-dev/pandas/issues/10673
     s = Series(date_range("20130101", periods=5, freq="D"))
     with pytest.raises(AttributeError, match="You cannot add any new attribute"):
         s.dt.xlabel = "a"
Пример #50
0
 def test_dt_accessor_updates_on_inplace(self):
     s = Series(pd.date_range("2018-01-01", periods=10))
     s[2] = None
     s.fillna(pd.Timestamp("2018-01-01"), inplace=True)
     result = s.dt.date
     assert result[0] == result[2]
Пример #51
0
import numpy as np
import pandas as pd

s = pd.Series([1,3,5,np.nan,6,8])


dates = pd.date_range('20200405',periods=13)



df = pd.DataFrame(np.random.randn(13, 4), index=dates, columns=list('ABCD')


df2 = pd.DataFrame({
    'A':1.,
    'B':pd.Timestamp('20200405'),
    'C':pd.Series(1, index=list(range(4)),dtype='float32'),
    'D':np.array([3]*4, dtype='int32'),
    'E': pd.Categorical(['test','train','test','train']),
    'F':'foo'})

print(df.iloc[3])

print(df.iloc[3:5,0:2])
Пример #52
0
    def test_dt_accessor_datetime_name_accessors(self, time_locale):
        # Test Monday -> Sunday and January -> December, in that sequence
        if time_locale is None:
            # If the time_locale is None, day-name and month_name should
            # return the english attributes
            expected_days = [
                "Monday",
                "Tuesday",
                "Wednesday",
                "Thursday",
                "Friday",
                "Saturday",
                "Sunday",
            ]
            expected_months = [
                "January",
                "February",
                "March",
                "April",
                "May",
                "June",
                "July",
                "August",
                "September",
                "October",
                "November",
                "December",
            ]
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_days = calendar.day_name[:]
                expected_months = calendar.month_name[1:]

        s = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
        english_days = [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
        for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
            name = name.capitalize()
            assert s.dt.weekday_name[day] == eng_name
            assert s.dt.day_name(locale=time_locale)[day] == name
        s = s.append(Series([pd.NaT]))
        assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])

        s = Series(date_range(freq="M", start="2012", end="2013"))
        result = s.dt.month_name(locale=time_locale)
        expected = Series([month.capitalize() for month in expected_months])

        # work around https://github.com/pandas-dev/pandas/issues/22342
        result = result.str.normalize("NFD")
        expected = expected.str.normalize("NFD")

        tm.assert_series_equal(result, expected)

        for s_date, expected in zip(s, expected_months):
            result = s_date.month_name(locale=time_locale)
            expected = expected.capitalize()

            result = unicodedata.normalize("NFD", result)
            expected = unicodedata.normalize("NFD", expected)

            assert result == expected

        s = s.append(Series([pd.NaT]))
        assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])
Пример #53
0
import pandas as pd
import numpy as np

dates = pd.date_range("20191031", periods=6)
df = pd.DataFrame(np.arange(24).reshape(6, 4), index=dates, columns=['A', 'B', 'C', 'D'])
print(df)
'''
             A   B   C   D
2019-10-31   0   1   2   3
2019-11-01   4   5   6   7
2019-11-02   8   9  10  11
2019-11-03  12  13  14  15
2019-11-04  16  17  18  19
2019-11-05  20  21  22  23
'''

df.iloc[2, 2] = 1111
print(df)
'''
             A   B     C   D
2019-10-31   0   1     2   3
2019-11-01   4   5     6   7
2019-11-02   8   9  1111  11
2019-11-03  12  13    14  15
2019-11-04  16  17    18  19
2019-11-05  20  21    22  23
'''

df.loc['2019-11-02', 'B'] = 2222
print(df)
'''
Пример #54
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodArray._datetimelike_ops
        ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            "to_period",
            "to_pydatetime",
            "tz_localize",
            "tz_convert",
            "normalize",
            "strftime",
            "round",
            "floor",
            "ceil",
            "day_name",
            "month_name",
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            "components",
            "to_pytimedelta",
            "total_seconds",
            "round",
            "floor",
            "ceil",
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype("int64")
            elif not is_list_like(result):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range("20130101", periods=5), name="xxx"),
            Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
            Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"),
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize("US/Eastern")
            exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == "US/Eastern"
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq="infer").freq

            # let's localize, then convert
            result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
            exp_values = (
                DatetimeIndex(s.values).tz_localize("UTC").tz_convert("US/Eastern")
            )
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx")
        for prop in ok_for_dt:

            # we test freq below
            if prop != "freq":
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert("CET")
        expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx")
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == "CET"
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq="infer").freq

        # timedelta index
        cases = [
            Series(
                timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
            ),
            Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
            Series(
                timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
                name="xxx",
            ),
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, pd.Series)
            assert result.dtype == "float64"

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq="infer").freq

        # both
        index = date_range("20130101", periods=3, freq="D")
        s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx")
        exp = Series(
            np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx"
        )
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx")
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx")
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name="xxx")
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [Series(period_range("20130101", periods=5, freq="D"), name="xxx")]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith("_")]
            return list(sorted(set(results)))

        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))
        )

        s = Series(
            period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
        )
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_period + ok_for_period_methods)))
        )

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
        s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))
        )
        exp_values = pd.date_range(
            "2015-01-01", "2016-01-01", freq="T", tz="UTC"
        ).tz_convert("America/Chicago")
        expected = Series(exp_values, name="xxx")
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        with pytest.raises(ValueError, match="modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context("chained_assignment", "raise"):
            with pytest.raises(com.SettingWithCopyError):
                s.dt.hour[0] = 5
Пример #55
0
def main():
    subject_file = open("subjects.txt","r")
    n=int(subject_file.readline())
    sub=[]
    subcode={}
    colors=[0]*n
    for i in range(n):
        x=subject_file.readline().split(" -> ")
        sub.append(x)
        subcode[x[0]]=[i,x[1]]
    graph=[None]*n
    for i in range(n):
        graph[i]=[0]*n
    student_file = open("students.txt" , "r")
    n2=int(student_file.readline())
    for i in range(n2):
        x=student_file.readline().split()
        x[1]=x[1].split(",")
        for j in x[1]:
            for k in x[1]:
                if(j==k):
                    continue
                graph[subcode[j][0]][subcode[k][0]]=1
                graph[subcode[k][0]][subcode[j][0]]=1
    colors[0]=1
    colorsused=1
    for i in range(1,n):
        c=[]
        for j in range(n):
            if graph[i][j]==1 and colors[j] not in c:
                c.append(colors[j])
        for col in range(1,colorsused+2):
            if col not in c:
                colors[i]=col
                colorsused=max(colorsused,col)
                break
    import pandas as pd
    import  datetime
    print("Enter starting of exam in YYYY-MM-DD format: ")
    d1,d2="",""
    week=['Monday','Tuesday','Wednesday','Thursday', 'Friday', 'Saturday','Sunday']
    while(True):
        try:
            d1=input()
            d1=pd.to_datetime(d1)
            break
        except:
            print("Enter Valid Date!")
    d2=d1 + datetime.timedelta(days=2*colorsused)
    daterange = pd.date_range(d1,d2)
    currcol=1
    for single_date in daterange:
        if(single_date.weekday()==6):
            continue
        else:
            s=str(single_date).split()[0]
            print("\n-------------------------------------------------")
            print(s,'('+week[single_date.weekday()]+')',":")
            print("-------------------------------------------------\n")
            for i in range(n):
                if(currcol==colors[i]):
                    print(sub[i][0],sub[i][1],end="")
            if(currcol>=colorsused):
                break
            else:
                currcol+=1
Пример #56
0
class TestCategoricalDtypeParametrized:
    @pytest.mark.parametrize(
        "categories",
        [
            list("abcd"),
            np.arange(1000),
            ["a", "b", 10, 2, 1.3, True],
            [True, False],
            pd.date_range("2017", periods=4),
        ],
    )
    def test_basic(self, categories, ordered):
        c1 = CategoricalDtype(categories, ordered=ordered)
        tm.assert_index_equal(c1.categories, pd.Index(categories))
        assert c1.ordered is ordered

    def test_order_matters(self):
        categories = ["a", "b"]
        c1 = CategoricalDtype(categories, ordered=True)
        c2 = CategoricalDtype(categories, ordered=False)
        c3 = CategoricalDtype(categories, ordered=None)
        assert c1 is not c2
        assert c1 is not c3

    @pytest.mark.parametrize("ordered", [False, None])
    def test_unordered_same(self, ordered):
        c1 = CategoricalDtype(["a", "b"], ordered=ordered)
        c2 = CategoricalDtype(["b", "a"], ordered=ordered)
        assert hash(c1) == hash(c2)

    def test_categories(self):
        result = CategoricalDtype(["a", "b", "c"])
        tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"]))
        assert result.ordered is False

    def test_equal_but_different(self, ordered):
        c1 = CategoricalDtype([1, 2, 3])
        c2 = CategoricalDtype([1.0, 2.0, 3.0])
        assert c1 is not c2
        assert c1 != c2

    @pytest.mark.parametrize("v1, v2", [([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1])])
    def test_order_hashes_different(self, v1, v2):
        c1 = CategoricalDtype(v1, ordered=False)
        c2 = CategoricalDtype(v2, ordered=True)
        c3 = CategoricalDtype(v1, ordered=None)
        assert c1 is not c2
        assert c1 is not c3

    def test_nan_invalid(self):
        msg = "Categorical categories cannot be null"
        with pytest.raises(ValueError, match=msg):
            CategoricalDtype([1, 2, np.nan])

    def test_non_unique_invalid(self):
        msg = "Categorical categories must be unique"
        with pytest.raises(ValueError, match=msg):
            CategoricalDtype([1, 2, 1])

    def test_same_categories_different_order(self):
        c1 = CategoricalDtype(["a", "b"], ordered=True)
        c2 = CategoricalDtype(["b", "a"], ordered=True)
        assert c1 is not c2

    @pytest.mark.parametrize("ordered1", [True, False, None])
    @pytest.mark.parametrize("ordered2", [True, False, None])
    def test_categorical_equality(self, ordered1, ordered2):
        # same categories, same order
        # any combination of None/False are equal
        # True/True is the only combination with True that are equal
        c1 = CategoricalDtype(list("abc"), ordered1)
        c2 = CategoricalDtype(list("abc"), ordered2)
        result = c1 == c2
        expected = bool(ordered1) is bool(ordered2)
        assert result is expected

        # same categories, different order
        # any combination of None/False are equal (order doesn't matter)
        # any combination with True are not equal (different order of cats)
        c1 = CategoricalDtype(list("abc"), ordered1)
        c2 = CategoricalDtype(list("cab"), ordered2)
        result = c1 == c2
        expected = (bool(ordered1) is False) and (bool(ordered2) is False)
        assert result is expected

        # different categories
        c2 = CategoricalDtype([1, 2, 3], ordered2)
        assert c1 != c2

        # none categories
        c1 = CategoricalDtype(list("abc"), ordered1)
        c2 = CategoricalDtype(None, ordered2)
        c3 = CategoricalDtype(None, ordered1)
        assert c1 == c2
        assert c2 == c1
        assert c2 == c3

    @pytest.mark.parametrize("categories", [list("abc"), None])
    @pytest.mark.parametrize("other", ["category", "not a category"])
    def test_categorical_equality_strings(self, categories, ordered, other):
        c1 = CategoricalDtype(categories, ordered)
        result = c1 == other
        expected = other == "category"
        assert result is expected

    def test_invalid_raises(self):
        with pytest.raises(TypeError, match="ordered"):
            CategoricalDtype(["a", "b"], ordered="foo")

        with pytest.raises(TypeError, match="'categories' must be list-like"):
            CategoricalDtype("category")

    def test_mixed(self):
        a = CategoricalDtype(["a", "b", 1, 2])
        b = CategoricalDtype(["a", "b", "1", "2"])
        assert hash(a) != hash(b)

    def test_from_categorical_dtype_identity(self):
        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
        # Identity test for no changes
        c2 = CategoricalDtype._from_categorical_dtype(c1)
        assert c2 is c1

    def test_from_categorical_dtype_categories(self):
        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
        # override categories
        result = CategoricalDtype._from_categorical_dtype(c1, categories=[2, 3])
        assert result == CategoricalDtype([2, 3], ordered=True)

    def test_from_categorical_dtype_ordered(self):
        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
        # override ordered
        result = CategoricalDtype._from_categorical_dtype(c1, ordered=False)
        assert result == CategoricalDtype([1, 2, 3], ordered=False)

    def test_from_categorical_dtype_both(self):
        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
        # override ordered
        result = CategoricalDtype._from_categorical_dtype(
            c1, categories=[1, 2], ordered=False
        )
        assert result == CategoricalDtype([1, 2], ordered=False)

    def test_str_vs_repr(self, ordered):
        c1 = CategoricalDtype(["a", "b"], ordered=ordered)
        assert str(c1) == "category"
        # Py2 will have unicode prefixes
        pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)"
        assert re.match(pat.format(ordered=ordered), repr(c1))

    def test_categorical_categories(self):
        # GH17884
        c1 = CategoricalDtype(Categorical(["a", "b"]))
        tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))
        c1 = CategoricalDtype(CategoricalIndex(["a", "b"]))
        tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))

    @pytest.mark.parametrize(
        "new_categories", [list("abc"), list("cba"), list("wxyz"), None]
    )
    @pytest.mark.parametrize("new_ordered", [True, False, None])
    def test_update_dtype(self, ordered, new_categories, new_ordered):
        original_categories = list("abc")
        dtype = CategoricalDtype(original_categories, ordered)
        new_dtype = CategoricalDtype(new_categories, new_ordered)

        result = dtype.update_dtype(new_dtype)
        expected_categories = pd.Index(new_categories or original_categories)
        expected_ordered = new_ordered if new_ordered is not None else dtype.ordered

        tm.assert_index_equal(result.categories, expected_categories)
        assert result.ordered is expected_ordered

    def test_update_dtype_string(self, ordered):
        dtype = CategoricalDtype(list("abc"), ordered)
        expected_categories = dtype.categories
        expected_ordered = dtype.ordered
        result = dtype.update_dtype("category")
        tm.assert_index_equal(result.categories, expected_categories)
        assert result.ordered is expected_ordered

    @pytest.mark.parametrize("bad_dtype", ["foo", object, np.int64, PeriodDtype("Q")])
    def test_update_dtype_errors(self, bad_dtype):
        dtype = CategoricalDtype(list("abc"), False)
        msg = "a CategoricalDtype must be passed to perform an update, "
        with pytest.raises(ValueError, match=msg):
            dtype.update_dtype(bad_dtype)
Пример #57
0
def apple_calendar(apple_start, apple_end):
	apple_fiscal = pd.read_csv(csv_path, parse_dates=True, index_col=None)

	# convert date, start and end date range, pay date and month duration to datetime
	apple_fiscal['date'] = pd.to_datetime(apple_fiscal['date'])
	apple_fiscal['start_date'] = pd.to_datetime(apple_fiscal['start_date'])
	apple_starts = apple_fiscal[['start_date', 'date']]
	apple_fiscal['end_date'] = pd.to_datetime(apple_fiscal['end_date'])
	apple_ends = apple_fiscal[['end_date', 'date']]
	apple_fiscal['pay_date'] = pd.to_datetime(apple_fiscal['pay_date'])
	apple_pays = apple_fiscal['pay_date']
	apple_duration = apple_fiscal[['date', 'next_month_duration']]

	# set parse format / parse date entry string
	year, month, day = map(int, apple_start_date.split('-'))
	apple_start = datetime.date(year, month, day)
	year, month, day = map(int, apple_end_date.split('-'))
	apple_end = datetime.date(year, month, day)

	# create list of annual dates, based on the start date from date entry variable and convert list to data frame
	apple_range = apple_end - apple_start
	apple_range = apple_range.days + 1
	apple_revenue = list(range(0, apple_range))

	start_datelist = pd.date_range(apple_start, apple_end).tolist()
	start_apple_table = pd.DataFrame(start_datelist)

	# name index 'days' and column 'date'
	start_apple_table.index.name = 'days'
	start_apple_table.columns = ['date']

	# merge csv columns 'start_date', 'end_date', and 'next_month_duration' with data frame
	start_apple_table = start_apple_table.merge(apple_starts, how='left', on='date', left_index=True)
	start_apple_table = start_apple_table.merge(apple_ends, how='left', on='date', left_index=True)
	start_apple_table = start_apple_table.merge(apple_duration, how='left', on='date', left_index=True)

	# add 'pay_date' column to list only the days when receive payment from Apple
	start_apple_table['date'] = start_apple_table.loc[(start_apple_table['date'].isin(apple_pays)), 'pay_date'] = start_apple_table['date']

	# assign 'sample_sales' column to second input revenue
	start_apple_table['sample_sales'] = apple_revenue

	# change index to column 'date'
	start_apple_table = start_apple_table.set_index('date')

	# convert 'next_month_duration' from integer to datetime days
	start_apple_table['next_month_duration'] = pd.to_timedelta(start_apple_table['next_month_duration'], unit='D')

	# create 'monthly_sales' column
	start_apple_table['monthly_sales'] = start_apple_table.apply(lambda x: start_apple_table.loc[(start_apple_table['start_date']
		<= x.name) & (x.name <= start_apple_table['end_date']), ['sample_sales']].sum(), axis=1)

	# create 'monthly_adj' column to move the sales up by next month fiscal duration period
	start_apple_table['monthly_adj'] = start_apple_table.apply(lambda x: start_apple_table.loc[(start_apple_table['start_date']
		+ start_apple_table['next_month_duration'] <= x.name) & (x.name <= start_apple_table['end_date'] +
		start_apple_table['next_month_duration']), ['sample_sales']].sum(), axis=1)

	# shift 'monthly_adj' by 7 rows to be captured by 'pay_date'
	start_apple_table['monthly_shift'] = start_apple_table['monthly_adj'].shift(7)

	# add 'monthly_payment' and show only on 'pay_date' dates
	start_apple_table['monthly_payment'] = start_apple_table['monthly_shift'].loc[start_apple_table['pay_date'].notnull()]

	# add 'cumulative_payment' column
	start_apple_table['cumulative_payment'] = start_apple_table['monthly_payment'].cumsum()

	return start_apple_table
Пример #58
0
# Learning pandas in about 10 minutes
# https://pandas.pydata.org/pandas-docs/stable/10min.html
# CNA 330
# Mustafa Musa, [email protected]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

dates = pd.date_range('20130101', periods=6)
print(dates)

df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)

df2 = pd.DataFrame({
    'A': 1.,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train", "test", "train"]),
    'F': 'foo'
})
print(df2)
print(df2.dtypes)
print(df.head())
print(df.tail(3))
print(df.index)
    thumbnail_url="caching-example.png",
    code_url="caching_example/caching_example.py",
    mp4_url="caching-example.mp4",
    tags=["Panel", "Caching"],
)

ACCENT_COLOR = "#C01754"

CACHE_EXPIRY = 60 * 60 * 24  # seconds, i.e. one Day


np.random.seed([3, 1415])
PERIODS = 1 * 24 * 60  # minutes. I.e. 1 days
DATA = pd.DataFrame(
    {
        "time": pd.date_range("2020-01-01", periods=PERIODS, freq="T"),
        "price": np.random.randn(PERIODS) + 98,
    }
)


def _load_data(frac=0.1):
    time.sleep(0.5 + frac * 0.5)
    return DATA.sample(frac=frac)


def _plot_data(frac=0.1):
    time.sleep(0.5)
    data = _load_data(frac)
    return data.hvplot(x="time", y="price")
Пример #60
0
  def forecast(self,
    start_date:date=None, 
    end_date:date=None, 
    fq:str=None,
    econ_limit:float=None,
    np_limit:float=None,
    npi:float=0, 
    fluid_rate:float=None,
    show_water:bool = False,
    **kwargs
    ):
    """
    Forecast curve from the declination object. 
 
    Input: 
        start_date ->  (datetime.date) Initial date Forecast
        end_date ->  (datetime.date) end date Forecast
        fq -> (str) frequecy for the time table. 
              Use https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
        econ_limit -> (int,float,np.dnarray) Economic limit Rate. If end_date 

    Return: 
      f: DataFrame with t column and curve column
      np: Cummulative production

    """
    if econ_limit is None:
      econ_limit = self.econ_limit
    else:
      assert isinstance(econ_limit,(int,float,np.ndarray)), 'econ_limit must be a number'

    if fq is None:
      fq = self.fq
    else:
      assert isinstance(fq,str), 'fq must be str'

    if start_date is None: 
      if self.start_date is None:
        start_date = self.ti
      else:
        start_date = self.start_date
    else:
      assert isinstance(start_date,date), 'start_date must be date'

    if end_date is None: 
      if self.end_date is None:
        end_date = self.ti + timedelta(days=365) if econ_limit is None else None
      else:
        end_date = self.end_date
    else:
      assert isinstance(end_date,date), 'end_date must be date'

    if np_limit is None:
      np_limit = self.np_limit
    else:
      assert isinstance(np_limit,(int,float,np.ndarray)), 'econ_limit must be a number'

    if fluid_rate is None:
      fluid_rate = self.fluid_rate

    if econ_limit is None:
      time_range = pd.Series(pd.date_range(start=start_date, end=end_date, freq=fq, **kwargs))
      f, Np = forecast_curve(time_range,self.qi,self.di,self.ti,self.b,npi=npi, gas=self.gas)
    else:
      f, Np = forecast_econlimit(start_date,econ_limit,self.qi,self.di,self.ti,self.b, fr=fq,end_date=end_date,npi=npi,gas=self.gas)

    if np_limit is not None:
      if Np > np_limit:
        f = f.loc[f['np']<np_limit,:]
        Np = f.iloc[-1,-1]

    if show_water and fluid_rate is not None:
      f['qw'] = fluid_rate  - f['qo']
      f['bsw'] = f['qw'] / (f['qw'] + f['qo'])
      f['wor'] = f['qw'] / f['qo']
      f['wor_1'] = f['wor'] + 1

    return f, Np