Exemplo n.º 1
0
    path = "elspot-prices_2019_hourly_eur.xls"
    df = datapreparation.read_ElspotPrices(path)
    df = datapreparation.removeDaylightSavings(df)

    # Get data
    market_area = "SE1"
    historic_prices = df[market_area]
    prediction_method = "SameHourLastWeek"  # methods = ["Perfect", "SameHourLastWeek"]

    # Plant parameters
    E0 = 0  # Energy difference at t = t_start
    Pmax = 100  # Maximum power deviation in MW
    Emax = 48 * Pmax  # Maximum energy deviation in MWh

    # Simulation parameters
    t_start = pd.Timestamp("2019-10-22 00:00:00")  # Times are in UTC+0
    t_end = pd.Timestamp(
        "2019-12-21 23:00:00")  # Simulate two months of operation
    t_horizon = 7 * 24  # Time horizon for optimization (in h)

    results = runScenario(
        historic_prices,  # Historic electricty prices
        E0,
        Emax,
        Pmax,  # Plant parameters
        prediction_method,  # Prediction settings
        t_start,
        t_end,
        t_horizon)  # Simulation times)
    print(results)
Exemplo n.º 2
0
    def test_unstack_nan_index(self):  # GH7466
        cast = lambda val: '{0:1}'.format('' if val != val else val)
        nan = np.nan

        def verify(df):
            mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
            rows, cols = df.notna().values.nonzero()
            for i, j in zip(rows, cols):
                left = sorted(df.iloc[i, j].split('.'))
                right = mk_list(df.index[i]) + mk_list(df.columns[j])
                right = sorted(list(map(cast, right)))
                assert left == right

        df = DataFrame({
            'jim': ['a', 'b', nan, 'd'],
            'joe': ['w', 'x', 'y', 'z'],
            'jolie': ['a.w', 'b.x', ' .y', 'd.z']
        })

        left = df.set_index(['jim', 'joe']).unstack()['jolie']
        right = df.set_index(['joe', 'jim']).unstack()['jolie'].T
        assert_frame_equal(left, right)

        for idx in itertools.permutations(df.columns[:2]):
            mi = df.set_index(list(idx))
            for lev in range(2):
                udf = mi.unstack(level=lev)
                assert udf.notna().values.sum() == len(df)
                verify(udf['jolie'])

        df = DataFrame({
            '1st': ['d'] * 3 + [nan] * 5 + ['a'] * 2 + ['c'] * 3 + ['e'] * 2 +
            ['b'] * 5,
            '2nd': ['y'] * 2 + ['w'] * 3 + [nan] * 3 + ['z'] * 4 + [nan] * 3 +
            ['x'] * 3 + [nan] * 2,
            '3rd': [
                67, 39, 53, 72, 57, 80, 31, 18, 11, 30, 59, 50, 62, 59, 76, 52,
                14, 53, 60, 51
            ]
        })

        df['4th'], df['5th'] = \
            df.apply(lambda r: '.'.join(map(cast, r)), axis=1), \
            df.apply(lambda r: '.'.join(map(cast, r.iloc[::-1])), axis=1)

        for idx in itertools.permutations(['1st', '2nd', '3rd']):
            mi = df.set_index(list(idx))
            for lev in range(3):
                udf = mi.unstack(level=lev)
                assert udf.notna().values.sum() == 2 * len(df)
                for col in ['4th', '5th']:
                    verify(udf[col])

        # GH7403
        df = pd.DataFrame({
            'A': list('aaaabbbb'),
            'B': range(8),
            'C': range(8)
        })
        df.iloc[3, 1] = np.NaN
        left = df.set_index(['A', 'B']).unstack(0)

        vals = [[3, 0, 1, 2, nan, nan, nan, nan],
                [nan, nan, nan, nan, 4, 5, 6, 7]]
        vals = list(map(list, zip(*vals)))
        idx = Index([nan, 0, 1, 2, 4, 5, 6, 7], name='B')
        cols = MultiIndex(levels=[['C'], ['a', 'b']],
                          codes=[[0, 0], [0, 1]],
                          names=[None, 'A'])

        right = DataFrame(vals, columns=cols, index=idx)
        assert_frame_equal(left, right)

        df = DataFrame({
            'A': list('aaaabbbb'),
            'B': list(range(4)) * 2,
            'C': range(8)
        })
        df.iloc[2, 1] = np.NaN
        left = df.set_index(['A', 'B']).unstack(0)

        vals = [[2, nan], [0, 4], [1, 5], [nan, 6], [3, 7]]
        cols = MultiIndex(levels=[['C'], ['a', 'b']],
                          codes=[[0, 0], [0, 1]],
                          names=[None, 'A'])
        idx = Index([nan, 0, 1, 2, 3], name='B')
        right = DataFrame(vals, columns=cols, index=idx)
        assert_frame_equal(left, right)

        df = pd.DataFrame({
            'A': list('aaaabbbb'),
            'B': list(range(4)) * 2,
            'C': range(8)
        })
        df.iloc[3, 1] = np.NaN
        left = df.set_index(['A', 'B']).unstack(0)

        vals = [[3, nan], [0, 4], [1, 5], [2, 6], [nan, 7]]
        cols = MultiIndex(levels=[['C'], ['a', 'b']],
                          codes=[[0, 0], [0, 1]],
                          names=[None, 'A'])
        idx = Index([nan, 0, 1, 2, 3], name='B')
        right = DataFrame(vals, columns=cols, index=idx)
        assert_frame_equal(left, right)

        # GH7401
        df = pd.DataFrame({
            'A':
            list('aaaaabbbbb'),
            'B': (date_range('2012-01-01', periods=5).tolist() * 2),
            'C':
            np.arange(10)
        })

        df.iloc[3, 1] = np.NaN
        left = df.set_index(['A', 'B']).unstack()

        vals = np.array([[3, 0, 1, 2, nan, 4], [nan, 5, 6, 7, 8, 9]])
        idx = Index(['a', 'b'], name='A')
        cols = MultiIndex(levels=[['C'],
                                  date_range('2012-01-01', periods=5)],
                          codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
                          names=[None, 'B'])

        right = DataFrame(vals, columns=cols, index=idx)
        assert_frame_equal(left, right)

        # GH4862
        vals = [['Hg', nan, nan, 680585148], ['U', 0.0, nan, 680585148],
                ['Pb', 7.07e-06, nan, 680585148],
                ['Sn', 2.3614e-05, 0.0133, 680607017],
                ['Ag', 0.0, 0.0133, 680607017],
                ['Hg', -0.00015, 0.0133, 680607017]]
        df = DataFrame(vals,
                       columns=['agent', 'change', 'dosage', 's_id'],
                       index=[17263, 17264, 17265, 17266, 17267, 17268])

        left = df.copy().set_index(['s_id', 'dosage', 'agent']).unstack()

        vals = [[nan, nan, 7.07e-06, nan, 0.0],
                [0.0, -0.00015, nan, 2.3614e-05, nan]]

        idx = MultiIndex(levels=[[680585148, 680607017], [0.0133]],
                         codes=[[0, 1], [-1, 0]],
                         names=['s_id', 'dosage'])

        cols = MultiIndex(levels=[['change'], ['Ag', 'Hg', 'Pb', 'Sn', 'U']],
                          codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]],
                          names=[None, 'agent'])

        right = DataFrame(vals, columns=cols, index=idx)
        assert_frame_equal(left, right)

        left = df.loc[17264:].copy().set_index(['s_id', 'dosage', 'agent'])
        assert_frame_equal(left.unstack(), right)

        # GH9497 - multiple unstack with nulls
        df = DataFrame({
            '1st': [1, 2, 1, 2, 1, 2],
            '2nd': pd.date_range('2014-02-01', periods=6, freq='D'),
            'jim': 100 + np.arange(6),
            'joe': (np.random.randn(6) * 10).round(2)
        })

        df['3rd'] = df['2nd'] - pd.Timestamp('2014-02-02')
        df.loc[1, '2nd'] = df.loc[3, '2nd'] = nan
        df.loc[1, '3rd'] = df.loc[4, '3rd'] = nan

        left = df.set_index(['1st', '2nd', '3rd']).unstack(['2nd', '3rd'])
        assert left.notna().values.sum() == 2 * len(df)

        for col in ['jim', 'joe']:
            for _, r in df.iterrows():
                key = r['1st'], (col, r['2nd'], r['3rd'])
                assert r[col] == left.loc[key]
def test_real_apisession_get_forecast_values(real_session):
    fx = real_session.get_forecast_values(
        'f8dd49fa-23e2-48a0-862b-ba0af6dec276',
        pd.Timestamp('2019-04-15T00:00:00Z'),
        pd.Timestamp('2019-04-15T12:00:00Z'))
    assert isinstance(fx, pd.Series)
         "https":"https://10.42.32.29:8080"}
         
         
e = entsoe.EntsoePandasClient(api_key=TOKEN, proxies = PROXY, retry_count=20, retry_delay=30)

start_year = 2015
end_year = 2019

domains = ["BE","FR","ES","DE","PL","PT","CZ","GB","IT","CH","NL","HU","AT","SK"]
quarterhour = ["BE","DE","AT","NL","HU"]
halfhour = ["GB"]


df_dic = {}
for year in range(start_year, end_year):
    start = pd.Timestamp(year=year, month=1, day=1, tz='Europe/Brussels')
    end = pd.Timestamp(year=year+1, month=1, day=1, tz='Europe/Brussels')
    df_dic[year] = {}    
    for country in domains:
        print("Querying yearly load from %s to %s for country %s" %(start.strftime('%d-%m-%Y'),end.strftime('%d-%m-%Y'),country))
        s = e.query_load(country_code=country, start=start, end=end)
        if s is not None:
            df_dic[year][country] = s
    df_dic[year] = pd.concat(df_dic[year])
result = pd.concat(df_dic).reset_index()
result.columns = ["year","country","time","load"]
#result["year"] = [n.year for n in result.time]

result.load = result.load/1000/1000 #CONVERTING MW TO TW
result.loc[result.country.isin(quarterhour), "load"] = result.loc[result.country.isin(quarterhour),"load"]/4.
result.loc[result.country.isin(halfhour),"load"] = result.loc[result.country.isin(halfhour),"load"]/2.
Exemplo n.º 5
0
    def setData(self, index, value, role=Qt.DisplayRole):
        """Set the value to the index position depending on Qt::ItemDataRole and data type of the column

        Args:
            index (QtCore.QModelIndex): Index to define column and row.
            value (object): new value.
            role (Qt::ItemDataRole): Use this role to specify what you want to do.

        Raises:
            TypeError: If the value could not be converted to a known datatype.

        Returns:
            True if value is changed. Calls layoutChanged after update.
            False if value is not different from original value.

        """
        if not index.isValid() or not self.editable:
            return False

        if value != index.data(role):

            self.layoutAboutToBeChanged.emit()

            row = self._dataFrame.index[index.row()]
            col = self._dataFrame.columns[index.column()]
            #print 'before change: ', index.data().toUTC(), self._dataFrame.iloc[row][col]
            columnDtype = self._dataFrame[col].dtype

            if columnDtype == object:
                pass

            elif columnDtype in self._intDtypes:
                dtypeInfo = numpy.iinfo(columnDtype)
                if value < dtypeInfo.min:
                    value = dtypeInfo.min
                elif value > dtypeInfo.max:
                    value = dtypeInfo.max

            elif columnDtype in self._floatDtypes:
                value = numpy.float64(value).astype(columnDtype)

            elif columnDtype in self._boolDtypes:
                value = numpy.bool_(value)

            elif columnDtype in self._dateDtypes:
                # convert the given value to a compatible datetime object.
                # if the conversation could not be done, keep the original
                # value.
                if isinstance(value, QtCore.QDateTime):
                    value = value.toString(self.timestampFormat)
                try:
                    value = pandas.Timestamp(value)
                except Exception:
                    raise Exception(
                        "Can't convert '{0}' into a datetime".format(value))
                    # return False
            else:
                raise TypeError("try to set unhandled data type")

            self._dataFrame.set_value(row, col, value)

            #print 'after change: ', value, self._dataFrame.iloc[row][col]
            self.layoutChanged.emit()
            return True
        else:
            return False
Exemplo n.º 6
0
    def test_minutely_fetcher(self):
        self.responses.add(
            self.responses.GET,
            'https://fake.urls.com/aapl_minute_csv_data.csv',
            body=AAPL_MINUTE_CSV_DATA,
            content_type='text/csv',
        )

        sim_params = factory.create_simulation_parameters(
            start=pd.Timestamp("2006-01-03", tz='UTC'),
            end=pd.Timestamp("2006-01-10", tz='UTC'),
            emission_rate="minute",
            data_frequency="minute")

        test_algo = TradingAlgorithm(script="""
from zipline.api import fetch_csv, record, sid

def initialize(context):
    fetch_csv('https://fake.urls.com/aapl_minute_csv_data.csv')

def handle_data(context, data):
    record(aapl_signal=data.current(sid(24), "signal"))
""",
                                     sim_params=sim_params,
                                     data_frequency="minute",
                                     env=self.env)

        # manually setting data portal and getting generator because we need
        # the minutely emission packets here.  TradingAlgorithm.run() only
        # returns daily packets.
        test_algo.data_portal = FetcherDataPortal(self.env,
                                                  self.trading_calendar)
        gen = test_algo.get_generator()
        perf_packets = list(gen)

        signal = [
            result["minute_perf"]["recorded_vars"]["aapl_signal"]
            for result in perf_packets if "minute_perf" in result
        ]

        self.assertEqual(6 * 390, len(signal))

        # csv data is:
        # symbol,date,signal
        # aapl,1/4/06 5:31AM, 1
        # aapl,1/4/06 11:30AM, 2
        # aapl,1/5/06 5:31AM, 1
        # aapl,1/5/06 11:30AM, 3
        # aapl,1/9/06 5:31AM, 1
        # aapl,1/9/06 11:30AM, 4 for dates 1/3 to 1/10

        # 2 signals per day, only last signal is taken. So we expect
        # 390 bars of signal NaN on 1/3
        # 390 bars of signal 2 on 1/4
        # 390 bars of signal 3 on 1/5
        # 390 bars of signal 3 on 1/6 (forward filled)
        # 390 bars of signal 4 on 1/9
        # 390 bars of signal 4 on 1/9 (forward filled)

        np.testing.assert_array_equal([np.NaN] * 390, signal[0:390])
        np.testing.assert_array_equal([2] * 390, signal[390:780])
        np.testing.assert_array_equal([3] * 780, signal[780:1560])
        np.testing.assert_array_equal([4] * 780, signal[1560:])
Exemplo n.º 7
0
def test_days_to_weeks(fn=days_to_weeks):
    tickers = generate_random_tickers(3)
    dates = pd.date_range('10/10/2018', periods=28, freq='D')
    resampled_dates = [
        pd.Timestamp('2018-10-14 00:00:00', freq='W-SUN'),
        pd.Timestamp('2018-10-21 00:00:00', freq='W-SUN'),
        pd.Timestamp('2018-10-28 00:00:00', freq='W-SUN'),
        pd.Timestamp('2018-11-04 00:00:00', freq='W-SUN'),
        pd.Timestamp('2018-11-11 00:00:00', freq='W-SUN')
    ]

    fn_inputs = {
        'open_prices':
        pd.DataFrame([[24, 21, 43], [14, 22, 41], [29, 23, 44], [44, 14, 13],
                      [31, 28, 34], [36, 49, 27], [48, 20, 46], [48, 37, 27],
                      [16, 42, 22], [23, 36, 32], [13, 31, 28], [23, 33, 18],
                      [14, 47, 45], [28, 21, 31], [31, 36, 40], [19, 25, 46],
                      [30, 46, 48], [19, 34, 35], [24, 13, 24], [48, 15, 39],
                      [16, 34, 14], [37, 30, 28], [34, 24, 20], [17, 15, 38],
                      [44, 15, 22], [24, 36, 28], [12, 41, 49], [24, 27, 14]],
                     dates, tickers),
        'high_prices':
        pd.DataFrame([[48, 48, 43], [42, 49, 47], [45, 47, 48], [48, 46, 48],
                      [49, 49, 46], [40, 49, 49], [49, 44, 49], [49, 46, 48],
                      [46, 49, 49], [49, 47, 47], [45, 49, 46], [45, 49, 49],
                      [49, 48, 48], [48, 49, 49], [49, 49, 48], [48, 48, 49],
                      [48, 47, 48], [47, 49, 49], [47, 49, 49], [48, 49, 48],
                      [49, 49, 47], [48, 47, 48], [47, 48, 47], [49, 49, 45],
                      [49, 49, 49], [47, 46, 48], [47, 47, 49], [49, 49, 46]],
                     dates, tickers),
        'low_prices':
        pd.DataFrame([[12, 12, 13], [12, 14, 15], [13, 14, 12], [14, 14, 13],
                      [12, 12, 14], [12, 12, 12], [12, 12, 12], [13, 12, 13],
                      [12, 12, 13], [14, 12, 14], [12, 12, 12], [13, 14, 16],
                      [14, 13, 13], [13, 14, 12], [14, 12, 14], [15, 12, 13],
                      [12, 12, 12], [12, 13, 15], [14, 12, 12], [12, 12, 12],
                      [12, 14, 13], [12, 12, 13], [13, 14, 15], [12, 12, 12],
                      [12, 14, 12], [12, 12, 13], [12, 12, 12], [16, 12, 14]],
                     dates, tickers),
        'close_prices':
        pd.DataFrame([[27, 45, 15], [40, 49, 40], [25, 26, 36], [26, 36, 19],
                      [25, 34, 46], [22, 39, 45], [40, 14, 17], [42, 46, 33],
                      [35, 41, 49], [14, 24, 31], [41, 18, 13], [36, 27, 18],
                      [16, 16, 45], [37, 24, 16], [43, 40, 28], [39, 29, 45],
                      [38, 20, 43], [44, 13, 34], [23, 17, 47], [25, 14, 38],
                      [48, 44, 23], [37, 24, 33], [40, 28, 17], [31, 12, 44],
                      [29, 40, 49], [18, 30, 13], [27, 16, 47], [31, 32, 14]],
                     dates, tickers)
    }
    fn_correct_outputs = OrderedDict([
        ('open_prices_weekly',
         pd.DataFrame([[24, 21, 43], [36, 49, 27], [14, 47, 45], [48, 15, 39],
                       [12, 41, 49]], resampled_dates, tickers)),
        ('high_prices_weekly',
         pd.DataFrame([[49, 49, 48], [49, 49, 49], [49, 49, 49], [49, 49, 49],
                       [49, 49, 49]], resampled_dates, tickers)),
        ('low_prices_weekly',
         pd.DataFrame([[12, 12, 12], [12, 12, 12], [12, 12, 12], [12, 12, 12],
                       [12, 12, 12]], resampled_dates, tickers)),
        ('close_prices_weekly',
         pd.DataFrame([[25, 34, 46], [36, 27, 18], [23, 17, 47], [18, 30, 13],
                       [31, 32, 14]], resampled_dates, tickers))
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Exemplo n.º 8
0
 def test_fast_unique_multiple_unsortable_runtimewarning(self):
     arr = [np.array(["foo", pd.Timestamp("2000")])]
     with tm.assert_produces_warning(RuntimeWarning):
         lib.fast_unique_multiple(arr, sort=None)
Exemplo n.º 9
0
 def test_setitem_clears_freq(self):
     a = DatetimeArray(
         pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
     a[0] = pd.Timestamp("2000", tz="US/Central")
     assert a.freq is None
Exemplo n.º 10
0
def write():
    st.markdown("""
        # SugarTime
        ### Model Performance
        This page lets you visualize how the model performs on data that
        it hasn't seen yet.
        """)
    with st.beta_expander("CLICK HERE to expand discussion"):
        st.markdown("""
            The dataset is split into two sets: a training set and a
            testing set. The model has been trained on the training set, and
            we can use the model to perform inference on data from the
            testing set here.

            The time series model is auto-regressive with exogenous variables
            (ARX). The base algorithm used in such a model can be any
            regression algorithm; here I currently use a support vector
            machine.

            The full model actually consists of several models, each
            individually
            fit to a different lag of the target variable. In other words,
            there
            is one model fit to the glucose data at time *t+1*, another fit to
            the
            glucose data at time *t+2*, another at *t+3*, etc.,
            all the way up to the
            selected horizon of the model (which defaults to 12 steps of 5
            minutes
            each, i.e., one hour). Each model represents the best performing
            model
            after optimizing the time-series design hyperparameters (e.g.,
            order of
            the *endogenous* or *target* variable, order of the *exogenous*
            variables, and/or delay of the exogenous variables) at that time
            step.

            Note that this model has essentially learned to revert to the mean.
            Since there is considerable autocorrelation in data from continuous
            glucose monitors, inference becomes less acurrate as the inference
            step gets farther away from the current time *t*.
            Here, instead of relying on the exogenous variables (i.e.,
            carbohydrates and insulin),
            the model does a better job by increasingly bringing the predicted
            value back to the mean, which for this patient is a blood glucose
            level of approximately 100 mg/dL.
            This is obviously not what we want the model to learn. But I have
            yet
            to find an estimator/algorithm that doesn't converge on this
            strategy
            to some extent, which suggests that these two exogenous variables
            are simply not predictive enough to account for significant
            variance beyond the autoregressive component of this model.
            """)
    st.markdown("""
        *Instructions:*
        Use the slider to select a time within the test set. The model
        will use the data up to that point to generate a forecast for
        the next hour.

        ***
        """)
    st.markdown("# Select date/time to show forecast.")

    # load patient data and fit model
    vm = load_saved_model()
    patient = vm.patient

    # make datetime selection slider
    x_index = patient.Xtest.index
    start_time = st.slider(
        "Move the slider to select the forecast date/time",
        min_value=x_index[40].to_pydatetime(),
        max_value=x_index[-40].to_pydatetime(),
        value=x_index[45].to_pydatetime(),
        step=timedelta(minutes=60),
        format="MM/DD/YY - hh:mm",
    )

    # plot glucose values for the test set
    fig = plot_test_set(patient, start_time)
    st.plotly_chart(fig)

    # plot performance of model
    st.markdown("# Show forecast vs actual")
    start_time_index = (x_index == pd.Timestamp(start_time)).argmax()
    nsteps = vm.horizon
    ypred = vm.multioutput_forecast(patient.Xtest[:start_time_index],
                                    patient.ytest[:start_time_index])
    idx = pd.date_range(
        start=start_time,
        end=start_time + timedelta(minutes=5 * (len(ypred) - 1)),
        freq="5T",
    )
    ypred = pd.DataFrame(ypred, columns=["ypred"], index=idx)
    fig = core.plot_forecast(
        patient.ytest[(start_time_index - 40):(start_time_index + nsteps)],
        ypred,
        return_flag=True,
    )
    start_time_text = datetime.datetime.strftime(start_time, "%m/%d/%m %H:%M")
    fig.update_layout(
        title={
            "text": "start time: " + start_time_text,
            "y": 0.88,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        })
    st.plotly_chart(fig)
Exemplo n.º 11
0
 def __call__(self, times, locations):
     _times = [pd.Timestamp(d) for d in times]
     return self._call_(
         np.asarray(_times).reshape((-1, 1)), np.asarray(locations).reshape((1, -1))
     ).astype(np.float32)
     ],
     "name_to_label": {
         "spring_no_effects": NO_VACCINATIONS + NO_SEASONALITY + NO_RAPID_TESTS,
         "spring_without_rapid_tests_and_no_vaccinations": NO_VACCINATIONS
         + WITH_SEASONALITY
         + NO_RAPID_TESTS,  # just seasonality
         "spring_without_rapid_tests_without_seasonality": WITH_VACCINATIONS
         + NO_SEASONALITY
         + NO_RAPID_TESTS,  # just vaccinations
         "spring_without_vaccinations_without_seasonality": NO_VACCINATIONS
         + NO_SEASONALITY
         + WITH_RAPID_TESTS,  # just rapid tests
         "spring_baseline": WITH_VACCINATIONS + WITH_SEASONALITY + WITH_RAPID_TESTS,
     },
     "colors": [BLUE, RED, ORANGE, GREEN, PURPLE],
     "plot_start": pd.Timestamp("2021-01-15"),
 },
 "one_off_and_combined": {
     "title": "The Effect of Each Channel on {outcome} Separately",
     "scenarios": [
         "spring_baseline",
         "spring_no_effects",
         "spring_without_seasonality",
         "spring_without_vaccines",
         "spring_without_rapid_tests",
     ],
     "name_to_label": {
         "spring_no_effects": NO_VACCINATIONS + NO_SEASONALITY + NO_RAPID_TESTS,
         "spring_without_seasonality": WITH_VACCINATIONS
         + NO_SEASONALITY
         + WITH_RAPID_TESTS,
Exemplo n.º 13
0
class TestGenRangeGeneration:
    def test_generate(self):
        rng1 = list(generate_range(START, END, offset=BDay()))
        rng2 = list(generate_range(START, END, offset="B"))
        assert rng1 == rng2

    def test_generate_cday(self):
        rng1 = list(generate_range(START, END, offset=CDay()))
        rng2 = list(generate_range(START, END, offset="C"))
        assert rng1 == rng2

    def test_1(self):
        rng = list(generate_range(start=datetime(2009, 3, 25), periods=2))
        expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
        assert rng == expected

    def test_2(self):
        rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3)))
        expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
        assert rng == expected

    def test_3(self):
        rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6)))
        expected = []
        assert rng == expected

    def test_precision_finer_than_offset(self):
        # GH#9907
        result1 = pd.date_range(
            start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q"
        )
        result2 = pd.date_range(
            start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W"
        )
        expected1_list = [
            "2015-06-30 00:00:03",
            "2015-09-30 00:00:03",
            "2015-12-31 00:00:03",
            "2016-03-31 00:00:03",
        ]
        expected2_list = [
            "2015-04-19 00:00:03",
            "2015-04-26 00:00:03",
            "2015-05-03 00:00:03",
            "2015-05-10 00:00:03",
            "2015-05-17 00:00:03",
            "2015-05-24 00:00:03",
            "2015-05-31 00:00:03",
            "2015-06-07 00:00:03",
            "2015-06-14 00:00:03",
            "2015-06-21 00:00:03",
        ]
        expected1 = DatetimeIndex(
            expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None
        )
        expected2 = DatetimeIndex(
            expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None
        )
        tm.assert_index_equal(result1, expected1)
        tm.assert_index_equal(result2, expected2)

    dt1, dt2 = "2017-01-01", "2017-01-01"
    tz1, tz2 = "US/Eastern", "Europe/London"

    @pytest.mark.parametrize(
        "start,end",
        [
            (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)),
            (pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)),
            (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)),
            (pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)),
        ],
    )
    def test_mismatching_tz_raises_err(self, start, end):
        # issue 18488
        msg = "Start and end cannot both be tz-aware with different timezones"
        with pytest.raises(TypeError, match=msg):
            pd.date_range(start, end)
        with pytest.raises(TypeError, match=msg):
            pd.date_range(start, end, freq=BDay())
Exemplo n.º 14
0
    def test_arithmetic_overflow(self):
        with pytest.raises(OverflowError):
            pd.Timestamp('1700-01-01') + pd.Timedelta(13 * 19999, unit='D')

        with pytest.raises(OverflowError):
            pd.Timestamp('1700-01-01') + timedelta(days=13 * 19999)
Exemplo n.º 15
0
 def get_datetime():
     return pd.Timestamp("2015-01-27", tz='UTC')
Exemplo n.º 16
0
    def __init__(self,
                 indir=None,
                 outdir=None,
                 theDate=None,
                 infile='trades.csv',
                 inputType='DAS',
                 infile2='positions.csv',
                 mydevel=False):
        '''
        Creates the required path and field names to run the program. Raises value error if the
        input file cannot be located. If mydevel is True, the default locations change.

        :params indir:      The location of the input file. Defaut is (cwd)/data. 
        :params outdir      The name of the output directory. Default is (indir)/out. 
        :params theDate:    A Datetime object or timestamp of the date of the transactions in the
                            input file. Will be used if the input file lacks dates. Defaults to 
                            today.
        :params infile:     The name of the input file. Defaults to 'trades.csv'.
        :params inputType:  One of  DAS, IB_HTML, or IB_CVS. Either IB input file should be an
                            activity statement with the tables: Trades, Open Positions and Account
                            Information.
        :params infile2:    This is the positions file. Required for DAS Trader Pro only and only
                            if positions are held before or after this input file's trades. If
                            missing, the program will ask for the information. Defaults to
                            'positions.csv'     
        :raise ValueError:  If theDate is not a valid time.
        :raise NameError:   If the infile is not located.
        '''
        if theDate:
            try:
                theDate = pd.Timestamp(theDate)
                assert isinstance(theDate, dt.datetime)

            except ValueError as ex:
                msg = f"\n\nTheDate ({theDate}) must be a valid timestamp or string.\n"
                msg += "Leave it blank to accept today's date\n"
                msg += ex.__str__() + "\n"
                print(msg)
                raise ValueError(msg)

            theDate = theDate
        else:
            theDate = dt.date.today()

        assert inputType in JournalFiles.inputType.values()
        self.inputType = inputType
        self.theDate = theDate
        self.monthformat = "_%Y%m_%B"
        self.dayformat = "_%m%d_%A"
        self.root = os.getcwd()
        self.indir = indir if indir else os.path.join(self.root, 'data/')
        self.outdir = outdir if outdir else os.path.join(self.root, 'out/')
        self.infile = infile if infile else 'trades.csv'
        self.infile2 = infile2
        self.inpathfile2 = None
        self.outfile = os.path.splitext(
            self.infile)[0] + self.theDate.strftime("%A_%m%d.xlsx")

        if not mydevel:
            self.inpathfile = os.path.join(self.indir, self.infile)
            self.outpathfile = os.path.join(self.outdir, self.outfile)
            if self.infile2:
                self.inpathfile2 = os.path.join(self.indir, self.infile2)

        else:
            self.setMyParams(indir, outdir)
        if self.inpathfile2 and not os.path.exists(self.inpathfile2):
            # Fail or succeed quietly here
            self.infile2 = None
            self.inpathfile2 = None

        self._checkPaths()
Exemplo n.º 17
0
class OrderTrackerUnitTests(unittest.TestCase):
    start: pd.Timestamp = pd.Timestamp("2019-01-01", tz="UTC")
    end: pd.Timestamp = pd.Timestamp("2019-01-01 01:00:00", tz="UTC")
    start_timestamp: float = start.timestamp()
    end_timestamp: float = end.timestamp()
    clock_tick_size = 10

    @classmethod
    def setUpClass(cls):
        cls.ev_loop = asyncio.get_event_loop()
        cls.trading_pair = "COINALPHA-HBOT"

        cls.limit_orders: List[LimitOrder] = [
            LimitOrder(client_order_id=f"LIMIT//-{i}-{int(time.time()*1e6)}",
                       trading_pair=cls.trading_pair,
                       is_buy=True if i % 2 == 0 else False,
                       base_currency=cls.trading_pair.split("-")[0],
                       quote_currency=cls.trading_pair.split("-")[1],
                       price=Decimal(f"{100 - i}") if i %
                       2 == 0 else Decimal(f"{100 + i}"),
                       quantity=Decimal(f"{10 * (i + 1)}"),
                       creation_timestamp=int(time.time() * 1e6))
            for i in range(20)
        ]
        cls.market_orders: List[MarketOrder] = [
            MarketOrder(order_id=f"MARKET//-{i}-{int(time.time()*1e3)}",
                        trading_pair=cls.trading_pair,
                        is_buy=True if i % 2 == 0 else False,
                        base_asset=cls.trading_pair.split("-")[0],
                        quote_asset=cls.trading_pair.split("-")[1],
                        amount=float(f"{10 * (i + 1)}"),
                        timestamp=time.time()) for i in range(20)
        ]

        cls.market: MockPaperExchange = MockPaperExchange()
        cls.market_info: MarketTradingPairTuple = MarketTradingPairTuple(
            cls.market, cls.trading_pair, *cls.trading_pair.split("-"))

    def setUp(self):
        self.order_tracker: OrderTracker = OrderTracker()
        self.clock: Clock = Clock(ClockMode.BACKTEST, self.clock_tick_size,
                                  self.start_timestamp, self.end_timestamp)
        self.clock.add_iterator(self.order_tracker)
        self.clock.backtest_til(self.start_timestamp)

    @staticmethod
    def simulate_place_order(order_tracker: OrderTracker,
                             order: Union[LimitOrder, MarketOrder],
                             market_info: MarketTradingPairTuple):
        """
        Simulates an order being succesfully placed.
        """
        if isinstance(order, LimitOrder):
            order_tracker.add_create_order_pending(order.client_order_id)
            order_tracker.start_tracking_limit_order(
                market_pair=market_info,
                order_id=order.client_order_id,
                is_buy=order.is_buy,
                price=order.price,
                quantity=order.quantity)
        else:
            order_tracker.add_create_order_pending(order.order_id)
            order_tracker.start_tracking_market_order(market_pair=market_info,
                                                      order_id=order.order_id,
                                                      is_buy=order.is_buy,
                                                      quantity=order.amount)

    @staticmethod
    def simulate_order_created(order_tracker: OrderTracker,
                               order: Union[LimitOrder, MarketOrder]):
        order_id = order.client_order_id if isinstance(
            order, LimitOrder) else order.order_id
        order_tracker.remove_create_order_pending(order_id)

    @staticmethod
    def simulate_stop_tracking_order(order_tracker: OrderTracker,
                                     order: Union[LimitOrder, MarketOrder],
                                     market_info: MarketTradingPairTuple):
        """
        Simulates an order being cancelled or filled completely.
        """
        if isinstance(order, LimitOrder):
            order_tracker.stop_tracking_limit_order(
                market_pair=market_info,
                order_id=order.client_order_id,
            )
        else:
            order_tracker.stop_tracking_market_order(market_pair=market_info,
                                                     order_id=order.order_id)

    @staticmethod
    def simulate_cancel_order(order_tracker: OrderTracker,
                              order: Union[LimitOrder, MarketOrder]):
        """
        Simulates order being cancelled.
        """
        order_id = order.client_order_id if isinstance(
            order, LimitOrder) else order.order_id
        if order_id:
            order_tracker.check_and_track_cancel(order_id)

    def test_active_limit_orders(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.active_limit_orders) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.active_limit_orders) == len(
                self.limit_orders))

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.limit_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        self.assertTrue(
            len(self.order_tracker.active_limit_orders) ==
            len(self.limit_orders) - 1)

    def test_shadow_limit_orders(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.shadow_limit_orders) == len(
                self.limit_orders))

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.limit_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        self.assertTrue(
            len(self.order_tracker.shadow_limit_orders) ==
            len(self.limit_orders) - 1)

    def test_market_pair_to_active_orders(self):
        # Check initial output
        self.assertTrue(
            len(self.order_tracker.market_pair_to_active_orders) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.market_pair_to_active_orders[
                self.market_info]) == len(self.limit_orders))

    def test_active_bids(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.active_bids) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.active_bids) == len(self.limit_orders) / 2)

    def test_active_asks(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.active_asks) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.active_asks) == len(self.limit_orders) / 2)

    def test_tracked_limit_orders(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.tracked_limit_orders) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.tracked_limit_orders) == len(
                self.limit_orders))

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.limit_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        # Note: This includes all orders(open, cancelled, filled, partially filled).
        # Hence it should not differ from initial list of orders
        self.assertTrue(
            len(self.order_tracker.tracked_limit_orders) == len(
                self.limit_orders))

    def test_tracked_limit_orders_data_frame(self):
        # Check initial output
        self.assertTrue(
            len(self.order_tracker.tracked_limit_orders_data_frame) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.tracked_limit_orders_data_frame) == len(
                self.limit_orders))

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.limit_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        # Note: This includes all orders(open, cancelled, filled, partially filled).
        # Hence it should not differ from initial list of orders
        self.assertTrue(
            len(self.order_tracker.tracked_limit_orders_data_frame) == len(
                self.limit_orders))

    def test_tracked_market_orders(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.tracked_market_orders) == 0)

        # Simulate orders being placed and tracked
        for order in self.market_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.tracked_market_orders) == len(
                self.market_orders))

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.market_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        # Note: This includes all orders(open, cancelled, filled, partially filled).
        # Hence it should not differ from initial list of orders
        self.assertTrue(
            len(self.order_tracker.tracked_market_orders) == len(
                self.market_orders))

    def test_tracked_market_order_data_frame(self):
        # Check initial output
        self.assertTrue(
            len(self.order_tracker.tracked_market_orders_data_frame) == 0)

        # Simulate orders being placed and tracked
        for order in self.market_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(
            len(self.order_tracker.tracked_market_orders_data_frame) == len(
                self.market_orders))

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.market_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        # Note: This includes all orders(open, cancelled, filled, partially filled).
        # Hence it should not differ from initial list of orders
        self.assertTrue(
            len(self.order_tracker.tracked_market_orders_data_frame) == len(
                self.market_orders))

    def test_in_flight_cancels(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.in_flight_cancels) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)
            self.simulate_order_created(self.order_tracker, order)

        # Simulates order cancellation request being sent to exchange
        order_to_cancel = self.limit_orders[0]
        self.simulate_cancel_order(self.order_tracker, order_to_cancel)

        self.assertTrue(len(self.order_tracker.in_flight_cancels) == 1)

    def test_in_flight_pending_created(self):
        # Check initial output
        self.assertTrue(len(self.order_tracker.in_flight_pending_created) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)

        self.assertTrue(
            len(self.order_tracker.in_flight_pending_created) == len(
                self.limit_orders))

        for order in self.limit_orders:
            self.simulate_order_created(self.order_tracker, order)

        self.assertTrue(len(self.order_tracker.in_flight_pending_created) == 0)

    def test_get_limit_orders(self):
        # Check initial output
        self.assertTrue(
            len(list(self.order_tracker.get_limit_orders().values())) == 0)

        # Simulate orders being placed and tracked
        for order in self.limit_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)

        self.assertTrue(
            len(self.order_tracker.get_limit_orders()[self.market_info].keys())
            == len(self.limit_orders))

    def test_get_market_orders(self):
        # Check initial output
        self.assertTrue(
            len(list(self.order_tracker.get_market_orders().values())) == 0)

        # Simulate orders being placed and tracked
        for order in self.market_orders:
            self.simulate_place_order(self.order_tracker, order,
                                      self.market_info)

        self.assertTrue(
            len(self.order_tracker.get_market_orders()[
                self.market_info].keys()) == len(self.market_orders))

    def test_get_shadow_limit_orders(self):
        # Check initial output
        self.assertTrue(self.market_info not in
                        self.order_tracker.get_shadow_limit_orders())

        # Simulates order being placed and tracked
        order: LimitOrder = self.limit_orders[0]
        self.simulate_place_order(self.order_tracker, order, self.market_info)

        # Compare order details and output
        other_order = self.order_tracker.get_shadow_limit_orders()[
            self.market_info][order.client_order_id]
        self.assertEqual(order.trading_pair, other_order.trading_pair)
        self.assertEqual(order.price, other_order.price)
        self.assertEqual(order.quantity, other_order.quantity)
        self.assertEqual(order.is_buy, other_order.is_buy)

        # Simulate order being cancelled
        self.simulate_cancel_order(self.order_tracker, order)
        self.simulate_stop_tracking_order(self.order_tracker, order,
                                          self.market_info)

        # Check that order is not yet removed from shadow_limit_orders
        other_order = self.order_tracker.get_shadow_limit_orders()[
            self.market_info][order.client_order_id]
        self.assertEqual(order.trading_pair, other_order.trading_pair)
        self.assertEqual(order.price, other_order.price)
        self.assertEqual(order.quantity, other_order.quantity)
        self.assertEqual(order.is_buy, other_order.is_buy)

        # Simulates current_timestamp > SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION
        self.clock.backtest_til(
            self.start_timestamp +
            OrderTracker.SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION + 1)
        self.order_tracker.check_and_cleanup_shadow_records()

        # Check that check_and_cleanup_shadow_records clears shadow_limit_orders
        self.assertTrue(self.market_info not in
                        self.order_tracker.get_shadow_limit_orders())

    def test_has_in_flight_cancel(self):
        # Check initial output
        self.assertFalse(
            self.order_tracker.has_in_flight_cancel("ORDER_ID_DO_NOT_EXIST"))

        # Simulates order being placed and tracked
        order: LimitOrder = self.limit_orders[0]
        self.simulate_place_order(self.order_tracker, order, self.market_info)
        self.simulate_order_created(self.order_tracker, order)

        # Order not yet cancelled.
        self.assertFalse(
            self.order_tracker.has_in_flight_cancel(order.client_order_id))

        # Simulate order being cancelled
        self.simulate_cancel_order(self.order_tracker, order)

        # Order inflight cancel timestamp has not yet expired
        self.assertTrue(
            self.order_tracker.has_in_flight_cancel(order.client_order_id))

        # Simulate in-flight cancel has expired
        self.clock.backtest_til(self.start_timestamp +
                                OrderTracker.CANCEL_EXPIRY_DURATION + 1)

        self.assertFalse(
            self.order_tracker.has_in_flight_cancel(order.client_order_id))

        # Simulates order being placed and tracked
        order: LimitOrder = self.limit_orders[0]
        self.simulate_place_order(self.order_tracker, order, self.market_info)
        self.simulate_order_created(self.order_tracker, order)

        # Simulate order being cancelled and no longer tracked
        self.simulate_cancel_order(self.order_tracker, order)
        self.simulate_stop_tracking_order(self.order_tracker, order,
                                          self.market_info)

        # Check that once the order is no longer tracker, it will no longer have a pending cancel
        self.assertFalse(
            self.order_tracker.has_in_flight_cancel(order.client_order_id))

    def test_get_market_pair_from_order_id(self):
        # Initial validation
        order: LimitOrder = self.limit_orders[0]

        self.assertNotEqual(
            self.market_info,
            self.order_tracker.get_market_pair_from_order_id(
                order.client_order_id))

        # Simulate order being placed and tracked
        self.simulate_place_order(self.order_tracker, order, self.market_info)

        self.assertEqual(
            self.market_info,
            self.order_tracker.get_market_pair_from_order_id(
                order.client_order_id))

    def test_get_shadow_market_pair_from_order_id(self):
        # Simulate order being placed and tracked
        order: LimitOrder = self.limit_orders[0]
        self.assertNotEqual(
            self.market_info,
            self.order_tracker.get_shadow_market_pair_from_order_id(
                order.client_order_id))

        self.simulate_place_order(self.order_tracker, order, self.market_info)

        self.assertEqual(
            self.market_info,
            self.order_tracker.get_shadow_market_pair_from_order_id(
                order.client_order_id))

    def test_get_limit_order(self):
        # Initial validation
        order: LimitOrder = self.limit_orders[0]

        # Order not yet placed
        self.assertNotEqual(
            order,
            self.order_tracker.get_limit_order(self.market_info,
                                               order.client_order_id))

        # Simulate order being placed and tracked
        self.simulate_place_order(self.order_tracker, order, self.market_info)

        # Unrecognized Order
        self.assertNotEqual(
            order,
            self.order_tracker.get_limit_order(self.market_info,
                                               "UNRECOGNIZED_ORDER"))

        # Matching Order
        other_order = self.order_tracker.get_limit_order(
            self.market_info, order.client_order_id)
        self.assertEqual(order.trading_pair, other_order.trading_pair)
        self.assertEqual(order.price, other_order.price)
        self.assertEqual(order.quantity, other_order.quantity)
        self.assertEqual(order.is_buy, other_order.is_buy)

    def test_get_market_order(self):
        # Initial validation
        order: MarketOrder = MarketOrder(
            order_id=f"MARKET//-{self.clock.current_timestamp}",
            trading_pair=self.trading_pair,
            is_buy=True,
            base_asset=self.trading_pair.split("-")[0],
            quote_asset=self.trading_pair.split("-")[1],
            amount=float(10),
            timestamp=self.clock.current_timestamp)

        # Order not yet placed
        self.assertNotEqual(
            order,
            self.order_tracker.get_market_order(self.market_info,
                                                order.order_id))

        # Simulate order being placed and tracked
        self.simulate_place_order(self.order_tracker, order, self.market_info)

        # Unrecognized Order
        self.assertNotEqual(
            order,
            self.order_tracker.get_market_order(self.market_info,
                                                "UNRECOGNIZED_ORDER"))

        # Matching Order
        self.assertEqual(
            str(order),
            str(
                self.order_tracker.get_market_order(self.market_info,
                                                    order.order_id)))

    def test_get_shadow_limit_order(self):
        # Initial validation
        order: LimitOrder = self.limit_orders[0]

        # Order not yet placed
        self.assertNotEqual(
            order,
            self.order_tracker.get_shadow_limit_order(order.client_order_id))

        # Simulate order being placed and tracked
        self.simulate_place_order(self.order_tracker, order, self.market_info)

        # Unrecognized Order
        self.assertNotEqual(
            order,
            self.order_tracker.get_shadow_limit_order("UNRECOGNIZED_ORDER"))

        # Matching Order
        shadow_order = self.order_tracker.get_shadow_limit_order(
            order.client_order_id)
        self.assertEqual(order.trading_pair, shadow_order.trading_pair)
        self.assertEqual(order.price, shadow_order.price)
        self.assertEqual(order.quantity, shadow_order.quantity)
        self.assertEqual(order.is_buy, shadow_order.is_buy)

        # Simulate order cancel
        self.simulate_cancel_order(self.order_tracker, order)

        self.assertNotEqual(
            order,
            self.order_tracker.get_shadow_limit_order(order.client_order_id))

    def test_check_and_cleanup_shadow_records(self):
        order: LimitOrder = self.limit_orders[0]

        # Simulate order being placed and tracked
        self.simulate_place_order(self.order_tracker, order, self.market_info)

        # Check for shadow_tracked_limit_order
        self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 1)

        # Simulate order cancel and stop tracking order
        self.simulate_cancel_order(self.order_tracker, order)
        self.simulate_stop_tracking_order(self.order_tracker, order,
                                          self.market_info)

        # Check for shadow_tracked_limit_order
        self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 1)

        # Simulates current_timestamp > SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION
        self.clock.backtest_til(
            self.start_timestamp +
            OrderTracker.SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION + 1)
        self.order_tracker.check_and_cleanup_shadow_records()

        # Check that check_and_cleanup_shadow_records clears shadow_limit_orders
        self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 0)
Exemplo n.º 18
0
# Third-party imports
import numpy as np
import pandas as pd
import pytest

# First-party imports
from gluonts.model.forecast import QuantileForecast, SampleForecast

QUANTILES = np.arange(1, 100) / 100
SAMPLES = np.arange(101).reshape(101, 1) / 100
START_DATE = pd.Timestamp(2017, 1, 1, 12)
FREQ = '1D'

FORECASTS = {
    'QuantileForecast': QuantileForecast(
        forecast_arrays=QUANTILES.reshape(-1, 1),
        start_date=START_DATE,
        forecast_keys=np.array(QUANTILES, str),
        freq=FREQ,
    ),
    'SampleForecast': SampleForecast(
        samples=SAMPLES, start_date=START_DATE, freq=FREQ
    ),
}


@pytest.mark.parametrize("name", FORECASTS.keys())
def test_Forecast(name):
    forecast = FORECASTS[name]

    def percentile(value):
Exemplo n.º 19
0
 def make_equity_info(cls):
     return pd.DataFrame.from_dict(
         {
             24: {
                 'start_date': pd.Timestamp('2006-01-01', tz='UTC'),
                 'end_date': pd.Timestamp('2007-01-01', tz='UTC'),
                 'symbol': 'AAPL',
                 'asset_type': 'equity',
                 'exchange': 'nasdaq'
             },
             3766: {
                 'start_date': pd.Timestamp('2006-01-01', tz='UTC'),
                 'end_date': pd.Timestamp('2007-01-01', tz='UTC'),
                 'symbol': 'IBM',
                 'asset_type': 'equity',
                 'exchange': 'nasdaq'
             },
             5061: {
                 'start_date': pd.Timestamp('2006-01-01', tz='UTC'),
                 'end_date': pd.Timestamp('2007-01-01', tz='UTC'),
                 'symbol': 'MSFT',
                 'asset_type': 'equity',
                 'exchange': 'nasdaq'
             },
             14848: {
                 'start_date': pd.Timestamp('2006-01-01', tz='UTC'),
                 'end_date': pd.Timestamp('2007-01-01', tz='UTC'),
                 'symbol': 'YHOO',
                 'asset_type': 'equity',
                 'exchange': 'nasdaq'
             },
             25317: {
                 'start_date': pd.Timestamp('2006-01-01', tz='UTC'),
                 'end_date': pd.Timestamp('2007-01-01', tz='UTC'),
                 'symbol': 'DELL',
                 'asset_type': 'equity',
                 'exchange': 'nasdaq'
             },
             13: {
                 'start_date': pd.Timestamp('2006-01-01', tz='UTC'),
                 'end_date': pd.Timestamp('2010-01-01', tz='UTC'),
                 'symbol': 'NFLX',
                 'asset_type': 'equity',
                 'exchange': 'nasdaq'
             }
         },
         orient='index',
     )
Exemplo n.º 20
0
df_wiki_china.columns = ['Place', 'Start date', 'End date']
df_wiki_china['url'] = wiki_china_links[1:]
df_wiki_china['update'] = pd.to_datetime(wiki_china_dates[1:],
                                         format='%d %B %Y')
df_wiki_china['Country'] = 'China'
df_wiki_china['Level'] = 'City'
df_wiki_china['Confirmed'] = True

df_wiki = pd.concat((df_wiki_inter, df_wiki_china), sort=False)
df_wiki.to_csv('wiki_lockdown_dates.csv', index=False)

df_aura = pd.read_csv('aura_lockdown_dates.csv')
df_aura['update'] = pd.to_datetime(df_aura['update'], format='%Y-%m-%d')

df_quar = pd.concat((df_wiki_inter, df_wiki_china, df_aura), sort=False)
df_quar['update'] = df_quar['update'].fillna(pd.Timestamp('2000/11/12 13:35'))
df_quar = df_quar.sort_values('update')
df_quar = df_quar.drop_duplicates(['Country', 'Place'], keep='last')
df_quar = df_quar.dropna(subset=['Start date'])
df_quar.loc[df_quar['Place'] == df_quar['Country'], 'Place'] = np.nan

print('not in arewe')
print(set(df_quar['Country']) - set(df_arewe_ls['Country']))
print('not in ours')
print(set(df_arewe_ls['Country']) - set(df_quar['Country']))

df_quar.to_csv('deploy/lockdown_dates.csv', index=False)
df_quar.to_csv('history/lockdown_dates_%s.csv' %
               (pd.datetime.now().strftime('%d-%m-%y')),
               index=False)
df_quar_old = pd.read_csv(
Exemplo n.º 21
0
#coding:utf-8
from threading import Timer
import time
import os
import csv
import tushare as ts
import numpy as np
import pandas as pd
import datetime
from jqdatasdk import *
from pathlib import Path

#前置全局变量
global today, todaytime, yesterday, yesterdaytime, tommow, tommowtime, pandastime, cwf, alltradeday, homefolder
#保存当前日期
pandastime = pd.Timestamp("2017-6-19 9:13:45")
today = datetime.date.today()
todaytime = datetime.datetime.strptime(str(today), '%Y-%m-%d')
tommow = today + datetime.timedelta(days=1)
tommowtime = datetime.datetime.strptime(str(tommow), '%Y-%m-%d')
yesterday = today - datetime.timedelta(days=1)
yesterdaytime = datetime.datetime.strptime(str(yesterday), '%Y-%m-%d')
#建立聚宽数据的最早日期线
jqmonthheadtime = datetime.datetime(2005, 2, 1, 0, 0)
jqweekheadtime = datetime.datetime(2005, 1, 15, 0, 0)
jqdayheadtime = datetime.datetime(2005, 1, 5, 0, 0)
jq60mheadtime = datetime.datetime(2005, 1, 4, 10, 31)
jq30mheadtime = datetime.datetime(2005, 1, 4, 10, 1)
jq15mheadtime = datetime.datetime(2005, 1, 4, 9, 46)
jq5mheadtime = datetime.datetime(2005, 1, 4, 9, 36)
jq1mheadtime = datetime.datetime(2005, 1, 4, 9, 31)
Exemplo n.º 22
0
def describe_cluster(cluster_df, features, weight_column, oven_refills):
    """ Create the statistics for a cluster. Datapoints that are part of a breakdown
    period are excluded.

    Parameters
    ----------
    cluster_df : DataFrame
        A dataframe that contains all points of the cluster you want to describe.
    features : list of source features
        All source feature for which the statistics should be generated
    weight_column : string
        Name of the column to use for weighting data points, typically
        `datapoint_duration` (``ProcessingFeatures.DATAPOINT_DURATION``)
    oven_refills : list of timestamp
        End of the oven refill periods

    Returns
    -------
    Series
        A Series of the following statistics

        For each parameter in `features`:

            1. mean
            2. std
            3. std% (std in percent of mean)
            4. avg_dev (average deviation of mean)
            5. min
            6. 25% (lower quartile)
            7. median
            8. 75% (upper quartile)
            9. max

        Once for the cluster:

            10. Density/count (number of data points in the cluster)
            11. Duration/in_hours (total duration of cluster)
            12. Duration/longest (duration of longest fragment)
            13. Duration/num_splits (number of fragments)
            14. Refill/index (index of oven refill that came directly before 
                the beginning of the longest fragment)
            15. Refill/delta_in_hours (delta from the end of the closest oven refill)
            16. num_breakdowns/per_hour (number of breakdowns per hour)
    """

    values = ["mean", "std", "std%", "avg_dev", "min", "25%", "median", "75%", "max"]
    index = pd.MultiIndex.from_tuples(
        [(p, v) for p in features for v in values]
        + [
            ("DENSITY", "count"),
            ("DURATION", "in_hours"),
            ("DURATION", "longest_in_hours"),
            ("DURATION", "num_splits"),
            ("REFILL", "index"),
            ("REFILL", "delta_in_hours"),
            ("num_breakdowns", "per_hour"),
        ]
    )

    data = cluster_df.loc[
        (cluster_df[ProcessingFeatures.HT_VOLTAGE_BREAKDOWN] == 0), features
    ].values  # TODO maybe only include non breakdown here???
    weights = cluster_df.loc[
        (cluster_df[ProcessingFeatures.HT_VOLTAGE_BREAKDOWN] == 0), weight_column
    ].values
    if data.size == 0:
        return None

    stats = DescrStatsW(data, weights, ddof=1)

    mean = np.array(stats.mean)  # np.mean(data, axis=0)
    std = np.array(stats.std)  # np.std(data, axis=0)
    quantiles = stats.quantile([0, 0.25, 0.5, 0.75, 1], return_pandas=False)
    # np.quantile(data, [0, 0.25, 0.5, 0.75, 1], axis=0)
    avg_dev = np.dot(weights, np.absolute(data - mean)) / np.sum(weights)

    count = len(data)

    duration_in_seconds = cluster_df[ProcessingFeatures.DATAPOINT_DURATION].sum()
    duration_in_hours = duration_in_seconds / 3600

    (
        duration_longest_start,
        duration_longest,
        duration_num_splits,
    ) = get_cluster_duration(cluster_df, weight_column)
    duration_longest /= 3600

    closest_refill = None
    for i, refill in reversed(list(enumerate(oven_refills))):
        if duration_longest_start > refill:
            closest_refill = i
            break

    refill_delta = -1
    if not closest_refill is None:
        refill_delta = (
            pd.Timestamp(duration_longest_start) - oven_refills[closest_refill]
        ).total_seconds() / 3600

    description = [
        [
            mean[i],
            std[i],
            np.abs(std[i] / mean[i]) * 100,
            avg_dev[i],
            quantiles[0][i],
            quantiles[1][i],
            quantiles[2][i],
            quantiles[3][i],
            quantiles[4][i],
        ]
        for i in range(len(features))
    ]
    description = [item for sublist in description for item in sublist]
    description.append(count)
    description.append(duration_in_hours)
    description.append(duration_longest)
    description.append(duration_num_splits)

    description.append(closest_refill)
    description.append(refill_delta)

    description.append(
        cluster_df.loc[
            cluster_df[ProcessingFeatures.HT_SPARKS_COUNTER] > 0,
            ProcessingFeatures.HT_SPARKS_COUNTER,
        ].nunique()
        / duration_in_hours
    )

    return pd.Series(description, index=index)
def _to_timestamp(d):
    return pd.Timestamp(d).replace(hour=0, minute=0, second=0, microsecond=0)
Exemplo n.º 24
0
Arquivo: ols.py Projeto: wdy0401/stmg
import statsmodels.api as sm

__all__=['predict']

# 函数部分
def predict(X,Y,x):
    '''
    @note:对X,Y做线性回归,并得到预测值
    @X:自变量
    @Y:应变量
    @x:输入x值求预测值
    '''
    X=np.array(X)
    Y=np.array(Y)
    X=sm.add_constant(X)
    model=sm.OLS(Y,X)
    results=model.fit()
    x=np.hstack(([1],np.array(x)))
#    return results.summary()
    return model.predict(results.params,x)

# 数据部分
p=pd.read_csv('../data/margin_m.csv', index_col=0)
p.index=[pd.Timestamp(str(x)) for x in p.index]
d=p[p>0].dropna(how="any")#去掉无效值
d=d[-12:]#取最近一年的
print(predict(d['commodity_pre'],d['margin'],[1]))

if __name__=="__main__":
    import ols
    print(help(ols))
Exemplo n.º 25
0
def _create_dataframe():
    sample_dataframe = pd.read_csv(os.path.join(RESOURCE_DIR, 'data.csv'))
    sample_dataframe['timestamp'] = sample_dataframe['timestamp'].apply(
        lambda x: pd.Timestamp(x))
    return sample_dataframe.set_index('timestamp')
Exemplo n.º 26
0
class TestSparseArray:
    def setup_method(self, method):
        self.arr_data = np.array(
            [np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
        self.arr = SparseArray(self.arr_data)
        self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)

    def test_constructor_dtype(self):
        arr = SparseArray([np.nan, 1, 2, np.nan])
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert arr.dtype.subtype == np.float64
        assert np.isnan(arr.fill_value)

        arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
        assert arr.dtype == SparseDtype(np.float64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
        assert arr.dtype == SparseDtype(np.float64, np.nan)
        assert np.isnan(arr.fill_value)

        arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

    def test_constructor_dtype_str(self):
        result = SparseArray([1, 2, 3], dtype="int")
        expected = SparseArray([1, 2, 3], dtype=int)
        tm.assert_sp_array_equal(result, expected)

    def test_constructor_sparse_dtype(self):
        result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
        expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
        tm.assert_sp_array_equal(result, expected)
        assert result.sp_values.dtype == np.dtype("int64")

    def test_constructor_sparse_dtype_str(self):
        result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
        expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
        tm.assert_sp_array_equal(result, expected)
        assert result.sp_values.dtype == np.dtype("int32")

    def test_constructor_object_dtype(self):
        # GH 11856
        arr = SparseArray(["A", "A", np.nan, "B"], dtype=object)
        assert arr.dtype == SparseDtype(object)
        assert np.isnan(arr.fill_value)

        arr = SparseArray(["A", "A", np.nan, "B"],
                          dtype=object,
                          fill_value="A")
        assert arr.dtype == SparseDtype(object, "A")
        assert arr.fill_value == "A"

        # GH 17574
        data = [False, 0, 100.0, 0.0]
        arr = SparseArray(data, dtype=object, fill_value=False)
        assert arr.dtype == SparseDtype(object, False)
        assert arr.fill_value is False
        arr_expected = np.array(data, dtype=object)
        it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
        assert np.fromiter(it, dtype=np.bool_).all()

    @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
    def test_constructor_na_dtype(self, dtype):
        with pytest.raises(ValueError, match="Cannot convert"):
            SparseArray([0, 1, np.nan], dtype=dtype)

    def test_constructor_warns_when_losing_timezone(self):
        # GH#32501 warn when losing timezone inforamtion
        dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")

        expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))

        with tm.assert_produces_warning(UserWarning):
            result = SparseArray(dti)

        tm.assert_sp_array_equal(result, expected)

        with tm.assert_produces_warning(UserWarning):
            result = SparseArray(pd.Series(dti))

        tm.assert_sp_array_equal(result, expected)

    def test_constructor_spindex_dtype(self):
        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
        # XXX: Behavior change: specifying SparseIndex no longer changes the
        # fill_value
        expected = SparseArray([0, 1, 2, 0], kind="integer")
        tm.assert_sp_array_equal(arr, expected)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(
            data=[1, 2, 3],
            sparse_index=IntIndex(4, [1, 2, 3]),
            dtype=np.int64,
            fill_value=0,
        )
        exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=np.int64)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(
            data=[1, 2, 3],
            sparse_index=IntIndex(4, [1, 2, 3]),
            dtype=None,
            fill_value=0,
        )
        exp = SparseArray([0, 1, 2, 3], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
    def test_constructor_spindex_dtype_scalar(self, sparse_index):
        # scalar input
        arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
        exp = SparseArray([1], dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    def test_constructor_spindex_dtype_scalar_broadcasts(self):
        arr = SparseArray(data=[1, 2],
                          sparse_index=IntIndex(4, [1, 2]),
                          fill_value=0,
                          dtype=None)
        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
        tm.assert_sp_array_equal(arr, exp)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

    @pytest.mark.parametrize(
        "data, fill_value",
        [
            (np.array([1, 2]), 0),
            (np.array([1.0, 2.0]), np.nan),
            ([True, False], False),
            ([pd.Timestamp("2017-01-01")], pd.NaT),
        ],
    )
    def test_constructor_inferred_fill_value(self, data, fill_value):
        result = SparseArray(data).fill_value

        if pd.isna(fill_value):
            assert pd.isna(result)
        else:
            assert result == fill_value

    @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
    @pytest.mark.parametrize("size", [0, 10])
    @td.skip_if_no_scipy
    def test_from_spmatrix(self, size, format):
        import scipy.sparse

        mat = scipy.sparse.random(size, 1, density=0.5, format=format)
        result = SparseArray.from_spmatrix(mat)

        result = np.asarray(result)
        expected = mat.toarray().ravel()
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
    @td.skip_if_no_scipy
    def test_from_spmatrix_including_explicit_zero(self, format):
        import scipy.sparse

        mat = scipy.sparse.random(10, 1, density=0.5, format=format)
        mat.data[0] = 0
        result = SparseArray.from_spmatrix(mat)

        result = np.asarray(result)
        expected = mat.toarray().ravel()
        tm.assert_numpy_array_equal(result, expected)

    @td.skip_if_no_scipy
    def test_from_spmatrix_raises(self):
        import scipy.sparse

        mat = scipy.sparse.eye(5, 4, format="csc")

        with pytest.raises(ValueError, match="not '4'"):
            SparseArray.from_spmatrix(mat)

    @pytest.mark.parametrize(
        "scalar,dtype",
        [
            (False, SparseDtype(bool, False)),
            (0.0, SparseDtype("float64", 0)),
            (1, SparseDtype("int64", 1)),
            ("z", SparseDtype("object", "z")),
        ],
    )
    def test_scalar_with_index_infer_dtype(self, scalar, dtype):
        # GH 19163
        arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
        exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)

        tm.assert_sp_array_equal(arr, exp)

        assert arr.dtype == dtype
        assert exp.dtype == dtype

    def test_get_item(self):

        assert np.isnan(self.arr[1])
        assert self.arr[2] == 1
        assert self.arr[7] == 5

        assert self.zarr[0] == 0
        assert self.zarr[2] == 1
        assert self.zarr[7] == 5

        errmsg = re.compile("bounds")

        with pytest.raises(IndexError, match=errmsg):
            self.arr[11]

        with pytest.raises(IndexError, match=errmsg):
            self.arr[-11]

        assert self.arr[-1] == self.arr[len(self.arr) - 1]

    def test_take_scalar_raises(self):
        msg = "'indices' must be an array, not a scalar '2'."
        with pytest.raises(ValueError, match=msg):
            self.arr.take(2)

    def test_take(self):
        exp = SparseArray(np.take(self.arr_data, [2, 3]))
        tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)

        exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
        tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)

    def test_take_all_empty(self):
        a = pd.array([0, 0], dtype=SparseDtype("int64"))
        result = a.take([0, 1], allow_fill=True, fill_value=np.nan)
        tm.assert_sp_array_equal(a, result)

    def test_take_fill_value(self):
        data = np.array([1, np.nan, 0, 3, 0])
        sparse = SparseArray(data, fill_value=0)

        exp = SparseArray(np.take(data, [0]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([0]), exp)

        exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)

    def test_take_negative(self):
        exp = SparseArray(np.take(self.arr_data, [-1]))
        tm.assert_sp_array_equal(self.arr.take([-1]), exp)

        exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
        tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)

    @pytest.mark.parametrize("fill_value", [0, None, np.nan])
    def test_shift_fill_value(self, fill_value):
        # GH #24128
        sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0)
        res = sparse.shift(1, fill_value=fill_value)
        if isna(fill_value):
            fill_value = res.dtype.na_value
        exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0)
        tm.assert_sp_array_equal(res, exp)

    def test_bad_take(self):
        with pytest.raises(IndexError, match="bounds"):
            self.arr.take([11])

    def test_take_filling(self):
        # similar tests as GH 12631
        sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, 4])
        tm.assert_sp_array_equal(result, expected)

        # XXX: test change: fill_value=True -> allow_fill=True
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        expected = SparseArray([np.nan, np.nan, np.nan])
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False,
                             fill_value=True)
        expected = SparseArray([np.nan, np.nan, 4])
        tm.assert_sp_array_equal(result, expected)

        msg = "Invalid value in 'indices'"
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)

        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        msg = "out of bounds value in 'indices'"
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, 5]), allow_fill=True)

    def test_take_filling_fill_value(self):
        # same tests as GH 12631
        sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # fill_value
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        # XXX: behavior change.
        # the old way of filling self.fill_value doesn't follow EA rules.
        # It's supposed to be self.dtype.na_value (nan in this case)
        expected = SparseArray([0, np.nan, np.nan], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False,
                             fill_value=True)
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        msg = "Invalid value in 'indices'."
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        msg = "out of bounds value in 'indices'"
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, 5]), fill_value=True)

    def test_take_filling_all_nan(self):
        sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
        # XXX: did the default kind from take change?
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, np.nan], kind="block")
        tm.assert_sp_array_equal(result, expected)

        result = sparse.take(np.array([1, 0, -1]), fill_value=True)
        expected = SparseArray([np.nan, np.nan, np.nan], kind="block")
        tm.assert_sp_array_equal(result, expected)

        msg = "out of bounds value in 'indices'"
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError, match=msg):
            sparse.take(np.array([1, 5]), fill_value=True)

    def test_set_item(self):
        def setitem():
            self.arr[5] = 3

        def setslice():
            self.arr[1:5] = 2

        with pytest.raises(TypeError, match="assignment via setitem"):
            setitem()

        with pytest.raises(TypeError, match="assignment via setitem"):
            setslice()

    def test_constructor_from_too_large_array(self):
        with pytest.raises(TypeError, match="expected dimension <= 1 data"):
            SparseArray(np.arange(10).reshape((2, 5)))

    def test_constructor_from_sparse(self):
        res = SparseArray(self.zarr)
        assert res.fill_value == 0
        tm.assert_almost_equal(res.sp_values, self.zarr.sp_values)

    def test_constructor_copy(self):
        cp = SparseArray(self.arr, copy=True)
        cp.sp_values[:3] = 0
        assert not (self.arr.sp_values[:3] == 0).any()

        not_copy = SparseArray(self.arr)
        not_copy.sp_values[:3] = 0
        assert (self.arr.sp_values[:3] == 0).all()

    def test_constructor_bool(self):
        # GH 10648
        data = np.array([False, False, True, True, False, False])
        arr = SparseArray(data, fill_value=False, dtype=bool)

        assert arr.dtype == SparseDtype(bool)
        tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([2, 3], np.int32))

        dense = arr.to_dense()
        assert dense.dtype == bool
        tm.assert_numpy_array_equal(dense, data)

    def test_constructor_bool_fill_value(self):
        arr = SparseArray([True, False, True], dtype=None)
        assert arr.dtype == SparseDtype(np.bool_)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool_)
        assert arr.dtype == SparseDtype(np.bool_)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True)
        assert arr.dtype == SparseDtype(np.bool_, True)
        assert arr.fill_value

    def test_constructor_float32(self):
        # GH 10648
        data = np.array([1.0, np.nan, 3], dtype=np.float32)
        arr = SparseArray(data, dtype=np.float32)

        assert arr.dtype == SparseDtype(np.float32)
        tm.assert_numpy_array_equal(arr.sp_values,
                                    np.array([1, 3], dtype=np.float32))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([0, 2], dtype=np.int32))

        dense = arr.to_dense()
        assert dense.dtype == np.float32
        tm.assert_numpy_array_equal(dense, data)

    def test_astype(self):
        # float -> float
        arr = SparseArray([None, None, 0, 2])
        result = arr.astype("Sparse[float32]")
        expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32"))
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("float64", fill_value=0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(
            np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]),
            dtype)
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("int64", 0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64),
                                           IntIndex(4, [2, 3]), dtype)
        tm.assert_sp_array_equal(result, expected)

        arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
        with pytest.raises(ValueError, match="NA"):
            arr.astype("Sparse[i8]")

    def test_astype_bool(self):
        a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
        result = a.astype(bool)
        expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0))
        tm.assert_sp_array_equal(result, expected)

        # update fill value
        result = a.astype(SparseDtype(bool, False))
        expected = SparseArray([True, False, False, True],
                               dtype=SparseDtype(bool, False))
        tm.assert_sp_array_equal(result, expected)

    def test_astype_all(self, any_real_dtype):
        vals = np.array([1, 2, 3])
        arr = SparseArray(vals, fill_value=1)
        typ = np.dtype(any_real_dtype)
        res = arr.astype(typ)
        assert res.dtype == SparseDtype(typ, 1)
        assert res.sp_values.dtype == typ

        tm.assert_numpy_array_equal(np.asarray(res.to_dense()),
                                    vals.astype(typ))

    @pytest.mark.parametrize(
        "array, dtype, expected",
        [
            (
                SparseArray([0, 1]),
                "float",
                SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)),
            ),
            (SparseArray([0, 1]), bool, SparseArray([False, True])),
            (
                SparseArray([0, 1], fill_value=1),
                bool,
                SparseArray([False, True], dtype=SparseDtype(bool, True)),
            ),
            pytest.param(
                SparseArray([0, 1]),
                "datetime64[ns]",
                SparseArray(
                    np.array([0, 1], dtype="datetime64[ns]"),
                    dtype=SparseDtype("datetime64[ns]", pd.Timestamp("1970")),
                ),
                marks=[pytest.mark.xfail(reason="NumPy-7619")],
            ),
            (
                SparseArray([0, 1, 10]),
                str,
                SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
            ),
            (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
            (
                SparseArray([0, 1, 0]),
                object,
                SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)),
            ),
        ],
    )
    def test_astype_more(self, array, dtype, expected):
        result = array.astype(dtype)
        tm.assert_sp_array_equal(result, expected)

    def test_astype_nan_raises(self):
        arr = SparseArray([1.0, np.nan])
        with pytest.raises(ValueError, match="Cannot convert non-finite"):
            arr.astype(int)

    def test_set_fill_value(self):
        arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan)
        arr.fill_value = 2
        assert arr.fill_value == 2

        arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
        arr.fill_value = 2
        assert arr.fill_value == 2

        # XXX: this seems fine? You can construct an integer
        # sparsearray with NaN fill value, why not update one?
        # coerces to int
        # msg = "unable to set fill_value 3\\.1 to int64 dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = 3.1
        assert arr.fill_value == 3.1

        # msg = "unable to set fill_value nan to int64 dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = np.nan
        assert np.isnan(arr.fill_value)

        arr = SparseArray([True, False, True],
                          fill_value=False,
                          dtype=np.bool_)
        arr.fill_value = True
        assert arr.fill_value

        # coerces to bool
        # msg = "unable to set fill_value 0 to bool dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = 0
        assert arr.fill_value == 0

        # msg = "unable to set fill_value nan to bool dtype"
        # with pytest.raises(ValueError, match=msg):
        arr.fill_value = np.nan
        assert np.isnan(arr.fill_value)

    @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)])
    def test_set_fill_invalid_non_scalar(self, val):
        arr = SparseArray([True, False, True],
                          fill_value=False,
                          dtype=np.bool_)
        msg = "fill_value must be a scalar"

        with pytest.raises(ValueError, match=msg):
            arr.fill_value = val

    def test_copy(self):
        arr2 = self.arr.copy()
        assert arr2.sp_values is not self.arr.sp_values
        assert arr2.sp_index is self.arr.sp_index

    def test_values_asarray(self):
        tm.assert_almost_equal(self.arr.to_dense(), self.arr_data)

    @pytest.mark.parametrize(
        "data,shape,dtype",
        [
            ([0, 0, 0, 0, 0], (5, ), None),
            ([], (0, ), None),
            ([0], (1, ), None),
            (["A", "A", np.nan, "B"], (4, ), object),
        ],
    )
    def test_shape(self, data, shape, dtype):
        # GH 21126
        out = SparseArray(data, dtype=dtype)
        assert out.shape == shape

    @pytest.mark.parametrize(
        "vals",
        [
            [np.nan, np.nan, np.nan, np.nan, np.nan],
            [1, np.nan, np.nan, 3, np.nan],
            [1, np.nan, 0, 3, 0],
        ],
    )
    @pytest.mark.parametrize("fill_value", [None, 0])
    def test_dense_repr(self, vals, fill_value):
        vals = np.array(vals)
        arr = SparseArray(vals, fill_value=fill_value)

        res = arr.to_dense()
        tm.assert_numpy_array_equal(res, vals)

        res2 = arr._internal_get_values()

        tm.assert_numpy_array_equal(res2, vals)

    def test_getitem(self):
        def _checkit(i):
            tm.assert_almost_equal(self.arr[i], self.arr.to_dense()[i])

        for i in range(len(self.arr)):
            _checkit(i)
            _checkit(-i)

    def test_getitem_arraylike_mask(self):
        arr = SparseArray([0, 1, 2])
        result = arr[[True, False, True]]
        expected = SparseArray([0, 2])
        tm.assert_sp_array_equal(result, expected)

    def test_getslice(self):
        result = self.arr[:-3]
        exp = SparseArray(self.arr.to_dense()[:-3])
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[-4:]
        exp = SparseArray(self.arr.to_dense()[-4:])
        tm.assert_sp_array_equal(result, exp)

        # two corner cases from Series
        result = self.arr[-12:]
        exp = SparseArray(self.arr)
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[:-12]
        exp = SparseArray(self.arr.to_dense()[:0])
        tm.assert_sp_array_equal(result, exp)

    def test_getslice_tuple(self):
        dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])

        sparse = SparseArray(dense)
        res = sparse[(slice(4, None), )]
        exp = SparseArray(dense[4:])
        tm.assert_sp_array_equal(res, exp)

        sparse = SparseArray(dense, fill_value=0)
        res = sparse[(slice(4, None), )]
        exp = SparseArray(dense[4:], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        msg = "too many indices for array"
        with pytest.raises(IndexError, match=msg):
            sparse[4:, :]

        with pytest.raises(IndexError, match=msg):
            # check numpy compat
            dense[4:, :]

    def test_boolean_slice_empty(self):
        arr = SparseArray([0, 1, 2])
        res = arr[[False, False, False]]
        assert res.dtype == arr.dtype

    @pytest.mark.parametrize(
        "op", ["add", "sub", "mul", "truediv", "floordiv", "pow"])
    def test_binary_operators(self, op):
        op = getattr(operator, op)
        data1 = np.random.randn(20)
        data2 = np.random.randn(20)

        data1[::2] = np.nan
        data2[::3] = np.nan

        arr1 = SparseArray(data1)
        arr2 = SparseArray(data2)

        data1[::2] = 3
        data2[::3] = 3
        farr1 = SparseArray(data1, fill_value=3)
        farr2 = SparseArray(data2, fill_value=3)

        def _check_op(op, first, second):
            res = op(first, second)
            exp = SparseArray(op(first.to_dense(), second.to_dense()),
                              fill_value=first.fill_value)
            assert isinstance(res, SparseArray)
            tm.assert_almost_equal(res.to_dense(), exp.to_dense())

            res2 = op(first, second.to_dense())
            assert isinstance(res2, SparseArray)
            tm.assert_sp_array_equal(res, res2)

            res3 = op(first.to_dense(), second)
            assert isinstance(res3, SparseArray)
            tm.assert_sp_array_equal(res, res3)

            res4 = op(first, 4)
            assert isinstance(res4, SparseArray)

            # Ignore this if the actual op raises (e.g. pow).
            try:
                exp = op(first.to_dense(), 4)
                exp_fv = op(first.fill_value, 4)
            except ValueError:
                pass
            else:
                tm.assert_almost_equal(res4.fill_value, exp_fv)
                tm.assert_almost_equal(res4.to_dense(), exp)

        with np.errstate(all="ignore"):
            for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
                _check_op(op, first_arr, second_arr)

    def test_pickle(self):
        def _check_roundtrip(obj):
            unpickled = tm.round_trip_pickle(obj)
            tm.assert_sp_array_equal(unpickled, obj)

        _check_roundtrip(self.arr)
        _check_roundtrip(self.zarr)

    def test_generator_warnings(self):
        sp_arr = SparseArray([1, 2, 3])
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings(action="always",
                                    category=DeprecationWarning)
            warnings.filterwarnings(action="always",
                                    category=PendingDeprecationWarning)
            for _ in sp_arr:
                pass
            assert len(w) == 0

    def test_fillna(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0])
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan])
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
        res = s.fillna(-1)
        exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

        # float dtype's fill_value is np.nan, replaced by -1
        s = SparseArray([0.0, 0.0, 0.0, 0.0])
        res = s.fillna(-1)
        exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

        # int dtype shouldn't have missing. No changes.
        s = SparseArray([0, 0, 0, 0])
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        tm.assert_sp_array_equal(res, s)

        s = SparseArray([0, 0, 0, 0], fill_value=0)
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        # fill_value can be nan if there is no missing hole.
        # only fill_value will be changed
        s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
        assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
        assert np.isnan(s.fill_value)
        res = s.fillna(-1)
        exp = SparseArray([0, 0, 0, 0], fill_value=-1)
        tm.assert_sp_array_equal(res, exp)

    def test_fillna_overlap(self):
        s = SparseArray([1, np.nan, np.nan, 3, np.nan])
        # filling with existing value doesn't replace existing value with
        # fill_value, i.e. existing 3 remains in sp_values
        res = s.fillna(3)
        exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
        tm.assert_numpy_array_equal(res.to_dense(), exp)

        s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
        res = s.fillna(3)
        exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
        tm.assert_sp_array_equal(res, exp)

    def test_nonzero(self):
        # Tests regression #21172.
        sa = SparseArray(
            [float("nan"),
             float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
        expected = np.array([2, 5, 9], dtype=np.int32)
        (result, ) = sa.nonzero()
        tm.assert_numpy_array_equal(expected, result)

        sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
        (result, ) = sa.nonzero()
        tm.assert_numpy_array_equal(expected, result)
#[ttmax,vvmin]=d_c2.min()
#aa=d_c2.loc[d_c2.y==vvmax, ["t"]]
#bb=d_c2.loc[d_c2.y==vvmin, ["t"]]
plt.axvspan(*mdates.datestr2num(
    ['2020-01-08 19:29:10.632', '2020-01-09 17:21:56.263']),
            color='red',
            alpha=0.5)
plt.axvspan(*mdates.datestr2num(
    ['2020-01-09 17:21:56.263', '2020-01-10 13:04:06.263']),
            color='green',
            alpha=0.5)
plt.axvspan(*mdates.datestr2num(
    ['2020-01-10 13:04:06.263', '2020-01-11 09:08:03.572']),
            color='gray',
            alpha=0.5)
plt.text(pd.Timestamp('2020-01-09 02:00:00'),
         2.9,
         'Cycle 1',
         horizontalalignment='left',
         size='large',
         color='white')
plt.text(pd.Timestamp('2020-01-10 00:00:00'),
         2.9,
         'Cycle 2',
         horizontalalignment='left',
         size='large',
         color='white')
plt.text(pd.Timestamp('2020-01-10 19:00:00'),
         2.9,
         'Cycle 3',
         horizontalalignment='left',
Exemplo n.º 28
0
def _create_schema(index="single"):

    if index == "multi":
        index = pa.MultiIndex([
            pa.Index(pa.Int, name="int_index0"),
            pa.Index(pa.Int, name="int_index1"),
            pa.Index(pa.Int, name="int_index2"),
        ])
    elif index == "single":
        # make sure io modules can handle case when index name is None
        index = pa.Index(pa.Int, name=None)
    else:
        index = None

    return pa.DataFrameSchema(columns={
        "int_column":
        pa.Column(
            pa.Int,
            checks=[
                pa.Check.greater_than(0),
                pa.Check.less_than(10),
                pa.Check.in_range(0, 10),
            ],
        ),
        "float_column":
        pa.Column(
            pa.Float,
            checks=[
                pa.Check.greater_than(-10),
                pa.Check.less_than(20),
                pa.Check.in_range(-10, 20),
            ],
        ),
        "str_column":
        pa.Column(
            pa.String,
            checks=[
                pa.Check.isin(["foo", "bar", "x", "xy"]),
                pa.Check.str_length(1, 3)
            ],
        ),
        "datetime_column":
        pa.Column(pa.DateTime,
                  checks=[
                      pa.Check.greater_than(pd.Timestamp("20100101")),
                      pa.Check.less_than(pd.Timestamp("20200101")),
                  ]),
        "timedelta_column":
        pa.Column(pa.Timedelta,
                  checks=[
                      pa.Check.greater_than(pd.Timedelta(1000, unit="ns")),
                      pa.Check.less_than(pd.Timedelta(10000, unit="ns")),
                  ]),
        "optional_props_column":
        pa.Column(
            pa.String,
            nullable=True,
            allow_duplicates=True,
            coerce=True,
            required=False,
            regex=True,
            checks=[pa.Check.str_length(1, 3)],
        ),
    },
                              index=index,
                              coerce=False,
                              strict=True)
Exemplo n.º 29
0
alpaca = tradeapi.REST(alpaca_key, alpaca_secret)

# In[179]:

# Format current date as ISO format
start = date.today().isoformat()
end = date.today().isoformat()

# Set the tickers
tickers = ["SPY", "AGG"]

# Set timeframe to '1D' for Alpaca API
timeframe = "1D"

# Get current closing prices for SPY and AGG
close = pd.Timestamp(today, tz="US/Central").isoformat()
agg_spy = alpaca.get_barset(tickers, timeframe, start=start, end=end).df

# Preview DataFrame
agg_spy.tail()

# In[96]:

# Pick AGG and SPY close prices
agg_close_price = agg_spy.iloc[0, 3]
spy_close_price = agg_spy.iloc[0, 8]
# Print AGG and SPY close prices
print(f"Current AGG closing price: ${agg_close_price}")
print(f"Current SPY closing price: ${spy_close_price}")

# In[104]:
Exemplo n.º 30
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, timedelta

df = pd.read_csv(
    'https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv',
    parse_dates=['Date'])
# active cases
df['Active'] = df['Confirmed'] - df['Recovered'] - df['Deaths']

yesterday = date.today() - timedelta(days=2)
today_df = df[df['Date'] == pd.Timestamp(yesterday)]
top_10 = today_df.sort_values(['Confirmed'], ascending=False)[:10]
top_10.loc['rest-of-world'] = today_df.sort_values(['Confirmed'],
                                                   ascending=False)[10:].sum()
top_10.loc['rest-of-world', 'Country'] = 'Rest of World'

# width of the donut
size = 0.3
# values for chart
vals1 = []
for i in range(len(top_10)):
    # [['Recovered'], ['Active'], ['Deaths']]
    val = [top_10.iloc[i][5], top_10.iloc[i][3] + top_10.iloc[i][4]]
    vals1.append(val)
vals1 = np.array(vals1)

cmap = plt.get_cmap('plasma')
num_colors = len(top_10) * 3
theme_colors = [cmap(1. * i / num_colors) for i in range(num_colors)]