示例#1
0
def performance_analysis_MF(pnls_or_values,
                            initial_cash=1,
                            benchmark='^GSPC',
                            risk_free='^IRX',
                            mar=0.0,
                            input_type='value'):
    if input_type == 'value':
        values_df = pd.Series(pnls_or_values)
    elif input_type == 'pnl':
        values_df = pd.Series(pnls_or_values).cumsum() + initial_cash

    values_df.index = pd.to_datetime(values_df.index)
    start_date = values_df.index[0]
    end_date = values_df.index[-1]

    # add the initial portfolio values
    values_df = pd.concat(
        [pd.Series([initial_cash], index=[start_date + BDay(-1)]), values_df])

    # calc the daily returns
    returns_df = (values_df - values_df.shift(1)) / values_df.shift(1)
    returns_df = returns_df.dropna()

    # calc the annualized return
    cum_return = values_df.iloc[1:] / initial_cash - 1
    annual_returns_df = (cum_return +
                         1)**(252 / np.array(range(1,
                                                   len(returns_df) + 1))) - 1

    # calc the annualized volatility
    annual_vol = returns_df.std() * np.sqrt(252)

    # calc the Sharpe ratio / sortino ratio
    if risk_free:
        # get the risk-free prices
        RF_quotes = web.DataReader(risk_free, 'yahoo', start_date + BDay(-1),
                                   end_date)['Close']
        # get the expected risk-free rate
        risk_free = np.mean(1 / (1 - RF_quotes * 0.01) - 1)
    else:
        risk_free = 0.0

    daily_risk_free = risk_free / 252
    daily_mar = mar / 252
    sharpe_ratio = (returns_df - daily_risk_free).mean() / (
        returns_df - daily_risk_free).std() * 252**0.5
    sortino_ratio = (returns_df.mean() - daily_mar) / (
        returns_df[returns_df < daily_mar]).std() * 252**0.5
    #sharpe_ratio = (returns_df.mean()*252 - risk_free) / ((returns_df - daily_risk_free).std()*252**0.5)
    #sortino_ratio = (returns_df.mean()*252 - mar) / ((returns_df[returns_df < daily_mar]).std()*252**0.5)

    # calc the maximum drawdown
    cum_max_value = (1 + cum_return).cummax()
    drawdowns = ((1 + cum_return) - cum_max_value) / cum_max_value
    max_drawdown = np.min(drawdowns)
    avg_drawdown = drawdowns.mean()

    if benchmark:
        # get the benchmark prices
        benchmark_prices = web.DataReader(benchmark, 'yahoo',
                                          start_date + BDay(-1),
                                          end_date)['Close']
        print(benchmark_prices.shape)
        # calc the benchmark daily returns
        benchmark_returns = (benchmark_prices - benchmark_prices.shift(1)
                             ) / benchmark_prices.shift(1)
        benchmark_returns = benchmark_returns.dropna()
        # calc the benchmark annualized return
        benchmark_cum_return = np.exp(np.log1p(benchmark_returns).cumsum()) - 1
        benchmark_annual_returns = (benchmark_cum_return + 1)**(
            252 / np.array(range(1,
                                 len(benchmark_returns) + 1))) - 1
        # calc the benchmark values based on the same initial_cash of portfolio
        benchmark_values = pd.concat([
            pd.Series([initial_cash], index=[start_date + BDay(-1)]),
            initial_cash * (1 + benchmark_cum_return)
        ])
        # calc the benchmark annualized volatility
        benchmark_annual_vol = benchmark_returns.std() * np.sqrt(252)
        # calc the maximum drawdown
        benchmark_cum_max_value = (1 + benchmark_cum_return).cummax()
        benchmark_drawdowns = (
            (1 + benchmark_cum_return) -
            benchmark_cum_max_value) / benchmark_cum_max_value
        benchmark_max_drawdown = np.min(benchmark_drawdowns)
        benchmark_avg_drawdown = benchmark_drawdowns.mean()

        # compare with the benchmark
        relative_return = annual_returns_df.iloc[
            -1] - benchmark_annual_returns.iloc[-1]
        relative_vol = annual_vol - benchmark_annual_vol
        relative_max_drawdown = max_drawdown - benchmark_max_drawdown
        relative_avg_drawdown = avg_drawdown - benchmark_avg_drawdown
        excess_return_std = (returns_df -
                             benchmark_returns).std() * np.sqrt(252)
        info_ratio = relative_return / excess_return_std

    # organize the output
    performance = pd.Series()
    performance.loc['Begin'] = start_date
    performance.loc['End'] = end_date
    performance.loc['Duration'] = performance.End - performance.Begin
    performance.loc['Initial_Value'] = initial_cash
    performance.loc['Highest_Value'] = np.max(values_df)
    performance.loc['Lowest_Value'] = np.min(values_df)
    performance.loc['Final_Value'] = values_df.iloc[-1]
    performance.loc['Total_Return'] = performance['Final_Value'] / performance[
        'Initial_Value'] - 1
    performance.loc['Total_Return_(Annualized)'] = annual_returns_df.iloc[-1]
    performance.loc['Volatility_(Annualized)'] = annual_vol
    performance.loc['Max_Drawdown'] = max_drawdown
    performance.loc['Avg_Drawdown'] = avg_drawdown
    performance.loc['Sharpe_Ratio'] = sharpe_ratio
    performance.loc['Sortino_Ratio'] = sortino_ratio
    if benchmark:
        performance.loc['Relative_Return'] = relative_return
        performance.loc['Relative_Vol'] = relative_vol
        performance.loc['Relative_Max_DD'] = relative_max_drawdown
        performance.loc['Relative_Avg_DD'] = relative_avg_drawdown
        performance.loc['Information_Ratio'] = info_ratio

    print(performance)
    performance.loc['values_data'] = values_df
    performance.loc['returns_data'] = returns_df
    performance.loc['annual_returns_data'] = annual_returns_df
    performance.loc['drawdowns_data'] = drawdowns
    if benchmark:
        performance.loc['benchmark_values_data'] = benchmark_values

    strategy_plot(performance)

    return performance
示例#2
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series({"year": 2000, "month": 1, "day": 10})
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex([
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
        ])

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    @pytest.mark.parametrize(
        "freq,expected",
        [
            ("A", "day"),
            ("Q", "day"),
            ("M", "day"),
            ("D", "day"),
            ("H", "hour"),
            ("T", "minute"),
            ("S", "second"),
            ("L", "millisecond"),
            ("U", "microsecond"),
        ],
    )
    def test_resolution(self, tz_naive_fixture, freq, expected):
        tz = tz_naive_fixture
        if freq == "A" and not IS64 and isinstance(tz, tzlocal):
            pytest.xfail(reason="OverflowError inside tzlocal past 2038")

        idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
        assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1,
                                                        len(idx) + 1)),
                            tz=tz)

        exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        expected.index = expected.index._with_freq(None)

        for obj in [idx, Series(idx)]:

            tm.assert_series_equal(obj.value_counts(), expected)

        expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        expected = expected._with_freq(None)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"],
                                tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(
            ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"],
                          freq="D",
                          name="idx"),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 1, 2]),
                                    check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True,
                                           ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([2, 1, 0]),
                                    check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates,
                                tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values(na_position="first")
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             na_position="first")
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 0, 4])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = date_range("2011-01-01",
                         freq=freq_sample,
                         periods=10,
                         name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(result, expected)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(
                ([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(
                ([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = date_range("2011-01-01",
                         freq=freq_sample,
                         periods=10,
                         name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert DatetimeIndex._na_value is pd.NaT
        assert DatetimeIndex([])._na_value is pd.NaT

        idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    @pytest.mark.parametrize("values",
                             [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize(
        "freq", ["2D", Day(2), "2B",
                 BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
 def test_mismatching_tz_raises_err(self, start, end):
     # issue 18488
     with pytest.raises(TypeError):
         pd.date_range(start, end)
     with pytest.raises(TypeError):
         pd.date_range(start, end, freq=BDay())
# Additionally, can make custom frequncy - see hourly timestamp range below
pd.date_range('2015-07-03', periods=8, freq='H')

# or, a sequence of durations increasing by an hour:
pd.timedelta_range(0, periods=10, freq='H')


### Frequencies and Offsets
# (table of codes)
# stuff ------
pd.timedelta_range(0, periods=9, freq="2H30T")

# get range of 5 business days, beginning on 2015-07-01
from pandas.tseries.offsets import BDay
pd.date_range('2015-07-01', periods=5, freq=BDay())


### Resampling, Shifting, and Windowing
from pandas_datareader import data 

goog = data.DataReader('GOOG', start='2004', end='2016', data_source='google')
#   NOTE: "ImmediateDeprecationError - Google Finance dep. due to API breaks"
#       will not be able to complete section notes. Goes into some basic plots.

## Resampling and Converting Frequencies
    # still uses deprecated functionality


### Example: Visualizing Seattle Bicycle Counts
# data get:
startingYear = 2015  # First year of data
endingYear = 2015  # Last year of data
for year in range(startingYear, endingYear + 1):

    makeDirectory(year)
    startTimeAcquireData = time()

    rawData = {year: {}}

    if year >= 1999:
        firstDateOfYear = datetime.strptime('01/01/{:d}'.format(year),
                                            '%m/%d/%Y')

        daysUntilStartingDay = random.randint(0, 5)
        recordDate = firstDateOfYear + BDay(daysUntilStartingDay)
        while recordDate.year == year:
            recordURL = getCongressionalRecordURL(year, date=recordDate)
            try:
                filePath = downloadPDFFile(recordURL, year, date=recordDate)
                textPageDict = parsePDFFile(filePath, everyNPages=7)

                recordDateString = str(recordDate)[:10]
                rawData[year][recordDateString] = textPageDict

                daysUntilNextRecordDate = random.randint(4, 9)
            except (PDFSyntaxError, FileNotFoundError):
                print('{:s} doesnt have data'.format(str(recordDate)))

                daysUntilNextRecordDate = random.randint(1, 2)
示例#6
0
        the date(s) to be converted
    Returns
    -------
    same type as input
        date(s) converted to UTC
    """

    dt = pd.to_datetime(dt)
    try:
        dt = dt.tz_localize('UTC')
    except TypeError:
        dt = dt.tz_convert('UTC')
    return dt


_1_bday = BDay()


def _1_bday_ago():
    return pd.Timestamp.now().normalize() - _1_bday


# @deprecated(msg=DATAREADER_DEPRECATION_WARNING)
# def get_fama_french():
#     """
#     Retrieve Fama-French factors via pandas-datareader
#     Returns
#     -------
#     pandas.DataFrame
#         Percent change of Fama-French factors
#     """
示例#7
0
    def test_shift(self):
        shifted = self.ts.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, self.ts.index)
        tm.assert_index_equal(unshifted.index, self.ts.index)
        tm.assert_numpy_array_equal(unshifted.valid().values,
                                    self.ts.values[:-1])

        offset = BDay()
        shifted = self.ts.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        assert_series_equal(unshifted, self.ts)

        unshifted = self.ts.shift(0, freq=offset)
        assert_series_equal(unshifted, self.ts)

        shifted = self.ts.shift(1, freq='B')
        unshifted = shifted.shift(-1, freq='B')

        assert_series_equal(unshifted, self.ts)

        # corner case
        unshifted = self.ts.shift(0)
        assert_series_equal(unshifted, self.ts)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1])

        shifted2 = ps.shift(1, 'B')
        shifted3 = ps.shift(1, BDay())
        assert_series_equal(shifted2, shifted3)
        assert_series_equal(ps, shifted2.shift(-1, 'B'))

        pytest.raises(ValueError, ps.shift, freq='D')

        # legacy support
        shifted4 = ps.shift(1, freq='B')
        assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=BDay())
        assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH 8129
        index = date_range('2000-01-01', periods=5)
        for dtype in ['int32', 'int64']:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            assert_series_equal(result, expected)

        # xref 8260
        # with tz
        s = Series(date_range('2000-01-01 09:00:00',
                              periods=5,
                              tz='US/Eastern'),
                   name='foo')
        result = s - s.shift()

        exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')
        assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'),
                    name='foo')
        pytest.raises(ValueError, lambda: s - s2)
NORM_FACTOR = 100

# Matching delay
MAX_LAG = pd.Timedelta('1 hours')
MATCH_TOL_DICT = {
    '1H': pd.Timedelta('1 hours'),
    '0.5H': pd.Timedelta('0.5 hours'),
    '0.1H': pd.Timedelta('0.1 hours')
}
MATCH_TOL = '0.1H'

# Hedging frequency
OFFSET_DICT = {
    '1H': [pd.Timedelta('1 hours'), '_1H'],
    '1D': [BDay(1), '_1D'],
    '2D': [BDay(2), '_2D']
}

FREQ = '1H'

T_FIRSTHALF = datetime.time(8, 30)
if FREQ == '1H':
    DT = 0.
    T_LASTHALF = datetime.time(15, 15)
if FREQ == '1D':
    DT = 1. / 253.
    T_LASTHALF = datetime.time(16, 0)
if FREQ == '2D':
    DT = 2. / 253.
    T_LASTHALF = datetime.time(16, 0)
示例#9
0
def main():
    temptime = datetime.datetime.now()
    print(temptime)
    warnings.filterwarnings("error")
    #final df
    wtf = pd.DataFrame()
    final = pd.DataFrame()
    #unprocessed df
    unproc = pd.DataFrame()
    unproc['complist'] = ''
    #number of unprocessed comp
    pos = 0
    #define date where we start fetching data, or check if any data exists
    try:
        maindf = pd.read_csv(os.path.join('allprice.csv'))
        final['Date'] = maindf['Date']
        day_num = len(maindf['Date'])

        start = datetime.datetime.strptime(str(maindf.loc[day_num - 1][0]),
                                           '%Y-%m-%d')
        end = datetime.datetime.today()
        status = "update"
        day = day_num
        for x in perdelta(start, end, timedelta(days=1)):
            final.set_value(day, 'Date', x)
            day += 1
    except (UnboundLocalError, OSError):
        start = datetime.datetime(2010, 1, 1)
        end = datetime.datetime.today()
        day_num = 0
        maindf = pd.DataFrame()
        status = "new"
    complist = pd.read_csv(path.join('Data/companylist.csv'))
    #traverse through the company list
    for x in range(len(complist)):
        time = datetime.datetime.now()
        day_num1 = day_num
        rsdf = pd.DataFrame()
        name = complist.ix[x]
        file_num = 1
        #fetch data
        try:
            data = web.DataReader(complist.ix[x], 'yahoo', start, end)
            print(complist.ix[x] + "'s Data obtained from Yahoo " +
                  str(time.isoformat()))
        except:
            try:
                data = web.DataReader(complist.ix[x], 'google', start, end)
                print(complist.ix[x] + "'s Data obtained from Google " +
                      str(time.isoformat()))
            except:
                try:
                    data = web.DataReader(complist.ix[x], 'fred', start, end)
                    print(complist.ix[x] + "'s Data obtained from Fred " +
                          str(time.isoformat()))
                except:
                    continue
                continue
            print(complist.ix[x] + "Cannot be Obtained " +
                  str(time.isoformat()))
            continue
    #create or append base on the status
        if status == "new":
            rsdf[name] = data['Adj Close']
        else:
            try:
                rsdf[name] = maindf[name]
                rsdf.columns.values[0] = 'Adj Close'
            except KeyError:
                unproc.set_value(pos, 'complist', str(name))
                pos += 1
        comprice = data['Adj Close']
        if (status == "update"):
            rsdf = rsdf.append(comprice, ignore_index=True)
        rsdf.rename(columns={'Adj Close': str(name)}, inplace=True)
        if (status == 'new' and x == 0):
            #since it's new, add an index Date column from 2010 to today
            date = pd.date_range('20100101', end, freq=BDay())
            final = pd.DataFrame(rsdf[name], index=date)
        else:
            #dump the data for every 1000 companies to decrease processing time
            if (x % 1000 == 0 and x != 0):
                if file_num != 1:
                    final.to_csv(path.join('all_price' + str(file_num) +
                                           '.csv'),
                                 index=False)
                else:
                    final.to_csv(path + 'all_price' + str(file_num) + '.csv',
                                 index=True)
                file_num += 1
                final = pd.DataFrame(rsdf[name])
            else:
                final = final.join(rsdf[name])
    #dump all data frame to file
    final.to_csv(path + 'all_price' + str(file_num) + '.csv', index=False)
    y = 1
    final = merge_file(y, file_num, path)
    final.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
    temptime = datetime.datetime.now()
    final.to_csv(path.join('all_price.csv'), index=False)
    #list of unprocessed companies due to not found data
    unproc.to_csv(path.join('unproc.csv'), index=False)
    print(temptime)
示例#10
0
    def test_shift(self, datetime_series):
        shifted = datetime_series.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, datetime_series.index)
        tm.assert_index_equal(unshifted.index, datetime_series.index)
        tm.assert_numpy_array_equal(
            unshifted.dropna().values, datetime_series.values[:-1]
        )

        offset = BDay()
        shifted = datetime_series.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        tm.assert_series_equal(unshifted, datetime_series)

        unshifted = datetime_series.shift(0, freq=offset)
        tm.assert_series_equal(unshifted, datetime_series)

        shifted = datetime_series.shift(1, freq="B")
        unshifted = shifted.shift(-1, freq="B")

        tm.assert_series_equal(unshifted, datetime_series)

        # corner case
        unshifted = datetime_series.shift(0)
        tm.assert_series_equal(unshifted, datetime_series)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1])

        shifted2 = ps.shift(1, "B")
        shifted3 = ps.shift(1, BDay())
        tm.assert_series_equal(shifted2, shifted3)
        tm.assert_series_equal(ps, shifted2.shift(-1, "B"))

        msg = "Given freq D does not match PeriodIndex freq B"
        with pytest.raises(ValueError, match=msg):
            ps.shift(freq="D")

        # legacy support
        shifted4 = ps.shift(1, freq="B")
        tm.assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=BDay())
        tm.assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH 8129
        index = date_range("2000-01-01", periods=5)
        for dtype in ["int32", "int64"]:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            tm.assert_series_equal(result, expected)

        # xref 8260
        # with tz
        s = Series(
            date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
        )
        result = s - s.shift()

        exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
        tm.assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo")
        msg = "DatetimeArray subtraction must have the same timezones or no timezones"
        with pytest.raises(TypeError, match=msg):
            s - s2
        def freight_and_quality_exceptions():
            if crude in ('Forties'):
                df_freight['Buzzard_Content'] = forties_sulphur[
                    'BuzzardContent']
                df_freight['Implied_Sulphur'] = df_freight[
                    'Buzzard_Content'] * 0.012 + 0.003
                df_freight['De-Escalator_Threshold'] = np.round(
                    df_freight['Implied_Sulphur'], 3)
                df_freight['De-Escalator_Counts'] = np.minimum(
                    0, 6 - df_freight['Implied_Sulphur'] * 1000)
                df_freight['Platts_De_Esc'] = total['AAUXL00']
                df_freight['Forties_Margin_Impact'] = df_freight[
                    'Platts_De_Esc'] * df_freight['De-Escalator_Counts'] * -1
                df_freight['Costs'] += df_freight['Forties_Margin_Impact']

            if crude in ('Basrah Light', 'Basrah Heavy'):
                """This handles the freight escalation calculation from Iraq - the base is sent by SOMO, and table is in databse / excel wb"""
                monthly_averages = total['PFAOH00'].asfreq(BDay(
                )).resample('BMS').mean(
                )  # resampled so we have the business month start, corrects averaging error if cma
                func_ma_on_days = lambda x: (monthly_averages.loc[
                    (monthly_averages.index.month == x.month) &
                    (monthly_averages.index.year == x.year)]).iat[0]
                """Create funcs to handle basrah base and flat rate values, apply over df and calc esclator"""
                func_ws_base = lambda x: (basrah_ws_base.loc[
                    (basrah_ws_base.index.year == x.year)]['SOMO_WS']).iat[0]
                func_fr = lambda x: (basrah_ws_base.loc[
                    (basrah_ws_base.index.year == x.year)]['SOMO_FlatRate']
                                     ).iat[0]
                func_bhapi = lambda x: (basrah_ws_base.loc[
                    (basrah_ws_base.index.year == x.year)]['BasrahHeavyAPI']
                                        ).iat[0]
                func_blapi = lambda x: (basrah_ws_base.loc[
                    (basrah_ws_base.index.year == x.year)]['BasrahLightAPI']
                                        ).iat[0]
                df_freight['Date'] = df_freight.index
                df_freight['WS Month Avg'] = df_freight['Date'].apply(
                    func_ma_on_days)
                df_freight['SOMO Base WS'] = df_freight['Date'].apply(
                    func_ws_base)
                # We have to apply the corrcetion here after SOMO dropped their base rate earlier this year - assumption
                # only valid for 2018
                df_freight['SOMO Base WS'].iloc[
                    (df_freight.index >= dt(2018, 4, 1))
                    & (df_freight.index <= dt(2018, 12, 31))] = 25
                df_freight['Base_FR_for_esc'] = df_freight['Date'].apply(
                    func_fr)

                if crude == 'Basrah Light':
                    df_freight['API Esc'] = df_freight['Date'].apply(
                        func_blapi)
                else:
                    df_freight['API Esc'] = df_freight['Date'].apply(
                        func_bhapi)

                df_freight['WS for Esc'] = (
                    df_freight['WS Month Avg'] - df_freight['SOMO Base WS']
                ) * df_freight['Base_FR_for_esc'] / 7.3 / 100
                df_freight.drop(['Date'], axis=1, inplace=True)

            # South Korean particulars
            if ports[ports['PortName'] ==
                     destination]['Country'].iat[0] == 'South Korea':
                # Freight rebate on imported crudes
                df_freight['Murban_Freight_Comp'] = total[
                    'PFAOC00'] / 100 * df_freight[
                        'Murban_Sing_Flat'] / 7.66  #Murban density conversion
                df_freight[
                    'UKC-Yosu_VLCC'] = total['AASLA00'] * 1000000 / 2000000
                df_freight['Freight_Rebate'] = np.maximum(
                    df_freight['UKC-Yosu_VLCC'] -
                    df_freight['Murban_Freight_Comp'], 0.6)
                df_freight['Costs'] -= df_freight['Freight_Rebate']

                # Tax rebate on crudes out of Europe
                if ports[ports['PortName'] ==
                         loadport]['RegionName'].iat[0] in ([
                             'NW EUROPE', 'MED'
                         ]):
                    df_freight['FTA_Tax_Rebate'] = 0.006 * total['LCOc1']
                    df_freight['Costs'] -= df_freight['FTA_Tax_Rebate']

                # Tax rebate on crudes out of the US
                if ports[ports['PortName'] ==
                         loadport]['RegionName'].iat[0] in (['N AMERICA']):
                    df_freight['FTA_Tax_Rebate'] = 0.005 * total['CLc1']
                    df_freight['Costs'] -= df_freight['FTA_Tax_Rebate']

                # Costs ascociated with lifting CPC based on delays
                if crude == 'CPC Blend':
                    df_freight['TS_Delays'] = np.maximum(
                        total['AAWIL00'] + total['AAWIK00'] - 2, 0)
                    df_freight['TS_Demur'] = total['AAPED00']
                    df_freight['TS_Demur_Costs'] = df_freight['TS_Delays'].mul(
                        df_freight['TS_Demur']) / 130
                    df_freight['Costs'] += df_freight['TS_Demur_Costs']

                # Costs ascociated with lifting Urals, actually a rebate as giving back port costs that are included in CIF price
                if crude in (['Urals Nth', 'Urals Med']):
                    df_freight['Urals_Cif_Rebate'] = 0.11
                    df_freight['Costs'] -= df_freight['Urals_Discharge_Costs']

                if crude == 'Forties':
                    df_freight['Forties_Mkt_Discount'] = 0.5
                    df_freight['Costs'] -= df_freight['Forties_Mkt_Discount']
            else:
                pass

            return df_freight
示例#12
0
def positions(weights, period, freq=None):
    """
    Builds net position values time series, the portfolio percentage invested
    in each position.

    Parameters
    ----------
    weights: pd.Series
        pd.Series containing factor weights, the index contains timestamps at
        which the trades are computed and the values correspond to assets
        weights
        - see factor_weights for more details
    period: pandas.Timedelta or string
        Assets holding period (1 day, 2 mins, 3 hours etc). It can be a
        Timedelta or a string in the format accepted by Timedelta constructor
        ('1 days', '1D', '30m', '3h', '1D1h', etc)
    freq : pandas DateOffset, optional
        Used to specify a particular trading calendar. If not present
        weights.index.freq will be used

    Returns
    -------
    pd.DataFrame
        Assets positions series, datetime on index, assets on columns.
    """

    weights = weights.unstack()

    if not isinstance(period, pd.Timedelta):
        period = pd.Timedelta(period)

    if freq is None:
        freq = weights.index.freq

    if freq is None:
        freq = BDay()
        warnings.warn("'freq' not set, using business day calendar",
                      UserWarning)

    #
    # weights index contains factor computation timestamps, then add returns
    # timestamps too (factor timestamps + period) and save them to 'full_idx'
    # 'full_idx' index will contain an entry for each point in time the weights
    # change and hence they have to be re-computed
    #
    trades_idx = weights.index.copy()
    returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq)
    weights_idx = trades_idx.union(returns_idx)

    #
    # Compute portfolio weights for each point in time contained in the index
    #
    portfolio_weights = pd.DataFrame(index=weights_idx,
                                     columns=weights.columns)
    active_weights = []

    for curr_time in weights_idx:

        #
        # fetch new weights that become available at curr_time and store them
        # in active weights
        #
        if curr_time in weights.index:
            assets_weights = weights.loc[curr_time]
            expire_ts = utils.add_custom_calendar_timedelta(
                curr_time, period, freq)
            active_weights.append((expire_ts, assets_weights))

        #
        # remove expired entry in active_weights (older than 'period')
        #
        if active_weights:
            expire_ts, assets_weights = active_weights[0]
            if expire_ts <= curr_time:
                active_weights.pop(0)

        if not active_weights:
            continue
        #
        # Compute total weights for curr_time and store them
        #
        tot_weights = [w for (ts, w) in active_weights]
        tot_weights = pd.concat(tot_weights, axis=1)
        tot_weights = tot_weights.sum(axis=1)
        tot_weights /= tot_weights.abs().sum()

        portfolio_weights.loc[curr_time] = tot_weights

    return portfolio_weights.fillna(0)
示例#13
0
def cumulative_returns(returns, period, freq=None):
    """
    Builds cumulative returns from 'period' returns. This function simulate the
    cumulative effect that a series of gains or losses (the 'retuns') have on
    an original amount of capital over a period of time.

    if F is the frequency at which returns are computed (e.g. 1 day if
    'returns' contains daily values) and N is the period for which the retuns
    are computed (e.g. returns after 1 day, 5 hours or 3 days) then:
    - if N <= F the cumulative retuns are trivially computed as Compound Return
    - if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the
      cumulative returns are computed building and averaging N interleaved sub
      portfolios (started at subsequent periods 1,2,..,N) each one rebalancing
      every N periods. This correspond to an algorithm which trades the factor
      every single time it is computed, which is statistically more robust and
      with a lower volatity compared to an algorithm that trades the factor
      every N periods and whose returns depend on the specific starting day of
      trading.

    Also note that when the factor is not computed at a specific frequency, for
    exaple a factor representing a random event, it is not efficient to create
    multiples sub-portfolios as it is not certain when the factor will be
    traded and this would result in an underleveraged portfolio. In this case
    the simulated portfolio is fully invested whenever an event happens and if
    a subsequent event occur while the portfolio is still invested in a
    previous event then the portfolio is rebalanced and split equally among the
    active events.

    Parameters
    ----------
    returns: pd.Series
        pd.Series containing factor 'period' forward returns, the index
        contains timestamps at which the trades are computed and the values
        correspond to returns after 'period' time
    period: pandas.Timedelta or string
        Length of period for which the returns are computed (1 day, 2 mins,
        3 hours etc). It can be a Timedelta or a string in the format accepted
        by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc)
    freq : pandas DateOffset, optional
        Used to specify a particular trading calendar. If not present
        returns.index.freq will be used

    Returns
    -------
    Cumulative returns series : pd.Series
    """

    if not isinstance(period, pd.Timedelta):
        period = pd.Timedelta(period)

    if freq is None:
        freq = returns.index.freq

    if freq is None:
        freq = BDay()
        warnings.warn("'freq' not set, using business day calendar",
                      UserWarning)

    #
    # returns index contains factor computation timestamps, then add returns
    # timestamps too (factor timestamps + period) and save them to 'full_idx'
    # Cumulative returns will use 'full_idx' index,because we want a cumulative
    # returns value for each entry in 'full_idx'
    #
    trades_idx = returns.index.copy()
    returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq)
    full_idx = trades_idx.union(returns_idx)

    #
    # Build N sub_returns from the single returns Series. Each sub_retuns
    # stream will contain non overlapping retuns.
    # In the next step we'll compute the portfolio returns averaging the
    # returns happening on those overlapping returns streams
    #
    sub_returns = []
    while len(trades_idx) > 0:

        #
        # select non-overlapping returns starting with first timestamp in index
        #
        sub_index = []
        next = trades_idx.min()
        while next <= trades_idx.max():
            sub_index.append(next)
            next = utils.add_custom_calendar_timedelta(next, period, freq)
            # make sure to fetch the next available entry after 'period'
            try:
                i = trades_idx.get_loc(next, method='bfill')
                next = trades_idx[i]
            except KeyError:
                break

        sub_index = pd.DatetimeIndex(sub_index, tz=full_idx.tz)
        subret = returns[sub_index]

        # make the index to have all entries in 'full_idx'
        subret = subret.reindex(full_idx)

        #
        # compute intermediate returns values for each index in subret that are
        # in between the timestaps at which the factors are computed and the
        # timestamps at which the 'period' returns actually happen
        #
        for pret_idx in reversed(sub_index):

            pret = subret[pret_idx]

            # get all timestamps between factor computation and period returns
            pret_end_idx = \
                utils.add_custom_calendar_timedelta(pret_idx, period, freq)
            slice = subret[(subret.index > pret_idx)
                           & (subret.index <= pret_end_idx)].index

            if pd.isnull(pret):
                continue

            def rate_of_returns(ret, period):
                return ((np.nansum(ret) + 1)**(1. / period)) - 1

            # compute intermediate 'period' returns values, note that this also
            # moves the final 'period' returns value from trading timestamp to
            # trading timestamp + 'period'
            for slice_idx in slice:
                sub_period = utils.diff_custom_calendar_timedeltas(
                    pret_idx, slice_idx, freq)
                subret[slice_idx] = rate_of_returns(pret, period / sub_period)

            subret[pret_idx] = np.nan

            # transform returns as percentage change from previous value
            subret[slice[1:]] = (subret[slice] + 1).pct_change()[slice[1:]]

        sub_returns.append(subret)
        trades_idx = trades_idx.difference(sub_index)

    #
    # Compute portfolio cumulative returns averaging the returns happening on
    # overlapping returns streams. Please note that the below algorithm keeps
    # into consideration the scenario where a factor is not computed at a fixed
    # frequency (e.g. every day) and consequently the returns appears randomly
    #
    sub_portfolios = pd.concat(sub_returns, axis=1)
    portfolio = pd.Series(index=sub_portfolios.index)

    for i, (index, row) in enumerate(sub_portfolios.iterrows()):

        # check the active portfolios, count() returns non-nans elements
        active_subfolios = row.count()

        # fill forward portfolio value
        portfolio.iloc[i] = portfolio.iloc[i - 1] if i > 0 else 1.

        if active_subfolios <= 0:
            continue

        # current portfolio is the average of active sub_portfolios
        portfolio.iloc[i] *= (row + 1).mean(skipna=True)

    return portfolio
示例#14
0
def performance_analysis_HF(pnls_or_values,
                            initial_cash=1,
                            benchmark='^GSPC',
                            risk_free='^IRX',
                            mar=0.0,
                            input_type='value'):
    if input_type == 'value':
        values_df = pd.Series(pnls_or_values)
    elif input_type == 'pnl':
        values_df = pd.Series(pnls_or_values).cumsum() + initial_cash

    values_df.index = pd.to_datetime(values_df.index)
    start_time = values_df.index[0]
    end_time = values_df.index[-1]

    # downsample the series into 3 min bins
    values_df_3T = values_df.resample('3T', label='right').last()
    snapshot_per_day = (16 - 9.5) * 60 / 3

    # add the initial portfolio values
    values_df = pd.concat([
        pd.Series([initial_cash], index=[start_time + Minute(-3)]),
        values_df_3T
    ])

    # calc the 3-minute returns
    returns_df = (values_df - values_df.shift(1)) / values_df.shift(1)
    returns_df = returns_df.dropna()

    # calc the daily return
    cum_return = values_df.iloc[1:] / initial_cash - 1
    annual_returns_df = (cum_return + 1)**(
        252 * snapshot_per_day / np.array(range(1,
                                                len(returns_df) + 1))) - 1

    # calc the daily volatility
    annual_vol = returns_df.std() * np.sqrt(252 * snapshot_per_day)

    # calc the Sharpe ratio / sortino ratio
    if risk_free:
        # get the risk-free prices
        RF_quotes = web.DataReader(risk_free, 'yahoo', start_time + Minute(-3),
                                   end_time)['Close']
        # get the expected risk-free rate
        risk_free = np.mean(1 / (1 - RF_quotes * 0.01) - 1)
    else:
        risk_free = 0.0

    # calc the Sharpe ratio / sortino ratio
    risk_free_per_snapshot = risk_free / 252 / snapshot_per_day
    mar_per_snapshot = mar / 252 / snapshot_per_day
    sharpe_ratio = (returns_df.mean() - risk_free_per_snapshot) / (returns_df - risk_free_per_snapshot).std() \
                    * (252*snapshot_per_day)**0.5
    sortino_ratio = (returns_df.mean() - mar_per_snapshot) / (returns_df[returns_df < mar_per_snapshot]).std() \
                    * (252*snapshot_per_day)**0.5

    # calc the maximum drawdown
    cum_max_value = (1 + cum_return).cummax()
    drawdowns = ((1 + cum_return) - cum_max_value) / cum_max_value
    max_drawdown = np.min(drawdowns)
    avg_drawdown = drawdowns.mean()

    if benchmark:
        start_time = values_df.index[0].replace(
            tzinfo=timezone('America/New_York'))
        end_time = values_df.index[-1].replace(
            tzinfo=timezone('America/New_York'))
        # get the benchmark prices
        benchmark_prices = yf.download(benchmark,
                                       start=start_time.strftime('%Y-%m-%e'),
                                       end=(end_time +
                                            BDay(1)).strftime('%Y-%m-%e'),
                                       interval="1m")['Close']
        benchmark_prices_3T = benchmark_prices.resample(
            '3T', label='right').last()[values_df_3T.index]
        benchmark_prices = pd.concat([
            pd.Series([benchmark_prices.iloc[-1]],
                      index=[start_time + Minute(-3)]), benchmark_prices_3T
        ])
        # calc the benchmark daily returns
        benchmark_returns = (benchmark_prices - benchmark_prices.shift(1)
                             ) / benchmark_prices.shift(1)
        benchmark_returns = benchmark_returns.dropna()
        # calc the benchmark daily return
        benchmark_cum_return = np.exp(np.log1p(benchmark_returns).cumsum()) - 1
        benchmark_annual_returns = (benchmark_cum_return + 1)**(
            252 * snapshot_per_day /
            np.array(range(1,
                           len(benchmark_cum_return) + 1))) - 1
        # calc the benchmark values based on the same initial_cash of portfolio
        benchmark_values = pd.concat([
            pd.Series([initial_cash], index=[start_time + Minute(-3)]),
            initial_cash * (1 + benchmark_cum_return)
        ])
        benchmark_values.index = [
            local_time.replace(tzinfo=timezone('UTC'))
            for local_time in benchmark_values.index
        ]
        # calc the benchmark daily volatility
        benchmark_annual_vol = benchmark_returns.std() * np.sqrt(
            252 * snapshot_per_day)
        # calc the maximum drawdown
        benchmark_cum_max_value = (1 + benchmark_cum_return).cummax()
        benchmark_drawdowns = (
            (1 + benchmark_cum_return) -
            benchmark_cum_max_value) / benchmark_cum_max_value
        benchmark_max_drawdown = np.min(benchmark_drawdowns)
        benchmark_avg_drawdown = benchmark_drawdowns.mean()

        # compare with the benchmark
        relative_return = annual_returns_df.iloc[
            -1] - benchmark_annual_returns.iloc[-1]
        relative_vol = annual_vol - benchmark_annual_vol
        relative_max_drawdown = max_drawdown - benchmark_max_drawdown
        relative_avg_drawdown = avg_drawdown - benchmark_avg_drawdown
        excess_return_std = (returns_df - benchmark_returns).std() * np.sqrt(
            252 * snapshot_per_day)
        info_ratio = relative_return / excess_return_std

    # organize the output
    performance = pd.Series()
    performance.loc['Begin'] = start_time
    performance.loc['End'] = end_time
    performance.loc['Duration'] = performance.End - performance.Begin
    performance.loc['Initial_Value'] = initial_cash
    performance.loc['Highest_Value'] = np.max(values_df)
    performance.loc['Lowest_Value'] = np.min(values_df)
    performance.loc['Final_Value'] = values_df.iloc[-1]
    performance.loc['Total_Return'] = performance['Final_Value'] / performance[
        'Initial_Value'] - 1
    performance.loc['Total_Return_(Annual)'] = annual_returns_df.iloc[-1]
    performance.loc['Volatility_(Annual)'] = annual_vol
    performance.loc['Max_Drawdown'] = max_drawdown
    performance.loc['Avg_Drawdown'] = avg_drawdown
    performance.loc['Sharpe_Ratio_(Annual)'] = sharpe_ratio
    performance.loc['Sortino_Ratio_(Annual)'] = sortino_ratio

    if benchmark:
        performance.loc['Relative_Return_(Annual)'] = relative_return
        performance.loc['Relative_Vol_(Annual)'] = relative_vol
        performance.loc['Relative_Max_DD'] = relative_max_drawdown
        performance.loc['Relative_Avg_DD'] = relative_avg_drawdown
        performance.loc['Information_Ratio_(Annual)'] = info_ratio

    print(performance)
    values_df.index = pd.Series([
        local_time.replace(tzinfo=timezone('UTC'))
        for local_time in values_df.index
    ])
    drawdowns.index = pd.Series([
        local_time.replace(tzinfo=timezone('UTC'))
        for local_time in drawdowns.index
    ])
    performance.loc['values_data'] = values_df
    performance.loc['returns_data'] = returns_df
    performance.loc['annual_returns_data'] = annual_returns_df
    performance.loc['drawdowns_data'] = drawdowns
    if benchmark:
        performance.loc['benchmark_values_data'] = benchmark_values

    strategy_plot(performance, benchmark, freq='intraday')

    return performance
class TestDatetimeIndexOps(Ops):
    def setup_method(self, method):
        super(TestDatetimeIndexOps, self).setup_method(method)
        mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(
            x, PeriodIndex))
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = [o for o in self.objs if not mask(o)]

    def test_ops_properties(self):
        f = lambda x: isinstance(x, DatetimeIndex)
        self.check_ops_properties(DatetimeIndex._field_ops, f)
        self.check_ops_properties(DatetimeIndex._object_ops, f)
        self.check_ops_properties(DatetimeIndex._bool_ops, f)

    def test_ops_properties_basic(self):

        # sanity check that the behavior didn't change
        # GH7206
        for op in ['year', 'day', 'second', 'weekday']:
            pytest.raises(TypeError, lambda x: getattr(self.dt_series, op))

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        pytest.raises(AttributeError, lambda: s.weekday)

    def test_minmax_tz(self, tz_fixture):
        tz = tz_fixture
        # monotonic
        idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
                                tz=tz)
        assert idx1.is_monotonic

        # non-monotonic
        idx2 = pd.DatetimeIndex(
            ['2011-01-01', pd.NaT, '2011-01-03', '2011-01-02', pd.NaT], tz=tz)
        assert not idx2.is_monotonic

        for idx in [idx1, idx2]:
            assert idx.min() == Timestamp('2011-01-01', tz=tz)
            assert idx.max() == Timestamp('2011-01-03', tz=tz)
            assert idx.argmin() == 0
            assert idx.argmax() == 2

    @pytest.mark.parametrize('op', ['min', 'max'])
    def test_minmax_nat(self, op):
        # Return NaT
        obj = DatetimeIndex([])
        assert pd.isna(getattr(obj, op)())

        obj = DatetimeIndex([pd.NaT])
        assert pd.isna(getattr(obj, op)())

        obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT])
        assert pd.isna(getattr(obj, op)())

    def test_numpy_minmax(self):
        dr = pd.date_range(start='2016-01-15', end='2016-01-20')

        assert np.min(dr) == Timestamp('2016-01-15 00:00:00', freq='D')
        assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D')

        errmsg = "the 'out' parameter is not supported"
        tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0)
        tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0)

        assert np.argmin(dr) == 0
        assert np.argmax(dr) == 5

        if not _np_version_under1p10:
            errmsg = "the 'out' parameter is not supported"
            tm.assert_raises_regex(ValueError, errmsg, np.argmin, dr, out=0)
            tm.assert_raises_regex(ValueError, errmsg, np.argmax, dr, out=0)

    def test_repeat_range(self, tz_fixture):
        tz = tz_fixture
        rng = date_range('1/1/2000', '1/1/2001')

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz)
        exp = pd.DatetimeIndex(
            ['2001-01-01', '2001-01-01', '2001-01-02', '2001-01-02'], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz)
        exp = pd.DatetimeIndex(
            ['2001-01-01', '2001-01-01', '2001-01-03', '2001-01-03'], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], tz=tz)
        exp = pd.DatetimeIndex([
            '2001-01-01', '2001-01-01', '2001-01-01', 'NaT', 'NaT', 'NaT',
            '2003-01-01', '2003-01-01', '2003-01-01'
        ],
                               tz=tz)
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_fixture):
        tz = tz_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start='2016-01-01', periods=2, freq='30Min', tz=tz)

        expected_rng = DatetimeIndex([
            Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
            Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
            Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
            Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
        ])

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        tm.assert_raises_regex(ValueError, msg, np.repeat, rng, reps, axis=1)

    def test_resolution(self, tz_fixture):
        tz = tz_fixture
        for freq, expected in zip(
            ['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], [
                'day', 'day', 'day', 'day', 'hour', 'minute', 'second',
                'millisecond', 'microsecond'
            ]):
            idx = pd.date_range(start='2013-04-01',
                                periods=30,
                                freq=freq,
                                tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_fixture):
        tz = tz_fixture
        # GH 7735
        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1,
                                                        len(idx) + 1)),
                            tz=tz)

        exp_idx = pd.date_range('2011-01-01 18:00',
                                freq='-1H',
                                periods=10,
                                tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range('2011-01-01 09:00',
                                 freq='H',
                                 periods=10,
                                 tz=tz)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex([
            '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00',
            '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT
        ],
                            tz=tz)

        exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
                                tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(
            ['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(DatetimeIndex,
                       ([0, 1, 0], [0, 0, -1], [0, -1, -1],
                        ['2015', '2015', '2016'], ['2015', '2015', '2014'])):
            assert idx[0] in idx

    @pytest.mark.parametrize('idx', [
        DatetimeIndex(
            ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx'),
        DatetimeIndex(
            ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'],
            freq='H',
            name='tzidx',
            tz='Asia/Tokyo')
    ])
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 1, 2]),
                                    check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True,
                                           ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([2, 1, 0]),
                                    check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        'index_dates,expected_dates',
        [([
            '2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02',
            '2011-01-01'
        ], [
            '2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03',
            '2011-01-05'
        ]),
         ([
             '2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02',
             '2011-01-01'
         ], [
             '2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03',
             '2011-01-05'
         ]),
         ([pd.NaT, '2011-01-03', '2011-01-05', '2011-01-02', pd.NaT
           ], [pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', '2011-01-05'])])
    def test_order_without_freq(self, index_dates, expected_dates, tz_fixture):
        tz = tz_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name='idx')
        expected = DatetimeIndex(expected_dates, tz=tz, name='idx')

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep='last')
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep='last')
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize('freq', [
        'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', '-3D', 'W', '-1W',
        'H', '2H', '-2H', 'T', '2T', 'S', '-3S'
    ])
    def test_infer_freq(self, freq):
        # GH 11018
        idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq='infer')
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_nat_new(self):
        idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x')
        result = idx._nat_new()
        exp = pd.DatetimeIndex([pd.NaT] * 5, name='x')
        tm.assert_index_equal(result, exp)

        result = idx._nat_new(box=False)
        exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
        tm.assert_numpy_array_equal(result, exp)

    def test_nat(self, tz_naive_fixture):
        timezone = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=timezone)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert not idx.hasnans
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=timezone)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
                                tz='US/Pacific')
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

    @pytest.mark.parametrize('values',
                             [['20180101', '20180103', '20180105'], []])
    @pytest.mark.parametrize(
        'freq', ['2D', Day(2), '2B',
                 BDay(2), '48H', Hour(48)])
    @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(['20180101', '20180103', '20180105'])

        # setting with an incompatible freq
        msg = ('Inferred frequency 2D from passed values does not conform to '
               'passed frequency 5D')
        with tm.assert_raises_regex(ValueError, msg):
            idx.freq = '5D'

        # setting with non-freq string
        with tm.assert_raises_regex(ValueError, 'Invalid frequency'):
            idx.freq = 'foo'

    def test_offset_deprecated(self):
        # GH 20716
        idx = pd.DatetimeIndex(['20180101', '20180102'])

        # getter deprecated
        with tm.assert_produces_warning(FutureWarning):
            idx.offset

        # setter deprecated
        with tm.assert_produces_warning(FutureWarning):
            idx.offset = BDay()
示例#16
0
#pandas sequence
pd.date_range('2019-07-01', '2019-10-30')

pd.date_range('2019-07-01', periods=45)
pd.date_range('2019-07-01', periods=3, freq='M')
pd.date_range('2019-07-01', periods=5, freq='H')

#%%%
#

#%%%
pd.timedelta_range(0, periods=9, freq='2H20T')

#business day offser
from pandas.tseries.offsets import BDay
pd.date_range('2019-07-01', periods=9, freq=BDay())
#see the gap in days - Sat & SUn

#%%%
#using Frequencies and Offsets




#%%%
#Reading Stock Data
#conda install pandas-datareader
from pandas_datareader import data
#https://pandas-datareader.readthedocs.io/en/latest/
#https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-google
示例#17
0
import pandas as pd
from tia.bbg import LocalTerminal
import tia.bbg.datamgr as dm
import os
from pandas import ExcelWriter
import datetime as dt
from pandas.tseries.offsets import BDay
import glob
t1 = dt.date.today() - BDay(1)
t1 = t1.strftime('%Y%m%d')
mgr = dm.BbgDataManager(
)  #this is used to access the bloomberg api with python, used in getAdvs method in class


class executedOrderReport(object):
    def __init__(self,
                 location,
                 saveLoc,
                 threshold,
                 advThreshold,
                 delimiter="|",
                 *args,
                 **kwargs):
        """
        takes 3 parameters
        location = where the raw fidessa file is
        saveLoc = where the output report will go
        """

        self.location = location
        self.delimiter = delimiter
示例#18
0
文件: stock.py 项目: flex94/Python
 def close(self, date):
     """Return stock close price for given date."""
     d = date if is_bday(date) else (date - BDay(1)).date()
     return self._prices.loc[d]["Close"]
示例#19
0
class TestDatetimeIndexOps(Ops):
    def setup_method(self, method):
        super().setup_method(method)
        mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = [o for o in self.objs if not mask(o)]

    def test_ops_properties(self):
        f = lambda x: isinstance(x, DatetimeIndex)
        self.check_ops_properties(DatetimeIndex._field_ops, f)
        self.check_ops_properties(DatetimeIndex._object_ops, f)
        self.check_ops_properties(DatetimeIndex._bool_ops, f)

    def test_ops_properties_basic(self):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(self.dt_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = pd.DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex(
            [
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            ]
        )

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
            DatetimeIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["2015", "2015", "2016"],
                ["2015", "2015", "2014"],
            ),
        ):
            assert idx[0] in idx

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(
                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
            ),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep="last")
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep="last")
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize(
        "freq",
        [
            "A",
            "2A",
            "-2A",
            "Q",
            "-1Q",
            "M",
            "-1M",
            "D",
            "3D",
            "-3D",
            "W",
            "-1W",
            "H",
            "2H",
            "-2H",
            "T",
            "2T",
            "S",
            "-3S",
        ],
    )
    def test_infer_freq(self, freq):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"
示例#20
0

def GetBulkRiskiness(stocks, start, end):
    import pandas as pd
    start_ = pd.DataFrame(columns = ['PriceRisk', 'ReturnRisk', 'Volatility'])
    for stock in stocks:
        _ = GetRiskiness(stock, start, end)
        start_ = pd.concat([start_,_])
    return start_    

import pandas as pd
_days_of_week = {
	0:"Monday",1:"Tuesday",2:"Wednesday",3:"Thursday",4:"Friday"
}

end = pd.datetime.today() - BDay(1)

#risk = GetBulkRiskiness(_stocks, start = pd.datetime.today() - BDay(60), end = pd.datetime.today())

try:
	risk = pd.read_excel("Risk.xlsx")
	risk.to_excel("Risk.xlsx")
except Exception:
	pass
import pandas as pd
end = pd.datetime.today() - BDay(1)
Today = (GetTSXReturns(end - BDay(1),end))
Today.columns = ["Monday"]
end = pd.datetime.today() - BDay(2) 
Thursday = (GetTSXReturns(end - BDay(1),end))
Thursday.columns = ["Friday"]
示例#21
0
    return str(mult) + code


#----------------------------------------------------------------------
# Offset names ("time rules") and related functions


from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli,
                                    Week, Micro, MonthEnd, MonthBegin,
                                    BMonthBegin, BMonthEnd, YearBegin, YearEnd,
                                    BYearBegin, BYearEnd, QuarterBegin,
                                    QuarterEnd, BQuarterBegin, BQuarterEnd)

_offset_map = {
    'D'     : Day(),
    'B'     : BDay(),
    'H'     : Hour(),
    'T'     : Minute(),
    'S'     : Second(),
    'L'     : Milli(),
    'U'     : Micro(),
    None    : None,

    # Monthly - Calendar
    'M'      : MonthEnd(),
    'MS'     : MonthBegin(),

    # Monthly - Business
    'BM'     : BMonthEnd(),
    'BMS'    : BMonthBegin(),
示例#22
0
dateFormater = ConciseDateFormatter(AutoDateLocator())

palette = sns.color_palette()

#%%

rki, meldedatum, hospital = read_case_data("berlin-cases.csv",
                                           "berlin-cases-meldedatum.csv",
                                           "berlin-hospital.csv")

#%% Activity participation

from pandas.tseries.offsets import BDay

isBusinessDay = BDay().onOffset

act = pd.read_csv(
    "C:/home/Development/matsim-org/matsim-episim/output/BerlinSnzData_daily_until20200705.csv",
    sep="\t",
    parse_dates=[0])

act_week = act[act.date.map(isBusinessDay)]
act_wend = act[act.date.map(lambda *args: not isBusinessDay(*args))]

fig, ax = plt.subplots(dpi=250, figsize=(7.5, 3.8))

ax = sns.scatterplot(x="date",
                     y="home",
                     label="home",
                     s=40,
示例#23
0
def test_freq_offsets():
    off = BDay(1, offset=timedelta(0, 1800))
    assert off.freqstr == "B+30Min"

    off = BDay(1, offset=timedelta(0, -1800))
    assert off.freqstr == "B-30Min"
def mainloop(trackit_api_username, jira_rest_call_post,
             jira_rest_call_get_trackit_id, jira_authorization, db_connection,
             jira_key, track_it_full_hostname, jira_server_address, sql,
             attachment_folder, duedate_map):
    """
    Purpose:
        The main function to run everything above. From retrieving Track-It!
        data from the database to POSTing it to JIRA and closing Track-It!
        work orders with a comment to the new JIRA issue.
    Args:
        trackit_api_username (str): A Track-It! Technician ID
        jira_rest_call_post (str): POST-able URL for JIRA
        jira_rest_call_get_trackit_id (str): GET-able URL for JIRA
        jira_authorization (str): BASE64 Encoded Username:Password
        db_connection (pymssql Object): A very specific library object from
            pymssql-2.1.3-cp36-cp36m-win_amd64.whl which can connect to SQL
            Server 2008.
        jira_key (str): JIRA Project key
        track_it_full_hostname (str): Track-It! Server Address/URL
        jira_server_address (str): JIRA Server Address/URL
        sql (str): A very specific SQL Select statement.
        Example:
            use TRACKIT_DATA;
            SELECT wo_num 'Workorder Number',
            priority 'Priority',
            LEFT(CONVERT(VARCHAR, REQDATE, 120), 10) AS 'Request Date',
            task AS 'Summary',
            request 'Requestor',
            RESPONS AS 'Assignee Username',
            LEFT(CONVERT(VARCHAR, duedate, 120), 10) AS 'Due Date',
            LEFT(CONVERT(VARCHAR, modidate, 120), 10) AS 'Modify Date',
            TRACKIT_DATA.dbo.tasks.dept,
            type,
            wotype2 'Subtype',
            wotype3 'Category',
            respons 'Assigned Technician',
            descript 'Description',
            note 'Notes',
            lookup1 'Company'
            FROM TRACKIT_DATA.dbo.tasks
            WHERE tasks.respons in ('Maxim Tam')
            and priority in ('Ongoing Support','High','Urgent','Critical','Routine','Project')
            and WorkOrderStatusId = 1
            and reqdate >= '2017-07-11'
            ORDER BY RESPONS, WO_NUM DESC;
        attachment_folder (str): Folder path to a specific work order's attachments
        duedate_map (str): Dictionary of {Issue Priority:Resolution Days}
    Returns:
        Nothing.
    """
    # Declare database query output
    database_full_output = get_database_cursor(db_connection, sql, jira_key)
    print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
          " Amount of open workorders: " + str(len(database_full_output)))
    # Cleans query output to create list of new Trackit workorder IDs
    # Retrieves all existing Jira Trackit IDs and compares the two lists
    trackit_ids_trackit = [
        int(x["fields"]["customfield_10411"]) for x in database_full_output
    ]
    trackit_ids_jira_dict = dict((get_request(jira_rest_call_get_trackit_id,
                                              jira_authorization)))
    trackit_ids_jira = [
        int(issue["fields"]["customfield_10411"])
        for issue in trackit_ids_jira_dict["issues"]
        if str(issue["fields"]["customfield_10411"]) != 'None'
    ]
    invalid_ids = [x for x in trackit_ids_trackit if x in trackit_ids_jira]
    database_full_output_valid = [
        x for x in database_full_output
        if int(x["fields"]["customfield_10411"]) not in invalid_ids
    ]

    # Submits POST request to JIRA to create new issue
    # and closes & comments on the old Track-It! ticket
    if len(database_full_output_valid) > 0:
        for data in database_full_output_valid:

            print("Moving workorders to Jira:" +
                  str(data["fields"]["customfield_10411"]))
            logging.info("Moving workorders to Jira:" +
                         str(data["fields"]["customfield_10411"]))

            trackit_key = create_trackit_key(
                trackit_api_username,
                track_it_full_hostname=track_it_full_hostname)

            try:
                response = (post_request(jira_rest_call_post, data,
                                         jira_authorization))
            except HTTPError:
                # replace customfield with data if you want the post request json string
                logging.error(
                    str(sys.exc_info()[1]) + ": " +
                    str(data["fields"]["customfield_10411"]))
                continue

            # Jira URL to newly created ticket
            response = (response)
            jira_link = "http://" + jira_server_address + \
                "/browse/" + str(response["key"])
            jira_attachment_link = "http://" + jira_server_address + r"/rest/api/2/issue/" + str(
                response["key"]) + "/attachments"
            print(jira_attachment_link)
            logging.info("Successfully migrated to Jira at: " +
                         jira_attachment_link)
            import_attachments(data["fields"]["customfield_10411"],
                               jira_attachment_link, jira_authorization,
                               attachment_folder)

            post_addnote_request_trackit(trackit_key,
                                         data["fields"]["customfield_10411"],
                                         jira_link, track_it_full_hostname)
            post_close_request_trackit(trackit_key,
                                       data["fields"]["customfield_10411"],
                                       jira_link, track_it_full_hostname)

    # Attempts to close TrackIt tickets when previously unable to
    if len(invalid_ids) > 0:
        print("Updating previously locked workorders: " + str(invalid_ids))
        for keys in invalid_ids:
            trackit_key = create_trackit_key(trackit_api_username,
                                             track_it_full_hostname)

            jira_trackit_id_url = "http://" + jira_server_address + \
                                  "/rest/api/2/search?jql=%22TrackIT%20%23%22%3D" + \
                str(keys)
            jira_link = "http://" + jira_server_address + "/browse/" \
                        + get_request(jira_trackit_id_url,
                                      jira_authorization)["issues"][0]["key"]

            post_addnote_request_trackit(trackit_key, keys, jira_link,
                                         track_it_full_hostname)
            post_close_request_trackit(trackit_key, keys, jira_link,
                                       track_it_full_hostname)

    # Due Date Creation
    # @TODO: PLEASE REFACTOR TO SHRINK MAINLOOP
    get_empty_duedates_url = "http://" + jira_server_address + \
        "/rest/api/2/search?jql=project%20%3D%20" \
        "" + jira_key + "" \
        "%20AND%20duedate%20is%20EMPTY%20AND%20type%20%20%3D%20%22Incident%20Management%22"

    duedates_response = get_request(get_empty_duedates_url, jira_authorization)
    srq_ids_empty = [[
        ticket["key"], ticket["fields"]["priority"]["name"],
        pd.to_datetime(ticket["fields"]["created"][0:10])
    ] for ticket in duedates_response["issues"]]

    for value in srq_ids_empty:
        duedate = str(value[2] + BDay(duedate_map[str(value[1])]))[0:10]
        headers = {
            "Authorization": "Basic YXBpOlBhc3N3b3Jk",
            "Content-Type": "application/json"
        }

        r = requests.put('http://' + config["jira_server_address"] + \
                         '/rest/api/2/issue/' + str(value[0]),
                         data=json.dumps({"fields": {"duedate": str(duedate)}}), headers=headers)
示例#25
0
affirmation_list = ['Y', 'y', 'Yes', 'YES', 'yes']
negation_list = ['N', 'n', 'No', 'NO', 'no']
valid_answers = ['Y', 'y', 'Yes', 'YES', 'yes', 'N', 'n', 'No', 'NO', 'no']

# Explaining nature of script
print("------------------------------------------------")
print("This script will analyse a reverse MACD trading strategy "
      "in a cryptocurrency portfolio")
print("The maximum absolute exposure per asset is 25%")
print("------------------------------------------------")
'''
###########
## DATES ##
###########
'''
previous_bday = pd.datetime.today() - BDay(1)
earliest_date = dt.datetime(2015, 8, 8)  # Earliest date with data available
default_initial_date = dt.datetime(2016, 1, 1)
default_final_date = dt.datetime(2019, 4, 30)
initial_date, final_date = gdate(default_initial_date, default_final_date)
'''
####################
## GATHERING DATA ##
####################
'''
df_data = gdata()
df_data = df_data[initial_date:final_date]
df_data.drop(['nasdaq_close', 'nasdaq_return'], axis=1, inplace=True)
'''
########################
## GENERATING SIGNALS ##
    prop = entity.property.add()
    prop.name = name
    prop.value.indexed = indx
    prop.value.string_value = item

if __name__ == "__main__":

    #launch example python DailyForecast.py

    #Track time of the simulation
    startTime = tt.time()

    #First day of trading
    nowTime = datetime.now(tz=timezone('US/Eastern')).time()
    if nowTime >= time(19,00):
        dayToPredict = datetime.now(tz=timezone('US/Eastern')) + BDay(1)
    else:
        dayToPredict = datetime.now(tz=timezone('US/Eastern')) + BDay(0)
            
    print "\nPredicting %s\n"%dayToPredict.date()
    logging.info("Predicting %s\n"%dayToPredict.date())
    
    NPredPast             = 10
    history_len           = 100 #days
    saftey_days           = 10

    startOfPredictSim     = dayToPredict - BDay(NPredPast)

    endOfHistoricalDate   = dayToPredict - BDay(1)
    startOfHistoricalDate = startOfPredictSim - BDay(history_len+saftey_days)
    
 def test_generate(self):
     rng1 = list(generate_range(START, END, offset=BDay()))
     rng2 = list(generate_range(START, END, time_rule='B'))
     assert rng1 == rng2
def PredictPrices(prices: PricingData,
                  predictionMethod: int = 0,
                  daysForward: int = 5,
                  numberOfLearningPasses: int = 500):
    #Simple procedure to test different prediction methods
    assert (0 <= predictionMethod <= 2)
    plot = PlotHelper()
    if predictionMethod == 0:  #Linear projection
        print('Running Linear Projection model predicting ' +
              str(daysForward) + ' days...')
        modelDescription = prices.stockTicker + '_Linear_daysforward' + str(
            daysForward)
        predDF = prices.GetPriceHistory()
        predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] +
                             predDF['Close']) / 4
        d = predDF.index[-1]
        for i in range(
                0, daysForward
        ):  #Add new days to the end for crystal ball predictions
            predDF.loc[d + BDay(i + 1), 'Average_Predicted'] = 0
        predDF['PastSlope'] = predDF['Average'].shift(
            daysForward) / predDF['Average'].shift(daysForward * 2)
        predDF['Average_Predicted'] = predDF['Average'].shift(
            daysForward) * predDF['PastSlope']
        predDF['PercentageDeviation'] = abs(
            (predDF['Average'] - predDF['Average_Predicted']) /
            predDF['Average'])
    else:
        SourceFieldList = ['High', 'Low', 'Open', 'Close']
        if predictionMethod == 1:  #LSTM learning
            print('Running LSTM model predicting ' + str(daysForward) +
                  ' days...')
            SourceFieldList = None
            UseLSTM = True
            window_size = 10
            modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str(
                numberOfLearningPasses) + '_histwin' + str(
                    window_size) + '_daysforward' + str(daysForward)
        elif predictionMethod == 2:  #CNN Learning
            print('Running CNN model predicting ' + str(daysForward) +
                  ' days...')
            UseLSTM = False
            window_size = 16 * daysForward
            modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str(
                numberOfLearningPasses) + '_histwin' + str(
                    window_size) + '_daysforward' + str(daysForward)
        learningModule = StockPredictionNN(modelName=prices.stockTicker,
                                           UseLSTM=UseLSTM)
        learningModule.LoadSource(prices.GetPriceHistory(),
                                  SourceFieldList=SourceFieldList,
                                  window_size=window_size)
        learningModule.LoadTarget(targetDF=None,
                                  prediction_target_days=daysForward)
        learningModule.MakeBatches(batch_size=32, train_test_split=.93)
        learningModule.Train(epochs=numberOfLearningPasses)
        learningModule.Predict(True)
        predDF = learningModule.GetTrainingResults(True, True)
    averageDeviation = predDF['PercentageDeviation'].tail(
        round(predDF.shape[0] /
              4)).mean()  #Average of the last 25% to account for training.
    print('Average deviation: ', averageDeviation * 100, '%')
    predDF = predDF.reindex(sorted(predDF.columns),
                            axis=1)  #Sort columns alphabetical
    predDF.to_csv(dataFolder + modelDescription + '.csv')
    plot.PlotDataFrame(predDF[['Average',
                               'Average_Predicted']], modelDescription, 'Date',
                       'Price', True, 'experiment/' + modelDescription)
    plot.PlotDataFrameDateRange(predDF[['Average', 'Average_Predicted']], None,
                                160, modelDescription + '_last160ays', 'Date',
                                'Price',
                                dataFolder + modelDescription + '_last160Days')
    plot.PlotDataFrameDateRange(
        predDF[['Average', 'Average_Predicted']], None, 1000,
        modelDescription + '_last1000ays', 'Date', 'Price',
        dataFolder + modelDescription + '_last1000Days')
示例#29
0
    def get_expiries_from_dates(self, date_time_index, calendar, tenor):
        freq = self.get_business_days_tenor(tenor)

        return pandas.DatetimeIndex(date_time_index + BDay(freq))
示例#30
0
def main():
    demo = 'd286f23fd3d3c4fbd6cc5768c2a6388d'

    #data = read_csv('/Users/alenshaju/Downloads/SP500_tickers_100.csv')
    #companies = data['Ticker'].to_list()[:10]

    consumer_companies = [
        'TJX', 'NKE', 'TGT', 'HD', 'LOW', 'PG', 'WMT', 'COST', 'MDLZ', 'EL',
        'KO', 'PEP', 'PM', 'MO', 'BKNG', 'MCD', 'SBUX'
    ]
    energy_companies = ['NEE', 'XOM', 'CVX']
    fig_companies = [
        'BLK', 'AXP', 'V', 'MA', 'PYPL', 'FIS', 'JPM', 'BAC', 'WFC', 'USB',
        'SPGI', 'MS', 'SCHW', 'GS', 'BRK.B', 'AMT'
    ]  #C
    healthcare_companies = [
        'ABBV', 'AMGN', 'GILD', 'ABT', 'DHR', 'MDT', 'SYK', 'ISRG', 'CVS',
        'CI', 'TMO', 'UNH', 'ANTM', 'JNJ', 'PFE', 'LLY', 'BMY'
    ]
    industrials_companies = [
        'BA', 'RTX', 'LMT', 'DE', 'UPS', 'TSLA', 'GM', 'CAT', 'HON', 'GE',
        'MMM', 'LIN', 'UNP'
    ]
    tech_companies = [
        'ADBE', 'CRM', 'INTU', 'GOOG', 'GOOG.L', 'FB', 'AMZN', 'ACN', 'IBM',
        'AMAT', 'LRCX', 'NVDA', 'INTC', 'AVGO', 'TXN', 'QCOM', 'MU', 'AMD',
        'MSFT', 'ORCL', 'NOW', 'AAPL'
    ]
    mt_companies = ['CMCS.A', 'CHTR', 'CSCO', 'VZ', 'T', 'DIS', 'NFLX']

    companies = ['UAL']

    past_call_dict = {}
    yec = YahooEarningsCalendar()

    for company in companies:
        print("Ticker:", company)
        past_calls_df = get_past_earnings_call(yec, company)
        past_call_dict[company] = past_calls_df
    df_returns_scores = pd.DataFrame(columns=['Return', 'Score'])
    sia = SentimentIntensityAnalyzer()

    d = {}
    with open(
            "/Users/alenshaju/Downloads/LoughranMcDonald_MasterDictionary_2018.txt"
    ) as f:
        for line in f:
            (key, val) = line.split()
            d[key] = float(val)
    sia.lexicon.update(d)
    excel_df = pd.DataFrame(
        columns=['Ticker', 'Quarter', 'Sentiment Score', 'Returns'])

    for company in companies:
        print("For company: ", company)
        for i, row in past_call_dict[company].iterrows():
            date = datetime.datetime.strptime(row['startdatetime'],
                                              '%Y-%m-%dT%H:%M:%S.%fZ')
            quarter = pd.Timestamp(date).quarter
            year = date.year
            if year <= datetime.datetime.now().year:
                if year == datetime.datetime.now().year:
                    if quarter >= pd.Timestamp(
                            datetime.datetime.now()).quarter:
                        continue
                transcript = requests.get(
                    f'https://financialmodelingprep.com/api/v3/earning_call_transcript/{company}?quarter={quarter}&year={year}&apikey={demo}'
                ).json()

                if len(transcript) == 0:
                    continue

                transcript = transcript[0]['content'].split('\n')
                if not bool(len(pd.bdate_range(date, date))):
                    date = date - BDay(1)
                if (date + BDay(1)) in get_trading_close_holidays(year):
                    end_date = date + BDay(1)
                else:
                    end_date = date

                stock = yf.download(company,
                                    start=date,
                                    end=end_date + BDay(1) +
                                    datetime.timedelta(1),
                                    progress=False)
                price_change_rate = (stock['Adj Close'][1] /
                                     stock['Adj Close'][0]) - 1
                price_change_percent = price_change_rate * 100
                sentiment_score = sia.polarity_scores(
                    transcript[0])['pos'] - sia.polarity_scores(
                        transcript[0])['neg']
                print(transcript)
                print('score: ', sia.polarity_scores(transcript[0]))
                print("price change: ", price_change_rate)

                df_returns_scores = df_returns_scores.append(
                    {
                        'Return': price_change_rate,
                        'Score': sentiment_score
                    },
                    ignore_index=True)
                excel_df = excel_df.append(
                    {
                        'Ticker': company,
                        "Date": date,
                        'Quarter': quarter,
                        'Sentiment Score': sentiment_score,
                        'Returns': price_change_rate
                    },
                    ignore_index=True)
            if i > 8:  # 10years - 4 quarters
                break

    excel_df.to_excel("/Users/alenshaju/Downloads/mt_excel_file_v1.xlsx")

    x = df_returns_scores.Score.values.reshape(-1, 1)
    y = df_returns_scores.Return.values.reshape(-1, 1)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.4,
                                                        random_state=42)

    support_vector_reg_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
    support_vector_reg_model.fit(x_train, y_train)

    y_pred = support_vector_reg_model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2_data = r2_score(y_test, y_pred)
    print("Root mean square error: ", rmse)
    print("R^2 score: ", r2_data)

    train_test_label = ['Training Data', 'Testing Data']
    model_color = ['m', 'c', 'g']

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 10), sharey=True)

    ###### Training Data ##########
    axes[0].plot(x_test,
                 y_pred,
                 color=model_color[0],
                 lw=2,
                 label='{} model'.format(train_test_label[0]))
    axes[0].scatter(x_train[np.setdiff1d(np.arange(len(x_train)),
                                         support_vector_reg_model.support_)],
                    y_train[np.setdiff1d(np.arange(len(x_train)),
                                         support_vector_reg_model.support_)],
                    facecolor="none",
                    edgecolor=model_color[0],
                    s=50,
                    label='Training data')
    axes[0].legend(loc='upper center',
                   bbox_to_anchor=(0.5, 1.1),
                   ncol=1,
                   fancybox=True,
                   shadow=True)

    ####### Testing Data #########
    axes[1].plot(x_test,
                 y_pred,
                 color=model_color[1],
                 lw=2,
                 label='{} model'.format(train_test_label[1]))
    axes[1].scatter(x_test[np.setdiff1d(np.arange(len(x_test)),
                                        support_vector_reg_model.support_)],
                    y_pred[np.setdiff1d(np.arange(len(x_test)),
                                        support_vector_reg_model.support_)],
                    facecolor="none",
                    edgecolor=model_color[1],
                    s=50,
                    label='Testing data')
    axes[1].legend(loc='upper center',
                   bbox_to_anchor=(0.5, 1.1),
                   ncol=1,
                   fancybox=True,
                   shadow=True)
    fig.text(0.5, 0.04, 'data', ha='center', va='center')
    fig.text(0.06,
             0.5,
             'target',
             ha='center',
             va='center',
             rotation='vertical')
    fig.suptitle("Support Vector Regression", fontsize=14)
    plt.show()