def performance_analysis_MF(pnls_or_values, initial_cash=1, benchmark='^GSPC', risk_free='^IRX', mar=0.0, input_type='value'): if input_type == 'value': values_df = pd.Series(pnls_or_values) elif input_type == 'pnl': values_df = pd.Series(pnls_or_values).cumsum() + initial_cash values_df.index = pd.to_datetime(values_df.index) start_date = values_df.index[0] end_date = values_df.index[-1] # add the initial portfolio values values_df = pd.concat( [pd.Series([initial_cash], index=[start_date + BDay(-1)]), values_df]) # calc the daily returns returns_df = (values_df - values_df.shift(1)) / values_df.shift(1) returns_df = returns_df.dropna() # calc the annualized return cum_return = values_df.iloc[1:] / initial_cash - 1 annual_returns_df = (cum_return + 1)**(252 / np.array(range(1, len(returns_df) + 1))) - 1 # calc the annualized volatility annual_vol = returns_df.std() * np.sqrt(252) # calc the Sharpe ratio / sortino ratio if risk_free: # get the risk-free prices RF_quotes = web.DataReader(risk_free, 'yahoo', start_date + BDay(-1), end_date)['Close'] # get the expected risk-free rate risk_free = np.mean(1 / (1 - RF_quotes * 0.01) - 1) else: risk_free = 0.0 daily_risk_free = risk_free / 252 daily_mar = mar / 252 sharpe_ratio = (returns_df - daily_risk_free).mean() / ( returns_df - daily_risk_free).std() * 252**0.5 sortino_ratio = (returns_df.mean() - daily_mar) / ( returns_df[returns_df < daily_mar]).std() * 252**0.5 #sharpe_ratio = (returns_df.mean()*252 - risk_free) / ((returns_df - daily_risk_free).std()*252**0.5) #sortino_ratio = (returns_df.mean()*252 - mar) / ((returns_df[returns_df < daily_mar]).std()*252**0.5) # calc the maximum drawdown cum_max_value = (1 + cum_return).cummax() drawdowns = ((1 + cum_return) - cum_max_value) / cum_max_value max_drawdown = np.min(drawdowns) avg_drawdown = drawdowns.mean() if benchmark: # get the benchmark prices benchmark_prices = web.DataReader(benchmark, 'yahoo', start_date + BDay(-1), end_date)['Close'] print(benchmark_prices.shape) # calc the benchmark daily returns benchmark_returns = (benchmark_prices - benchmark_prices.shift(1) ) / benchmark_prices.shift(1) benchmark_returns = benchmark_returns.dropna() # calc the benchmark annualized return benchmark_cum_return = np.exp(np.log1p(benchmark_returns).cumsum()) - 1 benchmark_annual_returns = (benchmark_cum_return + 1)**( 252 / np.array(range(1, len(benchmark_returns) + 1))) - 1 # calc the benchmark values based on the same initial_cash of portfolio benchmark_values = pd.concat([ pd.Series([initial_cash], index=[start_date + BDay(-1)]), initial_cash * (1 + benchmark_cum_return) ]) # calc the benchmark annualized volatility benchmark_annual_vol = benchmark_returns.std() * np.sqrt(252) # calc the maximum drawdown benchmark_cum_max_value = (1 + benchmark_cum_return).cummax() benchmark_drawdowns = ( (1 + benchmark_cum_return) - benchmark_cum_max_value) / benchmark_cum_max_value benchmark_max_drawdown = np.min(benchmark_drawdowns) benchmark_avg_drawdown = benchmark_drawdowns.mean() # compare with the benchmark relative_return = annual_returns_df.iloc[ -1] - benchmark_annual_returns.iloc[-1] relative_vol = annual_vol - benchmark_annual_vol relative_max_drawdown = max_drawdown - benchmark_max_drawdown relative_avg_drawdown = avg_drawdown - benchmark_avg_drawdown excess_return_std = (returns_df - benchmark_returns).std() * np.sqrt(252) info_ratio = relative_return / excess_return_std # organize the output performance = pd.Series() performance.loc['Begin'] = start_date performance.loc['End'] = end_date performance.loc['Duration'] = performance.End - performance.Begin performance.loc['Initial_Value'] = initial_cash performance.loc['Highest_Value'] = np.max(values_df) performance.loc['Lowest_Value'] = np.min(values_df) performance.loc['Final_Value'] = values_df.iloc[-1] performance.loc['Total_Return'] = performance['Final_Value'] / performance[ 'Initial_Value'] - 1 performance.loc['Total_Return_(Annualized)'] = annual_returns_df.iloc[-1] performance.loc['Volatility_(Annualized)'] = annual_vol performance.loc['Max_Drawdown'] = max_drawdown performance.loc['Avg_Drawdown'] = avg_drawdown performance.loc['Sharpe_Ratio'] = sharpe_ratio performance.loc['Sortino_Ratio'] = sortino_ratio if benchmark: performance.loc['Relative_Return'] = relative_return performance.loc['Relative_Vol'] = relative_vol performance.loc['Relative_Max_DD'] = relative_max_drawdown performance.loc['Relative_Avg_DD'] = relative_avg_drawdown performance.loc['Information_Ratio'] = info_ratio print(performance) performance.loc['values_data'] = values_df performance.loc['returns_data'] = returns_df performance.loc['annual_returns_data'] = annual_returns_df performance.loc['drawdowns_data'] = drawdowns if benchmark: performance.loc['benchmark_values_data'] = benchmark_values strategy_plot(performance) return performance
class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series({"year": 2000, "month": 1, "day": 10}) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex([ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ]) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) @pytest.mark.parametrize( "freq,expected", [ ("A", "day"), ("Q", "day"), ("M", "day"), ("D", "day"), ("H", "hour"), ("T", "minute"), ("S", "second"), ("L", "millisecond"), ("U", "microsecond"), ], ) def test_resolution(self, tz_naive_fixture, freq, expected): tz = tz_naive_fixture if freq == "A" and not IS64 and isinstance(tz, tzlocal): pytest.xfail(reason="OverflowError inside tzlocal past 2038") idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) expected = expected._with_freq(None) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex( ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( "idx", [ DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() expected = idx._with_freq(None) tm.assert_index_equal(result, expected) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate( ([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate( ([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert DatetimeIndex._na_value is pd.NaT assert DatetimeIndex([])._na_value is pd.NaT idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize( "freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same data dti = date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D"
def test_mismatching_tz_raises_err(self, start, end): # issue 18488 with pytest.raises(TypeError): pd.date_range(start, end) with pytest.raises(TypeError): pd.date_range(start, end, freq=BDay())
# Additionally, can make custom frequncy - see hourly timestamp range below pd.date_range('2015-07-03', periods=8, freq='H') # or, a sequence of durations increasing by an hour: pd.timedelta_range(0, periods=10, freq='H') ### Frequencies and Offsets # (table of codes) # stuff ------ pd.timedelta_range(0, periods=9, freq="2H30T") # get range of 5 business days, beginning on 2015-07-01 from pandas.tseries.offsets import BDay pd.date_range('2015-07-01', periods=5, freq=BDay()) ### Resampling, Shifting, and Windowing from pandas_datareader import data goog = data.DataReader('GOOG', start='2004', end='2016', data_source='google') # NOTE: "ImmediateDeprecationError - Google Finance dep. due to API breaks" # will not be able to complete section notes. Goes into some basic plots. ## Resampling and Converting Frequencies # still uses deprecated functionality ### Example: Visualizing Seattle Bicycle Counts # data get:
startingYear = 2015 # First year of data endingYear = 2015 # Last year of data for year in range(startingYear, endingYear + 1): makeDirectory(year) startTimeAcquireData = time() rawData = {year: {}} if year >= 1999: firstDateOfYear = datetime.strptime('01/01/{:d}'.format(year), '%m/%d/%Y') daysUntilStartingDay = random.randint(0, 5) recordDate = firstDateOfYear + BDay(daysUntilStartingDay) while recordDate.year == year: recordURL = getCongressionalRecordURL(year, date=recordDate) try: filePath = downloadPDFFile(recordURL, year, date=recordDate) textPageDict = parsePDFFile(filePath, everyNPages=7) recordDateString = str(recordDate)[:10] rawData[year][recordDateString] = textPageDict daysUntilNextRecordDate = random.randint(4, 9) except (PDFSyntaxError, FileNotFoundError): print('{:s} doesnt have data'.format(str(recordDate))) daysUntilNextRecordDate = random.randint(1, 2)
the date(s) to be converted Returns ------- same type as input date(s) converted to UTC """ dt = pd.to_datetime(dt) try: dt = dt.tz_localize('UTC') except TypeError: dt = dt.tz_convert('UTC') return dt _1_bday = BDay() def _1_bday_ago(): return pd.Timestamp.now().normalize() - _1_bday # @deprecated(msg=DATAREADER_DEPRECATION_WARNING) # def get_fama_french(): # """ # Retrieve Fama-French factors via pandas-datareader # Returns # ------- # pandas.DataFrame # Percent change of Fama-French factors # """
def test_shift(self): shifted = self.ts.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, self.ts.index) tm.assert_index_equal(unshifted.index, self.ts.index) tm.assert_numpy_array_equal(unshifted.valid().values, self.ts.values[:-1]) offset = BDay() shifted = self.ts.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) assert_series_equal(unshifted, self.ts) unshifted = self.ts.shift(0, freq=offset) assert_series_equal(unshifted, self.ts) shifted = self.ts.shift(1, freq='B') unshifted = shifted.shift(-1, freq='B') assert_series_equal(unshifted, self.ts) # corner case unshifted = self.ts.shift(0) assert_series_equal(unshifted, self.ts) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1]) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, BDay()) assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) pytest.raises(ValueError, ps.shift, freq='D') # legacy support shifted4 = ps.shift(1, freq='B') assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) assert_series_equal(shifted5, shifted4) # 32-bit taking # GH 8129 index = date_range('2000-01-01', periods=5) for dtype in ['int32', 'int64']: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) assert_series_equal(result, expected) # xref 8260 # with tz s = Series(date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s - s.shift() exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo') assert_series_equal(result, exp) # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') pytest.raises(ValueError, lambda: s - s2)
NORM_FACTOR = 100 # Matching delay MAX_LAG = pd.Timedelta('1 hours') MATCH_TOL_DICT = { '1H': pd.Timedelta('1 hours'), '0.5H': pd.Timedelta('0.5 hours'), '0.1H': pd.Timedelta('0.1 hours') } MATCH_TOL = '0.1H' # Hedging frequency OFFSET_DICT = { '1H': [pd.Timedelta('1 hours'), '_1H'], '1D': [BDay(1), '_1D'], '2D': [BDay(2), '_2D'] } FREQ = '1H' T_FIRSTHALF = datetime.time(8, 30) if FREQ == '1H': DT = 0. T_LASTHALF = datetime.time(15, 15) if FREQ == '1D': DT = 1. / 253. T_LASTHALF = datetime.time(16, 0) if FREQ == '2D': DT = 2. / 253. T_LASTHALF = datetime.time(16, 0)
def main(): temptime = datetime.datetime.now() print(temptime) warnings.filterwarnings("error") #final df wtf = pd.DataFrame() final = pd.DataFrame() #unprocessed df unproc = pd.DataFrame() unproc['complist'] = '' #number of unprocessed comp pos = 0 #define date where we start fetching data, or check if any data exists try: maindf = pd.read_csv(os.path.join('allprice.csv')) final['Date'] = maindf['Date'] day_num = len(maindf['Date']) start = datetime.datetime.strptime(str(maindf.loc[day_num - 1][0]), '%Y-%m-%d') end = datetime.datetime.today() status = "update" day = day_num for x in perdelta(start, end, timedelta(days=1)): final.set_value(day, 'Date', x) day += 1 except (UnboundLocalError, OSError): start = datetime.datetime(2010, 1, 1) end = datetime.datetime.today() day_num = 0 maindf = pd.DataFrame() status = "new" complist = pd.read_csv(path.join('Data/companylist.csv')) #traverse through the company list for x in range(len(complist)): time = datetime.datetime.now() day_num1 = day_num rsdf = pd.DataFrame() name = complist.ix[x] file_num = 1 #fetch data try: data = web.DataReader(complist.ix[x], 'yahoo', start, end) print(complist.ix[x] + "'s Data obtained from Yahoo " + str(time.isoformat())) except: try: data = web.DataReader(complist.ix[x], 'google', start, end) print(complist.ix[x] + "'s Data obtained from Google " + str(time.isoformat())) except: try: data = web.DataReader(complist.ix[x], 'fred', start, end) print(complist.ix[x] + "'s Data obtained from Fred " + str(time.isoformat())) except: continue continue print(complist.ix[x] + "Cannot be Obtained " + str(time.isoformat())) continue #create or append base on the status if status == "new": rsdf[name] = data['Adj Close'] else: try: rsdf[name] = maindf[name] rsdf.columns.values[0] = 'Adj Close' except KeyError: unproc.set_value(pos, 'complist', str(name)) pos += 1 comprice = data['Adj Close'] if (status == "update"): rsdf = rsdf.append(comprice, ignore_index=True) rsdf.rename(columns={'Adj Close': str(name)}, inplace=True) if (status == 'new' and x == 0): #since it's new, add an index Date column from 2010 to today date = pd.date_range('20100101', end, freq=BDay()) final = pd.DataFrame(rsdf[name], index=date) else: #dump the data for every 1000 companies to decrease processing time if (x % 1000 == 0 and x != 0): if file_num != 1: final.to_csv(path.join('all_price' + str(file_num) + '.csv'), index=False) else: final.to_csv(path + 'all_price' + str(file_num) + '.csv', index=True) file_num += 1 final = pd.DataFrame(rsdf[name]) else: final = final.join(rsdf[name]) #dump all data frame to file final.to_csv(path + 'all_price' + str(file_num) + '.csv', index=False) y = 1 final = merge_file(y, file_num, path) final.rename(columns={'Unnamed: 0': 'Date'}, inplace=True) temptime = datetime.datetime.now() final.to_csv(path.join('all_price.csv'), index=False) #list of unprocessed companies due to not found data unproc.to_csv(path.join('unproc.csv'), index=False) print(temptime)
def test_shift(self, datetime_series): shifted = datetime_series.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, datetime_series.index) tm.assert_index_equal(unshifted.index, datetime_series.index) tm.assert_numpy_array_equal( unshifted.dropna().values, datetime_series.values[:-1] ) offset = BDay() shifted = datetime_series.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) tm.assert_series_equal(unshifted, datetime_series) unshifted = datetime_series.shift(0, freq=offset) tm.assert_series_equal(unshifted, datetime_series) shifted = datetime_series.shift(1, freq="B") unshifted = shifted.shift(-1, freq="B") tm.assert_series_equal(unshifted, datetime_series) # corner case unshifted = datetime_series.shift(0) tm.assert_series_equal(unshifted, datetime_series) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, BDay()) tm.assert_series_equal(shifted2, shifted3) tm.assert_series_equal(ps, shifted2.shift(-1, "B")) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # legacy support shifted4 = ps.shift(1, freq="B") tm.assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) tm.assert_series_equal(shifted5, shifted4) # 32-bit taking # GH 8129 index = date_range("2000-01-01", periods=5) for dtype in ["int32", "int64"]: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) tm.assert_series_equal(result, expected) # xref 8260 # with tz s = Series( date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" ) result = s - s.shift() exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") tm.assert_series_equal(result, exp) # incompat tz s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo") msg = "DatetimeArray subtraction must have the same timezones or no timezones" with pytest.raises(TypeError, match=msg): s - s2
def freight_and_quality_exceptions(): if crude in ('Forties'): df_freight['Buzzard_Content'] = forties_sulphur[ 'BuzzardContent'] df_freight['Implied_Sulphur'] = df_freight[ 'Buzzard_Content'] * 0.012 + 0.003 df_freight['De-Escalator_Threshold'] = np.round( df_freight['Implied_Sulphur'], 3) df_freight['De-Escalator_Counts'] = np.minimum( 0, 6 - df_freight['Implied_Sulphur'] * 1000) df_freight['Platts_De_Esc'] = total['AAUXL00'] df_freight['Forties_Margin_Impact'] = df_freight[ 'Platts_De_Esc'] * df_freight['De-Escalator_Counts'] * -1 df_freight['Costs'] += df_freight['Forties_Margin_Impact'] if crude in ('Basrah Light', 'Basrah Heavy'): """This handles the freight escalation calculation from Iraq - the base is sent by SOMO, and table is in databse / excel wb""" monthly_averages = total['PFAOH00'].asfreq(BDay( )).resample('BMS').mean( ) # resampled so we have the business month start, corrects averaging error if cma func_ma_on_days = lambda x: (monthly_averages.loc[ (monthly_averages.index.month == x.month) & (monthly_averages.index.year == x.year)]).iat[0] """Create funcs to handle basrah base and flat rate values, apply over df and calc esclator""" func_ws_base = lambda x: (basrah_ws_base.loc[ (basrah_ws_base.index.year == x.year)]['SOMO_WS']).iat[0] func_fr = lambda x: (basrah_ws_base.loc[ (basrah_ws_base.index.year == x.year)]['SOMO_FlatRate'] ).iat[0] func_bhapi = lambda x: (basrah_ws_base.loc[ (basrah_ws_base.index.year == x.year)]['BasrahHeavyAPI'] ).iat[0] func_blapi = lambda x: (basrah_ws_base.loc[ (basrah_ws_base.index.year == x.year)]['BasrahLightAPI'] ).iat[0] df_freight['Date'] = df_freight.index df_freight['WS Month Avg'] = df_freight['Date'].apply( func_ma_on_days) df_freight['SOMO Base WS'] = df_freight['Date'].apply( func_ws_base) # We have to apply the corrcetion here after SOMO dropped their base rate earlier this year - assumption # only valid for 2018 df_freight['SOMO Base WS'].iloc[ (df_freight.index >= dt(2018, 4, 1)) & (df_freight.index <= dt(2018, 12, 31))] = 25 df_freight['Base_FR_for_esc'] = df_freight['Date'].apply( func_fr) if crude == 'Basrah Light': df_freight['API Esc'] = df_freight['Date'].apply( func_blapi) else: df_freight['API Esc'] = df_freight['Date'].apply( func_bhapi) df_freight['WS for Esc'] = ( df_freight['WS Month Avg'] - df_freight['SOMO Base WS'] ) * df_freight['Base_FR_for_esc'] / 7.3 / 100 df_freight.drop(['Date'], axis=1, inplace=True) # South Korean particulars if ports[ports['PortName'] == destination]['Country'].iat[0] == 'South Korea': # Freight rebate on imported crudes df_freight['Murban_Freight_Comp'] = total[ 'PFAOC00'] / 100 * df_freight[ 'Murban_Sing_Flat'] / 7.66 #Murban density conversion df_freight[ 'UKC-Yosu_VLCC'] = total['AASLA00'] * 1000000 / 2000000 df_freight['Freight_Rebate'] = np.maximum( df_freight['UKC-Yosu_VLCC'] - df_freight['Murban_Freight_Comp'], 0.6) df_freight['Costs'] -= df_freight['Freight_Rebate'] # Tax rebate on crudes out of Europe if ports[ports['PortName'] == loadport]['RegionName'].iat[0] in ([ 'NW EUROPE', 'MED' ]): df_freight['FTA_Tax_Rebate'] = 0.006 * total['LCOc1'] df_freight['Costs'] -= df_freight['FTA_Tax_Rebate'] # Tax rebate on crudes out of the US if ports[ports['PortName'] == loadport]['RegionName'].iat[0] in (['N AMERICA']): df_freight['FTA_Tax_Rebate'] = 0.005 * total['CLc1'] df_freight['Costs'] -= df_freight['FTA_Tax_Rebate'] # Costs ascociated with lifting CPC based on delays if crude == 'CPC Blend': df_freight['TS_Delays'] = np.maximum( total['AAWIL00'] + total['AAWIK00'] - 2, 0) df_freight['TS_Demur'] = total['AAPED00'] df_freight['TS_Demur_Costs'] = df_freight['TS_Delays'].mul( df_freight['TS_Demur']) / 130 df_freight['Costs'] += df_freight['TS_Demur_Costs'] # Costs ascociated with lifting Urals, actually a rebate as giving back port costs that are included in CIF price if crude in (['Urals Nth', 'Urals Med']): df_freight['Urals_Cif_Rebate'] = 0.11 df_freight['Costs'] -= df_freight['Urals_Discharge_Costs'] if crude == 'Forties': df_freight['Forties_Mkt_Discount'] = 0.5 df_freight['Costs'] -= df_freight['Forties_Mkt_Discount'] else: pass return df_freight
def positions(weights, period, freq=None): """ Builds net position values time series, the portfolio percentage invested in each position. Parameters ---------- weights: pd.Series pd.Series containing factor weights, the index contains timestamps at which the trades are computed and the values correspond to assets weights - see factor_weights for more details period: pandas.Timedelta or string Assets holding period (1 day, 2 mins, 3 hours etc). It can be a Timedelta or a string in the format accepted by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc) freq : pandas DateOffset, optional Used to specify a particular trading calendar. If not present weights.index.freq will be used Returns ------- pd.DataFrame Assets positions series, datetime on index, assets on columns. """ weights = weights.unstack() if not isinstance(period, pd.Timedelta): period = pd.Timedelta(period) if freq is None: freq = weights.index.freq if freq is None: freq = BDay() warnings.warn("'freq' not set, using business day calendar", UserWarning) # # weights index contains factor computation timestamps, then add returns # timestamps too (factor timestamps + period) and save them to 'full_idx' # 'full_idx' index will contain an entry for each point in time the weights # change and hence they have to be re-computed # trades_idx = weights.index.copy() returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq) weights_idx = trades_idx.union(returns_idx) # # Compute portfolio weights for each point in time contained in the index # portfolio_weights = pd.DataFrame(index=weights_idx, columns=weights.columns) active_weights = [] for curr_time in weights_idx: # # fetch new weights that become available at curr_time and store them # in active weights # if curr_time in weights.index: assets_weights = weights.loc[curr_time] expire_ts = utils.add_custom_calendar_timedelta( curr_time, period, freq) active_weights.append((expire_ts, assets_weights)) # # remove expired entry in active_weights (older than 'period') # if active_weights: expire_ts, assets_weights = active_weights[0] if expire_ts <= curr_time: active_weights.pop(0) if not active_weights: continue # # Compute total weights for curr_time and store them # tot_weights = [w for (ts, w) in active_weights] tot_weights = pd.concat(tot_weights, axis=1) tot_weights = tot_weights.sum(axis=1) tot_weights /= tot_weights.abs().sum() portfolio_weights.loc[curr_time] = tot_weights return portfolio_weights.fillna(0)
def cumulative_returns(returns, period, freq=None): """ Builds cumulative returns from 'period' returns. This function simulate the cumulative effect that a series of gains or losses (the 'retuns') have on an original amount of capital over a period of time. if F is the frequency at which returns are computed (e.g. 1 day if 'returns' contains daily values) and N is the period for which the retuns are computed (e.g. returns after 1 day, 5 hours or 3 days) then: - if N <= F the cumulative retuns are trivially computed as Compound Return - if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the cumulative returns are computed building and averaging N interleaved sub portfolios (started at subsequent periods 1,2,..,N) each one rebalancing every N periods. This correspond to an algorithm which trades the factor every single time it is computed, which is statistically more robust and with a lower volatity compared to an algorithm that trades the factor every N periods and whose returns depend on the specific starting day of trading. Also note that when the factor is not computed at a specific frequency, for exaple a factor representing a random event, it is not efficient to create multiples sub-portfolios as it is not certain when the factor will be traded and this would result in an underleveraged portfolio. In this case the simulated portfolio is fully invested whenever an event happens and if a subsequent event occur while the portfolio is still invested in a previous event then the portfolio is rebalanced and split equally among the active events. Parameters ---------- returns: pd.Series pd.Series containing factor 'period' forward returns, the index contains timestamps at which the trades are computed and the values correspond to returns after 'period' time period: pandas.Timedelta or string Length of period for which the returns are computed (1 day, 2 mins, 3 hours etc). It can be a Timedelta or a string in the format accepted by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc) freq : pandas DateOffset, optional Used to specify a particular trading calendar. If not present returns.index.freq will be used Returns ------- Cumulative returns series : pd.Series """ if not isinstance(period, pd.Timedelta): period = pd.Timedelta(period) if freq is None: freq = returns.index.freq if freq is None: freq = BDay() warnings.warn("'freq' not set, using business day calendar", UserWarning) # # returns index contains factor computation timestamps, then add returns # timestamps too (factor timestamps + period) and save them to 'full_idx' # Cumulative returns will use 'full_idx' index,because we want a cumulative # returns value for each entry in 'full_idx' # trades_idx = returns.index.copy() returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq) full_idx = trades_idx.union(returns_idx) # # Build N sub_returns from the single returns Series. Each sub_retuns # stream will contain non overlapping retuns. # In the next step we'll compute the portfolio returns averaging the # returns happening on those overlapping returns streams # sub_returns = [] while len(trades_idx) > 0: # # select non-overlapping returns starting with first timestamp in index # sub_index = [] next = trades_idx.min() while next <= trades_idx.max(): sub_index.append(next) next = utils.add_custom_calendar_timedelta(next, period, freq) # make sure to fetch the next available entry after 'period' try: i = trades_idx.get_loc(next, method='bfill') next = trades_idx[i] except KeyError: break sub_index = pd.DatetimeIndex(sub_index, tz=full_idx.tz) subret = returns[sub_index] # make the index to have all entries in 'full_idx' subret = subret.reindex(full_idx) # # compute intermediate returns values for each index in subret that are # in between the timestaps at which the factors are computed and the # timestamps at which the 'period' returns actually happen # for pret_idx in reversed(sub_index): pret = subret[pret_idx] # get all timestamps between factor computation and period returns pret_end_idx = \ utils.add_custom_calendar_timedelta(pret_idx, period, freq) slice = subret[(subret.index > pret_idx) & (subret.index <= pret_end_idx)].index if pd.isnull(pret): continue def rate_of_returns(ret, period): return ((np.nansum(ret) + 1)**(1. / period)) - 1 # compute intermediate 'period' returns values, note that this also # moves the final 'period' returns value from trading timestamp to # trading timestamp + 'period' for slice_idx in slice: sub_period = utils.diff_custom_calendar_timedeltas( pret_idx, slice_idx, freq) subret[slice_idx] = rate_of_returns(pret, period / sub_period) subret[pret_idx] = np.nan # transform returns as percentage change from previous value subret[slice[1:]] = (subret[slice] + 1).pct_change()[slice[1:]] sub_returns.append(subret) trades_idx = trades_idx.difference(sub_index) # # Compute portfolio cumulative returns averaging the returns happening on # overlapping returns streams. Please note that the below algorithm keeps # into consideration the scenario where a factor is not computed at a fixed # frequency (e.g. every day) and consequently the returns appears randomly # sub_portfolios = pd.concat(sub_returns, axis=1) portfolio = pd.Series(index=sub_portfolios.index) for i, (index, row) in enumerate(sub_portfolios.iterrows()): # check the active portfolios, count() returns non-nans elements active_subfolios = row.count() # fill forward portfolio value portfolio.iloc[i] = portfolio.iloc[i - 1] if i > 0 else 1. if active_subfolios <= 0: continue # current portfolio is the average of active sub_portfolios portfolio.iloc[i] *= (row + 1).mean(skipna=True) return portfolio
def performance_analysis_HF(pnls_or_values, initial_cash=1, benchmark='^GSPC', risk_free='^IRX', mar=0.0, input_type='value'): if input_type == 'value': values_df = pd.Series(pnls_or_values) elif input_type == 'pnl': values_df = pd.Series(pnls_or_values).cumsum() + initial_cash values_df.index = pd.to_datetime(values_df.index) start_time = values_df.index[0] end_time = values_df.index[-1] # downsample the series into 3 min bins values_df_3T = values_df.resample('3T', label='right').last() snapshot_per_day = (16 - 9.5) * 60 / 3 # add the initial portfolio values values_df = pd.concat([ pd.Series([initial_cash], index=[start_time + Minute(-3)]), values_df_3T ]) # calc the 3-minute returns returns_df = (values_df - values_df.shift(1)) / values_df.shift(1) returns_df = returns_df.dropna() # calc the daily return cum_return = values_df.iloc[1:] / initial_cash - 1 annual_returns_df = (cum_return + 1)**( 252 * snapshot_per_day / np.array(range(1, len(returns_df) + 1))) - 1 # calc the daily volatility annual_vol = returns_df.std() * np.sqrt(252 * snapshot_per_day) # calc the Sharpe ratio / sortino ratio if risk_free: # get the risk-free prices RF_quotes = web.DataReader(risk_free, 'yahoo', start_time + Minute(-3), end_time)['Close'] # get the expected risk-free rate risk_free = np.mean(1 / (1 - RF_quotes * 0.01) - 1) else: risk_free = 0.0 # calc the Sharpe ratio / sortino ratio risk_free_per_snapshot = risk_free / 252 / snapshot_per_day mar_per_snapshot = mar / 252 / snapshot_per_day sharpe_ratio = (returns_df.mean() - risk_free_per_snapshot) / (returns_df - risk_free_per_snapshot).std() \ * (252*snapshot_per_day)**0.5 sortino_ratio = (returns_df.mean() - mar_per_snapshot) / (returns_df[returns_df < mar_per_snapshot]).std() \ * (252*snapshot_per_day)**0.5 # calc the maximum drawdown cum_max_value = (1 + cum_return).cummax() drawdowns = ((1 + cum_return) - cum_max_value) / cum_max_value max_drawdown = np.min(drawdowns) avg_drawdown = drawdowns.mean() if benchmark: start_time = values_df.index[0].replace( tzinfo=timezone('America/New_York')) end_time = values_df.index[-1].replace( tzinfo=timezone('America/New_York')) # get the benchmark prices benchmark_prices = yf.download(benchmark, start=start_time.strftime('%Y-%m-%e'), end=(end_time + BDay(1)).strftime('%Y-%m-%e'), interval="1m")['Close'] benchmark_prices_3T = benchmark_prices.resample( '3T', label='right').last()[values_df_3T.index] benchmark_prices = pd.concat([ pd.Series([benchmark_prices.iloc[-1]], index=[start_time + Minute(-3)]), benchmark_prices_3T ]) # calc the benchmark daily returns benchmark_returns = (benchmark_prices - benchmark_prices.shift(1) ) / benchmark_prices.shift(1) benchmark_returns = benchmark_returns.dropna() # calc the benchmark daily return benchmark_cum_return = np.exp(np.log1p(benchmark_returns).cumsum()) - 1 benchmark_annual_returns = (benchmark_cum_return + 1)**( 252 * snapshot_per_day / np.array(range(1, len(benchmark_cum_return) + 1))) - 1 # calc the benchmark values based on the same initial_cash of portfolio benchmark_values = pd.concat([ pd.Series([initial_cash], index=[start_time + Minute(-3)]), initial_cash * (1 + benchmark_cum_return) ]) benchmark_values.index = [ local_time.replace(tzinfo=timezone('UTC')) for local_time in benchmark_values.index ] # calc the benchmark daily volatility benchmark_annual_vol = benchmark_returns.std() * np.sqrt( 252 * snapshot_per_day) # calc the maximum drawdown benchmark_cum_max_value = (1 + benchmark_cum_return).cummax() benchmark_drawdowns = ( (1 + benchmark_cum_return) - benchmark_cum_max_value) / benchmark_cum_max_value benchmark_max_drawdown = np.min(benchmark_drawdowns) benchmark_avg_drawdown = benchmark_drawdowns.mean() # compare with the benchmark relative_return = annual_returns_df.iloc[ -1] - benchmark_annual_returns.iloc[-1] relative_vol = annual_vol - benchmark_annual_vol relative_max_drawdown = max_drawdown - benchmark_max_drawdown relative_avg_drawdown = avg_drawdown - benchmark_avg_drawdown excess_return_std = (returns_df - benchmark_returns).std() * np.sqrt( 252 * snapshot_per_day) info_ratio = relative_return / excess_return_std # organize the output performance = pd.Series() performance.loc['Begin'] = start_time performance.loc['End'] = end_time performance.loc['Duration'] = performance.End - performance.Begin performance.loc['Initial_Value'] = initial_cash performance.loc['Highest_Value'] = np.max(values_df) performance.loc['Lowest_Value'] = np.min(values_df) performance.loc['Final_Value'] = values_df.iloc[-1] performance.loc['Total_Return'] = performance['Final_Value'] / performance[ 'Initial_Value'] - 1 performance.loc['Total_Return_(Annual)'] = annual_returns_df.iloc[-1] performance.loc['Volatility_(Annual)'] = annual_vol performance.loc['Max_Drawdown'] = max_drawdown performance.loc['Avg_Drawdown'] = avg_drawdown performance.loc['Sharpe_Ratio_(Annual)'] = sharpe_ratio performance.loc['Sortino_Ratio_(Annual)'] = sortino_ratio if benchmark: performance.loc['Relative_Return_(Annual)'] = relative_return performance.loc['Relative_Vol_(Annual)'] = relative_vol performance.loc['Relative_Max_DD'] = relative_max_drawdown performance.loc['Relative_Avg_DD'] = relative_avg_drawdown performance.loc['Information_Ratio_(Annual)'] = info_ratio print(performance) values_df.index = pd.Series([ local_time.replace(tzinfo=timezone('UTC')) for local_time in values_df.index ]) drawdowns.index = pd.Series([ local_time.replace(tzinfo=timezone('UTC')) for local_time in drawdowns.index ]) performance.loc['values_data'] = values_df performance.loc['returns_data'] = returns_df performance.loc['annual_returns_data'] = annual_returns_df performance.loc['drawdowns_data'] = drawdowns if benchmark: performance.loc['benchmark_values_data'] = benchmark_values strategy_plot(performance, benchmark, freq='intraday') return performance
class TestDatetimeIndexOps(Ops): def setup_method(self, method): super(TestDatetimeIndexOps, self).setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance( x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): f = lambda x: isinstance(x, DatetimeIndex) self.check_ops_properties(DatetimeIndex._field_ops, f) self.check_ops_properties(DatetimeIndex._object_ops, f) self.check_ops_properties(DatetimeIndex._bool_ops, f) def test_ops_properties_basic(self): # sanity check that the behavior didn't change # GH7206 for op in ['year', 'day', 'second', 'weekday']: pytest.raises(TypeError, lambda x: getattr(self.dt_series, op)) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 pytest.raises(AttributeError, lambda: s.weekday) def test_minmax_tz(self, tz_fixture): tz = tz_fixture # monotonic idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz=tz) assert idx1.is_monotonic # non-monotonic idx2 = pd.DatetimeIndex( ['2011-01-01', pd.NaT, '2011-01-03', '2011-01-02', pd.NaT], tz=tz) assert not idx2.is_monotonic for idx in [idx1, idx2]: assert idx.min() == Timestamp('2011-01-01', tz=tz) assert idx.max() == Timestamp('2011-01-03', tz=tz) assert idx.argmin() == 0 assert idx.argmax() == 2 @pytest.mark.parametrize('op', ['min', 'max']) def test_minmax_nat(self, op): # Return NaT obj = DatetimeIndex([]) assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT]) assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') assert np.min(dr) == Timestamp('2016-01-15 00:00:00', freq='D') assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D') errmsg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0) tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0) assert np.argmin(dr) == 0 assert np.argmax(dr) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, errmsg, np.argmin, dr, out=0) tm.assert_raises_regex(ValueError, errmsg, np.argmax, dr, out=0) def test_repeat_range(self, tz_fixture): tz = tz_fixture rng = date_range('1/1/2000', '1/1/2001') result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) exp = pd.DatetimeIndex( ['2001-01-01', '2001-01-01', '2001-01-02', '2001-01-02'], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) exp = pd.DatetimeIndex( ['2001-01-01', '2001-01-01', '2001-01-03', '2001-01-03'], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], tz=tz) exp = pd.DatetimeIndex([ '2001-01-01', '2001-01-01', '2001-01-01', 'NaT', 'NaT', 'NaT', '2003-01-01', '2003-01-01', '2003-01-01' ], tz=tz) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_fixture): tz = tz_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = pd.date_range(start='2016-01-01', periods=2, freq='30Min', tz=tz) expected_rng = DatetimeIndex([ Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), ]) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) tm.assert_raises_regex(ValueError, msg, np.repeat, rng, reps, axis=1) def test_resolution(self, tz_fixture): tz = tz_fixture for freq, expected in zip( ['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], [ 'day', 'day', 'day', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond' ]): idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_fixture): tz = tz_fixture # GH 7735 idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, tz=tz) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex([ '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT ], tz=tz) exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex( ['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map(DatetimeIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], ['2015', '2015', '2016'], ['2015', '2015', '2014'])): assert idx[0] in idx @pytest.mark.parametrize('idx', [ DatetimeIndex( ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx'), DatetimeIndex( ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], freq='H', name='tzidx', tz='Asia/Tokyo') ]) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( 'index_dates,expected_dates', [([ '2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01' ], [ '2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03', '2011-01-05' ]), ([ '2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01' ], [ '2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03', '2011-01-05' ]), ([pd.NaT, '2011-01-03', '2011-01-05', '2011-01-02', pd.NaT ], [pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', '2011-01-05'])]) def test_order_without_freq(self, index_dates, expected_dates, tz_fixture): tz = tz_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name='idx') expected = DatetimeIndex(expected_dates, tz=tz, name='idx') ordered = index.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') idx = base.append(base[:5]) res = idx.drop_duplicates() tm.assert_index_equal(res, base) res = Series(idx).drop_duplicates() tm.assert_series_equal(res, Series(base)) res = idx.drop_duplicates(keep='last') exp = base[5:].append(base[:5]) tm.assert_index_equal(res, exp) res = Series(idx).drop_duplicates(keep='last') tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) res = idx.drop_duplicates(keep=False) tm.assert_index_equal(res, base[5:]) res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) @pytest.mark.parametrize('freq', [ 'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' ]) def test_infer_freq(self, freq): # GH 11018 idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) result = pd.DatetimeIndex(idx.asi8, freq='infer') tm.assert_index_equal(idx, result) assert result.freq == freq def test_nat_new(self): idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') result = idx._nat_new() exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') tm.assert_index_equal(result, exp) result = idx._nat_new(box=False) exp = np.array([tslib.iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) def test_nat(self, tz_naive_fixture): timezone = tz_naive_fixture assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=timezone) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert not idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=timezone) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], tz='US/Pacific') assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(pd.Series(idx3)) @pytest.mark.parametrize('values', [['20180101', '20180103', '20180105'], []]) @pytest.mark.parametrize( 'freq', ['2D', Day(2), '2B', BDay(2), '48H', Hour(48)]) @pytest.mark.parametrize('tz', [None, 'US/Eastern']) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx.freq = freq assert idx.freq == freq assert isinstance(idx.freq, ABCDateOffset) # can reset to None idx.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(['20180101', '20180103', '20180105']) # setting with an incompatible freq msg = ('Inferred frequency 2D from passed values does not conform to ' 'passed frequency 5D') with tm.assert_raises_regex(ValueError, msg): idx.freq = '5D' # setting with non-freq string with tm.assert_raises_regex(ValueError, 'Invalid frequency'): idx.freq = 'foo' def test_offset_deprecated(self): # GH 20716 idx = pd.DatetimeIndex(['20180101', '20180102']) # getter deprecated with tm.assert_produces_warning(FutureWarning): idx.offset # setter deprecated with tm.assert_produces_warning(FutureWarning): idx.offset = BDay()
#pandas sequence pd.date_range('2019-07-01', '2019-10-30') pd.date_range('2019-07-01', periods=45) pd.date_range('2019-07-01', periods=3, freq='M') pd.date_range('2019-07-01', periods=5, freq='H') #%%% # #%%% pd.timedelta_range(0, periods=9, freq='2H20T') #business day offser from pandas.tseries.offsets import BDay pd.date_range('2019-07-01', periods=9, freq=BDay()) #see the gap in days - Sat & SUn #%%% #using Frequencies and Offsets #%%% #Reading Stock Data #conda install pandas-datareader from pandas_datareader import data #https://pandas-datareader.readthedocs.io/en/latest/ #https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-google
import pandas as pd from tia.bbg import LocalTerminal import tia.bbg.datamgr as dm import os from pandas import ExcelWriter import datetime as dt from pandas.tseries.offsets import BDay import glob t1 = dt.date.today() - BDay(1) t1 = t1.strftime('%Y%m%d') mgr = dm.BbgDataManager( ) #this is used to access the bloomberg api with python, used in getAdvs method in class class executedOrderReport(object): def __init__(self, location, saveLoc, threshold, advThreshold, delimiter="|", *args, **kwargs): """ takes 3 parameters location = where the raw fidessa file is saveLoc = where the output report will go """ self.location = location self.delimiter = delimiter
def close(self, date): """Return stock close price for given date.""" d = date if is_bday(date) else (date - BDay(1)).date() return self._prices.loc[d]["Close"]
class TestDatetimeIndexOps(Ops): def setup_method(self, method): super().setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): f = lambda x: isinstance(x, DatetimeIndex) self.check_ops_properties(DatetimeIndex._field_ops, f) self.check_ops_properties(DatetimeIndex._object_ops, f) self.check_ops_properties(DatetimeIndex._bool_ops, f) def test_ops_properties_basic(self): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(self.dt_series, op) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = pd.DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = pd.DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = pd.DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex( [ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ] ) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture for freq, expected in zip( ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], [ "day", "day", "day", "day", "hour", "minute", "second", "millisecond", "microsecond", ], ): idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map( DatetimeIndex, ( [0, 1, 0], [0, 0, -1], [0, -1, -1], ["2015", "2015", "2016"], ["2015", "2015", "2014"], ), ): assert idx[0] in idx @pytest.mark.parametrize( "idx", [ DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" ), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") idx = base.append(base[:5]) res = idx.drop_duplicates() tm.assert_index_equal(res, base) res = Series(idx).drop_duplicates() tm.assert_series_equal(res, Series(base)) res = idx.drop_duplicates(keep="last") exp = base[5:].append(base[:5]) tm.assert_index_equal(res, exp) res = Series(idx).drop_duplicates(keep="last") tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) res = idx.drop_duplicates(keep=False) tm.assert_index_equal(res, base[5:]) res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) @pytest.mark.parametrize( "freq", [ "A", "2A", "-2A", "Q", "-1Q", "M", "-1M", "D", "3D", "-3D", "W", "-1W", "H", "2H", "-2H", "T", "2T", "S", "-3S", ], ) def test_infer_freq(self, freq): # GH 11018 idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10) result = pd.DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(pd.Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, ABCDateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ( "Inferred frequency 2D from passed values does not conform to " "passed frequency 5D" ) with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo"
def GetBulkRiskiness(stocks, start, end): import pandas as pd start_ = pd.DataFrame(columns = ['PriceRisk', 'ReturnRisk', 'Volatility']) for stock in stocks: _ = GetRiskiness(stock, start, end) start_ = pd.concat([start_,_]) return start_ import pandas as pd _days_of_week = { 0:"Monday",1:"Tuesday",2:"Wednesday",3:"Thursday",4:"Friday" } end = pd.datetime.today() - BDay(1) #risk = GetBulkRiskiness(_stocks, start = pd.datetime.today() - BDay(60), end = pd.datetime.today()) try: risk = pd.read_excel("Risk.xlsx") risk.to_excel("Risk.xlsx") except Exception: pass import pandas as pd end = pd.datetime.today() - BDay(1) Today = (GetTSXReturns(end - BDay(1),end)) Today.columns = ["Monday"] end = pd.datetime.today() - BDay(2) Thursday = (GetTSXReturns(end - BDay(1),end)) Thursday.columns = ["Friday"]
return str(mult) + code #---------------------------------------------------------------------- # Offset names ("time rules") and related functions from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli, Week, Micro, MonthEnd, MonthBegin, BMonthBegin, BMonthEnd, YearBegin, YearEnd, BYearBegin, BYearEnd, QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd) _offset_map = { 'D' : Day(), 'B' : BDay(), 'H' : Hour(), 'T' : Minute(), 'S' : Second(), 'L' : Milli(), 'U' : Micro(), None : None, # Monthly - Calendar 'M' : MonthEnd(), 'MS' : MonthBegin(), # Monthly - Business 'BM' : BMonthEnd(), 'BMS' : BMonthBegin(),
dateFormater = ConciseDateFormatter(AutoDateLocator()) palette = sns.color_palette() #%% rki, meldedatum, hospital = read_case_data("berlin-cases.csv", "berlin-cases-meldedatum.csv", "berlin-hospital.csv") #%% Activity participation from pandas.tseries.offsets import BDay isBusinessDay = BDay().onOffset act = pd.read_csv( "C:/home/Development/matsim-org/matsim-episim/output/BerlinSnzData_daily_until20200705.csv", sep="\t", parse_dates=[0]) act_week = act[act.date.map(isBusinessDay)] act_wend = act[act.date.map(lambda *args: not isBusinessDay(*args))] fig, ax = plt.subplots(dpi=250, figsize=(7.5, 3.8)) ax = sns.scatterplot(x="date", y="home", label="home", s=40,
def test_freq_offsets(): off = BDay(1, offset=timedelta(0, 1800)) assert off.freqstr == "B+30Min" off = BDay(1, offset=timedelta(0, -1800)) assert off.freqstr == "B-30Min"
def mainloop(trackit_api_username, jira_rest_call_post, jira_rest_call_get_trackit_id, jira_authorization, db_connection, jira_key, track_it_full_hostname, jira_server_address, sql, attachment_folder, duedate_map): """ Purpose: The main function to run everything above. From retrieving Track-It! data from the database to POSTing it to JIRA and closing Track-It! work orders with a comment to the new JIRA issue. Args: trackit_api_username (str): A Track-It! Technician ID jira_rest_call_post (str): POST-able URL for JIRA jira_rest_call_get_trackit_id (str): GET-able URL for JIRA jira_authorization (str): BASE64 Encoded Username:Password db_connection (pymssql Object): A very specific library object from pymssql-2.1.3-cp36-cp36m-win_amd64.whl which can connect to SQL Server 2008. jira_key (str): JIRA Project key track_it_full_hostname (str): Track-It! Server Address/URL jira_server_address (str): JIRA Server Address/URL sql (str): A very specific SQL Select statement. Example: use TRACKIT_DATA; SELECT wo_num 'Workorder Number', priority 'Priority', LEFT(CONVERT(VARCHAR, REQDATE, 120), 10) AS 'Request Date', task AS 'Summary', request 'Requestor', RESPONS AS 'Assignee Username', LEFT(CONVERT(VARCHAR, duedate, 120), 10) AS 'Due Date', LEFT(CONVERT(VARCHAR, modidate, 120), 10) AS 'Modify Date', TRACKIT_DATA.dbo.tasks.dept, type, wotype2 'Subtype', wotype3 'Category', respons 'Assigned Technician', descript 'Description', note 'Notes', lookup1 'Company' FROM TRACKIT_DATA.dbo.tasks WHERE tasks.respons in ('Maxim Tam') and priority in ('Ongoing Support','High','Urgent','Critical','Routine','Project') and WorkOrderStatusId = 1 and reqdate >= '2017-07-11' ORDER BY RESPONS, WO_NUM DESC; attachment_folder (str): Folder path to a specific work order's attachments duedate_map (str): Dictionary of {Issue Priority:Resolution Days} Returns: Nothing. """ # Declare database query output database_full_output = get_database_cursor(db_connection, sql, jira_key) print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Amount of open workorders: " + str(len(database_full_output))) # Cleans query output to create list of new Trackit workorder IDs # Retrieves all existing Jira Trackit IDs and compares the two lists trackit_ids_trackit = [ int(x["fields"]["customfield_10411"]) for x in database_full_output ] trackit_ids_jira_dict = dict((get_request(jira_rest_call_get_trackit_id, jira_authorization))) trackit_ids_jira = [ int(issue["fields"]["customfield_10411"]) for issue in trackit_ids_jira_dict["issues"] if str(issue["fields"]["customfield_10411"]) != 'None' ] invalid_ids = [x for x in trackit_ids_trackit if x in trackit_ids_jira] database_full_output_valid = [ x for x in database_full_output if int(x["fields"]["customfield_10411"]) not in invalid_ids ] # Submits POST request to JIRA to create new issue # and closes & comments on the old Track-It! ticket if len(database_full_output_valid) > 0: for data in database_full_output_valid: print("Moving workorders to Jira:" + str(data["fields"]["customfield_10411"])) logging.info("Moving workorders to Jira:" + str(data["fields"]["customfield_10411"])) trackit_key = create_trackit_key( trackit_api_username, track_it_full_hostname=track_it_full_hostname) try: response = (post_request(jira_rest_call_post, data, jira_authorization)) except HTTPError: # replace customfield with data if you want the post request json string logging.error( str(sys.exc_info()[1]) + ": " + str(data["fields"]["customfield_10411"])) continue # Jira URL to newly created ticket response = (response) jira_link = "http://" + jira_server_address + \ "/browse/" + str(response["key"]) jira_attachment_link = "http://" + jira_server_address + r"/rest/api/2/issue/" + str( response["key"]) + "/attachments" print(jira_attachment_link) logging.info("Successfully migrated to Jira at: " + jira_attachment_link) import_attachments(data["fields"]["customfield_10411"], jira_attachment_link, jira_authorization, attachment_folder) post_addnote_request_trackit(trackit_key, data["fields"]["customfield_10411"], jira_link, track_it_full_hostname) post_close_request_trackit(trackit_key, data["fields"]["customfield_10411"], jira_link, track_it_full_hostname) # Attempts to close TrackIt tickets when previously unable to if len(invalid_ids) > 0: print("Updating previously locked workorders: " + str(invalid_ids)) for keys in invalid_ids: trackit_key = create_trackit_key(trackit_api_username, track_it_full_hostname) jira_trackit_id_url = "http://" + jira_server_address + \ "/rest/api/2/search?jql=%22TrackIT%20%23%22%3D" + \ str(keys) jira_link = "http://" + jira_server_address + "/browse/" \ + get_request(jira_trackit_id_url, jira_authorization)["issues"][0]["key"] post_addnote_request_trackit(trackit_key, keys, jira_link, track_it_full_hostname) post_close_request_trackit(trackit_key, keys, jira_link, track_it_full_hostname) # Due Date Creation # @TODO: PLEASE REFACTOR TO SHRINK MAINLOOP get_empty_duedates_url = "http://" + jira_server_address + \ "/rest/api/2/search?jql=project%20%3D%20" \ "" + jira_key + "" \ "%20AND%20duedate%20is%20EMPTY%20AND%20type%20%20%3D%20%22Incident%20Management%22" duedates_response = get_request(get_empty_duedates_url, jira_authorization) srq_ids_empty = [[ ticket["key"], ticket["fields"]["priority"]["name"], pd.to_datetime(ticket["fields"]["created"][0:10]) ] for ticket in duedates_response["issues"]] for value in srq_ids_empty: duedate = str(value[2] + BDay(duedate_map[str(value[1])]))[0:10] headers = { "Authorization": "Basic YXBpOlBhc3N3b3Jk", "Content-Type": "application/json" } r = requests.put('http://' + config["jira_server_address"] + \ '/rest/api/2/issue/' + str(value[0]), data=json.dumps({"fields": {"duedate": str(duedate)}}), headers=headers)
affirmation_list = ['Y', 'y', 'Yes', 'YES', 'yes'] negation_list = ['N', 'n', 'No', 'NO', 'no'] valid_answers = ['Y', 'y', 'Yes', 'YES', 'yes', 'N', 'n', 'No', 'NO', 'no'] # Explaining nature of script print("------------------------------------------------") print("This script will analyse a reverse MACD trading strategy " "in a cryptocurrency portfolio") print("The maximum absolute exposure per asset is 25%") print("------------------------------------------------") ''' ########### ## DATES ## ########### ''' previous_bday = pd.datetime.today() - BDay(1) earliest_date = dt.datetime(2015, 8, 8) # Earliest date with data available default_initial_date = dt.datetime(2016, 1, 1) default_final_date = dt.datetime(2019, 4, 30) initial_date, final_date = gdate(default_initial_date, default_final_date) ''' #################### ## GATHERING DATA ## #################### ''' df_data = gdata() df_data = df_data[initial_date:final_date] df_data.drop(['nasdaq_close', 'nasdaq_return'], axis=1, inplace=True) ''' ######################## ## GENERATING SIGNALS ##
prop = entity.property.add() prop.name = name prop.value.indexed = indx prop.value.string_value = item if __name__ == "__main__": #launch example python DailyForecast.py #Track time of the simulation startTime = tt.time() #First day of trading nowTime = datetime.now(tz=timezone('US/Eastern')).time() if nowTime >= time(19,00): dayToPredict = datetime.now(tz=timezone('US/Eastern')) + BDay(1) else: dayToPredict = datetime.now(tz=timezone('US/Eastern')) + BDay(0) print "\nPredicting %s\n"%dayToPredict.date() logging.info("Predicting %s\n"%dayToPredict.date()) NPredPast = 10 history_len = 100 #days saftey_days = 10 startOfPredictSim = dayToPredict - BDay(NPredPast) endOfHistoricalDate = dayToPredict - BDay(1) startOfHistoricalDate = startOfPredictSim - BDay(history_len+saftey_days)
def test_generate(self): rng1 = list(generate_range(START, END, offset=BDay())) rng2 = list(generate_range(START, END, time_rule='B')) assert rng1 == rng2
def PredictPrices(prices: PricingData, predictionMethod: int = 0, daysForward: int = 5, numberOfLearningPasses: int = 500): #Simple procedure to test different prediction methods assert (0 <= predictionMethod <= 2) plot = PlotHelper() if predictionMethod == 0: #Linear projection print('Running Linear Projection model predicting ' + str(daysForward) + ' days...') modelDescription = prices.stockTicker + '_Linear_daysforward' + str( daysForward) predDF = prices.GetPriceHistory() predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] + predDF['Close']) / 4 d = predDF.index[-1] for i in range( 0, daysForward ): #Add new days to the end for crystal ball predictions predDF.loc[d + BDay(i + 1), 'Average_Predicted'] = 0 predDF['PastSlope'] = predDF['Average'].shift( daysForward) / predDF['Average'].shift(daysForward * 2) predDF['Average_Predicted'] = predDF['Average'].shift( daysForward) * predDF['PastSlope'] predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) else: SourceFieldList = ['High', 'Low', 'Open', 'Close'] if predictionMethod == 1: #LSTM learning print('Running LSTM model predicting ' + str(daysForward) + ' days...') SourceFieldList = None UseLSTM = True window_size = 10 modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str( numberOfLearningPasses) + '_histwin' + str( window_size) + '_daysforward' + str(daysForward) elif predictionMethod == 2: #CNN Learning print('Running CNN model predicting ' + str(daysForward) + ' days...') UseLSTM = False window_size = 16 * daysForward modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str( numberOfLearningPasses) + '_histwin' + str( window_size) + '_daysforward' + str(daysForward) learningModule = StockPredictionNN(modelName=prices.stockTicker, UseLSTM=UseLSTM) learningModule.LoadSource(prices.GetPriceHistory(), SourceFieldList=SourceFieldList, window_size=window_size) learningModule.LoadTarget(targetDF=None, prediction_target_days=daysForward) learningModule.MakeBatches(batch_size=32, train_test_split=.93) learningModule.Train(epochs=numberOfLearningPasses) learningModule.Predict(True) predDF = learningModule.GetTrainingResults(True, True) averageDeviation = predDF['PercentageDeviation'].tail( round(predDF.shape[0] / 4)).mean() #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF = predDF.reindex(sorted(predDF.columns), axis=1) #Sort columns alphabetical predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, 'experiment/' + modelDescription) plot.PlotDataFrameDateRange(predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days')
def get_expiries_from_dates(self, date_time_index, calendar, tenor): freq = self.get_business_days_tenor(tenor) return pandas.DatetimeIndex(date_time_index + BDay(freq))
def main(): demo = 'd286f23fd3d3c4fbd6cc5768c2a6388d' #data = read_csv('/Users/alenshaju/Downloads/SP500_tickers_100.csv') #companies = data['Ticker'].to_list()[:10] consumer_companies = [ 'TJX', 'NKE', 'TGT', 'HD', 'LOW', 'PG', 'WMT', 'COST', 'MDLZ', 'EL', 'KO', 'PEP', 'PM', 'MO', 'BKNG', 'MCD', 'SBUX' ] energy_companies = ['NEE', 'XOM', 'CVX'] fig_companies = [ 'BLK', 'AXP', 'V', 'MA', 'PYPL', 'FIS', 'JPM', 'BAC', 'WFC', 'USB', 'SPGI', 'MS', 'SCHW', 'GS', 'BRK.B', 'AMT' ] #C healthcare_companies = [ 'ABBV', 'AMGN', 'GILD', 'ABT', 'DHR', 'MDT', 'SYK', 'ISRG', 'CVS', 'CI', 'TMO', 'UNH', 'ANTM', 'JNJ', 'PFE', 'LLY', 'BMY' ] industrials_companies = [ 'BA', 'RTX', 'LMT', 'DE', 'UPS', 'TSLA', 'GM', 'CAT', 'HON', 'GE', 'MMM', 'LIN', 'UNP' ] tech_companies = [ 'ADBE', 'CRM', 'INTU', 'GOOG', 'GOOG.L', 'FB', 'AMZN', 'ACN', 'IBM', 'AMAT', 'LRCX', 'NVDA', 'INTC', 'AVGO', 'TXN', 'QCOM', 'MU', 'AMD', 'MSFT', 'ORCL', 'NOW', 'AAPL' ] mt_companies = ['CMCS.A', 'CHTR', 'CSCO', 'VZ', 'T', 'DIS', 'NFLX'] companies = ['UAL'] past_call_dict = {} yec = YahooEarningsCalendar() for company in companies: print("Ticker:", company) past_calls_df = get_past_earnings_call(yec, company) past_call_dict[company] = past_calls_df df_returns_scores = pd.DataFrame(columns=['Return', 'Score']) sia = SentimentIntensityAnalyzer() d = {} with open( "/Users/alenshaju/Downloads/LoughranMcDonald_MasterDictionary_2018.txt" ) as f: for line in f: (key, val) = line.split() d[key] = float(val) sia.lexicon.update(d) excel_df = pd.DataFrame( columns=['Ticker', 'Quarter', 'Sentiment Score', 'Returns']) for company in companies: print("For company: ", company) for i, row in past_call_dict[company].iterrows(): date = datetime.datetime.strptime(row['startdatetime'], '%Y-%m-%dT%H:%M:%S.%fZ') quarter = pd.Timestamp(date).quarter year = date.year if year <= datetime.datetime.now().year: if year == datetime.datetime.now().year: if quarter >= pd.Timestamp( datetime.datetime.now()).quarter: continue transcript = requests.get( f'https://financialmodelingprep.com/api/v3/earning_call_transcript/{company}?quarter={quarter}&year={year}&apikey={demo}' ).json() if len(transcript) == 0: continue transcript = transcript[0]['content'].split('\n') if not bool(len(pd.bdate_range(date, date))): date = date - BDay(1) if (date + BDay(1)) in get_trading_close_holidays(year): end_date = date + BDay(1) else: end_date = date stock = yf.download(company, start=date, end=end_date + BDay(1) + datetime.timedelta(1), progress=False) price_change_rate = (stock['Adj Close'][1] / stock['Adj Close'][0]) - 1 price_change_percent = price_change_rate * 100 sentiment_score = sia.polarity_scores( transcript[0])['pos'] - sia.polarity_scores( transcript[0])['neg'] print(transcript) print('score: ', sia.polarity_scores(transcript[0])) print("price change: ", price_change_rate) df_returns_scores = df_returns_scores.append( { 'Return': price_change_rate, 'Score': sentiment_score }, ignore_index=True) excel_df = excel_df.append( { 'Ticker': company, "Date": date, 'Quarter': quarter, 'Sentiment Score': sentiment_score, 'Returns': price_change_rate }, ignore_index=True) if i > 8: # 10years - 4 quarters break excel_df.to_excel("/Users/alenshaju/Downloads/mt_excel_file_v1.xlsx") x = df_returns_scores.Score.values.reshape(-1, 1) y = df_returns_scores.Return.values.reshape(-1, 1) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=42) support_vector_reg_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1) support_vector_reg_model.fit(x_train, y_train) y_pred = support_vector_reg_model.predict(x_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) r2_data = r2_score(y_test, y_pred) print("Root mean square error: ", rmse) print("R^2 score: ", r2_data) train_test_label = ['Training Data', 'Testing Data'] model_color = ['m', 'c', 'g'] fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 10), sharey=True) ###### Training Data ########## axes[0].plot(x_test, y_pred, color=model_color[0], lw=2, label='{} model'.format(train_test_label[0])) axes[0].scatter(x_train[np.setdiff1d(np.arange(len(x_train)), support_vector_reg_model.support_)], y_train[np.setdiff1d(np.arange(len(x_train)), support_vector_reg_model.support_)], facecolor="none", edgecolor=model_color[0], s=50, label='Training data') axes[0].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=1, fancybox=True, shadow=True) ####### Testing Data ######### axes[1].plot(x_test, y_pred, color=model_color[1], lw=2, label='{} model'.format(train_test_label[1])) axes[1].scatter(x_test[np.setdiff1d(np.arange(len(x_test)), support_vector_reg_model.support_)], y_pred[np.setdiff1d(np.arange(len(x_test)), support_vector_reg_model.support_)], facecolor="none", edgecolor=model_color[1], s=50, label='Testing data') axes[1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=1, fancybox=True, shadow=True) fig.text(0.5, 0.04, 'data', ha='center', va='center') fig.text(0.06, 0.5, 'target', ha='center', va='center', rotation='vertical') fig.suptitle("Support Vector Regression", fontsize=14) plt.show()