data = pd.Series([fixed_rate], [dt.date.min]) IndexProvider.__init__(self, data) self._data = data self._index_name = self._data.index.name if self._index_name == 'Date': self._o_dates = [d.toordinal() for d in self._data.index] def index_value(self, date): return self._data[0] # Example if __name__ == '__main__': # This is to test time series start_date = dt.date(2014, 11, 11) dates = pd.date_range(start_date, periods=120, freq="MS").date index_values = np.random.randn(120) index_data = pd.TimeSeries(index_values, index=dates) inv_index = IndexProvider(index_data) test_date = dt.date(2015, 1, 1) print(inv_index.index_value(test_date)) # This is to test DataFrame start_date = dt.date(2014, 11, 11) dates = pd.date_range(start_date, periods=120, freq="MS").date index_values = np.random.randn(360).reshape(120, 3) index_data = pd.DataFrame(index_values, index=dates) inv_index = IndexProvider(index_data) test_date = dt.date(2015, 1, 1) print(inv_index.index_value(test_date))
def test_TimeSeries_deprecation(self): # deprecation TimeSeries, #10890 with tm.assert_produces_warning(FutureWarning): pd.TimeSeries(1, index=date_range('20130101', periods=3))
def finalise(self): self.data = pd.TimeSeries(index=self.period, data=self._data)
# Getting started # --------------- data = sm.datasets.sunspots.load() # Right now an annual date series must be datetimes at the end of the year. from datetime import datetime dates = sm.tsa.datetools.dates_from_range('1700', length=len(data.endog)) # Using Pandas # ------------ # Make a pandas TimeSeries or DataFrame endog = pandas.TimeSeries(data.endog, index=dates) # and instantiate the model ar_model = sm.tsa.AR(endog, freq='A') pandas_ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1) # Let's do some out-of-sample prediction pred = pandas_ar_res.predict(start='2005', end='2015') print pred # Using explicit dates # -------------------- ar_model = sm.tsa.AR(data.endog, dates=dates, freq='A') ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1) pred = ar_res.predict(start='2005', end='2015')
import numpy import pandas from datetime import datetime import py import serializer simple_tests = [ # type: expected before/after repr 4, 3.25, [1,2,3], ["123", "hello"], (1,2,3), {(1,2,3): 32}, datetime(2014,1,1), numpy.array([datetime(2014,1,1)]), pandas.date_range(datetime(2014,1,1), periods=12), pandas.DataFrame({"col1": pandas.TimeSeries(datetime(2014,1,1))}) ] @py.test.mark.parametrize("obj", simple_tests) def test_simple(obj): j = serializer.data_to_json(obj) back = serializer.json_to_data(j) try: assert back == obj except ValueError: assert all(back == obj)
]) def test_numpy_array_handler(arr_before): buf = dumps(arr_before) arr_after = loads(buf) assert_equal(arr_before, arr_after) def test_nested_array(): data_before = {"1": np.array([1, 2])} buf = dumps(data_before) data_after = loads(buf) assert_equal(data_before["1"], data_after["1"]) @pytest.mark.parametrize('ts_before', [ pd.TimeSeries([1, 2, 3], index=[0, 1, 2]), pd.TimeSeries([1., 2., 3.], pd.date_range( '1970-01-01', periods=3, freq='S')), pd.TimeSeries([1., 2., 3.], pd.date_range( '1970-01-01', periods=3, freq='D')), ]) def test_pandas_timeseries_handler(ts_before): buf = dumps(ts_before) ts_after = loads(buf) assert_series_equal_strict(ts_before, ts_after) @pytest.mark.parametrize( 'index_before', [ pd.Index([0, 1, 2]),
def butter_bandpass_filter(data, lowcut=0.1, highcut=20.0, fs=512.0, order=5): b, a = butter_bandpass(lowcut, highcut, fs, order=order) return pd.TimeSeries(lfilter(b, a, data), index=data.index.copy())
def null_transformer(data, genome, loci, prediction_steps): """This prediction model assumes tomorrow will be entirely flat.""" return pd.TimeSeries(data=data["Load"][:-prediction_steps].mean(), index=data.index[-prediction_steps:])
def test_pandas_endog(): # Test various types of pandas endog inputs (e.g. TimeSeries, etc.) # Example (failure): pandas.Series, no dates endog = pd.Series([1., 2.]) # raises error due to no dates assert_raises(ValueError, check_endog, endog, **kwargs) # Example : pandas.Series dates = pd.date_range(start='1980-01-01', end='1981-01-01', freq='AS') endog = pd.Series([1., 2.], index=dates) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : pandas.Series, string datatype endog = pd.Series(['a'], index=dates) # raises error due to direct type casting check in Statsmodels base classes assert_raises(ValueError, check_endog, endog, **kwargs) # Example : pandas.TimeSeries endog = pd.TimeSeries([1., 2.], index=dates) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : pandas.DataFrame with 1 column endog = pd.DataFrame({'a': [1., 2.]}, index=dates) mod = check_endog(endog, **kwargs) mod.filter([]) # Example (failure): pandas.DataFrame with 2 columns endog = pd.DataFrame({'a': [1., 2.], 'b': [3., 4.]}, index=dates) # raises error because 2-columns means k_endog=2, but the design matrix # set in **kwargs is shaped (1,1) assert_raises(ValueError, check_endog, endog, **kwargs) # Check behavior of the link maintained between passed `endog` and # `mod.endog` arrays endog = pd.DataFrame({'a': [1., 2.]}, index=dates) mod = check_endog(endog, **kwargs) assert_equal(mod.endog.base is not mod.data.orig_endog, True) assert_equal(mod.endog.base is not endog, True) assert_equal(mod.data.orig_endog.values.base is not endog, True) endog.iloc[0, 0] = 2 # there is no link to mod.endog assert_equal(mod.endog, np.r_[1, 2].reshape(2, 1)) # there remains a link to mod.data.orig_endog assert_allclose(mod.data.orig_endog, endog) # Example : pandas.DataFrame with 2 columns # Update kwargs for k_endog=2 kwargs2 = { 'k_states': 1, 'design': [[1], [0.]], 'obs_cov': [[1, 0], [0, 1]], 'transition': [[1]], 'selection': [[1]], 'state_cov': [[1]], 'initialization': 'approximate_diffuse' } endog = pd.DataFrame({'a': [1., 2.], 'b': [3., 4.]}, index=dates) mod = check_endog(endog, k_endog=2, **kwargs2) mod.filter([])
plt.savefig(fig_filename + str(timeperiod_num) + '_' + str(plot_num)) plt.show() plot_num+=1 r = np.sqrt(plc['CTLSOLUTION1']**2 + plc['CTLSOLUTION2']**2) ax = r.plot() ax.set_ylabel('arcsec') #ax.set_ybound(2800,3200) ax.set_title('CTL Solution period ' + str(timeperiod_num)) plt.savefig(fig_filename + str(timeperiod_num) + '_' + str(plot_num)) plt.show() fit = np.polyfit(r.index.astype(np.int64), r.values,1) ylin = fit[0]*r.index.astype(np.int64) + fit[1] fit = pandas.TimeSeries(ylin, index=r.index) df = pandas.DataFrame(r) df[1] = fit df.columns = ['data', 'fit'] plot_num+=1 ax = df.plot() ax.set_ylabel('arcsec') #ax.set_ybound(2800,3200) ax.set_title('CTL Solution period ' + str(timeperiod_num)) plt.savefig(fig_filename + str(timeperiod_num) + '_' + str(plot_num)) plt.show() diff = df['data'] - df['fit'] plot_num+=1
def get_time_series(self): values, dates = self.get_data() return pd.TimeSeries(values, index=dates)
fig.tight_layout() return fig if __name__ == "__main__": import numpy as np from statsmodels.tsa.arima_process import ArmaProcess np.random.seed(123) ar = [1, .35, .8] ma = [1, .8] arma = ArmaProcess(ar, ma, nobs=100) assert arma.isstationary() assert arma.isinvertible() y = arma.generate_sample() dates = pd.date_range("1/1/1990", periods=len(y), freq='M') ts = pd.TimeSeries(y, index=dates) xpath = "/home/skipper/src/x12arima/x12a" try: results = x13_arima_analysis(xpath, ts) except: print("Caught exception") results = x13_arima_analysis(xpath, ts, log=False) # import pandas as pd # seas_y = pd.read_csv("usmelec.csv") # seas_y = pd.TimeSeries(seas_y["usmelec"].values, # index=pd.DatetimeIndex(seas_y["date"], freq="MS")) # results = x13_arima_analysis(xpath, seas_y)
#fill the dataframe for shares to add (for each trade) of each symbol (df_trade_matrix) #do this by iterating through the orders (df_orders) and filling the number of shares for that particular symbol and date for date, row in df_orders.iterrows(): if row['action'] == 'Buy': shares_add = int(row['volume']) elif row['action'] == 'Sell': shares_add = -1 * int(row['volume']) else: next symbol = row['sym'] df_trade_matrix.loc[date][symbol] += float(shares_add) ## STEP 4 # create a timeseries for CASH - tells you what your CASH VALUE is (starting cash + any buy/sell you've made) #df_cash = pd.DataFrame( s.zeros(len(ls_dt_unique)), ls_dt_unique, columns=['CASH']) ts_cash = pd.TimeSeries(0.0, ldt_timestamps) ts_cash[0] = i_start_cash # for each order, subtract the cash used in that trade # need to multiple volume*price df_close = d_data['close'] for date, row in df_trade_matrix.iterrows(): ## for sym in df_trade_matrix.columns: ## price = df_close[sym].ix[date] ## print price, sym ## s_cash.loc[date] -= price * df_trade_matrix.loc[date][sym] #update the cash SPENT ##use dot product - faster than nested for loop (commented above this line) cash = np.dot(row.values.astype(float), df_close.ix[date].values) ts_cash[date] -= cash
def _hourbyhour_ar_ga_with_lags(data, genome, loci, prediction_steps, lags_2d): ar_col = data.columns.tolist().index('Load') start = max(0, len(data) - genome[loci.hindsight] - prediction_steps) prediction, _ = vector_ar(data[start:].values, lags_2d, prediction_steps, [ar_col]) return pd.TimeSeries(data=prediction, index=data[-prediction_steps:].index)
def queue_to_series(a, freq="s"): t = pd.date_range(end=datetime.now(), freq=freq, periods=len(a)) return pd.TimeSeries(a, index=t)
offset_end_date = '2015-08-25' asset1 = Quandl.get('WIKI/MMM', trim_start=offset_start_date, trim_end=offset_end_date, authtoken=AUTHTOKEN)['Adj. Close'].pct_change()[1:] treasury_ret = Quandl.get('FRED/DTB3', trim_start=start_date, trim_end=end_date, authtoken=AUTHTOKEN)['VALUE'].pct_change()[1:] bench = Quandl.get('YAHOO/INDEX_GSPC', trim_start=start_date, trim_end=end_date, authtoken=AUTHTOKEN)['Adjusted Close'].pct_change()[1:] constant = pd.TimeSeries(np.ones(len(asset1.index)), index=asset1.index) df = pd.DataFrame({ 'R1': asset1, 'SPY': bench, 'RF': treasury_ret, 'Constant': constant }) df = df.dropna() OLS_model = regression.linear_model.OLS(df['R1'], df[['SPY', 'RF', 'Constant']]) fitted_model = OLS_model.fit() b_SPY = fitted_model.params['SPY'] b_RF = fitted_model.params['RF'] a = fitted_model.params['Constant']
def plot_acf_pacf(df0): # Doesn't converge on different ads dframe = df0.copy() pivoted = dframe.pivot('date', 'ad', 'shown') pivoted.index = pd.to_datetime(pivoted.index) for ad in pivoted.columns: # for ad in ['ad_group_1']: print("Processing ad #%s" % ad.split('_')[2]) subset = pivoted[np.isfinite(pivoted[ad])] time_series_initial = pd.TimeSeries(subset[ad].ravel(), index=pd.to_datetime(subset.index)) time_series = np.log(time_series_initial) filename = out_dir_plots + 'AutoCorrPlots_' + ad + '.png' if not os.path.exists(filename): # print time_series pa = sm.tsa.pacf(time_series) acf = sm.tsa.acf(time_series) fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) ax1.plot(acf) z = stats.norm.ppf(0.99) n = time_series.shape[0] ax1.axhline(y=z / np.sqrt(n), linestyle='--', color='red') ax1.axhline(y=-z / np.sqrt(n), linestyle='--', color='red') ax1.set_ylabel('Auto-Corr Func.') ax1.set_title(ad) ax2.plot(pa) ax2.axhline(y=z / np.sqrt(n), linestyle='--', color='red') ax2.axhline(y=-z / np.sqrt(n), linestyle='--', color='red') ax2.set_ylabel('Partial Auto-Corr Func.') plt.savefig(filename) plt.close() filename = out_dir_plots + 'Prediction_' + ad + '.png' if not os.path.exists(filename): try: # Most of the plots show a 1 peak at lag =1 for ACF and for PACF --> model with params p=1, q=0 # res10 = sm.tsa.ARMA(time_series, (1, 0)).fit() # res71 = sm.tsa.ARMA(time_series, (7, 1)).fit() # res81 = sm.tsa.ARMA(time_series, (8, 1)).fit() res51 = sm.tsa.ARMA(time_series, (5, 1)).fit() # res121 = sm.tsa.ARMA(time_series, (12, 1)).fit() fig, ax = plt.subplots() ax = time_series.ix['2015-10-01':].plot(ax=ax) # fig = res10.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False) # fig = res20.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False) # fig = res11.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False) # fig = res81.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False) fig = res51.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample=False) # fig = res121.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False) # fig2, ax2 = plt.subplots() # y_resid81 = res81.resid # y_resid91 = res91.resid # y_resid121 = res121.resid # y_resid81.plot() # y_resid91.plot() # y_resid121.plot() except ValueError: continue plt.savefig(filename) plt.close()
def daily_average(data, genome, loci, prediction_steps): start = -prediction_steps - genome[loci.hindsight] end = -prediction_steps return pd.TimeSeries(data=data["Load"][start:end].mean(), index=data.index[-prediction_steps:])
import pandas as pd # Getting started # --------------- data = sm.datasets.sunspots.load() # Right now an annual date series must be datetimes at the end of the year. dates = sm.tsa.datetools.dates_from_range('1700', length=len(data.endog)) # Using Pandas # ------------ # Make a pandas TimeSeries or DataFrame endog = pd.TimeSeries(data.endog, index=dates) # and instantiate the model ar_model = sm.tsa.AR(endog, freq='A') pandas_ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1) # Let's do some out-of-sample prediction pred = pandas_ar_res.predict(start='2005', end='2015') print(pred) # Using explicit dates # -------------------- ar_model = sm.tsa.AR(data.endog, dates=dates, freq='A') ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1) pred = ar_res.predict(start='2005', end='2015')
(num_hours, smoothness, zscore) sys.stdout.flush() start_time = time.time() # This is the part that takes time smoother = _get_smoother()(data, smoothness) cleaner = cln.RegressionCleaner(smoother, zscore) cleaned, _ = cleaner.get_cleaned_data( method=cln.RegressionCleaner.replace_with_bound) # Wrap up and plot the result end_time = time.time() print "Done in %s." % SimpleTimer.period_to_string(start_time, end_time) print cleaned sys.stdout.flush() plt.figure() data.plot(style='r', label='Raw load') spline = pd.TimeSeries(data=smoother.splev(range(len(cleaned))), index=cleaned.index) spline.plot(style='g', label='Smoothing spline') # THE SAUSAGE! lower, upper = cleaner.get_confidence_interval() ax = plt.gca() ax.fill_between(cleaned.index, lower, upper, facecolor='g', alpha=0.1) cleaned.plot(style='b', label='Cleaned load') plt.legend(loc=3) plt.show()
MARKET_DATA_MANAGER = MarketDataManager() # ---------- See the example on how all these work together ---------------- if __name__ == '__main__': from utils.database import pickle_save import pandas as pd import datetime as dt import numpy as np from Managers.ScenarioManager import EqBSEngine, ScenarioGenerator, FixRateEngine from lib.constants import BDAYS_PER_YEAR sample_credit_curve = IndexProvider( pd.TimeSeries(index=pd.date_range(start=dt.date(2000, 1, 1), periods=600, freq='MS').date, data=[0.03] * 600)) pickle_save(sample_credit_curve, 'sample_credit_curve', db_path=MARKET_DATA_DB) MARKET_DATA_MANAGER.reset() # =========== test re-set market data date ================ print(MARKET_DATA_MANAGER.get_index('fake_libor_3m').data) MARKET_DATA_MANAGER.market_data_date = dt.date(2008, 1, 1) print(MARKET_DATA_MANAGER.get_index('fake_libor_3m').data) # ========== test scen gen table ============= print(MARKET_DATA_MANAGER.scen_gen_table) eng = FixRateEngine(0.05)
def score(self, filename, subname=None): # here subname must be a valid sub challenge (10,100,100_multifactorial # the batch will be infered from the file name # if a list, return the overall score otherwise just score for that filename if isinstance(filename, str): end = self._check_filename(filename) assert subname is not None, "If one file provided, subname must be provided e.g., 10" subname = subname + "_" + end results = self.score_prediction(filename, subname=subname) del results['tpr'] del results['fpr'] del results['rec'] del results['prec'] return results elif isinstance(filename, list): assert len( filename ) == 5, "if a list of gilenames is provide, it must contains 5 names" results = {} for i in [1, 2, 3, 4, 5]: tag = subname + "_" + str(i) assert tag in filename[ i - 1], "files must be sorted and ending in Size10_1, Size10_2, ...Size10_5" results['Net%s' % i] = self.score_prediction(filename[i - 1], subname=tag) df = pd.DataFrame(results).T # get rid of non important data df = df[['AUROC', 'AUPR', 'p_aupr', 'p_auroc']] df = df.astype('float64') final_score = -np.mean(np.log10(df[['p_auroc', 'p_aupr']])) results = {} results['AUPR_SCORE'] = final_score['p_aupr'] results['AUROC_SCORE'] = final_score['p_auroc'] overall_score = np.mean(final_score) for index in df.index: results['%s_AUROC' % index] = df.ix[index]['AUROC'] for index in df.index: results['%s_AUPR' % index] = df.ix[index]['AUPR'] final_score = 10**-(final_score) results['AUPR_PVAL'] = final_score['p_aupr'] results['AUROC_PVAL'] = final_score['p_auroc'] results['SCORE'] = overall_score results = pd.TimeSeries(results) results = results[[ 'SCORE', 'AUPR_PVAL', 'AUPR_SCORE', 'AUROC_PVAL', 'AUROC_SCORE', 'Net1_AUPR', 'Net2_AUPR', 'Net3_AUPR', 'Net4_AUPR', 'Net5_AUPR', 'Net1_AUROC', 'Net2_AUROC', 'Net3_AUROC', 'Net4_AUROC', 'Net5_AUROC' ]] return results
def compute_quotient_metric(name, num_metric, den_metric): series = pd.TimeSeries(data[name]) num_period = series.resample(resample_period, how=num_metric) den_period = series.resample(resample_period, how=den_metric) return num_period[shift:] / den_period[:len(num_period) - shift].values
try: os.remove(dbfilename) except: pass setup_blank_tables(dbfilename, [ "CREATE TABLE timeseries (datetime text, code text, price float)", "CREATE TABLE static (code text, fullname text)" ]) st_table = staticdata(dbname) st_table.add("FTSE", "FTSE 100 index") assert st_table.read("FTSE") == "FTSE 100 index" st_table.modify("FTSE", "FTSE all share") assert st_table.read("FTSE") == "FTSE all share" st_table.delete("FTSE") assert st_table.read("FTSE") is None dt_table = tsdata(dbname) somprices = pd.TimeSeries(range(100), pd.date_range('1/1/2014', periods=100)) dt_table.add("FTSE", somprices) assert dt_table.read("FTSE").values[-1] == 99.0 ## Remove the file so example is clean next time os.remove(dbfilename) print "No problems"
""" Example: scikits.statsmodels.tsa.ARMA """ import numpy as np import scikits.statsmodels.api as sm # Generate some data from an ARMA process from scikits.statsmodels.tsa.arima_process import arma_generate_sample arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) # The conventions of the arma_generate function require that we specify a # 1 for the zero-lag of the AR and MA parameters and that the AR parameters # be negated. arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 y = arma_generate_sample(arparams, maparams, nobs) # Now, optionally, we can add some dates information. For this example, # we'll use a pandas time series. import pandas dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs) y = pandas.TimeSeries(y, index=dates) arma_mod = sm.tsa.ARMA(y, freq='M') arma_res = arma_mod.fit(order=(2,2), trend='nc', disp=-1)
def run_va_model(self): raw_input = {"Acct Value": 1344581.6, "Attained Age": 52.8, "ID": "000001", "Issue Age": 45.1, "Issue Date": dt.date(2005, 6, 22), "Initial Date": dt.date(2013, 2, 1), "Maturity Age": 90, "Population": 1, "Riders": dict({}), "ROP Amount": 1038872.0, "Gender": "F", "RPB": 1038872.0, "Free Withdrawal Rate": 0.1, "Asset Names": ["Fund A", "Fund B"], "Asset Values": [1344581.6/2, 1344581.6/2]} # For now, we assume the init_date is month begin step_per_year = 12 periods = 360 init_date = dt.date(2013, 2, 1) pricing_date = init_date # Set up the investment index #credit_rider = isr.InsCreditRateFixed(credit_rate) # set up the mutual fund return index init_df = [ pd.TimeSeries(data=[100], index=[init_date], name='stock A'), pd.TimeSeries(data=[100], index=[init_date], name='stock B') ] eq_index = [ip.IndexProvider(init_df[0], 'stock A'), ip.IndexProvider(init_df[1], 'stock B')] # no vol, otherwise randomness will break my test sim_engine = EqBSEngine(np.array([0.02, 0.02]), np.array([0.0, 0.0]), corr=np.array([[1., 0.3], [0.3, 1.]])) simulator = ScenarioGenerator(eq_index, sim_engine, **{'max_time_step': 5. / BDAYS_PER_YEAR}) MARKET_DATA_MANAGER.reset() MARKET_DATA_MANAGER.setup(init_date) MARKET_DATA_MANAGER.index_table[ 'stock A'] = eq_index[0] MARKET_DATA_MANAGER.index_table[ 'stock B'] = eq_index[1] MARKET_DATA_MANAGER.scen_gen_table['stock A']=simulator MARKET_DATA_MANAGER.scen_gen_table['stock B']=simulator fund_info = {'Fund A': { 'Allocations': { 'stock A': 1, 'stock B': 0, }, 'Management Fee': 0.01, 'Description': 'blah blah', }, 'Fund B': { 'Allocations': { 'stock A': 0, 'stock B': 1, }, 'Management Fee': 0.01, 'Description': 'blah blah', }, } credit_rider = isr.InsCreditRateMutualFunds(fund_info=fund_info) # Set up non-rider fees annual_fee_rate = 0.01 annual_booking_fee = 100 mgmt_fee = mif.InsFeeProp(annual_fee_rate, fee_name="Mgmt Fee") booking_fee = mif.InsFeeConst(annual_booking_fee, fee_name="Booking Fee") fees = [mgmt_fee, booking_fee] # Set up rider db_rider_fee_rate = 0.005 db_rider = mir.InsRiderDB(extract_strict(raw_input, "ROP Amount"), db_rider_fee_rate, rider_name="UWL") riders = [db_rider] # Setup investment index inv_index = credit_rider.inv_index(init_date, periods, step_per_year) # Setup iteration product = InsProduct(riders, fees, inv_index) acct = InsAcct(raw_input, product) acct_iter = acct.acct_iterator() # Setup lapse function and lapse model xs = [0] ys = [0.0, 0.1] shock_func = linear_comp_bounded(1, 0, floor=0.5, cap=1.5) lapse_model = LapseDynamic(InsStepFunc(xs, ys), shock_func, rider_name='UWL') # Setup surrender charge xs = [0] ys = [100, 100] fixed_charge_func = InsStepFunc(xs, ys) xs = [0, 1, 2] ys = [0.0, 0.3, 0.2, 0.0] pct_charge_func = InsStepFunc(xs, ys) surrender_charge = SurrenderCharge(fixed_charge_func, pct_charge_func) # Setup mortality function and mortality model xs = [x for x in range(0, 100)] ys = [0.01] * 100 ys.append(float('inf')) mort_model = InsMortModel(InsStepFunc(xs, ys)) # Setup VA Model model = InsModelVA(acct, lapse_model, mort_model, surrender_charge) model_iter = model.create_iterator(pricing_date) # model iterator to evolve the model_iter to move forward metrics = ['Account Value', 'Active Population', 'Benefit Base.UWL', 'Rider Fee.UWL', 'Benefit.UWL', 'Fee.Mgmt Fee', 'Fee.Booking Fee', 'Date', 'Attained Age', 'Anniv Flag', 'Death', 'Lapse', 'Paid Benefit.UWL', 'Surrender Charge', ] crv_aggregator = create_curve_aggregator(metrics) params = {'pricing date': init_date, 'periods': 60, 'frequency': 'MS'} proj_mgr = ProjectionManager(crv_aggregator, model_iter, **params) proj_mgr.run() df = crv_aggregator.to_dataframe() # df[['Rider Fee.UWL', 'Fee.Mgmt Fee', 'Fee.Booking Fee', 'Surrender Charge']].plot(kind='bar', stacked=True) return df
import scikits.timeseries as ts d1 = ts.Date(year=1700, freq='A') #NOTE: have to have yearBegin offset for annual data until parser rewrite #should this be up to the user, or should it be done in TSM init? #NOTE: not anymore, it's end of year now ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog)) pandas_dr = pandas.DateRange(start=d1.datetime, periods=len(sunspots.endog), timeRule='A@DEC') #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin) dates = np.arange(1700, 1700 + len(sunspots.endog)) dates = ts.date_array(dates, freq='A') #sunspots = pandas.TimeSeries(sunspots.endog, index=dates) #NOTE: pandas only does business days for dates it looks like import datetime dt_dates = np.asarray( map(datetime.datetime.fromordinal, ts_dr.toordinal().astype(int))) sunspots = pandas.TimeSeries(sunspots.endog, index=dt_dates) #NOTE: pandas can't handle pre-1900 dates mod = AR(sunspots, freq='A') res = mod.fit(method='mle', maxlag=9) # some data for an example in Box Jenkins IBM = np.asarray([460, 457, 452, 459, 462, 459, 463, 479, 493, 490.]) w = np.diff(IBM) theta = .5
og_dir = 'processed\\SW\\GATE_daily\\' for g, g_str in zip(g_sites, g_str_num): #--find the records for this gate for gg_str in g_str: this_gate_files, this_gate_info = [], [] for f in gate_files: fdict = dbu.parse_fname(f) if fdict['site'] == g and fdict['strnum'] == gg_str: this_gate_files.append(f) this_gate_info.append(fdict) #print this_gate_files p_series = [] for gf, gi in zip(this_gate_files, this_gate_info): if gi['dtype'].upper() == 'BK': series, flg = dbu.load_series(g_dir + gf) series = dbu.interp_breakpoint(series, flg) p_series.append( pandas.TimeSeries(series[:, 1], index=series[:, 0])) else: #raise TypeError,'Only use breakpoint data for gate openings' print 'non break point record - skipping' #--create a full record final_p_series = dbu.create_full_record(p_series) dbu.save_series(og_dir + g + '.' + str(gg_str) + '.dat', final_p_series) print 'processed record saved for structure,gate:', str(g), str( gg_str), '\n' #break #break
def marketsim(starting_cash, order_file, out_file): dates = [] symbols = [] #order_list=[] #starting_cash = float(sys.argv[1]) #order_file = sys.argv[2] #out_file = sys.argv[3] #step1: read in csv file and remove duplicates #see marketsim-guidelines.pdf reader = csv.reader(open(order_file, 'rU'), delimiter=',') for row in reader: #ex: 2008, 12, 3, AAPL, BUY, 130 dates.append(dt.datetime(int(row[0]), int(row[1]), int(row[2]))) #need int, otherwise get "TypeError: an integer is required" symbols.append(row[3]) #order_list.sort(['date']) #remove duplicates #set(listWithDuplicates) is an unordered collection without duplicates #so it removes the duplicates in listWithDuplicates uniqueDates = list(set(dates)) uniqueSymbols = list(set(symbols)) #step 2 - read the data like in previous HW and tutorials sortedDates = sorted(uniqueDates) dt_start = sortedDates[0] #End date should be offset-ed by 1 day to #read the close for the last date. - see marketsim-guidelines.pdf dt_end = sortedDates[-1] + dt.timedelta(days=1) dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) ldf_data = dataobj.get_data(ldt_timestamps, uniqueSymbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) #step 3: create dataframe that contains trade matrix #see marketsim-guidelines.pdf df_trade = np.zeros((len(ldt_timestamps), len(uniqueSymbols))) df_trade = pd.DataFrame(df_trade, index=[ldt_timestamps], columns=uniqueSymbols) #iterate orders file and fill the number of shares for that #symbol and date to create trade matrix reader = csv.reader(open(order_file, 'rU'), delimiter=',') for orderrow in reader: order_date = dt.datetime(int(orderrow[0]), int(orderrow[1]), int(orderrow[2])).date() for index, row in df_trade.iterrows(): if order_date == index.date(): if orderrow[4] == 'Buy': df_trade.set_value(index, orderrow[3], float(orderrow[5])) #df_trade.ix[index][orderrow[3]] += float(orderrow[5]) #print ts_cash[index] elif orderrow[4] == "Sell": #df_trade.ix[index][orderrow[3]] -= float(orderrow[5]) df_trade.set_value(index, orderrow[3], -float(orderrow[5])) print df_trade #step4: create timeseries containing cash values, all values are 0 initially ts_cash = pd.TimeSeries(0.0, index=ldt_timestamps) ts_cash[0] = starting_cash #for each order in trade matrix, subtract the cash used in that trade for index, row in df_trade.iterrows(): ts_cash[index] -= np.dot(row.values.astype(float), d_data['close'].ix[index]) #print 'df_trade',df_trade.head() #step5: #append '_CASH' into the price date df_close = d_data['close'] df_close['_CASH'] = 1.0 #append cash time series into the trade matrix df_trade['_CASH'] = ts_cash #convert to holding matrix df_holding = df_trade.cumsum() #df_trade = df_trade.cumsum(axis=1) #axis=1 means sum over columns #see http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.cumsum.html #dot product on price (df_close) and holding/trade matrix (df_trade) to #calculate portfolio on each date ts_fund = np.zeros((len(ldt_timestamps), 1)) #ts_fund = pd.DataFrame(ts_fund, index=ldt_timestamps, columns='portfolio value') ts_fund = df_holding.mul(df_close, axis='columns', fill_value=0).sum(axis=1) #better to avoid iterating over rows unless necessary #and try to use pandas' vectorized operations #for index, row in df_trade.iterrows(): # portfolio_value = np.dot(row.values.astype(float), df_close.ix[index].values) # ts_fund[index] = portfolio_value #write this to csv writer = csv.writer(open(out_file, 'wb'), delimiter=',') for row_index in ts_fund.index: row_to_enter = [ row_index.year, row_index.month, row_index.day, ts_fund[row_index] ] writer.writerow(row_to_enter) return out_file
def set_value_at(crv, date, set_value): crv.set_value_at(date, set_value) # --------- if __name__ == '__main__': import pandas as pd import datetime as dt import numpy as np from Infra.IndexProvider import IndexProvider curve = IndexProvider( pd.TimeSeries(index=pd.date_range(start=dt.date(2011, 1, 1), periods=10, freq='D').date, data=np.arange(10))) spread = IndexProvider( pd.TimeSeries(index=pd.date_range(start=dt.date(2011, 1, 1), periods=3, freq='3D').date, data=np.arange(3))) # parallel_shift(curve, 2) # print(curve.data) # scale(curve, 1.1) # print(curve.data) print('curve', curve.data) print('spread', spread.data) add_spread(curve, spread) print(curve.data) # set_value_at(curve, dt.date(2011, 1, 2), 10)