def test_char_to_date(self): d1 = np.arange("2010-01-01", "2011-01-01", dtype='datetime64[D]') df = pd.DataFrame({'date1': d1, 'date2': d1.astype(str)}) # test for dataframe df1 = char_to_date(df) for date_col in ['date1', 'date2']: np.testing.assert_array_equal(df1[date_col].values, d1) # series for date_col in ['date1', 'date2']: np.testing.assert_array_equal(char_to_date(df[date_col]), d1)
def __init__(self, data: pd.DataFrame, input_directory: str, is_bloomberg: str) -> None: self.run_date = extract_str_timestamp(dt.datetime.now()) self.wkdir = input_directory self.set_output_folder() if is_bloomberg: input_data = utils_date.char_to_date(data) self.start_date = extract_str_timestamp(input_data.index.min()) self.end_date = extract_str_timestamp(input_data.index.max()) print(f"Data for period runs {self.start_date} to {self.end_date}") self.data = input_data elif is_bloomberg: print("Data likely downloaded from Yahoo Finance?") pass
def prep_fund_data(df_path, date_col="Date"): """Prep fund data (csv) using char to date and setting 'Date' as the index Args: df_path (str): Path to dataframe date_col (str): Date column labelled in bloomberg dataframe Returns: df (dataframe): Dataframe with date columns converted to np.datetime64 """ df = pd.read_csv(df_path) df = char_to_date(df) assert date_col in df.columns, f"The date column: {date_col} is not specified in the function" df.set_index('Date', inplace=True) return df
if __name__ == "main": RUN_DATE_STR = dt.datetime.now().strftime("%Y-%m-%d") RUN_DATE_DT = np.datetime64(RUN_DATE_STR) WORK_DIR = r"/Users/philip_p/python/projects" OUTPUT_DIR = os.path.join(WORK_DIR, "output", "efficient_frontier") TODAY_OUTPUT_DIR = os.path.join(OUTPUT_DIR, RUN_DATE_STR) if not os.path.exists(TODAY_OUTPUT_DIR): os.mkdir(TODAY_OUTPUT_DIR) example_df = pd.read_csv(get_data_path("example_data.csv"), index_col='Date') example_df = utils_date.char_to_date(example_df) example_df.dropna(axis=0, inplace=True) # EXPLORATORY DATA ANALYSIS # -------------- # plot the prices - using plot_raw_data method defined above # plot_raw_data(df= df, file_name="price_plot", y_label="Price (p)", to_close=True) # # Now look at 1 day returns (should follow something resembling a normal distribution) # returns = df.pct_change() # plot_raw_data(df=returns, file_name="returns_plot", # y_label="Daily Return (%)", to_close=True) # Explore the methods daily_return = example_df.pct_change()
import utils_date plt.style.use('seaborn') pd.set_option('display.max_columns', 5) wkdir = "C://Users//Philip//Documents//python//" inputFolder = wkdir + "input/" inputDir = wkdir + "input/" outputFolder = wkdir + "output/" # "example_data.csv", "example_data_na.csv" has NA rows # df = pd.read_csv(inputDir + 'example_data.csv') #, parse_dates=True) df = pd.read_csv(inputDir + "funds_stocks_2019.csv") df = utils_date.char_to_date(df) #convert all dates to np datetime64 df.set_index('Date', inplace=True) # deal with returns not time series of prices, as prices are non-stationary transform the time # series so that it becomes stationary. If the non-stationary is a random walk with or without # drift, it is transformed to a stationary process by differencing - it is now a stationary # stochastic (random probability distribution) process if time series data also exhibits a # deterministic trend, spurious results can be avoided by detrending if non-stationary time # series are both stochastic and deterministic at the same time, differencing and detrending # should be applied - differencing removes the trend in variance and detrending removes # determinstic trend def log_daily_returns(data): """Give log daily returns""" log_daily_return = data.apply(lambda x: np.log(x) - np.log(x.shift(1)))[1:]