def read_analyze_write_profile(csv_file): df = pd.read_csv(csv_file, names=['first', 'last']) output_csv_dict = {} for index, row in df.iterrows(): name_string = row['first'].lower() + '-' + row['last'].lower() url = base_url + name_string + parameters pmids = get_pmids_from_profile_link(url) #print pmids stats_dict = util.get_stats(pmids) stats_dict['url'] = url if len(pmids) == 0: stats_dict['unable_to_find_any_articles'] = 'true' output_csv_dict[name_string] = pd.Series(stats_dict) print 'read data from: ' + name_string result_df = pd.DataFrame(output_csv_dict) #transpose and write it out transpose = result_df.T transpose.to_csv('stanford_profile_output.csv')
num_pages = float( soup.find_all("a", class_="btn-page-jumper")[0].text.split('/')[1]) print(num_pages) #if num_pages is None: print('got links from page: ' + str(page_no) + ' of ' + str(num_pages)) page_no += 1 return list_of_profiles prof_list = get_links() csv_dict = {} num_profiles_processed = 0 for p in prof_list: print('Number of profiles processed so far: ' + str(num_profiles_processed)) print('reading data from: ' + p['name']) link = STANFORD_MED_PROFILES_BASE_URL + p['link'] pmids = util.stanford_profile_get_pmids_from_profile_link(link) stats_dict = util.get_stats(pmids) stats_dict['official_name'] = p['name'] stats_dict['title'] = p['title'] stats_dict['link_to_profile'] = link csv_dict[link] = pd.Series(stats_dict) num_profiles_processed += 1 result_df = pd.DataFrame(csv_dict) transpose = result_df.T transpose.to_csv(result_csv_file_name, encoding='utf-8')
for y in range(1995, 2017): M.run_model_based_on_deepgp(y, model="deepGP", trw=trw, i=0) pass else: y = int(args[2]) M.run_model_based_on_deepgp(y, model="deepGP", trw=trw, i=i) pass return def run_model_stats(args): if len(args) == 0: print "python jobutil.py 4 <reg model> <trw>" else: model = args[0] trw = int(args[1]) util.get_stats(model, trw) return def run_tss_plot(args): if len(args) == 0: print "python jobutil.py 6 <reg model> <trw>" else: model = args[0] trw = int(args[1]) util.run_for_TSS(model, trw) return if __name__ == "__main__": args = sys.argv[1:] if len(args) == 0:
from bs4 import BeautifulSoup # importing the requests library import requests #for reading csv file: import pandas as pd import collections import re import util base_url = 'https://profiles.stanford.edu/' parameters = '?tab=publications' #API_ENDPOINT = "https://www.ncbi.nlm.nih.gov/myncbi/browse/collection" #test_url = 'https://profiles.stanford.edu/paul-heidenreich?tab=publications' #test_url = 'https://profiles.stanford.edu/steven-asch?tab=publications' ''' test_url = 'https://profiles.stanford.edu/karl-Deisseroth?tab=publications' pub_results = requests.get(url = test_url) soup = BeautifulSoup(pub_results.text, 'html.parser') pmids_text = soup.find_all(text = re.compile(".*PubMedID.*")) pmids_no_label = [x.split(' ')[1] for x in pmids_text] #print pmids_text print len(pmids_text) print pmids_no_label print len(pmids_no_label) util.get_stats(pmids_no_label) #if len(sys.argv > 2):
def usePossibleStrategy(df_prices, symbols, sd, ed, start_val): orders = [] short_Xs = [] long_Xs = [] trade_num = 1000 shares_min = -1000 shares_max = 1000 # rm = get_rolling_mean(df_prices['SPY'], window=20) # rstd = get_rolling_std(df_prices['SPY'], window=20) # # market = get_bollinger_bands(rm, rstd) # market['SPY'] = df_prices['SPY'] # market = market.fillna(method='bfill') # market = market.fillna(method='ffill') # Get the Rolling Mean rm = get_rolling_mean(df_prices['JPM'], window=10) # Get the Rolling Standard Deviation rstd = get_rolling_std(df_prices['JPM'], window=10) bband = get_bollinger_bands(rm, rstd) bband[symbols] = df_prices[symbols] bband = bband.fillna(method='bfill') current_number_of_shares = 0 for i in range(1, bband.shape[0]): if (bband.iloc[i - 1][symbols] > bband.iloc[i - 1]['Upper'])[0] and ( bband.iloc[i][symbols] < bband.iloc[i]['Upper'] )[0] and current_number_of_shares - trade_num >= shares_min: short_Xs.append([bband.index[i]]) orders.append([bband.index[i], symbols, 'SELL', trade_num]) current_number_of_shares -= trade_num elif (bband.iloc[i - 1][symbols] < bband.iloc[i - 1]['Lower'])[0] and ( bband.iloc[i][symbols] > bband.iloc[i]['Lower'] )[0] and current_number_of_shares + trade_num <= shares_max: long_Xs.append([bband.index[i]]) orders.append([bband.index[i], symbols, 'BUY', trade_num]) current_number_of_shares += trade_num orders.append([ed - dt.timedelta(days=1), symbols, 'BUY', 0]) df_trades = pd.DataFrame(orders, columns=['Date', 'Symbol', 'Order', 'Shares']) df_shortXs = pd.DataFrame(short_Xs, columns=['Shorts']) df_longXs = pd.DataFrame(long_Xs, columns=['Longs']) df_benchmark_orders = pd.DataFrame( [[min(df_prices.index), symbols, 'BUY', 1000], [max(df_prices.index), symbols, 'BUY', 0]], columns=['Date', 'Symbol', 'Order', 'Shares']) df_ms_value = get_portfolio_value(df_prices, df_trades, start_val, commission=9.95, impact=0.005) df_benchmark_value = get_portfolio_value(df_prices, df_benchmark_orders, start_val, commission=9.95, impact=0.005) fig, ax = plt.subplots() ax.set_title('Manual vs Benchmark Strategy', fontsize=20) ax.plot(df_ms_value.index, df_ms_value / df_ms_value.ix[0], color='black', label='Manual Strategy') ax.plot(df_benchmark_value.index, df_benchmark_value / df_benchmark_value.ix[0], color='blue', label='Benchmark Strategy') ax.legend(loc='best') plt.xticks(rotation=45) for i in range(0, df_shortXs.shape[0]): plt.axvline(x=pd.to_datetime(df_shortXs.iloc[i]['Shorts']), color='red') for i in range(0, df_longXs.shape[0]): plt.axvline(x=pd.to_datetime(df_longXs.iloc[i]['Longs']), color='green') plt.show() avg_daily_ret, std_daily_ret, sharpe_ratio, cum_ret = get_stats( df_ms_value) # Comparative Analysis Stuff print('Manual Strategy Performance Data; ') print('Cumulative Return of Fund: {}'.format(cum_ret)) print('Standard Deviation of Fund: {}'.format(std_daily_ret)) print('Average Daily Return of Fund: {}\n'.format(avg_daily_ret)) return df_trades, df_shortXs, df_longXs, bband
def useImpossibleStrategy(df_prices, symbols, sd, ed, start_val): # fill back in time, then fill forward df_prices = df_prices.fillna(method='bfill') df_prices = df_prices.fillna(method='ffill') # create an empty order set orders = [] shares_max = 1000 shares_min = -1000 current_shares = 0 for count, (date, row) in enumerate(df_prices.iloc[:-1].iterrows()): current_price = df_prices['JPM'][count] next_price = df_prices['JPM'][count + 1] if (next_price > current_price) & (current_shares < shares_max): shares_to_buy = shares_max - current_shares orders.append([date, symbols, 'BUY', shares_to_buy]) current_shares += shares_to_buy elif (next_price < current_price) & (current_shares > shares_min): shares_to_sell = current_shares - shares_min orders.append([date, symbols, 'SELL', shares_to_sell]) current_shares -= shares_to_sell df_orders = pd.DataFrame(orders, columns=['Date', 'Symbol', 'Order', 'Shares']) df_benchmark_orders = pd.DataFrame( [[min(df_prices.index), symbols, 'BUY', 1000], [max(df_prices.index), symbols, 'BUY', 0]], columns=['Date', 'Symbol', 'Order', 'Shares']) df_benchmark_value = get_portfolio_value(df_prices, df_benchmark_orders, start_val, commission=9.95, impact=0.005) df_bps = get_portfolio_value(df_prices, df_orders, start_val, commission=0, impact=0) # # avg_daily_ret, std_daily_ret, sharpe_ratio, cum_ret = get_stats(df_benchmark_value) # print('Benchmark Stats') # print('Cumulative Return of Fund: {} '.format(cum_ret)) # print('Standard Deviation of Fund: {} '.format(std_daily_ret)) # print('Average Daily Return of Fund: {} \n'.format(avg_daily_ret)) avg_daily_ret, std_daily_ret, sharpe_ratio, cum_ret = get_stats(df_bps) print('Best Possible Strategy Stats') print('Cumulative Return of Fund: {}'.format(cum_ret)) print('Standard Deviation of Fund: {}'.format(std_daily_ret)) print('Average Daily Return of Fund: {}\n'.format(avg_daily_ret)) # Plot Benchmark Dataframe fig, ax = plt.subplots() ax.set_title('JPM Benchmark vs Best Possible Strategy', fontsize=20) ax.plot(df_benchmark_value.index, df_benchmark_value / df_benchmark_value.ix[0], color='blue', label='SPY') ax.plot() ax.plot(df_bps.index, df_bps / df_bps.ix[0], color='black', label='Best Strategy') ax.legend(loc='best') plt.xticks(rotation=45) plt.show() return df_orders