Пример #1
0
def read_analyze_write_profile(csv_file):
    df = pd.read_csv(csv_file, names=['first', 'last'])
    output_csv_dict = {}
    for index, row in df.iterrows():
        name_string = row['first'].lower() + '-' + row['last'].lower()

        url = base_url + name_string + parameters
        pmids = get_pmids_from_profile_link(url)
        #print pmids
        stats_dict = util.get_stats(pmids)
        stats_dict['url'] = url
        if len(pmids) == 0:
            stats_dict['unable_to_find_any_articles'] = 'true'

        output_csv_dict[name_string] = pd.Series(stats_dict)
        print 'read data from: ' + name_string
    result_df = pd.DataFrame(output_csv_dict)
    #transpose and write it out
    transpose = result_df.T
    transpose.to_csv('stanford_profile_output.csv')
Пример #2
0
            num_pages = float(
                soup.find_all("a",
                              class_="btn-page-jumper")[0].text.split('/')[1])
            print(num_pages)
        #if num_pages is None:
        print('got links from page: ' + str(page_no) + ' of ' + str(num_pages))
        page_no += 1

    return list_of_profiles


prof_list = get_links()
csv_dict = {}
num_profiles_processed = 0
for p in prof_list:
    print('Number of profiles processed so far: ' +
          str(num_profiles_processed))
    print('reading data from: ' + p['name'])
    link = STANFORD_MED_PROFILES_BASE_URL + p['link']
    pmids = util.stanford_profile_get_pmids_from_profile_link(link)
    stats_dict = util.get_stats(pmids)
    stats_dict['official_name'] = p['name']
    stats_dict['title'] = p['title']
    stats_dict['link_to_profile'] = link
    csv_dict[link] = pd.Series(stats_dict)
    num_profiles_processed += 1

result_df = pd.DataFrame(csv_dict)
transpose = result_df.T
transpose.to_csv(result_csv_file_name, encoding='utf-8')
Пример #3
0
            for y in range(1995, 2017):
                M.run_model_based_on_deepgp(y, model="deepGP", trw=trw, i=0)
            pass
        else:
            y = int(args[2])
            M.run_model_based_on_deepgp(y, model="deepGP", trw=trw, i=i)
        pass
    return


def run_model_stats(args):
    if len(args) == 0: print "python jobutil.py 4 <reg model> <trw>"
    else:
        model = args[0]
        trw = int(args[1])
        util.get_stats(model, trw)
    return


def run_tss_plot(args):
    if len(args) == 0: print "python jobutil.py 6 <reg model> <trw>"
    else:
        model = args[0]
        trw = int(args[1])
        util.run_for_TSS(model, trw)
    return


if __name__ == "__main__":
    args = sys.argv[1:]
    if len(args) == 0:
Пример #4
0
from bs4 import BeautifulSoup
# importing the requests library
import requests
#for reading csv file:
import pandas as pd
import collections
import re
import util

base_url = 'https://profiles.stanford.edu/'
parameters = '?tab=publications'

#API_ENDPOINT = "https://www.ncbi.nlm.nih.gov/myncbi/browse/collection"
#test_url = 'https://profiles.stanford.edu/paul-heidenreich?tab=publications'
#test_url = 'https://profiles.stanford.edu/steven-asch?tab=publications'
'''
test_url = 'https://profiles.stanford.edu/karl-Deisseroth?tab=publications'

pub_results = requests.get(url = test_url)
soup = BeautifulSoup(pub_results.text, 'html.parser')
pmids_text = soup.find_all(text = re.compile(".*PubMedID.*"))
pmids_no_label = [x.split(' ')[1] for x in pmids_text]
#print pmids_text
print len(pmids_text)
print pmids_no_label
print len(pmids_no_label)

util.get_stats(pmids_no_label)

#if len(sys.argv > 2):
def usePossibleStrategy(df_prices, symbols, sd, ed, start_val):

    orders = []
    short_Xs = []
    long_Xs = []
    trade_num = 1000
    shares_min = -1000
    shares_max = 1000

    # rm = get_rolling_mean(df_prices['SPY'], window=20)
    # rstd = get_rolling_std(df_prices['SPY'], window=20)
    #
    # market = get_bollinger_bands(rm, rstd)
    # market['SPY'] = df_prices['SPY']
    # market = market.fillna(method='bfill')
    # market = market.fillna(method='ffill')

    # Get the Rolling Mean
    rm = get_rolling_mean(df_prices['JPM'], window=10)

    # Get the Rolling Standard Deviation
    rstd = get_rolling_std(df_prices['JPM'], window=10)
    bband = get_bollinger_bands(rm, rstd)
    bband[symbols] = df_prices[symbols]
    bband = bband.fillna(method='bfill')
    current_number_of_shares = 0

    for i in range(1, bband.shape[0]):
        if (bband.iloc[i - 1][symbols] > bband.iloc[i - 1]['Upper'])[0] and (
                bband.iloc[i][symbols] < bband.iloc[i]['Upper']
        )[0] and current_number_of_shares - trade_num >= shares_min:
            short_Xs.append([bband.index[i]])
            orders.append([bband.index[i], symbols, 'SELL', trade_num])
            current_number_of_shares -= trade_num
        elif (bband.iloc[i - 1][symbols] < bband.iloc[i - 1]['Lower'])[0] and (
                bband.iloc[i][symbols] > bband.iloc[i]['Lower']
        )[0] and current_number_of_shares + trade_num <= shares_max:
            long_Xs.append([bband.index[i]])
            orders.append([bband.index[i], symbols, 'BUY', trade_num])
            current_number_of_shares += trade_num

    orders.append([ed - dt.timedelta(days=1), symbols, 'BUY', 0])
    df_trades = pd.DataFrame(orders,
                             columns=['Date', 'Symbol', 'Order', 'Shares'])
    df_shortXs = pd.DataFrame(short_Xs, columns=['Shorts'])
    df_longXs = pd.DataFrame(long_Xs, columns=['Longs'])

    df_benchmark_orders = pd.DataFrame(
        [[min(df_prices.index), symbols, 'BUY', 1000],
         [max(df_prices.index), symbols, 'BUY', 0]],
        columns=['Date', 'Symbol', 'Order', 'Shares'])
    df_ms_value = get_portfolio_value(df_prices,
                                      df_trades,
                                      start_val,
                                      commission=9.95,
                                      impact=0.005)
    df_benchmark_value = get_portfolio_value(df_prices,
                                             df_benchmark_orders,
                                             start_val,
                                             commission=9.95,
                                             impact=0.005)

    fig, ax = plt.subplots()
    ax.set_title('Manual vs Benchmark Strategy', fontsize=20)
    ax.plot(df_ms_value.index,
            df_ms_value / df_ms_value.ix[0],
            color='black',
            label='Manual Strategy')
    ax.plot(df_benchmark_value.index,
            df_benchmark_value / df_benchmark_value.ix[0],
            color='blue',
            label='Benchmark Strategy')
    ax.legend(loc='best')
    plt.xticks(rotation=45)
    for i in range(0, df_shortXs.shape[0]):
        plt.axvline(x=pd.to_datetime(df_shortXs.iloc[i]['Shorts']),
                    color='red')
    for i in range(0, df_longXs.shape[0]):
        plt.axvline(x=pd.to_datetime(df_longXs.iloc[i]['Longs']),
                    color='green')
    plt.show()

    avg_daily_ret, std_daily_ret, sharpe_ratio, cum_ret = get_stats(
        df_ms_value)

    # Comparative Analysis Stuff
    print('Manual Strategy Performance Data; ')
    print('Cumulative Return of Fund: {}'.format(cum_ret))
    print('Standard Deviation of Fund: {}'.format(std_daily_ret))
    print('Average Daily Return of Fund: {}\n'.format(avg_daily_ret))

    return df_trades, df_shortXs, df_longXs, bband
def useImpossibleStrategy(df_prices, symbols, sd, ed, start_val):

    # fill back in time, then fill forward
    df_prices = df_prices.fillna(method='bfill')
    df_prices = df_prices.fillna(method='ffill')

    # create an empty order set
    orders = []
    shares_max = 1000
    shares_min = -1000

    current_shares = 0

    for count, (date, row) in enumerate(df_prices.iloc[:-1].iterrows()):
        current_price = df_prices['JPM'][count]
        next_price = df_prices['JPM'][count + 1]

        if (next_price > current_price) & (current_shares < shares_max):
            shares_to_buy = shares_max - current_shares
            orders.append([date, symbols, 'BUY', shares_to_buy])
            current_shares += shares_to_buy

        elif (next_price < current_price) & (current_shares > shares_min):
            shares_to_sell = current_shares - shares_min
            orders.append([date, symbols, 'SELL', shares_to_sell])
            current_shares -= shares_to_sell

    df_orders = pd.DataFrame(orders,
                             columns=['Date', 'Symbol', 'Order', 'Shares'])

    df_benchmark_orders = pd.DataFrame(
        [[min(df_prices.index), symbols, 'BUY', 1000],
         [max(df_prices.index), symbols, 'BUY', 0]],
        columns=['Date', 'Symbol', 'Order', 'Shares'])

    df_benchmark_value = get_portfolio_value(df_prices,
                                             df_benchmark_orders,
                                             start_val,
                                             commission=9.95,
                                             impact=0.005)
    df_bps = get_portfolio_value(df_prices,
                                 df_orders,
                                 start_val,
                                 commission=0,
                                 impact=0)

    #
    # avg_daily_ret, std_daily_ret, sharpe_ratio, cum_ret = get_stats(df_benchmark_value)
    # print('Benchmark Stats')
    # print('Cumulative Return of Fund: {} '.format(cum_ret))
    # print('Standard Deviation of Fund: {} '.format(std_daily_ret))
    # print('Average Daily Return of Fund: {} \n'.format(avg_daily_ret))

    avg_daily_ret, std_daily_ret, sharpe_ratio, cum_ret = get_stats(df_bps)
    print('Best Possible Strategy Stats')
    print('Cumulative Return of Fund: {}'.format(cum_ret))
    print('Standard Deviation of Fund: {}'.format(std_daily_ret))
    print('Average Daily Return of Fund: {}\n'.format(avg_daily_ret))

    # Plot Benchmark Dataframe
    fig, ax = plt.subplots()
    ax.set_title('JPM Benchmark vs Best Possible Strategy', fontsize=20)
    ax.plot(df_benchmark_value.index,
            df_benchmark_value / df_benchmark_value.ix[0],
            color='blue',
            label='SPY')
    ax.plot()
    ax.plot(df_bps.index,
            df_bps / df_bps.ix[0],
            color='black',
            label='Best Strategy')
    ax.legend(loc='best')
    plt.xticks(rotation=45)
    plt.show()

    return df_orders