def simfin_load_market_data_historical(): log.info("Called simfin_load_market_data...") df = load_shareprices(variant='daily', market='us') json = frame_to_json(df) num_inserted = 0 num_updated = 0 num_no_ref_data = 0 num_no_close = 0 for j in json: log.info("Processing %s" % (j['ticker'])) if j['close'] == None or j['adjClose'] == None: num_no_close += 1 log.info("No close found, skipping") continue ref = ref_data_by_symbol(j['ticker']) if ref == None: num_no_ref_data += 1 log.info("No ref data found, skipping") continue cur = mdh_by_ref_data_id_date(ref['id'],j['date']) if cur == None: num_inserted += 1 post_market_data_historical(j['date'], ref['id'], j['close'], j['adjClose']) else: num_updated += 1 put_market_data_historical(cur['id'], cur['date'], cur['refDataId'], j['close'], j['adjClose']) ret = "market_data_historical: Inserted %d records, Updated %d records, %d no ref data and %d no close" % (num_inserted, num_updated, num_no_ref_data, num_no_close) log.info(ret) return ret
def getData(): sf.set_api_key('free') # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # Load daily share-prices for all companies in USA. # The data is automatically downloaded if you don't have it already. df_prices = sf.load_shareprices(market='us', variant='daily') # Plot the closing share-prices for ticker MSFT. msft_close_values_TEMP = df_prices.loc['MSFT'] print(msft_close_values_TEMP.columns) list_of_stocks = ['AAPL', 'MSFT', 'CLDR', 'CRM', 'TSLA', 'NVDA', 'DAL'] big_df = pd.DataFrame() for stock in list_of_stocks: temp_df = df_prices.loc[stock].tail(100) temp_df = temp_df[["SimFinId", "Close", "Open", "High", "Low"]] big_df = pd.concat([big_df, temp_df]) ultimate_df = big_df return ultimate_df
def get_share_prices() -> pd.DataFrame: """Gets the bulk share prices from SimFin API Downloads the data if you don't already have it """ setup_simfin() df = sf.load_shareprices(variant='daily', market='us') return df
def export_simfin(date_str, table_id=_TABLE_ID_DAILY_SIMFIN_TEMP): logging.info('daily export simfin data, date: {date}, table_id: {table_id}'.format(date=date_str, table_id=table_id)) #df = sf.load_shareprices(variant='latest', market='us', refresh_days=0) df = sf.load_shareprices(variant='daily', market='us', refresh_days=0) try: df_date = df.xs(date_str, level=1, drop_level=False) if len(df_date) == 0: logging.info('can not find any data for the given date {d}'.format(d=date_str)) _export_simfin_df(df_date, _DATASET_ID_EQUITY_DAILY, table_id) except Exception as ex: logging.error(ex)
def ingest(): sf.set_api_key(os.getenv('API_KEY_SIMFIN')) # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') df = sf.load_shareprices(variant='daily', market='us') for y in range(2010, 2021): df_y = df.xs(slice('{y}-01-01'.format(y=y), '{y}-12-31'.format(y=y)), level='Date', drop_level=False) df_y.to_csv('data/daily_simfin_{y}.csv'.format(y=y))
def SF_pricing_data(self, sec_id='AAPL', period: str = 'Daily', market: str = 'us', start_date=None, end_date=None) -> pd.DataFrame: """[summary] Get dailiy pricing data from simfin. Token is needed for the most recent data. This function will download a csv file with pricing data for US stocks. Args: sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'. period (str, optional): [the period of stock price]. Defaults to 'Daily'. market (str, optional): [stock market, us, de, etc]. Defaults to 'us'. start_date ([type], optional): ['xxxx-xx-xx' start date of pricing data]. Defaults to None. end_date ([type], optional): ['xxxx-xx-xx' end date of pricing data]. Defaults to None. Returns: pd.DataFrame: """ df = simfin.load_shareprices(variant=period, market=market) df = df.loc[sec_id] df = df.reset_index() df = df.rename( columns={ "Date": "date", 'Open': 'open', 'Close': 'close', 'High': 'high', 'Low': 'low', 'Volume': 'volume' }) df = df[['date', 'open', 'close', 'high', 'low', 'volume']] df['date'] = pd.to_datetime(df['date']) start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) if start_date is None and end_date is None: return df elif start_date is not None and end_date is None: df = df[(df['date'] >= start_date)] elif start_date is None and end_date is not None: df = df[(df['date'] <= end_date)] else: df = df[(df['date'] >= start_date) & (df['date'] <= end_date)] df = df.reset_index() return df
def simfin_historical_data(self, sec_id='AAPL', period: str = 'Daily', market: str = 'us', start_date=None, end_date=None) -> pd.DataFrame: """ Download historica data from simfin as a csv file. Token is needed for the most recent data, which is not free. Can sign up membership on https://simfin.com/ Args: sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'. period (str, optional): [the period of stock price]. Defaults to 'Daily'. market (str, optional): [stock market, us, de, etc]. Defaults to 'us'. start_date ([type], optional): ['xxxx-xx-xx' start date of pricing data]. Defaults to None. end_date ([type], optional): ['xxxx-xx-xx' end date of pricing data]. Defaults to None. Returns: pd.DataFrame """ df = sf.load_shareprices(variant=period, market=market) df = df.loc[sec_id] df = df.reset_index() df = df.rename( columns={ "Date": "date", 'Open': 'open', 'Close': 'close', 'High': 'high', 'Low': 'low', 'Volume': 'volume' }) df = df[['date', 'open', 'close', 'high', 'low', 'volume']] df['date'] = pd.to_datetime(df['date']) start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) if start_date is None and end_date is None: return df elif start_date is not None and end_date is None: df = df[(df['date'] >= start_date)] elif start_date is None and end_date is not None: df = df[(df['date'] <= end_date)] else: df = df[(df['date'] >= start_date) & (df['date'] <= end_date)] df = df.reset_index() return df
def __init__(self, dir: str = os.getcwd()): self._dir = dir dotenv_path = os.path.join('.env') load_dotenv(dotenv_path) sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX') sf.set_data_dir(os.path.join(self._dir, 'simfin')) self._industries = sf.load_industries() self._prices = sf.load_shareprices(refresh_days=0) self._balance = sf.load_balance(variant="quarterly") self._income = sf.load_income(variant="quarterly") self._companies = sf.load_companies()
def load_shareprices(refresh_days=1, simfin_api_key='free', simfin_directory='simfin_data/'): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) # Used by all datasets shareprices_df = sf.load_shareprices(variant='daily', market='us', refresh_days=refresh_days) # Merge Fundamental with Stock Prices # Downsample Share Prices to Rolling 30 Day End of Month shareprices_df = shareprices_df[['Close']].groupby('Ticker').rolling( 30, min_periods=1).mean().reset_index(0, drop=True) shareprices_df = sf.resample(df=shareprices_df, rule='M', method=lambda x: x.last()) return shareprices_df
def simfin_load_market_data(): log.info("Called simfin_load_market_data...") df = load_shareprices(variant='latest', market='us') json = frame_to_json(df) num_inserted = 0 num_updated = 0 num_price_to_old = 0 num_no_ref_data = 0 num_no_close = 0 for j in json: log.info("Processing %s" % (j['ticker'])) if j['close'] == None: num_no_close += 1 log.info("No close found, skipping") continue if datetime.strptime(j['date'][:10], '%Y-%m-%d') < datetime.now() - timedelta(days=10): num_price_to_old += 1 log.info("Price too old (%s), skipping" % (j['date'])) continue cur = market_data_by_symbol(j['ticker']) if cur == None: ref = ref_data_by_symbol(j['ticker']) if ref == None: num_no_ref_data += 1 log.info("No ref data found, skipping") continue num_inserted += 1 post_market_data(ref['id'], j['close']) else: num_updated += 1 put_market_data(cur['id'], cur['refDataId'], j['close']) ret = "market_data: Inserted %d records, Updated %d records, %d price to old and %d no ref data and %d no close" % (num_inserted, num_updated, num_price_to_old, num_no_ref_data, num_no_close) log.info(ret) return ret
# Import names used for easy access to SimFin's data-columns. from simfin.names import * #Set the local directory where data-files are stored. sf.set_data_dir( 'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/' ) # Set up API key sf.set_api_key(api_key='free') #set plotting style sns.set_style("whitegrid") #--------------------------Load dataframe----------------------------- df_prices = sf.load_shareprices(variant='daily', market='us') #--------------------------Add Columns----------------------------- #simple way df2 = pd.DataFrame() df2[FOO] = df_prices[CLOSE] / df_prices[ADJ_CLOSE] df2[BAR] = df_prices[CLOSE] * df_prices[ADJ_CLOSE] df2[QUX] = df_prices[CLOSE] * df_prices[VOLUME] #much faster to add new columns to the DataFrame if index match df3 = pd.DataFrame(index=df_prices.index) df3[FOO] = df_prices[CLOSE] / df_prices[ADJ_CLOSE] df3[BAR] = df_prices[CLOSE] * df_prices[ADJ_CLOSE] df3[QUX] = df_prices[CLOSE] * df_prices[VOLUME] #another way: use dict
bigquery.SchemaField("symbol", "STRING"), bigquery.SchemaField("open", "FLOAT"), bigquery.SchemaField("high", "FLOAT"), bigquery.SchemaField("low", "FLOAT"), bigquery.SchemaField("close", "FLOAT"), bigquery.SchemaField("volume", "FLOAT"), bigquery.SchemaField("volume_weighted_price", "FLOAT"), ] full_table_id = get_full_table_id('{t}_{y}'.format(t=TABLE_ID_DAILY, y=year)) table = bigquery.Table(full_table_id, schema=schema) table = _bigquery_client.create_table(table) # Make an API request. import simfin as sf sf.set_api_key(os.getenv('API_KEY_SIMFIN')) # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') #df = sf.load_shareprices(variant='daily', market='us') df_l = sf.load_shareprices(variant='latest', market='us') for index, row in df_l.iterrows(): print(index, index[0], index[1].date(), row['Close']) pass
def test_load_shareprices(): """Test simfin.bulk.load_shareprices()""" for dataset, variant, market in iter_all_datasets(datasets='shareprices'): kwargs = _create_kwargs(variant=variant, market=market) sf.load_shareprices(**kwargs)
# Download the data from the SimFin server and load into a Pandas DataFrame. df = sf.load_income(variant='quarterly', market='us') # Print the first rows of the data. print(df.head()) # Print all column names of income statement data print(df.columns) #Print all Revenue and Net Income for Microsoft (ticker MSFT). print(df.loc['MSFT', [REVENUE, NET_INCOME]]) # Load daily share-prices for all companies in USA. df_prices = sf.load_shareprices(market='us', variant='daily') # Plot the closing share-prices for ticker MSFT. df_prices.loc['MSFT', CLOSE].plot(grid=True, figsize=(20,10), title='MSFT Close') ------------------------------------------------------------------------------------ tickers = ["AAPL","NVDA","WMT"] sim_ids = [] for ticker in tickers: request_url = f'https://simfin.com/api/v1/info/find-id/ticker/{ticker}?api-key={api_key}' content = requests.get(request_url) data = content.json()
def __load_share_data__(self): sf.set_data_dir('~/simfin_data/') self.df_prices = sf.load_shareprices(variant='daily', market='US')
#check how the dataframe looks like df_a.head() df_q.head() df_m.head() #Plot Microsoft's revenue across years #don't have to add quatation marks when using pyhon shortcut df_q.loc['MSFT'][REVENUE].plot(grid=True) #Load in income statement for banks and insurance companies df = sf.load_income_banks(variant='annual', market='us') df = sf.load_balance_insurance(variant='annual', market='us') #load share prices df_prices_latest = sf.load_shareprices(variant='latest', market='us') df_prices = sf.load_shareprices(variant='daily', market='us') df_prices_latest.head() #Load Companies Detail df_companies = sf.load_companies(index=TICKER, market='us') #Load sector and industry details df_industries = sf.load_industries() #Look up industry detail of Microsoft industry_id = df_companies.loc['MSFT'][INDUSTRY_ID] df_industries.loc[industry_id] #try to load full income statement data try: