def form_cur_quotes_property_url_str_fr_excel(self): """ Required xls_table_extract_module. Get all the properties from excel table. Properties can be selected by comment out those properties not required. Also set the heeader: self.cur_quotes_parm_headers for the values. """ from xls_table_extract_module import XlsExtractor self.xls_property_data = XlsExtractor( fname=self.properties_excel_table, sheetname='Sheet1', param_start_key='stock_property//', param_end_key='stock_property_end//', header_key='', col_len=2) self.xls_property_data.open_excel_and_process_block_data() ## form the header self.cur_quotes_parm_headers = [ n.encode().strip() for n in self.xls_property_data.data_label_list ] ## form the url str start_str = '&f=' target_properties = ''.join([ n[0].encode().strip() for n in self.xls_property_data.data_value_list ]) self.cur_quotes_property_portion_url = start_str + target_properties
def setUp(self): """ Create mulitple instances of different xlsExtactor """ self.xls_set_class_1 = XlsExtractor(fname = r'C:\Python27\Lib\site-packages\excel_table_extract\testset.xls', sheetname= 'Sheet1', param_start_key = 'start//', param_end_key = 'end//', header_key = '', col_len = 3) self.xls_set_class_1.initialize_excel()
class XlsExtractorTest(XlsExtractorTestCase): """ Main unit test for the module Split to two separate test object?? """ def setUp(self): """ Create mulitple instances of different xlsExtactor """ self.xls_set_class_1 = XlsExtractor(fname = r'C:\Python27\Lib\site-packages\excel_table_extract\testset.xls', sheetname= 'Sheet1', param_start_key = 'start//', param_end_key = 'end//', header_key = '', col_len = 3) self.xls_set_class_1.initialize_excel() def test_calculate_start_end_pos_for_data(self): start_address_row, start_address_col, end_address_row = self.xls_set_class_1.calculate_start_end_pos_for_data() self.assertTupleEqual((start_address_row, start_address_col, end_address_row), (7,4,11)) def test_get_raw_block_fr_xls(self): start_address_row, start_address_col, end_address_row = self.xls_set_class_1.calculate_start_end_pos_for_data() s = self.xls_set_class_1.get_raw_block_fr_xls(start_address_row, start_address_col, end_address_row, self.xls_set_class_1.col_len) self.assertListEqual(s, [[None, 'label1', 2.0, 3.0], ['#', 'label2', 5.0, 6.0], [None, None, None, None], [None, 'label3', 8.0, 9.0]]) def test_filter_space_and_comment_row(self): s = self.xls_set_class_1.filter_space_and_comment_row([[None, 1.0, 2.0, 3.0],['#', 4.0, 5.0, 6.0], [None, None, None, None], [None, 7.0, 8.0, 9.0]]) self.assertListEqual(s, [[None, 1.0, 2.0, 3.0], [None, 7.0, 8.0, 9.0]]) def test_exclude_comment_block(self): s = self.xls_set_class_1.exclude_comment_block([[None, 1.0, 2.0, 3.0], [None, 7.0, 8.0, 9.0]]) self.assertListEqual(s, [[1.0, 2.0, 3.0], [7.0, 8.0, 9.0]]) def tearDown(self): """ For each instances try to close it. """ try: self.xls_set_class_1.close_excel() finally: pass
def form_cur_quotes_property_url_str_fr_excel(self): """ Required xls_table_extract_module. Get all the properties from excel table. Properties can be selected by comment out those properties not required. Also set the heeader: self.cur_quotes_parm_headers for the values. """ from xls_table_extract_module import XlsExtractor self.xls_property_data = XlsExtractor(fname = self.properties_excel_table, sheetname= 'Sheet1', param_start_key = 'stock_property//', param_end_key = 'stock_property_end//', header_key = '', col_len = 2) self.xls_property_data.open_excel_and_process_block_data() ## form the header self.cur_quotes_parm_headers = [n.encode().strip() for n in self.xls_property_data.data_label_list] ## form the url str start_str = '&f=' target_properties = ''.join([n[0].encode().strip() for n in self.xls_property_data.data_value_list]) self.cur_quotes_property_portion_url = start_str + target_properties
def __init__(self): super(SGXDataExtract, self).__init__() # dict will contain a tuple of website and the target tag self.retrieval_dict = { "company_info": ( "http://54.254.221.141/sgx/search?callback=jQuery111008655668143182993_1425728207793&json=%7B%22criteria%22%3A%5B%5D%7D&_=1425728207802", "companies", ), "announcement": ( "http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCOW_CorpAnnouncement_Content%26B%3DAnnouncementToday%26R_C%3D%26C_T%3D200", "items", ), "ex_div_data": ( "http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCow_CorporateInformation_Content%26B%3DCorpDistributionByExDate%26S_T%3D1%26C_T%3D400", "items", ), "curr_price": ("http://sgx.com/JsonRead/JsonData?qryId=RStock&timeout=30", "items"), } ## for short sell information. self.shortsell_info_start_url = ( "http://sgx.com/wps/wcm/connect/sgx_en/home/market_info/short_sale/short_sale_daily/DailyShortSell" ) self.shortsell_date_url = "20150417" self.shortsell_end_url = ".txt" self.shortsell_full_url = r"" ## parameters self.saved_json_file = r"c:\data\temptryyql.json" self.saved_parm_df_dict = {} # storing the final df in dict with type as keyword ## for setting up custom alerts self.custom_alert_dict_list = [] # list of dict?? self.custom_alert_df = pandas.DataFrame() # convert the custom alert dict to ## final output paramters self.sgx_announ_df = pandas.DataFrame() self.sgx_div_ex_date_df = pandas.DataFrame() self.sgx_curr_price_df = pandas.DataFrame() self.price_limit_alerts_df = pandas.DataFrame() self.shortsell_info_df = pandas.DataFrame() self.sgx_curr_plus_company_df = pandas.DataFrame() ## shortend output version for alert creation self.div_ex_date_shtver = "" self.filtered_announ_shtver = "" ## watchlist to set self.price_limit_reach_watchlist = [] self.announce_watchlist = [] ## target stocks for announcements -- using excel query xls_set_class = XlsExtractor( fname=r"C:\data\stockselection_for_sgx.xls", sheetname="stockselection", param_start_key="stock//", param_end_key="stock_end//", header_key="header#2//", col_len=2, ) xls_set_class.open_excel_and_process_block_data() self.announce_watchlist = xls_set_class.data_label_list # also get the company name self.companyname_watchlist = [n[0].encode() for n in xls_set_class.data_value_list] # also get the company name
if choice == 5: """ Handling current price """ w = SGXDataExtract() # w.shortsell_notification() # print w.shortsell_info_df w.process_all_data() w.sgx_curr_price_df.to_csv(r"c:\data\temp.csv", index=False) if choice == 6: """ Use excel table to get the required stocks.""" xls_set_class = XlsExtractor( fname=r"C:\data\stockselection_for_sgx.xls", sheetname="stockselection", param_start_key="stock//", param_end_key="stock_end//", header_key="header#2//", col_len=2, ) xls_set_class.open_excel_and_process_block_data() xls_set_class.data_label_list if choice == 7: """ use the price extract here to pull """ if choice == 8: """ Get the short sell volume. Need to get the shortsell vs the volumne traded that day Url need change with the current date """
class YFinanceDataExtr(object): """ Class to extract data from yahoo finance. Achieved by query the various url (see Yahoo Finance API) and downloading the respectively .csv files. Further analysis of data done by pandas. """ def __init__(self): """ List of url parameters """ # Param ## self.target_stocks use mainly for a few stocks. ## it also use when setting the 45 or 50 stocks at a time to url self.target_stocks = ['S58.SI','S68.SI'] ##special character need to be converted self.full_stocklist_to_retrieve = [] #full range fo stocks # for difffernt retrieval, based on the dict available to select the file type # currently have "watcher", "all" where watcher is the selected stocks to watch. self.stock_retrieval_type = 'watcher' ## current data .csv file url formation #header to match the sequence of the formed url self.cur_quotes_parm_headers = ['NAME', 'SYMBOL', 'LATEST_PRICE', 'OPEN', 'CLOSE','VOL', 'YEAR_HIGH','YEAR_LOW'] #label to be use when downloading. # URL forming for price details self.cur_quotes_start_url = "http://download.finance.yahoo.com/d/quotes.csv?s=" self.cur_quotes_stock_portion_url = '' self.cur_quotes_stock_portion_additional_url = '.SI'# for adding additonal str to the stock url. self.cur_quotes_property_portion_url = '' self.cur_quotes_property_str = 'nsl1opvkj' #default list of properties to copy. self.cur_quotes_end_url = "&e=.csv" self.cur_quotes_full_url = '' # Properties from excel self.enable_form_properties_fr_exceltable = 1 self.properties_excel_table = r'C:\pythonuserfiles\yahoo_finance_data_extract\Individual_stock_query_property.xls' # Output storage self.cur_quotes_csvfile = r'c:\data\temp\stock_data.csv' self.cur_quotes_df = object() ## !!! self.cur_quotes_url_list = [] # store of all the url list being query. For debug. # for debug/printing self.store_individual_set_df = [] self.__print_url = 0 # for printing the url string # input file path # dict based on the file for different type of retrieval self.retrieval_type_input_file_dict = { "all" : r'C:\pythonuserfiles\yahoo_finance_data_extract\stocklist.csv', "watcher": r'c:\data\google_stock_screener.csv' } def set_stock_sym_append_str(self, append_str): """ Set additional append str to stock symbol when forming stock url. Set to sel.cur_quotes_stock_portion_additional_url. Mainly to set the '.SI' for singapore stocks. Args: append_str (str): additional str to append to stock symbol. """ self.cur_quotes_stock_portion_additional_url = append_str def set_stock_retrieval_type(self, type ='all'): """ Set the type of stocks retrieval type.mro Kwargs: type (str): default "all" """ self.stock_retrieval_type = type def load_stock_symbol_fr_file(self): """ Load the stock symbol info based on the file selected from the set_stock_retrieval_type. The file must have particular column: SYMBOL. """ stock_list = pandas.read_csv(self.retrieval_type_input_file_dict[self.stock_retrieval_type]) stock_list = list(stock_list['SYMBOL']) self.set_full_stocklist_to_retrieve(stock_list) def set_full_stocklist_to_retrieve(self, list_of_stocks): """ Set all target list of stocks that need to retrieve to the self.full_stocklist_to_retrieve. Args: list_of_stocks (list): full list of stocks to set """ self.full_stocklist_to_retrieve = list_of_stocks def set_target_stocks_list(self, list_of_stocks): """ Set the target list of stocks to the self.target_stocks. Not the full list. Args: list_of_stocks (list): target list of stocks to set """ self.target_stocks = list_of_stocks def set_column_headers(self,param_headers): """ Set column headers for the data. Set to self.cur_quotes_parm_headers. Args: param_headers (list): list of column names """ self.cur_quotes_parm_headers = param_headers def form_cur_quotes_stock_url_str(self): """ Form the list of stock portion for the cur quotes url. """ self.cur_quotes_stock_portion_url = '' for n in self.target_stocks: self.cur_quotes_stock_portion_url = self.cur_quotes_stock_portion_url + n +\ self.cur_quotes_stock_portion_additional_url + ',' self.cur_quotes_stock_portion_url =self.cur_quotes_stock_portion_url[:-1] def form_cur_quotes_property_url_str_fr_excel(self): """ Required xls_table_extract_module. Get all the properties from excel table. Properties can be selected by comment out those properties not required. Also set the heeader: self.cur_quotes_parm_headers for the values. """ from xls_table_extract_module import XlsExtractor self.xls_property_data = XlsExtractor(fname = self.properties_excel_table, sheetname= 'Sheet1', param_start_key = 'stock_property//', param_end_key = 'stock_property_end//', header_key = '', col_len = 2) self.xls_property_data.open_excel_and_process_block_data() ## form the header self.cur_quotes_parm_headers = [n.encode().strip() for n in self.xls_property_data.data_label_list] ## form the url str start_str = '&f=' target_properties = ''.join([n[0].encode().strip() for n in self.xls_property_data.data_value_list]) self.cur_quotes_property_portion_url = start_str + target_properties def set_quotes_properties(self, target_properties = 'nsl1opvkj' ): """ Set the quotes properties use in form_cur_quotes_property_url_str function. Set to self.cur_quotes_property_str. Kwargs: target_properties (str): 'nsl1opvkj' Default properties: Current use default parameters. name(n0), symbol(s), the latest value(l1), open(o) and the close value of the last trading day(p) volumn (v), year high (k), year low(j) """ self.cur_quotes_property_str = target_properties def form_cur_quotes_property_url_str(self): """ To form the properties/parameters of the data to be received for current quotes Can also form from the form_cur_quotes_property_url_str_fr_excel function Further info can be found at : https://code.google.com/p/yahoo-finance-managed/wiki/enumQuoteProperty """ start_str = '&f=' self.cur_quotes_property_portion_url = start_str + self.cur_quotes_property_str def form_url_str(self, type = 'cur_quotes'): """ Form the url str necessary to get the .csv file.close May need to segregate into the various types. Args: type (str): Retrieval type. """ if type == 'cur_quotes': self.form_cur_quotes_stock_url_str() # form the property. 2 methods enabled. if self.enable_form_properties_fr_exceltable: self.form_cur_quotes_property_url_str_fr_excel() else: self.form_cur_quotes_property_url_str() self.cur_quotes_full_url = self.cur_quotes_start_url + self.cur_quotes_stock_portion_url +\ self.cur_quotes_property_portion_url + self.cur_quotes_end_url def downloading_csv(self, url_address): """ Download the csv information from the url_address given. """ cache.clear() url = URL(url_address) f = open(self.cur_quotes_csvfile, 'wb') # save as test.gif f.write(url.download()) f.close() def cur_quotes_create_dataframe(self): """ Create dataframe for the results. Achieved by reading the .csv file and retrieving the results using pandas. """ self.cur_quotes_df = pandas.read_csv(self.cur_quotes_csvfile,header =None) self.cur_quotes_df.rename(columns={org: change.upper() for org, change\ in zip(self.cur_quotes_df.columns,self.cur_quotes_parm_headers)},\ inplace=True) def get_cur_quotes(self): """ Combine the cur quotes function. Formed the url, download the csv, put in the header. Have a dataframe object. """ self.form_url_str() if self.__print_url: print self.cur_quotes_full_url self.downloading_csv(self.cur_quotes_full_url) self.cur_quotes_create_dataframe() def get_cur_quotes_fr_list(self): """ Cater for situation where there is large list. Limit for the url is 50. Take care where list exceed 50. For safeguard, clip limit to 49. """ ## full list with special characters take care full_list = self.replace_special_characters_in_list(self.full_stocklist_to_retrieve) chunk_of_list = self.break_list_to_sub_list(self.full_stocklist_to_retrieve) self.temp_full_data_df = None for n in chunk_of_list: # print the progress sys.stdout.write('.') # set the small chunk of list self.set_target_stocks_list(n) self.get_cur_quotes() ## need take care of cases where the return is empty -- will return Missing symbols list if not len(self.cur_quotes_df.columns) < len(self.cur_quotes_parm_headers): self.store_individual_set_df.append(self.cur_quotes_df) if self.temp_full_data_df is None: self.temp_full_data_df = self.cur_quotes_df else: self.temp_full_data_df = self.temp_full_data_df.append(self.cur_quotes_df) ## Remove the % symbol fr self.temp_full_data_df columns self.rm_percent_symbol_fr_cols() print 'Done\n' def rm_percent_symbol_fr_cols(self): """ Remove the % symbol from those columns that have this symbol. Convert the columns to float for later easy filtering. Set to self.temp_full_data_df """ col_with_percent = [n for n in self.temp_full_data_df.columns if re.search('PERCENT',n)] for col in col_with_percent: self.temp_full_data_df[col] = self.temp_full_data_df[col].map(lambda x: float(str(x).rstrip('%'))) def break_list_to_sub_list(self, full_list, chunk_size = 45): """ Break list into smaller equal chunks specified by chunk_size. Args: full_list (list): full list of items. Kwargs: chunk_size (int): length of each chunk. Max up to 50. Return (list): list of list. """ if chunk_size < 1: chunk_size = 1 return [full_list[i:i + chunk_size] for i in range(0, len(full_list), chunk_size)] def replace_special_characters_in_list(self, full_list): """ Replace any special characters in symbol that might affect url pulling. At present only replace the ":". See the following website for all the special characters http://www.blooberry.com/indexdot/html/topics/urlencoding.htm Args: full_list (list): list of symbol Returns: (list): modified list with special characters replaced. """ return [n.replace(':','%3A') for n in full_list]
class YFinanceDataExtr(object): """ Class to extract data from yahoo finance. Achieved by query the various url (see Yahoo Finance API) and downloading the respectively .csv files. Further analysis of data done by pandas. """ def __init__(self): """ List of url parameters """ # Param ## self.target_stocks use mainly for a few stocks. ## it also use when setting the 45 or 50 stocks at a time to url self.target_stocks = ['S58.SI','S68.SI'] ##special character need to be converted self.full_stocklist_to_retrieve = [] #full range fo stocks # for difffernt retrieval, based on the dict available to select the file type # currently have "watcher", "all" where watcher is the selected stocks to watch. self.stock_retrieval_type = 'watcher' ## current data .csv file url formation #header to match the sequence of the formed url self.cur_quotes_parm_headers = ['NAME', 'SYMBOL', 'LATEST_PRICE', 'OPEN', 'CLOSE','VOL', 'YEAR_HIGH','YEAR_LOW'] #label to be use when downloading. # URL forming for price details self.cur_quotes_start_url = "http://download.finance.yahoo.com/d/quotes.csv?s=" self.cur_quotes_stock_portion_url = '' self.cur_quotes_stock_portion_additional_url = '.SI'# for adding additonal str to the stock url. self.cur_quotes_property_portion_url = '' self.cur_quotes_property_str = 'nsl1opvkj' #default list of properties to copy. self.cur_quotes_end_url = "&e=.csv" self.cur_quotes_full_url = '' # Properties from excel self.enable_form_properties_fr_exceltable = 1 self.properties_excel_table = r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\Individual_stock_query_property.xls' # Output storage #self.cur_quotes_csvfile = r'c:\data\temp\stock_data.csv' self.cur_quotes_csvfile = r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\data\temp\stock_data.csv' self.cur_quotes_df = object() ## !!! self.cur_quotes_url_list = [] # store of all the url list being query. For debug. # for debug/printing self.store_individual_set_df = [] self.__print_url = 0 # for printing the url string # input file path # dict based on the file for different type of retrieval self.retrieval_type_input_file_dict = { "all" : r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\stocklist.csv', "watcher": r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\data\google_stock_screener.csv' } def set_stock_sym_append_str(self, append_str): """ Set additional append str to stock symbol when forming stock url. Set to sel.cur_quotes_stock_portion_additional_url. Mainly to set the '.SI' for singapore stocks. Args: append_str (str): additional str to append to stock symbol. """ self.cur_quotes_stock_portion_additional_url = append_str def set_stock_retrieval_type(self, type ='all'): """ Set the type of stocks retrieval type.mro Kwargs: type (str): default "all" """ self.stock_retrieval_type = type def load_stock_symbol_fr_file(self): """ Load the stock symbol info based on the file selected from the set_stock_retrieval_type. The file must have particular column: SYMBOL. """ stock_list = pandas.read_csv(self.retrieval_type_input_file_dict[self.stock_retrieval_type]) stock_list = list(stock_list['SYMBOL']) self.set_full_stocklist_to_retrieve(stock_list) def set_full_stocklist_to_retrieve(self, list_of_stocks): """ Set all target list of stocks that need to retrieve to the self.full_stocklist_to_retrieve. Args: list_of_stocks (list): full list of stocks to set """ self.full_stocklist_to_retrieve = list_of_stocks def set_target_stocks_list(self, list_of_stocks): """ Set the target list of stocks to the self.target_stocks. Not the full list. Args: list_of_stocks (list): target list of stocks to set """ self.target_stocks = list_of_stocks def set_column_headers(self,param_headers): """ Set column headers for the data. Set to self.cur_quotes_parm_headers. Args: param_headers (list): list of column names """ self.cur_quotes_parm_headers = param_headers def form_cur_quotes_stock_url_str(self): """ Form the list of stock portion for the cur quotes url. """ self.cur_quotes_stock_portion_url = '' for n in self.target_stocks: self.cur_quotes_stock_portion_url = self.cur_quotes_stock_portion_url + n +\ self.cur_quotes_stock_portion_additional_url + ',' self.cur_quotes_stock_portion_url =self.cur_quotes_stock_portion_url[:-1] def form_cur_quotes_property_url_str_fr_excel(self): """ Required xls_table_extract_module. Get all the properties from excel table. Properties can be selected by comment out those properties not required. Also set the heeader: self.cur_quotes_parm_headers for the values. """ from xls_table_extract_module import XlsExtractor self.xls_property_data = XlsExtractor(fname = self.properties_excel_table, sheetname= 'Sheet1', param_start_key = 'stock_property//', param_end_key = 'stock_property_end//', header_key = '', col_len = 2) self.xls_property_data.open_excel_and_process_block_data() ## form the header self.cur_quotes_parm_headers = [n.encode().strip() for n in self.xls_property_data.data_label_list] ## form the url str start_str = '&f=' target_properties = ''.join([n[0].encode().strip() for n in self.xls_property_data.data_value_list]) self.cur_quotes_property_portion_url = start_str + target_properties def set_quotes_properties(self, target_properties = 'nsl1opvkj' ): """ Set the quotes properties use in form_cur_quotes_property_url_str function. Set to self.cur_quotes_property_str. Kwargs: target_properties (str): 'nsl1opvkj' Default properties: Current use default parameters. name(n0), symbol(s), the latest value(l1), open(o) and the close value of the last trading day(p) volumn (v), year high (k), year low(j) """ self.cur_quotes_property_str = target_properties def form_cur_quotes_property_url_str(self): """ To form the properties/parameters of the data to be received for current quotes Can also form from the form_cur_quotes_property_url_str_fr_excel function Further info can be found at : https://code.google.com/p/yahoo-finance-managed/wiki/enumQuoteProperty """ start_str = '&f=' self.cur_quotes_property_portion_url = start_str + self.cur_quotes_property_str def form_url_str(self, type = 'cur_quotes'): """ Form the url str necessary to get the .csv file.close May need to segregate into the various types. Args: type (str): Retrieval type. """ if type == 'cur_quotes': self.form_cur_quotes_stock_url_str() # form the property. 2 methods enabled. if self.enable_form_properties_fr_exceltable: self.form_cur_quotes_property_url_str_fr_excel() else: self.form_cur_quotes_property_url_str() self.cur_quotes_full_url = self.cur_quotes_start_url + self.cur_quotes_stock_portion_url +\ self.cur_quotes_property_portion_url + self.cur_quotes_end_url def downloading_csv(self, url_address): """ Download the csv information from the url_address given. """ #cache.clear() print(url_address) #url = URL(url_address) url = request.urlopen(url_address)#URL(url_address) f = open(self.cur_quotes_csvfile, 'wb') # save as test.gif # f.write(url.download())#if have problem skip with url as d: f.write(d.read()) f.close() def cur_quotes_create_dataframe(self): """ Create dataframe for the results. Achieved by reading the .csv file and retrieving the results using pandas. """ self.cur_quotes_df = pandas.read_csv(self.cur_quotes_csvfile,header =None) self.cur_quotes_df.rename(columns={org: change.upper() for org, change\ in zip(self.cur_quotes_df.columns,self.cur_quotes_parm_headers)},\ inplace=True) def get_cur_quotes(self): """ Combine the cur quotes function. Formed the url, download the csv, put in the header. Have a dataframe object. """ self.form_url_str() if self.__print_url: print (self.cur_quotes_full_url) self.downloading_csv(self.cur_quotes_full_url) self.cur_quotes_create_dataframe() def get_cur_quotes_fr_list(self): """ Cater for situation where there is large list. Limit for the url is 50. Take care where list exceed 50. For safeguard, clip limit to 49. """ ## full list with special characters take care full_list = self.replace_special_characters_in_list(self.full_stocklist_to_retrieve) chunk_of_list = self.break_list_to_sub_list(self.full_stocklist_to_retrieve) self.temp_full_data_df = None for n in chunk_of_list: # print the progress sys.stdout.write('.') # set the small chunk of list self.set_target_stocks_list(n) self.get_cur_quotes() ## need take care of cases where the return is empty -- will return Missing symbols list if not len(self.cur_quotes_df.columns) < len(self.cur_quotes_parm_headers): self.store_individual_set_df.append(self.cur_quotes_df) if self.temp_full_data_df is None: self.temp_full_data_df = self.cur_quotes_df else: self.temp_full_data_df = self.temp_full_data_df.append(self.cur_quotes_df) ## Remove the % symbol fr self.temp_full_data_df columns self.rm_percent_symbol_fr_cols() print( 'Done\n') def rm_percent_symbol_fr_cols(self): """ Remove the % symbol from those columns that have this symbol. Convert the columns to float for later easy filtering. Set to self.temp_full_data_df """ col_with_percent = [n for n in self.temp_full_data_df.columns if re.search('PERCENT',n)] for col in col_with_percent: self.temp_full_data_df[col] = self.temp_full_data_df[col].map(lambda x: float(str(x).rstrip('%'))) def break_list_to_sub_list(self, full_list, chunk_size = 45): """ Break list into smaller equal chunks specified by chunk_size. Args: full_list (list): full list of items. Kwargs: chunk_size (int): length of each chunk. Max up to 50. Return (list): list of list. """ if chunk_size < 1: chunk_size = 1 return [full_list[i:i + chunk_size] for i in range(0, len(full_list), chunk_size)] def replace_special_characters_in_list(self, full_list): """ Replace any special characters in symbol that might affect url pulling. At present only replace the ":". See the following website for all the special characters http://www.blooberry.com/indexdot/html/topics/urlencoding.htm Args: full_list (list): list of symbol Returns: (list): modified list with special characters replaced. """ return [n.replace(':','%3A') for n in full_list]
def __init__(self): super(SGXDataExtract, self).__init__() # dict will contain a tuple of website and the target tag self.retrieval_dict = { 'company_info': ('http://54.254.221.141/sgx/search?callback=jQuery111008655668143182993_1425728207793&json=%7B%22criteria%22%3A%5B%5D%7D&_=1425728207802', 'companies'), 'announcement': ('http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCOW_CorpAnnouncement_Content%26B%3DAnnouncementToday%26R_C%3D%26C_T%3D200', 'items'), 'ex_div_data': ('http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCow_CorporateInformation_Content%26B%3DCorpDistributionByExDate%26S_T%3D1%26C_T%3D400', 'items'), 'curr_price': ('http://sgx.com/JsonRead/JsonData?qryId=RStock&timeout=30', 'items'), } ## for short sell information. self.shortsell_info_start_url = 'http://sgx.com/wps/wcm/connect/sgx_en/home/market_info/short_sale/short_sale_daily/DailyShortSell' self.shortsell_date_url = '20150417' self.shortsell_end_url = '.txt' self.shortsell_full_url = r'' ## parameters self.saved_json_file = r'c:\data\temptryyql.json' self.saved_parm_df_dict = { } #storing the final df in dict with type as keyword ## for setting up custom alerts self.custom_alert_dict_list = [] #list of dict?? self.custom_alert_df = pandas.DataFrame( ) #convert the custom alert dict to ## final output paramters self.sgx_announ_df = pandas.DataFrame() self.sgx_div_ex_date_df = pandas.DataFrame() self.sgx_curr_price_df = pandas.DataFrame() self.price_limit_alerts_df = pandas.DataFrame() self.shortsell_info_df = pandas.DataFrame() self.sgx_curr_plus_company_df = pandas.DataFrame() ## shortend output version for alert creation self.div_ex_date_shtver = '' self.filtered_announ_shtver = '' ## watchlist to set self.price_limit_reach_watchlist = [] self.announce_watchlist = [] ## target stocks for announcements -- using excel query xls_set_class = XlsExtractor( fname=r'C:\data\stockselection_for_sgx.xls', sheetname='stockselection', param_start_key='stock//', param_end_key='stock_end//', header_key='header#2//', col_len=2) xls_set_class.open_excel_and_process_block_data() self.announce_watchlist = xls_set_class.data_label_list #also get the company name self.companyname_watchlist = [ n[0].encode() for n in xls_set_class.data_value_list ] #also get the company name
if choice == 5: """ Handling current price """ w = SGXDataExtract() #w.shortsell_notification() #print w.shortsell_info_df w.process_all_data() w.sgx_curr_price_df.to_csv(r'c:\data\temp.csv', index=False) if choice == 6: """ Use excel table to get the required stocks.""" xls_set_class = XlsExtractor( fname=r'C:\data\stockselection_for_sgx.xls', sheetname='stockselection', param_start_key='stock//', param_end_key='stock_end//', header_key='header#2//', col_len=2) xls_set_class.open_excel_and_process_block_data() xls_set_class.data_label_list if choice ==7: """ use the price extract here to pull """ if choice == 8: """ Get the short sell volume. Need to get the shortsell vs the volumne traded that day Url need change with the current date