Exemplos de XlsExtractor em Python, exemplos de xls_table_extract_module.XlsExtractor em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: yahoo_finance_data_extract.py Projeto: jupadhya1/YF-API

    def form_cur_quotes_property_url_str_fr_excel(self):
        """ Required xls_table_extract_module.
            Get all the properties from excel table.
            Properties can be selected by comment out those properties not required.
            Also set the heeader: self.cur_quotes_parm_headers for the values.

        """
        from xls_table_extract_module import XlsExtractor
        self.xls_property_data = XlsExtractor(
            fname=self.properties_excel_table,
            sheetname='Sheet1',
            param_start_key='stock_property//',
            param_end_key='stock_property_end//',
            header_key='',
            col_len=2)

        self.xls_property_data.open_excel_and_process_block_data()

        ## form the header
        self.cur_quotes_parm_headers = [
            n.encode().strip() for n in self.xls_property_data.data_label_list
        ]

        ## form the url str
        start_str = '&f='
        target_properties = ''.join([
            n[0].encode().strip()
            for n in self.xls_property_data.data_value_list
        ])
        self.cur_quotes_property_portion_url = start_str + target_properties

Exemplo n.º 2

0

Exibir arquivo

Arquivo: xls_table_extract_module_unittest.py Projeto: spidezad/excel_table_extract

    def setUp(self):
        """
            Create mulitple instances of different xlsExtactor
            
        """
        self.xls_set_class_1 = XlsExtractor(fname = r'C:\Python27\Lib\site-packages\excel_table_extract\testset.xls', sheetname= 'Sheet1',
                             param_start_key = 'start//', param_end_key = 'end//',
                             header_key = '', col_len = 3)

        self.xls_set_class_1.initialize_excel()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: xls_table_extract_module_unittest.py Projeto: spidezad/excel_table_extract

class XlsExtractorTest(XlsExtractorTestCase):
    """ Main unit test for the module
        Split to two separate test object??

    """

    def setUp(self):
        """
            Create mulitple instances of different xlsExtactor
            
        """
        self.xls_set_class_1 = XlsExtractor(fname = r'C:\Python27\Lib\site-packages\excel_table_extract\testset.xls', sheetname= 'Sheet1',
                             param_start_key = 'start//', param_end_key = 'end//',
                             header_key = '', col_len = 3)

        self.xls_set_class_1.initialize_excel()

    def test_calculate_start_end_pos_for_data(self):

        start_address_row, start_address_col, end_address_row = self.xls_set_class_1.calculate_start_end_pos_for_data()
        self.assertTupleEqual((start_address_row, start_address_col, end_address_row), (7,4,11))

    def test_get_raw_block_fr_xls(self):
        start_address_row, start_address_col, end_address_row = self.xls_set_class_1.calculate_start_end_pos_for_data()
        s = self.xls_set_class_1.get_raw_block_fr_xls(start_address_row, start_address_col, end_address_row, self.xls_set_class_1.col_len)
        self.assertListEqual(s, [[None, 'label1', 2.0, 3.0], ['#', 'label2', 5.0, 6.0], [None, None, None, None], [None, 'label3', 8.0, 9.0]])   

    def test_filter_space_and_comment_row(self):
        s = self.xls_set_class_1.filter_space_and_comment_row([[None, 1.0, 2.0, 3.0],['#', 4.0, 5.0, 6.0], [None, None, None, None], [None, 7.0, 8.0, 9.0]])
        self.assertListEqual(s, [[None, 1.0, 2.0, 3.0], [None, 7.0, 8.0, 9.0]])

    def test_exclude_comment_block(self):
        s = self.xls_set_class_1.exclude_comment_block([[None, 1.0, 2.0, 3.0], [None, 7.0, 8.0, 9.0]])
        self.assertListEqual(s, [[1.0, 2.0, 3.0], [7.0, 8.0, 9.0]])
        
    def tearDown(self):
        """ For each instances try to close it.
        """
        try:
            self.xls_set_class_1.close_excel()
        finally:
            pass

Exemplo n.º 4

0

Exibir arquivo

Arquivo: yahoo_finance_data_extract.py Projeto: Fanghz/yahoo_finance_data_extract

    def form_cur_quotes_property_url_str_fr_excel(self):
        """ Required xls_table_extract_module.
            Get all the properties from excel table.
            Properties can be selected by comment out those properties not required.
            Also set the heeader: self.cur_quotes_parm_headers for the values.

        """
        from xls_table_extract_module import XlsExtractor
        self.xls_property_data = XlsExtractor(fname = self.properties_excel_table, sheetname= 'Sheet1',
                                             param_start_key = 'stock_property//', param_end_key = 'stock_property_end//',
                                             header_key = '', col_len = 2)

        self.xls_property_data.open_excel_and_process_block_data()

        ## form the header
        self.cur_quotes_parm_headers = [n.encode().strip() for n in self.xls_property_data.data_label_list]

        ## form the url str
        start_str = '&f='
        target_properties = ''.join([n[0].encode().strip() for n in self.xls_property_data.data_value_list])
        self.cur_quotes_property_portion_url =  start_str + target_properties

Exemplo n.º 5

0

Exibir arquivo

Arquivo: SGX_stock_announcement_extract.py Projeto: gsohos/yahoo_finance_data_extract

    def __init__(self):
        super(SGXDataExtract, self).__init__()
        # dict will contain a tuple of website and the target tag
        self.retrieval_dict = {
            "company_info": (
                "http://54.254.221.141/sgx/search?callback=jQuery111008655668143182993_1425728207793&json=%7B%22criteria%22%3A%5B%5D%7D&_=1425728207802",
                "companies",
            ),
            "announcement": (
                "http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCOW_CorpAnnouncement_Content%26B%3DAnnouncementToday%26R_C%3D%26C_T%3D200",
                "items",
            ),
            "ex_div_data": (
                "http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCow_CorporateInformation_Content%26B%3DCorpDistributionByExDate%26S_T%3D1%26C_T%3D400",
                "items",
            ),
            "curr_price": ("http://sgx.com/JsonRead/JsonData?qryId=RStock&timeout=30", "items"),
        }

        ## for short sell information.
        self.shortsell_info_start_url = (
            "http://sgx.com/wps/wcm/connect/sgx_en/home/market_info/short_sale/short_sale_daily/DailyShortSell"
        )
        self.shortsell_date_url = "20150417"
        self.shortsell_end_url = ".txt"
        self.shortsell_full_url = r""

        ## parameters
        self.saved_json_file = r"c:\data\temptryyql.json"
        self.saved_parm_df_dict = {}  # storing the final df in dict with type as keyword

        ## for setting up custom alerts
        self.custom_alert_dict_list = []  # list of dict??
        self.custom_alert_df = pandas.DataFrame()  # convert the custom alert dict to

        ## final output paramters
        self.sgx_announ_df = pandas.DataFrame()
        self.sgx_div_ex_date_df = pandas.DataFrame()
        self.sgx_curr_price_df = pandas.DataFrame()
        self.price_limit_alerts_df = pandas.DataFrame()
        self.shortsell_info_df = pandas.DataFrame()
        self.sgx_curr_plus_company_df = pandas.DataFrame()

        ## shortend output version for alert creation
        self.div_ex_date_shtver = ""
        self.filtered_announ_shtver = ""

        ## watchlist to set
        self.price_limit_reach_watchlist = []
        self.announce_watchlist = []

        ## target stocks for announcements -- using excel query
        xls_set_class = XlsExtractor(
            fname=r"C:\data\stockselection_for_sgx.xls",
            sheetname="stockselection",
            param_start_key="stock//",
            param_end_key="stock_end//",
            header_key="header#2//",
            col_len=2,
        )
        xls_set_class.open_excel_and_process_block_data()
        self.announce_watchlist = xls_set_class.data_label_list  # also get the company name
        self.companyname_watchlist = [n[0].encode() for n in xls_set_class.data_value_list]  # also get the company name

Exemplo n.º 6

0

Exibir arquivo

Arquivo: SGX_stock_announcement_extract.py Projeto: gsohos/yahoo_finance_data_extract

    if choice == 5:
        """ Handling current price

        """
        w = SGXDataExtract()
        # w.shortsell_notification()
        # print w.shortsell_info_df
        w.process_all_data()
        w.sgx_curr_price_df.to_csv(r"c:\data\temp.csv", index=False)

    if choice == 6:
        """ Use excel table to get the required stocks."""
        xls_set_class = XlsExtractor(
            fname=r"C:\data\stockselection_for_sgx.xls",
            sheetname="stockselection",
            param_start_key="stock//",
            param_end_key="stock_end//",
            header_key="header#2//",
            col_len=2,
        )

        xls_set_class.open_excel_and_process_block_data()
        xls_set_class.data_label_list

    if choice == 7:
        """ use the price extract here to pull """

    if choice == 8:
        """ Get the short sell volume. Need to get the shortsell vs the volumne traded that day
            Url need change with the current date

        """

Exemplo n.º 7

0

Exibir arquivo

Arquivo: yahoo_finance_data_extract.py Projeto: Fanghz/yahoo_finance_data_extract

class YFinanceDataExtr(object):
    """ Class to extract data from yahoo finance.
        Achieved by query the various url (see Yahoo Finance API) and downloading the respectively .csv files.
        Further analysis of data done by pandas.
    """
    def __init__(self):
        """ List of url parameters """
        # Param
        ## self.target_stocks use mainly for a few stocks.
        ## it also use when setting the 45 or 50 stocks at a time to url
        self.target_stocks = ['S58.SI','S68.SI'] ##special character need to be converted
        self.full_stocklist_to_retrieve = [] #full range fo stocks
        
        # for difffernt retrieval, based on the dict available to select the file type
        # currently have "watcher", "all" where watcher is the selected stocks to watch.
        self.stock_retrieval_type = 'watcher' 

        ## current data .csv file url formation
        #header to match the sequence of the formed url
        self.cur_quotes_parm_headers = ['NAME', 'SYMBOL', 'LATEST_PRICE', 'OPEN', 'CLOSE','VOL',
                                             'YEAR_HIGH','YEAR_LOW'] #label to be use when downloading.
                                            
        # URL forming for price details
        self.cur_quotes_start_url = "http://download.finance.yahoo.com/d/quotes.csv?s="
        self.cur_quotes_stock_portion_url = ''
        self.cur_quotes_stock_portion_additional_url = '.SI'# for adding additonal str to the stock url.
        self.cur_quotes_property_portion_url = ''
        self.cur_quotes_property_str = 'nsl1opvkj' #default list of properties to copy.
        self.cur_quotes_end_url = "&e=.csv"
        self.cur_quotes_full_url = ''

        # Properties from excel
        self.enable_form_properties_fr_exceltable = 1
        self.properties_excel_table = r'C:\pythonuserfiles\yahoo_finance_data_extract\Individual_stock_query_property.xls'

        # Output storage
        self.cur_quotes_csvfile = r'c:\data\temp\stock_data.csv'
        self.cur_quotes_df = object()

        ## !!!
        self.cur_quotes_url_list = [] # store of all the url list being query. For debug.

        # for debug/printing
        self.store_individual_set_df = []
        self.__print_url = 0 # for printing the url string

        # input file path
        # dict based on the file for different type of retrieval
        self.retrieval_type_input_file_dict  = {
                                                "all"    : r'C:\pythonuserfiles\yahoo_finance_data_extract\stocklist.csv',
                                                "watcher": r'c:\data\google_stock_screener.csv'
                                                }

    def set_stock_sym_append_str(self, append_str):
        """ Set additional append str to stock symbol when forming stock url.
            Set to sel.cur_quotes_stock_portion_additional_url.
            Mainly to set the '.SI' for singapore stocks.
            Args:
                append_str (str): additional str to append to stock symbol.
        
        """
        self.cur_quotes_stock_portion_additional_url = append_str

    def set_stock_retrieval_type(self, type ='all'):
        """ Set the type of stocks retrieval type.mro
            Kwargs:
                type (str): default "all"
        """
        self.stock_retrieval_type = type

    def load_stock_symbol_fr_file(self):
        """ Load the stock symbol info based on the file selected from the set_stock_retrieval_type.
            The file must have particular column: SYMBOL.
        """
        stock_list = pandas.read_csv(self.retrieval_type_input_file_dict[self.stock_retrieval_type])
        stock_list = list(stock_list['SYMBOL'])
        self.set_full_stocklist_to_retrieve(stock_list)

    def set_full_stocklist_to_retrieve(self, list_of_stocks):
        """ Set all target list of stocks that need to retrieve to the self.full_stocklist_to_retrieve. 
            Args:
                list_of_stocks (list): full list of stocks to set
        """
        self.full_stocklist_to_retrieve = list_of_stocks

    def set_target_stocks_list(self, list_of_stocks):
        """ Set the target list of stocks to the self.target_stocks. Not the full list.
            Args:
                list_of_stocks (list): target list of stocks to set
        """
        self.target_stocks = list_of_stocks

    def set_column_headers(self,param_headers):
        """ Set column headers for the data.
            Set to self.cur_quotes_parm_headers.
            Args:
                param_headers (list): list of column names
        """
        self.cur_quotes_parm_headers = param_headers
        
    def form_cur_quotes_stock_url_str(self):
        """ Form the list of stock portion for the cur quotes url.
        """
        self.cur_quotes_stock_portion_url = ''
        for n in self.target_stocks:
            self.cur_quotes_stock_portion_url = self.cur_quotes_stock_portion_url + n +\
                                                self.cur_quotes_stock_portion_additional_url  + ','
            
        self.cur_quotes_stock_portion_url =self.cur_quotes_stock_portion_url[:-1]

    def form_cur_quotes_property_url_str_fr_excel(self):
        """ Required xls_table_extract_module.
            Get all the properties from excel table.
            Properties can be selected by comment out those properties not required.
            Also set the heeader: self.cur_quotes_parm_headers for the values.

        """
        from xls_table_extract_module import XlsExtractor
        self.xls_property_data = XlsExtractor(fname = self.properties_excel_table, sheetname= 'Sheet1',
                                             param_start_key = 'stock_property//', param_end_key = 'stock_property_end//',
                                             header_key = '', col_len = 2)

        self.xls_property_data.open_excel_and_process_block_data()

        ## form the header
        self.cur_quotes_parm_headers = [n.encode().strip() for n in self.xls_property_data.data_label_list]

        ## form the url str
        start_str = '&f='
        target_properties = ''.join([n[0].encode().strip() for n in self.xls_property_data.data_value_list])
        self.cur_quotes_property_portion_url =  start_str + target_properties

    def set_quotes_properties(self, target_properties = 'nsl1opvkj' ):
        """ Set the quotes properties use in form_cur_quotes_property_url_str function.
            Set to self.cur_quotes_property_str.
            Kwargs:
                target_properties (str): 'nsl1opvkj'
                Default properties:
                    Current use default parameters.
                    name(n0), symbol(s), the latest value(l1), open(o) and the close value of the last trading day(p)
                    volumn (v), year high (k), year low(j)

        """
        self.cur_quotes_property_str = target_properties

    def form_cur_quotes_property_url_str(self):
        """ To form the properties/parameters of the data to be received for current quotes
            Can also form from the form_cur_quotes_property_url_str_fr_excel function

            Further info can be found at : https://code.google.com/p/yahoo-finance-managed/wiki/enumQuoteProperty
        """
        start_str = '&f='
        self.cur_quotes_property_portion_url =  start_str + self.cur_quotes_property_str

    def form_url_str(self, type = 'cur_quotes'):
        """ Form the url str necessary to get the .csv file.close
            May need to segregate into the various types.
            Args:
                type (str): Retrieval type.
        """
        if type == 'cur_quotes':
            self.form_cur_quotes_stock_url_str()
            
            # form the property. 2 methods enabled.
            if self.enable_form_properties_fr_exceltable:
                self.form_cur_quotes_property_url_str_fr_excel()
            else:
                self.form_cur_quotes_property_url_str()
                
            self.cur_quotes_full_url = self.cur_quotes_start_url + self.cur_quotes_stock_portion_url +\
                                       self.cur_quotes_property_portion_url + self.cur_quotes_end_url
             
    def downloading_csv(self, url_address):
        """ Download the csv information from the url_address given.
        """
        cache.clear()
        url = URL(url_address)
        f = open(self.cur_quotes_csvfile, 'wb') # save as test.gif
        f.write(url.download())
        f.close()

    def cur_quotes_create_dataframe(self):
        """ Create dataframe for the results.
            Achieved by reading the .csv file and retrieving the results using pandas.
        """
        self.cur_quotes_df = pandas.read_csv(self.cur_quotes_csvfile,header =None)
        self.cur_quotes_df.rename(columns={org: change.upper() for org, change\
                                           in zip(self.cur_quotes_df.columns,self.cur_quotes_parm_headers)},\
                                              inplace=True)

    def get_cur_quotes(self):
        """ Combine the cur quotes function.
            Formed the url, download the csv, put in the header. Have a dataframe object.
        """
        self.form_url_str()
        if self.__print_url: print self.cur_quotes_full_url
        self.downloading_csv(self.cur_quotes_full_url)
        self.cur_quotes_create_dataframe()

    def get_cur_quotes_fr_list(self):
        """ Cater for situation where there is large list.
            Limit for the url is 50. Take care where list exceed 50.
            For safeguard, clip limit to 49.
        """

        ## full list with special characters take care
        full_list = self.replace_special_characters_in_list(self.full_stocklist_to_retrieve)
        chunk_of_list = self.break_list_to_sub_list(self.full_stocklist_to_retrieve)
        self.temp_full_data_df = None
        for n in chunk_of_list:
            # print the progress
            sys.stdout.write('.')

            # set the small chunk of list
            self.set_target_stocks_list(n)
            self.get_cur_quotes()
    
            ## need take care of cases where the return is empty -- will return Missing symbols list
            if not len(self.cur_quotes_df.columns) < len(self.cur_quotes_parm_headers):
                self.store_individual_set_df.append(self.cur_quotes_df)
                if self.temp_full_data_df is None:
                    self.temp_full_data_df =  self.cur_quotes_df
                else:
                    self.temp_full_data_df = self.temp_full_data_df.append(self.cur_quotes_df)

        ## Remove the % symbol fr self.temp_full_data_df columns
        self.rm_percent_symbol_fr_cols()

        print 'Done\n'

    def rm_percent_symbol_fr_cols(self):
        """ Remove the % symbol from those columns that have this symbol.
            Convert the columns to float for later easy filtering.
            Set to self.temp_full_data_df
        """
        col_with_percent = [n for n in self.temp_full_data_df.columns if re.search('PERCENT',n)] 
        for col in col_with_percent:
            self.temp_full_data_df[col] = self.temp_full_data_df[col].map(lambda x: float(str(x).rstrip('%')))


    def break_list_to_sub_list(self, full_list, chunk_size = 45):
        """ Break list into smaller equal chunks specified by chunk_size.
            Args:
                full_list (list): full list of items.
            Kwargs:
                chunk_size (int): length of each chunk. Max up to 50.
            Return
                (list): list of list.
        """
        if chunk_size < 1:
            chunk_size = 1
        return [full_list[i:i + chunk_size] for i in range(0, len(full_list), chunk_size)]

    def replace_special_characters_in_list(self, full_list):
        """ Replace any special characters in symbol that might affect url pulling.
            At present only replace the ":".
            See the following website for all the special characters
            http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
            Args:
                full_list (list): list of symbol
            Returns:
                (list): modified list with special characters replaced.
        """
        return [n.replace(':','%3A') for n in full_list]

Exemplo n.º 8

0

Exibir arquivo

class YFinanceDataExtr(object):
    """ Class to extract data from yahoo finance.
        Achieved by query the various url (see Yahoo Finance API) and downloading the respectively .csv files.
        Further analysis of data done by pandas.
    """
    def __init__(self):
        """ List of url parameters """
        # Param
        ## self.target_stocks use mainly for a few stocks.
        ## it also use when setting the 45 or 50 stocks at a time to url
        self.target_stocks = ['S58.SI','S68.SI'] ##special character need to be converted
        self.full_stocklist_to_retrieve = [] #full range fo stocks
        
        # for difffernt retrieval, based on the dict available to select the file type
        # currently have "watcher", "all" where watcher is the selected stocks to watch.
        self.stock_retrieval_type = 'watcher' 

        ## current data .csv file url formation
        #header to match the sequence of the formed url
        self.cur_quotes_parm_headers = ['NAME', 'SYMBOL', 'LATEST_PRICE', 'OPEN', 'CLOSE','VOL',
                                             'YEAR_HIGH','YEAR_LOW'] #label to be use when downloading.
                                            
        # URL forming for price details
        self.cur_quotes_start_url = "http://download.finance.yahoo.com/d/quotes.csv?s="
        self.cur_quotes_stock_portion_url = ''
        self.cur_quotes_stock_portion_additional_url = '.SI'# for adding additonal str to the stock url.
        self.cur_quotes_property_portion_url = ''
        self.cur_quotes_property_str = 'nsl1opvkj' #default list of properties to copy.
        self.cur_quotes_end_url = "&e=.csv"
        self.cur_quotes_full_url = ''

        # Properties from excel
        self.enable_form_properties_fr_exceltable = 1
        self.properties_excel_table = r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\Individual_stock_query_property.xls'

        # Output storage
        #self.cur_quotes_csvfile = r'c:\data\temp\stock_data.csv'
        self.cur_quotes_csvfile = r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\data\temp\stock_data.csv'
        self.cur_quotes_df = object()

        ## !!!
        self.cur_quotes_url_list = [] # store of all the url list being query. For debug.

        # for debug/printing
        self.store_individual_set_df = []
        self.__print_url = 0 # for printing the url string

        # input file path
        # dict based on the file for different type of retrieval
        self.retrieval_type_input_file_dict  = {
                                                "all"    : r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\stocklist.csv',
                                                "watcher": r'C:\Users\JSYADAV\PycharmProjects\yahoo_finance_data_extract\data\google_stock_screener.csv'
                                                }

    def set_stock_sym_append_str(self, append_str):
        """ Set additional append str to stock symbol when forming stock url.
            Set to sel.cur_quotes_stock_portion_additional_url.
            Mainly to set the '.SI' for singapore stocks.
            Args:
                append_str (str): additional str to append to stock symbol.
        
        """
        self.cur_quotes_stock_portion_additional_url = append_str

    def set_stock_retrieval_type(self, type ='all'):
        """ Set the type of stocks retrieval type.mro
            Kwargs:
                type (str): default "all"
        """
        self.stock_retrieval_type = type

    def load_stock_symbol_fr_file(self):
        """ Load the stock symbol info based on the file selected from the set_stock_retrieval_type.
            The file must have particular column: SYMBOL.
        """
        stock_list = pandas.read_csv(self.retrieval_type_input_file_dict[self.stock_retrieval_type])
        stock_list = list(stock_list['SYMBOL'])
        self.set_full_stocklist_to_retrieve(stock_list)

    def set_full_stocklist_to_retrieve(self, list_of_stocks):
        """ Set all target list of stocks that need to retrieve to the self.full_stocklist_to_retrieve. 
            Args:
                list_of_stocks (list): full list of stocks to set
        """
        self.full_stocklist_to_retrieve = list_of_stocks

    def set_target_stocks_list(self, list_of_stocks):
        """ Set the target list of stocks to the self.target_stocks. Not the full list.
            Args:
                list_of_stocks (list): target list of stocks to set
        """
        self.target_stocks = list_of_stocks

    def set_column_headers(self,param_headers):
        """ Set column headers for the data.
            Set to self.cur_quotes_parm_headers.
            Args:
                param_headers (list): list of column names
        """
        self.cur_quotes_parm_headers = param_headers
        
    def form_cur_quotes_stock_url_str(self):
        """ Form the list of stock portion for the cur quotes url.
        """
        self.cur_quotes_stock_portion_url = ''
        for n in self.target_stocks:
            self.cur_quotes_stock_portion_url = self.cur_quotes_stock_portion_url + n +\
                                                self.cur_quotes_stock_portion_additional_url  + ','
            
        self.cur_quotes_stock_portion_url =self.cur_quotes_stock_portion_url[:-1]

    def form_cur_quotes_property_url_str_fr_excel(self):
        """ Required xls_table_extract_module.
            Get all the properties from excel table.
            Properties can be selected by comment out those properties not required.
            Also set the heeader: self.cur_quotes_parm_headers for the values.

         """
        from xls_table_extract_module import XlsExtractor
        self.xls_property_data = XlsExtractor(fname = self.properties_excel_table, sheetname= 'Sheet1',
                                             param_start_key = 'stock_property//', param_end_key = 'stock_property_end//',
                                             header_key = '', col_len = 2)

        self.xls_property_data.open_excel_and_process_block_data()

        ## form the header
        self.cur_quotes_parm_headers = [n.encode().strip() for n in self.xls_property_data.data_label_list]

        ## form the url str
        start_str = '&f='
        target_properties = ''.join([n[0].encode().strip() for n in self.xls_property_data.data_value_list])
        self.cur_quotes_property_portion_url =  start_str + target_properties

    def set_quotes_properties(self, target_properties = 'nsl1opvkj' ):
        """ Set the quotes properties use in form_cur_quotes_property_url_str function.
            Set to self.cur_quotes_property_str.
            Kwargs:
                target_properties (str): 'nsl1opvkj'
                Default properties:
                    Current use default parameters.
                    name(n0), symbol(s), the latest value(l1), open(o) and the close value of the last trading day(p)
                    volumn (v), year high (k), year low(j)

        """
        self.cur_quotes_property_str = target_properties

    def form_cur_quotes_property_url_str(self):
        """ To form the properties/parameters of the data to be received for current quotes
            Can also form from the form_cur_quotes_property_url_str_fr_excel function

            Further info can be found at : https://code.google.com/p/yahoo-finance-managed/wiki/enumQuoteProperty
        """
        start_str = '&f='
        self.cur_quotes_property_portion_url =  start_str + self.cur_quotes_property_str

    def form_url_str(self, type = 'cur_quotes'):
        """ Form the url str necessary to get the .csv file.close
            May need to segregate into the various types.
            Args:
                type (str): Retrieval type.
        """
        if type == 'cur_quotes':
            self.form_cur_quotes_stock_url_str()
            
            # form the property. 2 methods enabled.
            if self.enable_form_properties_fr_exceltable:
                self.form_cur_quotes_property_url_str_fr_excel()
            else:
                self.form_cur_quotes_property_url_str()
                
            self.cur_quotes_full_url = self.cur_quotes_start_url + self.cur_quotes_stock_portion_url +\
                                       self.cur_quotes_property_portion_url + self.cur_quotes_end_url
             
    def downloading_csv(self, url_address):
        """ Download the csv information from the url_address given.
        """
        #cache.clear()
        print(url_address)
        #url = URL(url_address)
        url = request.urlopen(url_address)#URL(url_address)
        f = open(self.cur_quotes_csvfile, 'wb') # save as test.gif
        # f.write(url.download())#if have problem skip
        with url as d:
            f.write(d.read())
        f.close()

    def cur_quotes_create_dataframe(self):
        """ Create dataframe for the results.
            Achieved by reading the .csv file and retrieving the results using pandas.
        """
        self.cur_quotes_df = pandas.read_csv(self.cur_quotes_csvfile,header =None)
        self.cur_quotes_df.rename(columns={org: change.upper() for org, change\
                                           in zip(self.cur_quotes_df.columns,self.cur_quotes_parm_headers)},\
                                              inplace=True)

    def get_cur_quotes(self):
        """ Combine the cur quotes function.
            Formed the url, download the csv, put in the header. Have a dataframe object.
        """
        self.form_url_str()
        if self.__print_url: print (self.cur_quotes_full_url)
        self.downloading_csv(self.cur_quotes_full_url)
        self.cur_quotes_create_dataframe()

    def get_cur_quotes_fr_list(self):
        """ Cater for situation where there is large list.
            Limit for the url is 50. Take care where list exceed 50.
            For safeguard, clip limit to 49.
        """

        ## full list with special characters take care
        full_list = self.replace_special_characters_in_list(self.full_stocklist_to_retrieve)
        chunk_of_list = self.break_list_to_sub_list(self.full_stocklist_to_retrieve)
        self.temp_full_data_df = None
        for n in chunk_of_list:
            # print the progress
            sys.stdout.write('.')

            # set the small chunk of list
            self.set_target_stocks_list(n)
            self.get_cur_quotes()
    
            ## need take care of cases where the return is empty -- will return Missing symbols list
            if not len(self.cur_quotes_df.columns) < len(self.cur_quotes_parm_headers):
                self.store_individual_set_df.append(self.cur_quotes_df)
                if self.temp_full_data_df is None:
                    self.temp_full_data_df =  self.cur_quotes_df
                else:
                    self.temp_full_data_df = self.temp_full_data_df.append(self.cur_quotes_df)

        ## Remove the % symbol fr self.temp_full_data_df columns
        self.rm_percent_symbol_fr_cols()

        print( 'Done\n')

    def rm_percent_symbol_fr_cols(self):
        """ Remove the % symbol from those columns that have this symbol.
            Convert the columns to float for later easy filtering.
            Set to self.temp_full_data_df
        """
        col_with_percent = [n for n in self.temp_full_data_df.columns if re.search('PERCENT',n)] 
        for col in col_with_percent:
            self.temp_full_data_df[col] = self.temp_full_data_df[col].map(lambda x: float(str(x).rstrip('%')))


    def break_list_to_sub_list(self, full_list, chunk_size = 45):
        """ Break list into smaller equal chunks specified by chunk_size.
            Args:
                full_list (list): full list of items.
            Kwargs:
                chunk_size (int): length of each chunk. Max up to 50.
            Return
                (list): list of list.
        """
        if chunk_size < 1:
            chunk_size = 1
        return [full_list[i:i + chunk_size] for i in range(0, len(full_list), chunk_size)]

    def replace_special_characters_in_list(self, full_list):
        """ Replace any special characters in symbol that might affect url pulling.
            At present only replace the ":".
            See the following website for all the special characters
            http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
            Args:
                full_list (list): list of symbol
            Returns:
                (list): modified list with special characters replaced.
        """
        return [n.replace(':','%3A') for n in full_list]

Exemplo n.º 9

0

Exibir arquivo

Arquivo: SGX_stock_announcement_extract.py Projeto: jupadhya1/YF-API

    def __init__(self):
        super(SGXDataExtract, self).__init__()
        # dict will contain a tuple of website and the target tag
        self.retrieval_dict = {
            'company_info':
            ('http://54.254.221.141/sgx/search?callback=jQuery111008655668143182993_1425728207793&json=%7B%22criteria%22%3A%5B%5D%7D&_=1425728207802',
             'companies'),
            'announcement':
            ('http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCOW_CorpAnnouncement_Content%26B%3DAnnouncementToday%26R_C%3D%26C_T%3D200',
             'items'),
            'ex_div_data':
            ('http://www.sgx.com/proxy/SgxDominoHttpProxy?timeout=100&dominoHost=http%3A%2F%2Finfofeed.sgx.com%2FApps%3FA%3DCow_CorporateInformation_Content%26B%3DCorpDistributionByExDate%26S_T%3D1%26C_T%3D400',
             'items'),
            'curr_price':
            ('http://sgx.com/JsonRead/JsonData?qryId=RStock&timeout=30',
             'items'),
        }

        ## for short sell information.
        self.shortsell_info_start_url = 'http://sgx.com/wps/wcm/connect/sgx_en/home/market_info/short_sale/short_sale_daily/DailyShortSell'
        self.shortsell_date_url = '20150417'
        self.shortsell_end_url = '.txt'
        self.shortsell_full_url = r''

        ## parameters
        self.saved_json_file = r'c:\data\temptryyql.json'
        self.saved_parm_df_dict = {
        }  #storing the final df in dict with type as keyword

        ## for setting up custom alerts
        self.custom_alert_dict_list = []  #list of dict??
        self.custom_alert_df = pandas.DataFrame(
        )  #convert the custom alert dict to

        ## final output paramters
        self.sgx_announ_df = pandas.DataFrame()
        self.sgx_div_ex_date_df = pandas.DataFrame()
        self.sgx_curr_price_df = pandas.DataFrame()
        self.price_limit_alerts_df = pandas.DataFrame()
        self.shortsell_info_df = pandas.DataFrame()
        self.sgx_curr_plus_company_df = pandas.DataFrame()

        ## shortend output version for alert creation
        self.div_ex_date_shtver = ''
        self.filtered_announ_shtver = ''

        ## watchlist to set
        self.price_limit_reach_watchlist = []
        self.announce_watchlist = []

        ## target stocks for announcements -- using excel query
        xls_set_class = XlsExtractor(
            fname=r'C:\data\stockselection_for_sgx.xls',
            sheetname='stockselection',
            param_start_key='stock//',
            param_end_key='stock_end//',
            header_key='header#2//',
            col_len=2)
        xls_set_class.open_excel_and_process_block_data()
        self.announce_watchlist = xls_set_class.data_label_list  #also get the company name
        self.companyname_watchlist = [
            n[0].encode() for n in xls_set_class.data_value_list
        ]  #also get the company name

Exemplo n.º 10

0

Exibir arquivo

Arquivo: SGX_stock_announcement_extract.py Projeto: jupadhya1/YF-API

    if choice == 5:
        """ Handling current price

        """
        w = SGXDataExtract()
        #w.shortsell_notification()
        #print w.shortsell_info_df
        w.process_all_data()
        w.sgx_curr_price_df.to_csv(r'c:\data\temp.csv', index=False)

    if choice == 6:
        """ Use excel table to get the required stocks."""
        xls_set_class = XlsExtractor(
            fname=r'C:\data\stockselection_for_sgx.xls',
            sheetname='stockselection',
            param_start_key='stock//',
            param_end_key='stock_end//',
            header_key='header#2//',
            col_len=2)

        xls_set_class.open_excel_and_process_block_data()
        xls_set_class.data_label_list

    if choice ==7:
        """ use the price extract here to pull """


    if choice == 8:
        """ Get the short sell volume. Need to get the shortsell vs the volumne traded that day
            Url need change with the current date