def get_stock_recent_data(stock, country, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced stock from Investing.com. So on, the recent data of the introduced stock from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date) and whether debug messages are going to be printed or not, respectively. Args: stock (:obj:`str`): symbol of the stock to retrieve recent historical data from. country (:obj:`str`): name of the country from where the stock is. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, either True or False, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved recent data of the specified stock from the specified country. So on, the resulting dataframe contains the open, high, low, close and volume values for the selected stock on market days and the currency in which those values are presented. The resulting recent data, in case that the default parameters were applied, will look like:: date || open | high | low | close | volume | currency -----||----------------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, recent: [ dd/mm/yyyy: { open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if stocks object/file was not found or unable to retrieve. RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if stock recent data was unavailable or not found in Investing.com. Examples: >>> investpy.get_recent_data(stock='bbva', country='spain') Open High Low Close Volume Currency Date 2019-08-13 4.263 4.395 4.230 4.353 27250000 EUR 2019-08-14 4.322 4.325 4.215 4.244 36890000 EUR 2019-08-15 4.281 4.298 4.187 4.234 21340000 EUR 2019-08-16 4.234 4.375 4.208 4.365 46080000 EUR 2019-08-19 4.396 4.425 4.269 4.269 18950000 EUR """ if not stock: raise ValueError( "ERR#0013: stock parameter is mandatory and must be a valid stock name." ) if not isinstance(stock, str): raise ValueError("ERR#0027: stock argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'stocks', 'stocks.csv')) if pkg_resources.resource_exists(resource_package, resource_path): stocks = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0056: stocks file not found or errored.") if stocks is None: raise IOError( "ERR#0001: stocks object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_stock_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())] stock = stock.strip() stock = stock.lower() if unidecode.unidecode(stock) not in [ unidecode.unidecode(value.lower()) for value in stocks['symbol'].tolist() ]: raise RuntimeError("ERR#0018: stock " + stock + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced stock on Investing.com') symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'symbol'] id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id'] name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name'] stock_currency = stocks.loc[( stocks['symbol'].str.lower() == stock).idxmax(), 'currency'] logger.info(str(stock) + ' found on Investing.com') header = "Datos históricos " + symbol params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': raise IndexError( "ERR#0007: stock information unavailable or not found.") stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y') stock_close = float(info[1].replace('.', '').replace(',', '.')) stock_open = float(info[2].replace('.', '').replace(',', '.')) stock_high = float(info[3].replace('.', '').replace(',', '.')) stock_low = float(info[4].replace('.', '').replace(',', '.')) stock_volume = 0 if info[5].__contains__('K'): stock_volume = int( float(info[5].replace('K', '').replace('.', '').replace( ',', '.')) * 1000) elif info[5].__contains__('M'): stock_volume = int( float(info[5].replace('M', '').replace('.', '').replace( ',', '.')) * 1000000) elif info[5].__contains__('B'): stock_volume = int( float(info[5].replace('B', '').replace('.', '').replace( ',', '.')) * 1000000000) result.insert( len(result), Data(stock_date, stock_open, stock_high, stock_low, stock_close, stock_volume, stock_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result logger.info('Data parsing process finished...') if as_json is True: json_ = { 'name': name, 'recent': [value.stock_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.stock_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_stock_historical_data(stock, country, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves historical data from the introduced stock from Investing.com. So on, the historical data of the introduced stock from the specified country in the specified data range will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date) and whether debug messages are going to be printed or not, respectively. Args: stock (:obj:`str`): symbol of the stock to retrieve historical data from. country (:obj:`str`): name of the country from where the stock is. from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved. to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, either True or False, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified stock via argument. The dataset contains the open, high, low, close and volume values for the selected stock on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close | volume | currency -----||----------------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ dd/mm/yyyy: { open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if stocks object/file was not found or unable to retrieve. RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if stock historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_historical_data(stock='bbva', country='spain', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Volume Currency Date 2010-01-04 12.73 12.96 12.73 12.96 0 EUR 2010-01-05 13.00 13.11 12.97 13.09 0 EUR 2010-01-06 13.03 13.17 13.02 13.12 0 EUR 2010-01-07 13.02 13.11 12.93 13.05 0 EUR 2010-01-08 13.12 13.22 13.04 13.18 0 EUR """ if not stock: raise ValueError( "ERR#0013: stock parameter is mandatory and must be a valid stock name." ) if not isinstance(stock, str): raise ValueError("ERR#0027: stock argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) try: datetime.datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'." ) try: datetime.datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'stocks', 'stocks.csv')) if pkg_resources.resource_exists(resource_package, resource_path): stocks = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0056: stocks file not found or errored.") if stocks is None: raise IOError( "ERR#0001: stocks object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_stock_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())] stock = stock.strip() stock = stock.lower() if unidecode.unidecode(stock) not in [ unidecode.unidecode(value.lower()) for value in stocks['symbol'].tolist() ]: raise RuntimeError("ERR#0018: stock " + stock + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced stock on Investing.com') symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'symbol'] id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id'] name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name'] stock_currency = stocks.loc[( stocks['symbol'].str.lower() == stock).idxmax(), 'currency'] logger.info(str(stock) + ' found on Investing.com') final = list() logger.info('Data parsing process starting...') header = "Datos históricos " + symbol for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0007: stock information unavailable or not found." ) else: data_flag = True if data_flag is True: stock_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') stock_close = float(info[1].replace('.', '').replace(',', '.')) stock_open = float(info[2].replace('.', '').replace(',', '.')) stock_high = float(info[3].replace('.', '').replace(',', '.')) stock_low = float(info[4].replace('.', '').replace(',', '.')) stock_volume = 0 if info[5].__contains__('K'): stock_volume = int( float(info[5].replace('K', '').replace( '.', '').replace(',', '.')) * 1000) elif info[5].__contains__('M'): stock_volume = int( float(info[5].replace('M', '').replace( '.', '').replace(',', '.')) * 1000000) elif info[5].__contains__('B'): stock_volume = int( float(info[5].replace('B', '').replace( '.', '').replace(',', '.')) * 1000000000) result.insert( len(result), Data(stock_date, stock_open, stock_high, stock_low, stock_close, stock_volume, stock_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.stock_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.stock_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_currency_cross_historical_data(currency_cross, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced `currency_cross` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: currency_cross (:obj:`str`): name of the currency cross to retrieve recent historical data from. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close and volume values for the selected currency_cross on market days. The return data is case we use default arguments will look like:: date || open | high | low | close | volume | currency -----||------------------------------------|--------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ dd/mm/yyyy: { 'open': x, 'high': x, 'low': x, 'close': x, 'volume': x, 'currency' : x }, ... ] } Raises: ValueError: argument error. IOError: stocks object/file not found or unable to retrieve. RuntimeError: introduced currency_cross does not match any of the indexed ones. ConnectionError: if GET requests does not return 200 status code. IndexError: if currency_cross information was unavailable or not found. Examples: >>> investpy.get_currency_cross_historical_data(currency_cross='EUR/USD', from_date='01/01/2018', to_date='01/01/2019') Open High Low Close Volume Currency Date 2018-01-01 1.2003 1.2014 1.1995 1.2010 0 USD 2018-01-02 1.2013 1.2084 1.2003 1.2059 0 USD 2018-01-03 1.2058 1.2070 1.2001 1.2014 0 USD 2018-01-04 1.2015 1.2090 1.2004 1.2068 0 USD 2018-01-05 1.2068 1.2085 1.2021 1.2030 0 USD """ if not currency_cross: raise ValueError( "ERR#0052: currency_cross param is mandatory and should be a str.") if not isinstance(currency_cross, str): raise ValueError( "ERR#0052: currency_cross param is mandatory and should be a str.") try: datetime.datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join( ('resources', 'currency_crosses', 'currency_crosses.csv')) if pkg_resources.resource_exists(resource_package, resource_path): currency_crosses = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError( "ERR#0060: currency_crosses file not found or errored.") if currency_crosses is None: raise IOError( "ERR#0050: currency_crosses not found or unable to retrieve.") currency_cross = currency_cross.strip() currency_cross = currency_cross.lower() if unidecode.unidecode(currency_cross) not in [ unidecode.unidecode(value.lower()) for value in currency_crosses['name'].tolist() ]: raise RuntimeError("ERR#0054: the introduced currency_cross " + str(currency_cross) + " does not exists.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced currency_cross on Investing.com') id_ = currency_crosses.loc[( currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id'] name = currency_crosses.loc[( currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'name'] currency = currency_crosses.loc[( currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'second'] logger.info(str(currency_cross) + ' found on Investing.com') final = list() header = "Datos históricos " + name for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0055: currency_cross information unavailable or not found." ) else: data_flag = True if data_flag is True: currency_cross_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') currency_cross_close = float(info[1].replace('.', '').replace( ',', '.')) currency_cross_open = float(info[2].replace('.', '').replace( ',', '.')) currency_cross_high = float(info[3].replace('.', '').replace( ',', '.')) currency_cross_low = float(info[4].replace('.', '').replace( ',', '.')) currency_cross_volume = 0 if info[5].__contains__('K'): currency_cross_volume = int( float(info[5].replace('K', '').replace( '.', '').replace(',', '.')) * 1000) elif info[5].__contains__('M'): currency_cross_volume = int( float(info[5].replace('M', '').replace( '.', '').replace(',', '.')) * 1000000) elif info[5].__contains__('B'): currency_cross_volume = int( float(info[5].replace('B', '').replace( '.', '').replace(',', '.')) * 1000000000) result.insert( len(result), Data(currency_cross_date, currency_cross_open, currency_cross_high, currency_cross_low, currency_cross_close, currency_cross_volume, currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.currency_cross_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.currency_cross_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_etf_historical_data(etf, country, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves historical data from the introduced `etf` from Investing via Web Scraping on the introduced date range. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` object with `ascending` or `descending` order. Args: etf (:obj:`str`): name of the etf to retrieve recent historical data from. country (:obj:`str`): name of the country from where the etf is. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified etf via argument. The dataset contains the open, high, low and close values for the selected etf on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close | currency -----||-------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, currency: x }, ... ] } Raises: ValueError: argument error. IOError: etfs object/file not found or unable to retrieve. RuntimeError: introduced etf does not match any of the indexed ones. ConnectionError: if GET requests does not return 200 status code. IndexError: if etf information was unavailable or not found. Examples: >>> investpy.get_etf_historical_data(etf='bbva accion dj eurostoxx 50', country='spain', from_date='01/01/2010', to_date='01/01/2019', as_json=False, order='ascending', debug=False) Open High Low Close Currency Date 2011-12-07 23.70 23.70 23.70 23.62 EUR 2011-12-08 23.53 23.60 23.15 23.04 EUR 2011-12-09 23.36 23.60 23.36 23.62 EUR 2011-12-12 23.15 23.26 23.00 22.88 EUR 2011-12-13 22.88 22.88 22.88 22.80 EUR """ if not etf: raise ValueError( "ERR#0031: etf parameter is mandatory and must be a valid etf name." ) if not isinstance(etf, str): raise ValueError("ERR#0030: etf argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) try: datetime.datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'etfs', 'etfs.csv')) if pkg_resources.resource_exists(resource_package, resource_path): etfs = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0058: etfs file not found or errored.") if etfs is None: raise IOError("ERR#0009: etfs object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_etf_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") etfs = etfs[etfs['country'] == unidecode.unidecode(country.lower())] etf = etf.strip() etf = etf.lower() if unidecode.unidecode(etf) not in [ unidecode.unidecode(value.lower()) for value in etfs['name'].tolist() ]: raise RuntimeError("ERR#0019: etf " + str(etf) + " not found in " + str(country.lower()) + ", check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced etf on Investing.com') symbol = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'symbol'] id_ = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'id'] name = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'name'] etf_currency = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'currency'] logger.info(str(etf) + ' found on Investing.com') final = list() header = "Datos históricos " + symbol for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0010: etf information unavailable or not found." ) else: data_flag = True if data_flag is True: etf_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') etf_close = float(info[1].replace('.', '').replace(',', '.')) etf_open = float(info[2].replace('.', '').replace(',', '.')) etf_high = float(info[3].replace('.', '').replace(',', '.')) etf_low = float(info[4].replace('.', '').replace(',', '.')) result.insert( len(result), Data(etf_date, etf_open, etf_high, etf_low, etf_close, None, etf_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.etf_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.etf_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_index_historical_data(index, country, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves historical data of the introduced `index` (from the specified country, note that both index and country should match since if the introduced index is not listed in the indices of that country, the function will raise an error). The retrieved historical data are the OHLC values plus the Volume and the Currency in which those values are specified, from the introduced data range if valid. So on, the resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file. Args: index (:obj:`str`): name of the index to retrieve recent historical data from. country (:obj:`str`): name of the country from where the index is. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved historical data from the specified index via argument. The dataset contains the open, high, low, close and volume values for the selected index on market days, additionally the currency in which those values are specified is returned. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------------------------------------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised if there was an argument error. IOError: raised if indices object/file was not found or unable to retrieve. RuntimeError: raised if the introduced index does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if index information was unavailable or not found. Examples: >>> investpy.get_index_historical_data(index='ibex 35', country='spain', from_date='01/01/2018', to_date='01/01/2019') Open High Low Close Volume Currency Date 2018-01-02 15128.2 15136.7 14996.6 15096.8 10340000 EUR 2018-01-03 15145.0 15186.9 15091.9 15106.9 12800000 EUR 2018-01-04 15105.5 15368.7 15103.7 15368.7 17070000 EUR 2018-01-05 15353.9 15407.5 15348.6 15398.9 11180000 EUR 2018-01-08 15437.1 15448.7 15344.0 15373.3 12890000 EUR """ if not index: raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if not isinstance(index, str): raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") try: datetime.datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'indices', 'indices.csv')) if pkg_resources.resource_exists(resource_package, resource_path): indices = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0059: indices file not found or errored.") if indices is None: raise IOError("ERR#0037: indices not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_index_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") indices = indices[indices['country'] == unidecode.unidecode( country.lower())] index = index.strip() index = index.lower() if unidecode.unidecode(index) not in [ unidecode.unidecode(value.lower()) for value in indices['name'].tolist() ]: raise RuntimeError("ERR#0045: index " + index + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced index on Investing.com') full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'full_name'] id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id'] name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name'] index_currency = indices.loc[( indices['name'].str.lower() == index).idxmax(), 'currency'] logger.info(str(index) + ' found on Investing.com') final = list() header = "Datos históricos " + full_name for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0046: index information unavailable or not found." ) else: data_flag = True if data_flag is True: index_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') index_close = float(info[1].replace('.', '').replace(',', '.')) index_open = float(info[2].replace('.', '').replace(',', '.')) index_high = float(info[3].replace('.', '').replace(',', '.')) index_low = float(info[4].replace('.', '').replace(',', '.')) index_volume = 0 if info[5].__contains__('K'): index_volume = int( float(info[5].replace('K', '').replace( '.', '').replace(',', '.')) * 1000) elif info[5].__contains__('M'): index_volume = int( float(info[5].replace('M', '').replace( '.', '').replace(',', '.')) * 1000000) elif info[5].__contains__('B'): index_volume = int( float(info[5].replace('B', '').replace( '.', '').replace(',', '.')) * 1000000000) result.insert( len(result), Data(index_date, index_open, index_high, index_low, index_close, index_volume, index_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.index_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.index_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_index_recent_data(index, country, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced `index` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: index (:obj:`str`): name of the index to retrieve recent historical data from. country (:obj:`str`): name of the country from where the index is. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified index via argument. The dataset contains the open, high, low, close and volume values for the selected index on market days, additionally the currency value is returned. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised if there was an argument error. IOError: raised if indices object/file was not found or unable to retrieve. RuntimeError: raised if the introduced index does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if index information was unavailable or not found. Examples: >>> investpy.get_index_recent_data(index='ibex 35', country='spain') Open High Low Close Volume Currency Date 2019-08-26 12604.7 12646.3 12510.4 12621.3 4770000 EUR 2019-08-27 12618.3 12723.3 12593.6 12683.8 8230000 EUR 2019-08-28 12657.2 12697.2 12585.1 12642.5 7300000 EUR 2019-08-29 12637.2 12806.6 12633.8 12806.6 5650000 EUR 2019-08-30 12767.6 12905.9 12756.9 12821.6 6040000 EUR """ if not index: raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if not isinstance(index, str): raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'indices', 'indices.csv')) if pkg_resources.resource_exists(resource_package, resource_path): indices = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0059: indices file not found or errored.") if indices is None: raise IOError("ERR#0037: indices not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_index_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") indices = indices[indices['country'] == unidecode.unidecode( country.lower())] index = index.strip() index = index.lower() if unidecode.unidecode(index) not in [ unidecode.unidecode(value.lower()) for value in indices['name'].tolist() ]: raise RuntimeError("ERR#0045: index " + index + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced index on Investing.com') full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'full_name'] id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id'] name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name'] index_currency = indices.loc[( indices['name'].str.lower() == index).idxmax(), 'currency'] logger.info(str(index) + ' found on Investing.com') header = "Datos históricos " + full_name params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': raise IndexError( "ERR#0046: index information unavailable or not found.") index_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y') index_close = float(info[1].replace('.', '').replace(',', '.')) index_open = float(info[2].replace('.', '').replace(',', '.')) index_high = float(info[3].replace('.', '').replace(',', '.')) index_low = float(info[4].replace('.', '').replace(',', '.')) index_volume = 0 if info[5].__contains__('K'): index_volume = int( float(info[5].replace('K', '').replace('.', '').replace( ',', '.')) * 1000) elif info[5].__contains__('M'): index_volume = int( float(info[5].replace('M', '').replace('.', '').replace( ',', '.')) * 1000000) elif info[5].__contains__('B'): index_volume = int( float(info[5].replace('B', '').replace('.', '').replace( ',', '.')) * 1000000000) result.insert( len(result), Data(index_date, index_open, index_high, index_low, index_close, index_volume, index_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result logger.info('Data parsing process finished...') if as_json is True: json_ = { 'name': name, 'recent': [value.index_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.index_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_fund_recent_data(fund, country, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced `fund` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: fund (:obj:`str`): name of the fund to retrieve recent historical data from. country (:obj:`str`): name of the country from where the introduced fund is. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified fund via argument. The dataset contains the open, high, low and close values for the selected fund on market days. The return data is case we use default arguments will look like:: date || open | high | low | close | currency -----||-------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ date: dd/mm/yyyy, open: x, high: x, low: x, close: x }, ... ] } Raises: ValueError: argument error. IOError: funds object/file not found or unable to retrieve. RuntimeError: introduced fund does not match any of the indexed ones. ConnectionError: if GET requests does not return 200 status code. IndexError: if fund information was unavailable or not found. Examples: >>> investpy.get_fund_recent_data(fund='bbva multiactivo conservador pp', country='spain', as_json=False, order='ascending', debug=False) Open High Low Close Currency Date 2019-08-13 1.110 1.110 1.110 1.110 EUR 2019-08-16 1.109 1.109 1.109 1.109 EUR 2019-08-19 1.114 1.114 1.114 1.114 EUR 2019-08-20 1.112 1.112 1.112 1.112 EUR 2019-08-21 1.115 1.115 1.115 1.115 EUR """ if not fund: raise ValueError("ERR#0029: fund parameter is mandatory and must be a valid fund name.") if not isinstance(fund, str): raise ValueError("ERR#0028: fund argument needs to be a str.") if country is None: raise ValueError("ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not isinstance(debug, bool): raise ValueError("ERR#0033: debug argument can just be a boolean value, either True or False.") resource_package = 'investpy' resource_path = '/'.join(('resources', 'funds', 'funds.csv')) if pkg_resources.resource_exists(resource_package, resource_path): funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0057: funds file not found or errored.") if funds is None: raise IOError("ERR#0005: funds object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_fund_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") funds = funds[funds['country'] == unidecode.unidecode(country.lower())] fund = fund.strip() fund = fund.lower() if unidecode.unidecode(fund) not in [unidecode.unidecode(value.lower()) for value in funds['name'].tolist()]: raise RuntimeError("ERR#0019: fund " + fund + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced fund on Investing.com') symbol = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'symbol'] id_ = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'id'] name = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'name'] fund_currency = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'currency'] logger.info(str(fund) + ' found on Investing.com') header = "Datos históricos " + symbol params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': raise IndexError("ERR#0008: fund information unavailable or not found.") fund_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y') fund_close = float(info[1].replace('.', '').replace(',', '.')) fund_open = float(info[2].replace('.', '').replace(',', '.')) fund_high = float(info[3].replace('.', '').replace(',', '.')) fund_low = float(info[4].replace('.', '').replace(',', '.')) result.insert(len(result), Data(fund_date, fund_open, fund_high, fund_low, fund_close, None, fund_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result logger.info('Data parsing process finished...') if as_json is True: json_ = {'name': name, 'recent': [value.fund_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records([value.fund_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")