def test_get_intermediate_panel_data_from_parsed(self): url = '' parsed = { 'tc_local': [{ 'bolivar_venezolano': Decimal('0.0123560'), 'indice_tiempo': datetime(2019, 3, 6) }], 'tp_usd': [{ 'bolivar_venezolano': Decimal('0.0003030'), 'indice_tiempo': datetime(2019, 3, 6) }] } coins = { "bolivar_venezolano": "Bolívar Venezolano", } scraper = BCRAExchangeRateScraper(url, coins, intermediate_panel_path=None, use_intermediate_panel=True) result = scraper.get_intermediate_panel_data_from_parsed(parsed) assert result == [{ 'indice_tiempo': datetime(2019, 3, 6), 'coin': 'bolivar_venezolano', 'type': 'tp_usd', 'value': Decimal('0.0003030') }, { 'indice_tiempo': datetime(2019, 3, 6), 'coin': 'bolivar_venezolano', 'type': 'tc_local', 'value': Decimal('0.0123560') }]
def test_html_is_valid(self): """Probar que el html sea valido""" url = "" start_date = date(2019, 3, 4) coin = '' rates = {} with patch.object( BCRAExchangeRateScraper, 'fetch_content', return_value=''' <table class="table table-BCRA table-bordered table-hover table-responsive"> <thead> </thead> <tbody> </tbody> </table> ''' ): scraper = BCRAExchangeRateScraper(url, rates, False) content = scraper.fetch_content(start_date, coin) soup = BeautifulSoup(content, "html.parser") table = soup.find('table') head = table.find('thead') if table else None body = table.find('tbody') if table else None assert table is not None assert head is not None assert body is not None
def test_not_head_parse_coin(self): url = \ "http://www.bcra.gov.ar/Publicaciones\ Estadisticas/Evolucion_moneda.asp" coins = { "bolivar_venezolano": "Bolívar Venezolano" } start_date = datetime(2019, 4, 8) end_date = datetime(2019, 4, 8) coin = 'bolivar_venezolano' content = ''' <table class="table table-BCRA table-bordered table-hover table-responsive" colspan="3"> <tr> <td colspan="3"> <b></b> </td> </tr> <tr> <td width="10%"><b></b> </td> <td width="40%"><b></b></td> <td width="50%"><b></b></td> </tr> </table> ''' scraper = BCRAExchangeRateScraper(url, coins, False) parsed_coin = scraper.parse_coin(content, start_date, end_date, coin) assert parsed_coin == []
def test_parse_for_non_empty_contents(self): url = \ "http://www.bcra.gov.ar/Publicaciones\ Estadisticas/Evolucion_moneda.asp" coins = {"bolivar_venezolano": "Bolívar Venezolano"} scraper = BCRAExchangeRateScraper(url, coins, intermediate_panel_path=None, use_intermediate_panel=False) start_date = datetime(2019, 4, 8) end_date = datetime(2019, 4, 8) contents = {} table_content = ''' <table class="table table-BCRA table-bordered table-hover table-responsive" colspan="3"> <thead> <tr> <td colspan="3"> <b>MERCADO DE CAMBIOS - COTIZACIONES CIERRE VENDEDOR<br> Bolívar Venezolano</b> </td> </tr> <tr> <td width="10%"><b> FECHA</b> </td> <td width="40%"><b> TIPO DE PASE - EN DOLARES - (por unidad)</b></td> <td width="50%"><b> TIPO DE CAMBIO - MONEDA DE CURSO LEGAL - (por unidad)</b></td> </tr> </thead> <tbody><tr> <td width="10%"> 08/04/2019</td> <td width="40%"> 0,0003030</td> <td width="50%"> 0,0132500</td> </tr> </tbody> </table> ''' contents['bolivar_venezolano'] = table_content parsed = scraper.parse_contents(contents, start_date, end_date) assert parsed['tc_local'] == [{ 'bolivar_venezolano': '0,0132500', 'indice_tiempo': '08/04/2019' }] assert parsed['tp_usd'] == [{ 'bolivar_venezolano': '0,0003030', 'indice_tiempo': '08/04/2019' }]
def test_parse_from_intermediate_panel_empty_value(self): """Probar parseo desde el archivo intermedio""" start_date = '2019-03-06' end_date = '2019-03-06' coins = { "bolivar_venezolano": "Bolívar Venezolano", "chelin_austriaco": 'Chelin Austriaco' } url = '' intermediate_panel_df = MagicMock() intermediate_panel_df = { 'indice_tiempo': [ '2019-03-06', '2019-03-06' ], 'coin': [ 'bolivar_venezolano', 'bolivar_venezolano' ], 'type': [ 'tc_local', 'tp_usd' ], 'value': [ '0.0003030', '0.0123560' ] } with patch.object( BCRAExchangeRateScraper, 'read_intermediate_panel_dataframe', return_value=pd.DataFrame(data=intermediate_panel_df) ): scraper = BCRAExchangeRateScraper(url, coins, True) content = scraper.parse_from_intermediate_panel( start_date, end_date, ) assert content == { 'tc_local': [ { 'indice_tiempo': '2019-03-06', 'bolivar_venezolano': '0.0003030' } ], 'tp_usd': [ { 'indice_tiempo': '2019-03-06', 'bolivar_venezolano': '0.0123560' } ] }
def test_get_intermediate_panel_data_from_empty_parsed(self): url = '' parsed = {} coins = { "bolivar_venezolano": "Bolívar Venezolano", } scraper = BCRAExchangeRateScraper(url, coins, True) result = scraper.get_intermediate_panel_data_from_parsed(parsed) assert result == []
def test_validate_coin_in_configuration_file_true(self): coins = {} url = 'foo.com' coin = "Bolívar Venezolano" options = [] for option_text in ['Seleccione Moneda', 'Bolívar Venezolano']: mock = MagicMock() mock.text = option_text options.append(mock) scraper = BCRAExchangeRateScraper(url, coins, False) coin_in_configuration_file = scraper.validate_coin_in_configuration_file(coin, options) assert coin_in_configuration_file is True
def test_parse_contents(self): url = '' start_date = datetime(2019, 4, 24) end_date = datetime(2019, 4, 24) coins = { "bolivar_venezolano": "Bolívar Venezolano" } content = {'bolivar_venezolano': 'foo'} parsed = [ { 'moneda': 'bolivar_venezolano', 'indice_tiempo': '24/04/2019', 'tp_usd': '0,0001930', 'tc_local': '0,0084610' } ] with patch.object( BCRAExchangeRateScraper, 'parse_coin', return_value=parsed ): scraper = BCRAExchangeRateScraper(url, coins, False) result = scraper.parse_contents(content, start_date, end_date) assert result == { 'tc_local': [ { 'indice_tiempo': '24/04/2019', 'bolivar_venezolano': '0,0084610' } ], 'tp_usd': [ { 'indice_tiempo': '24/04/2019', 'bolivar_venezolano': '0,0001930' } ] }
def test_not_table_parse_coin(self): url = \ "http://www.bcra.gov.ar/Publicaciones\ Estadisticas/Evolucion_moneda.asp" coins = { "bolivar_venezolano": "Bolívar Venezolano" } start_date = datetime(2019, 4, 8) end_date = datetime(2019, 4, 8) coin = 'bolivar_venezolano' content = '' scraper = BCRAExchangeRateScraper(url, coins, False) parsed_coin = scraper.parse_coin(content, start_date, end_date, coin) assert parsed_coin == []
def test_parse_for_empty_contents(self): url = \ "http://www.bcra.gov.ar/Publicaciones\ Estadisticas/Evolucion_moneda.asp" coins = { "bolivar_venezolano": "Bolívar Venezolano", "chelin_austriaco": "Chelín Austríaco", "cordoba_nicaraguense": "Cordoba Nicaraguense", "corona_checa": "Corona Checa", "corona_danesa": "Corona Danesa", } scraper = BCRAExchangeRateScraper(url, coins, False) start_date = date.today() end_date = date.today() contents = {} parsed = scraper.parse_contents(contents, start_date, end_date) assert parsed['tc_local'] == [] assert parsed['tp_usd'] == []
def test_fetch_contents(self): coins = {"bolivar_venezolano": "Bolívar Venezolano"} start_date = datetime(2019, 4, 24) end_date = datetime(2019, 4, 24) url = '' content = 'foo' with patch.object(BCRAExchangeRateScraper, 'fetch_content', return_value=content): scraper = BCRAExchangeRateScraper(url, coins, intermediate_panel_path=None, use_intermediate_panel=False) result = scraper.fetch_contents(start_date, end_date) assert result == { 'bolivar_venezolano': 'foo', }
def test_preprocessed_rows_date(self): rows = [{ 'bolivar_venezolano': '0,0003040', 'dolar_estadounidense': '--------', 'oro_onza_troy': '1.289,6300000', 'indice_tiempo': '2019-04-01' }] scraper = BCRAExchangeRateScraper(False, rows, intermediate_panel_path=None, use_intermediate_panel=False) result = scraper.preprocess_rows(rows) assert result == [{ 'bolivar_venezolano': Decimal('0.0003040'), 'dolar_estadounidense': None, 'oro_onza_troy': Decimal('1289.6300000'), 'indice_tiempo': date(2019, 4, 1) }]
def test_fetch_content_invalid_url_patching_driver(self): """Probar fetch content con url invalida""" single_date = date(2019, 3, 4) coins = {} url = 'foo.com' mocked_driver = MagicMock() mocked_driver.page_source = 400 with patch.object(BCRAExchangeRateScraper, 'get_browser_driver', return_value=mocked_driver): with patch.object(BCRAExchangeRateScraper, 'validate_coin_in_configuration_file', return_value=True): scraper = BCRAExchangeRateScraper(url, coins, intermediate_panel_path=None, use_intermediate_panel=False) content = scraper.fetch_content(single_date, coins) assert content == 400
def test_html_is_not_valid(self): """Probar que el html no sea valido""" url = "" start_date = date(2019, 3, 4) coin = '' coins = {} with patch.object( BCRAExchangeRateScraper, 'fetch_content', return_value='' ): scraper = BCRAExchangeRateScraper(url, coins, False) content = scraper.fetch_content(start_date, coin) soup = BeautifulSoup(content, "html.parser") table = soup.find('table') head = table.find('thead') if table else None body = table.find('tbody') if table else None assert table is None assert head is None assert body is None
def test_fetch_content_patching_driver(self): """Probar fetch content""" single_date = date(2019, 3, 4) coins = {} url = '' mocked_driver = MagicMock() mocked_driver.page_source = "foo" mocked_driver.status_code = 200 with patch.object( BCRAExchangeRateScraper, 'get_browser_driver', return_value=mocked_driver ): with patch.object( BCRAExchangeRateScraper, 'validate_coin_in_configuration_file', return_value=True ): scraper = BCRAExchangeRateScraper(url, coins, False) content = scraper.fetch_content(single_date, coins) assert content == 'foo'
def exchange_rates(ctx, start_date, end_date, config, use_intermediate_panel, tp_csv_path, tc_csv_path, intermediate_panel_path): try: logging.basicConfig(level=logging.WARNING) config = read_config(file_path=config, command=ctx.command.name) validate_url_config(config) validate_url_has_value(config) validate_coins_key_config(config) validate_coins_key_has_values(config) validate_dates(start_date, end_date) tp_file_path = validate_file_path(tp_csv_path, config, file_path_key='tp_file_path') tc_file_path = validate_file_path(tc_csv_path, config, file_path_key='tc_file_path') intermediate_panel_path = validate_file_path( intermediate_panel_path, config, file_path_key='intermediate_panel_path') if os.path.isdir(tp_file_path): click.echo( 'Error: el path ingresado para tipo de pase usd es un directorio' ) exit() elif os.path.isdir(tc_file_path): click.echo( 'Error: el path ingresado para tipo de cambio local es un directorio' ) exit() elif os.path.isdir(intermediate_panel_path): click.echo( 'Error: el path ingresado para el panel intermedio es un directorio' ) exit() ensure_dir_exists(os.path.split(tp_file_path)[0]) ensure_dir_exists(os.path.split(tc_file_path)[0]) ensure_dir_exists(os.path.split(intermediate_panel_path)[0]) timeout = (int(config.get('timeout')) if 'timeout' in config.keys() else None) tries = int(config.get('tries', 1)) scraper = BCRAExchangeRateScraper( url=config.get('url'), timeout=timeout, tries=tries, coins=config.get('coins'), use_intermediate_panel=use_intermediate_panel, intermediate_panel_path=intermediate_panel_path) parsed = scraper.run(start_date, end_date) if parsed: coins = config.get('coins') csv_header = ['indice_tiempo'] csv_header.extend([v for v in coins.keys()]) parsed['tp_usd'].reverse() parsed['tc_local'].reverse() write_file(csv_header, parsed['tp_usd'], tp_file_path) write_file(csv_header, parsed['tc_local'], tc_file_path) else: click.echo("No se encontraron resultados") except InvalidConfigurationError as err: click.echo(err)
def exchange_rates(ctx, start_date, end_date, refetch_start_date, refetch_end_date, config, skip_intermediate_panel_data, tp_csv_path, tc_csv_path, intermediate_panel_path, skip_clean_last_dates): try: execution_start_hour = time.time() execution_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logging.basicConfig(format='%(message)s', level=logging.INFO) logging.info(Figlet(font='standard').renderText('scraper')) logging.info(Figlet(font='standard').renderText('exchange rates')) logging.info(f"Inicio de tiempo de ejecución: {execution_start_time}") config = read_config(file_path=config, command=ctx.command.name) validate_url_config(config) validate_url_has_value(config) validate_coins_key_config(config) validate_coins_key_has_values(config) validate_dates(start_date, end_date) start_date = start_date.date() end_date = end_date.date() refetch_dates_range = [] if refetch_start_date and refetch_end_date: validate_refetch_dates(start_date, end_date, refetch_start_date.date(), refetch_end_date.date()) refetch_dates_range = generate_dates_range( refetch_start_date.date(), refetch_end_date.date()) elif refetch_start_date or refetch_end_date: logging.warning( 'No se encontró fecha para refetch_start_date o refetch_end_date, no se hará refetch.' ) tp_file_path = validate_file_path(tp_csv_path, config, file_path_key='tp_file_path') tc_file_path = validate_file_path(tc_csv_path, config, file_path_key='tc_file_path') intermediate_panel_path = validate_file_path( intermediate_panel_path, config, file_path_key='intermediate_panel_path') if os.path.isdir(tp_file_path): click.echo( 'Error: el path ingresado para tipo de pase usd es un directorio' ) exit() elif os.path.isdir(tc_file_path): click.echo( 'Error: el path ingresado para tipo de cambio local es un directorio' ) exit() elif os.path.isdir(intermediate_panel_path): click.echo( 'Error: el path ingresado para el panel intermedio es un directorio' ) exit() ensure_dir_exists(os.path.split(tp_file_path)[0]) ensure_dir_exists(os.path.split(tc_file_path)[0]) ensure_dir_exists(os.path.split(intermediate_panel_path)[0]) timeout = (int(config.get('timeout')) if 'timeout' in config.keys() else None) tries = int(config.get('tries', 1)) scraper = BCRAExchangeRateScraper( url=config.get('url'), timeout=timeout, tries=tries, coins=config.get('coins'), skip_intermediate_panel_data=skip_intermediate_panel_data, intermediate_panel_path=intermediate_panel_path, skip_clean_last_dates=skip_clean_last_dates) parsed = scraper.run(start_date, end_date, refetch_dates_range) if parsed: coins = config.get('coins') csv_header = ['indice_tiempo'] csv_header.extend([v for v in coins.keys()]) write_file(csv_header, parsed['tp_usd'].values(), tp_file_path) write_file(csv_header, parsed['tc_local'].values(), tc_file_path) else: click.echo("No se encontraron resultados") execution_end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logging.info(f"Fin de tiempo de ejecución: {execution_end_time}") execution_end_hour = time.time() hours, rem = divmod(execution_end_hour - execution_start_hour, 3600) minutes, seconds = divmod(rem, 60) execution_total_time = "{:0>2}:{:0>2}:{:05.2f}".format( int(hours), int(minutes), seconds) Email().send_validation_group_email(execution_start_time, execution_end_time, execution_total_time, start_date, end_date, skip_intermediate_panel_data, identifier='exchange-rates') except InvalidConfigurationError as err: click.echo(err)
def exchange_rates(ctx, start_date, end_date, config, skip_intermediate_panel_data, tp_csv_path, tc_csv_path, intermediate_panel_path, skip_clean_last_dates): try: execution_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logging.basicConfig(level=logging.WARNING) config = read_config(file_path=config, command=ctx.command.name) validate_url_config(config) validate_url_has_value(config) validate_coins_key_config(config) validate_coins_key_has_values(config) validate_dates(start_date, end_date) start_date = start_date.date() end_date = end_date.date() tp_file_path = validate_file_path(tp_csv_path, config, file_path_key='tp_file_path') tc_file_path = validate_file_path(tc_csv_path, config, file_path_key='tc_file_path') intermediate_panel_path = validate_file_path(intermediate_panel_path, config, file_path_key='intermediate_panel_path') if os.path.isdir(tp_file_path): click.echo('Error: el path ingresado para tipo de pase usd es un directorio') exit() elif os.path.isdir(tc_file_path): click.echo('Error: el path ingresado para tipo de cambio local es un directorio') exit() elif os.path.isdir(intermediate_panel_path): click.echo('Error: el path ingresado para el panel intermedio es un directorio') exit() ensure_dir_exists(os.path.split(tp_file_path)[0]) ensure_dir_exists(os.path.split(tc_file_path)[0]) ensure_dir_exists(os.path.split(intermediate_panel_path)[0]) timeout = ( int(config.get('timeout')) if 'timeout' in config.keys() else None ) tries = int(config.get('tries', 1)) scraper = BCRAExchangeRateScraper( url=config.get('url'), timeout=timeout, tries=tries, coins=config.get('coins'), skip_intermediate_panel_data=skip_intermediate_panel_data, intermediate_panel_path=intermediate_panel_path, skip_clean_last_dates=skip_clean_last_dates ) parsed = scraper.run(start_date, end_date) if parsed: coins = config.get('coins') csv_header = ['indice_tiempo'] csv_header.extend([v for v in coins.keys()]) write_file(csv_header, parsed['tp_usd'], tp_file_path) write_file(csv_header, parsed['tc_local'], tc_file_path) else: click.echo("No se encontraron resultados") execution_end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") Email().send_validation_group_email(execution_start_time, execution_end_time, start_date, end_date, skip_intermediate_panel_data, identifier='exchange-rates') except InvalidConfigurationError as err: click.echo(err)