def sml(ctx, config, start_date, end_date, use_intermediate_panel, uruguayo_csv_path, real_csv_path, intermediate_panel_path): try: logging.basicConfig(level=logging.WARNING) config = read_config(file_path=config, command=ctx.command.name) validate_url_config(config) validate_url_has_value(config) validate_coins_key_config(config) validate_coins_key_has_values(config) validate_dates(start_date, end_date) peso_uruguayo_file_path = validate_file_path( uruguayo_csv_path, config, file_path_key='peso_uruguayo_file_path') real_file_path = validate_file_path(real_csv_path, config, file_path_key='real_file_path') intermediate_panel_path = validate_file_path( intermediate_panel_path, config, file_path_key='intermediate_panel_path') if os.path.isdir(peso_uruguayo_file_path): click.echo( 'Error: el path ingresado para peso uruguayo es un directorio') exit() elif os.path.isdir(real_file_path): click.echo('Error: el path ingresado para real es un directorio') exit() elif os.path.isdir(intermediate_panel_path): click.echo( 'Error: el path ingresado para el panel intermedio es un directorio' ) exit() ensure_dir_exists(os.path.split(peso_uruguayo_file_path)[0]) ensure_dir_exists(os.path.split(real_file_path)[0]) ensure_dir_exists(os.path.split(intermediate_panel_path)[0]) timeout = (int(config.get('timeout')) if 'timeout' in config.keys() else None) tries = int(config.get('tries', 1)) scraper = BCRASMLScraper( url=config.get('url'), timeout=timeout, tries=tries, coins=config.get('coins'), use_intermediate_panel=use_intermediate_panel, intermediate_panel_path=intermediate_panel_path) parsed = scraper.run(start_date, end_date) if parsed: for k, v in parsed.items(): if k == 'peso_uruguayo': csv_header = [ 'indice_tiempo', 'Tipo de cambio de Referencia', 'Tipo de cambio URINUSCA', 'Tipo de cambio SML Peso Uruguayo', 'Tipo de cambio SML Uruguayo Peso' ] file_path = peso_uruguayo_file_path elif k == 'real': csv_header = [ 'indice_tiempo', 'Tipo de cambio de Referencia', 'Tipo de cambio PTAX', 'Tipo de cambio SML Peso Real', 'Tipo de cambio SML Real Peso' ] file_path = real_file_path parsed[k].reverse() write_file(csv_header, parsed[k], file_path) else: click.echo("No se encontraron resultados") except InvalidConfigurationError as err: click.echo(err)
def sml(ctx, config, start_date, end_date, refetch_start_date, refetch_end_date, skip_intermediate_panel_data, uruguayo_csv_path, real_csv_path, intermediate_panel_path, skip_clean_last_dates): try: execution_start_hour = time.time() execution_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logging.basicConfig(format='%(message)s', level=logging.INFO) logging.info(Figlet(font='standard').renderText('scraper sml')) logging.info(f"Inicio de tiempo de ejecución: {execution_start_time}") config = read_config(file_path=config, command=ctx.command.name) validate_url_config(config) validate_url_has_value(config) validate_coins_key_config(config) validate_coins_key_has_values(config) validate_dates(start_date, end_date) start_date = start_date.date() end_date = end_date.date() refetch_dates_range = [] if refetch_start_date and refetch_end_date: validate_refetch_dates(start_date, end_date, refetch_start_date.date(), refetch_end_date.date()) refetch_dates_range = generate_dates_range( refetch_start_date.date(), refetch_end_date.date()) elif refetch_start_date or refetch_end_date: logging.warning( 'No se encontró fecha para refetch_start_date o refetch_end_date, no se hará refetch.' ) peso_uruguayo_file_path = validate_file_path( uruguayo_csv_path, config, file_path_key='peso_uruguayo_file_path') real_file_path = validate_file_path(real_csv_path, config, file_path_key='real_file_path') intermediate_panel_path = validate_file_path( intermediate_panel_path, config, file_path_key='intermediate_panel_path') if os.path.isdir(peso_uruguayo_file_path): click.echo( 'Error: el path ingresado para peso uruguayo es un directorio') exit() elif os.path.isdir(real_file_path): click.echo('Error: el path ingresado para real es un directorio') exit() elif os.path.isdir(intermediate_panel_path): click.echo( 'Error: el path ingresado para el panel intermedio es un directorio' ) exit() ensure_dir_exists(os.path.split(peso_uruguayo_file_path)[0]) ensure_dir_exists(os.path.split(real_file_path)[0]) ensure_dir_exists(os.path.split(intermediate_panel_path)[0]) timeout = (int(config.get('timeout')) if 'timeout' in config.keys() else None) tries = int(config.get('tries', 1)) scraper = BCRASMLScraper( url=config.get('url'), timeout=timeout, tries=tries, coins=config.get('coins'), types=config.get('types'), skip_intermediate_panel_data=skip_intermediate_panel_data, intermediate_panel_path=intermediate_panel_path, skip_clean_last_dates=skip_clean_last_dates) parsed = scraper.run(start_date, end_date, refetch_dates_range) if parsed: for k in parsed.keys(): if k == 'peso_uruguayo': csv_header = ['indice_tiempo'] csv_header.extend( config['types']['peso_uruguayo'].values()) write_file(csv_header, parsed['peso_uruguayo'].values(), peso_uruguayo_file_path) elif k == 'real': csv_header = ['indice_tiempo'] csv_header.extend(config['types']['real'].values()) write_file(csv_header, parsed['real'].values(), real_file_path) else: click.echo("No se encontraron resultados") execution_end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logging.info(f"Fin de tiempo de ejecución: {execution_end_time}") execution_end_hour = time.time() hours, rem = divmod(execution_end_hour - execution_start_hour, 3600) minutes, seconds = divmod(rem, 60) execution_total_time = "{:0>2}:{:0>2}:{:05.2f}".format( int(hours), int(minutes), seconds) Email().send_validation_group_email(execution_start_time, execution_end_time, execution_total_time, start_date, end_date, skip_intermediate_panel_data, identifier='sml') except InvalidConfigurationError as err: click.echo(err)
def test_run_using_intermediate_panel(self): start_date = datetime(2019, 5, 6) end_date = datetime(2019, 5, 6) url = ''' http://www.bcra.gov.ar/PublicacionesEstadisticas/Tipo_de_cambio_sml.asp ''' coins = {"peso_uruguayo": "Peso Uruguayo", "real": "Real"} parsed = { 'peso_uruguayo': [{ 'Tipo de cambio de Referencia': Decimal('44.89670'), 'Tipo de cambio URINUSCA': Decimal('35.03600'), 'Tipo de cambio SML Peso Uruguayo': Decimal('1.28145'), 'Tipo de cambio SML Uruguayo Peso': Decimal('0.78040'), 'indice_tiempo': date(2019, 5, 6) }], 'real': [{ 'Tipo de cambio de Referencia': Decimal('44.89670'), 'Tipo de cambio PTAX': Decimal('3.96210'), 'Tipo de cambio SML Peso Real': Decimal('11.33155'), 'Tipo de cambio SML Real Peso': Decimal('0.08825'), 'indice_tiempo': date(2019, 5, 6) }] } peso_uruguayo_preprocess = [{ 'Tipo de cambio de Referencia': Decimal('44.89670'), 'Tipo de cambio URINUSCA': Decimal('35.03600'), 'Tipo de cambio SML Peso Uruguayo': Decimal('1.28145'), 'Tipo de cambio SML Uruguayo Peso': Decimal('0.78040'), 'indice_tiempo': date(2019, 5, 6) }] real_preprocess = [{ 'Tipo de cambio de Referencia': Decimal('44.89670'), 'Tipo de cambio PTAX': Decimal('3.96210'), 'Tipo de cambio SML Peso Real': Decimal('11.33155'), 'Tipo de cambio SML Real Peso': Decimal('0.08825'), 'indice_tiempo': date(2019, 5, 6) }] with patch.object(BCRASMLScraper, 'parse_from_intermediate_panel', return_value=parsed): with patch.object( BCRASMLScraper, 'preprocess_rows', side_effect=[peso_uruguayo_preprocess, real_preprocess]): scraper = BCRASMLScraper(url, coins, True) result = scraper.run(start_date, end_date) assert result == { 'peso_uruguayo': [{ 'Tipo de cambio de Referencia': Decimal('44.89670'), 'Tipo de cambio URINUSCA': Decimal('35.03600'), 'Tipo de cambio SML Peso Uruguayo': Decimal('1.28145'), 'Tipo de cambio SML Uruguayo Peso': Decimal('0.78040'), 'indice_tiempo': date(2019, 5, 6) }], 'real': [{ 'Tipo de cambio de Referencia': Decimal('44.89670'), 'Tipo de cambio PTAX': Decimal('3.96210'), 'Tipo de cambio SML Peso Real': Decimal('11.33155'), 'Tipo de cambio SML Real Peso': Decimal('0.08825'), 'indice_tiempo': date(2019, 5, 6) }] }
def sml(ctx, config, start_date, end_date, skip_intermediate_panel_data, uruguayo_csv_path, real_csv_path, intermediate_panel_path, skip_clean_last_dates): try: execution_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logging.basicConfig(level=logging.WARNING) config = read_config(file_path=config, command=ctx.command.name) validate_url_config(config) validate_url_has_value(config) validate_coins_key_config(config) validate_coins_key_has_values(config) validate_dates(start_date, end_date) start_date = start_date.date() end_date = end_date.date() peso_uruguayo_file_path = validate_file_path(uruguayo_csv_path, config, file_path_key='peso_uruguayo_file_path') real_file_path = validate_file_path(real_csv_path, config, file_path_key='real_file_path') intermediate_panel_path = validate_file_path(intermediate_panel_path, config, file_path_key='intermediate_panel_path') if os.path.isdir(peso_uruguayo_file_path): click.echo('Error: el path ingresado para peso uruguayo es un directorio') exit() elif os.path.isdir(real_file_path): click.echo('Error: el path ingresado para real es un directorio') exit() elif os.path.isdir(intermediate_panel_path): click.echo('Error: el path ingresado para el panel intermedio es un directorio') exit() ensure_dir_exists(os.path.split(peso_uruguayo_file_path)[0]) ensure_dir_exists(os.path.split(real_file_path)[0]) ensure_dir_exists(os.path.split(intermediate_panel_path)[0]) timeout = ( int(config.get('timeout')) if 'timeout' in config.keys() else None ) tries = int(config.get('tries', 1)) scraper = BCRASMLScraper( url=config.get('url'), timeout=timeout, tries=tries, coins=config.get('coins'), types=config.get('types'), skip_intermediate_panel_data=skip_intermediate_panel_data, intermediate_panel_path=intermediate_panel_path, skip_clean_last_dates=skip_clean_last_dates ) parsed = scraper.run(start_date, end_date) if parsed: for k in parsed.keys(): if k == 'peso_uruguayo': csv_header = ['indice_tiempo'] csv_header.extend(config['types']['peso_uruguayo'].values()) write_file(csv_header, parsed['peso_uruguayo'], peso_uruguayo_file_path) elif k == 'real': csv_header = ['indice_tiempo'] csv_header.extend(config['types']['real'].values()) write_file(csv_header, parsed['real'], real_file_path) else: click.echo("No se encontraron resultados") execution_end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") Email().send_validation_group_email(execution_start_time, execution_end_time, start_date, end_date, skip_intermediate_panel_data, identifier='sml') except InvalidConfigurationError as err: click.echo(err)