Пример #1
0
    def test_get_intermediate_panel_data_from_parsed(self):
        url = ''
        parsed = {
            'tc_local': [{
                'bolivar_venezolano': Decimal('0.0123560'),
                'indice_tiempo': datetime(2019, 3, 6)
            }],
            'tp_usd': [{
                'bolivar_venezolano': Decimal('0.0003030'),
                'indice_tiempo': datetime(2019, 3, 6)
            }]
        }

        coins = {
            "bolivar_venezolano": "Bolívar Venezolano",
        }

        scraper = BCRAExchangeRateScraper(url,
                                          coins,
                                          intermediate_panel_path=None,
                                          use_intermediate_panel=True)

        result = scraper.get_intermediate_panel_data_from_parsed(parsed)

        assert result == [{
            'indice_tiempo': datetime(2019, 3, 6),
            'coin': 'bolivar_venezolano',
            'type': 'tp_usd',
            'value': Decimal('0.0003030')
        }, {
            'indice_tiempo': datetime(2019, 3, 6),
            'coin': 'bolivar_venezolano',
            'type': 'tc_local',
            'value': Decimal('0.0123560')
        }]
    def test_html_is_valid(self):
        """Probar que el html sea valido"""
        url = ""
        start_date = date(2019, 3, 4)
        coin = ''

        rates = {}
        with patch.object(
            BCRAExchangeRateScraper,
            'fetch_content',
            return_value='''
                <table class="table table-BCRA table-bordered table-hover
                    table-responsive">
                <thead>
                </thead>
                    <tbody>
                    </tbody>
                </table>
            '''
        ):
            scraper = BCRAExchangeRateScraper(url, rates, False)
            content = scraper.fetch_content(start_date, coin)

            soup = BeautifulSoup(content, "html.parser")

            table = soup.find('table')
            head = table.find('thead') if table else None
            body = table.find('tbody') if table else None

            assert table is not None
            assert head is not None
            assert body is not None
    def test_not_head_parse_coin(self):
        url = \
         "http://www.bcra.gov.ar/Publicaciones\
            Estadisticas/Evolucion_moneda.asp"
        coins = {
            "bolivar_venezolano": "Bolívar Venezolano"
        }

        start_date = datetime(2019, 4, 8)
        end_date = datetime(2019, 4, 8)
        coin = 'bolivar_venezolano'

        content = '''
                    <table class="table table-BCRA table-bordered table-hover
                            table-responsive" colspan="3">
                        <tr>
                        <td colspan="3">
                            <b></b>
                        </td>
                        </tr>
                        <tr>
                            <td width="10%"><b></b>
                            </td>
                            <td width="40%"><b></b></td>
                            <td width="50%"><b></b></td>
                            </tr>
                    </table>
                '''

        scraper = BCRAExchangeRateScraper(url, coins, False)
        parsed_coin = scraper.parse_coin(content, start_date, end_date, coin)

        assert parsed_coin == []
Пример #4
0
    def test_parse_for_non_empty_contents(self):
        url = \
         "http://www.bcra.gov.ar/Publicaciones\
            Estadisticas/Evolucion_moneda.asp"

        coins = {"bolivar_venezolano": "Bolívar Venezolano"}
        scraper = BCRAExchangeRateScraper(url,
                                          coins,
                                          intermediate_panel_path=None,
                                          use_intermediate_panel=False)
        start_date = datetime(2019, 4, 8)
        end_date = datetime(2019, 4, 8)
        contents = {}

        table_content = '''
        <table class="table table-BCRA table-bordered table-hover
        table-responsive" colspan="3">
            <thead>
            <tr>
            <td colspan="3">
                <b>MERCADO DE CAMBIOS - COTIZACIONES CIERRE VENDEDOR<br>
                Bolívar Venezolano</b>
            </td>
            </tr>
            <tr>
                <td width="10%"><b>
                    FECHA</b>
                </td>
                <td width="40%"><b>
            TIPO DE PASE - EN DOLARES - (por unidad)</b></td>
                <td width="50%"><b>
            TIPO DE CAMBIO - MONEDA DE CURSO LEGAL - (por unidad)</b></td>
                </tr>
            </thead>
            <tbody><tr>
                <td width="10%">
                08/04/2019</td>
                <td width="40%">
                0,0003030</td>
                <td width="50%">
                0,0132500</td>
            </tr>
            </tbody>
        </table>
        '''

        contents['bolivar_venezolano'] = table_content

        parsed = scraper.parse_contents(contents, start_date, end_date)

        assert parsed['tc_local'] == [{
            'bolivar_venezolano': '0,0132500',
            'indice_tiempo': '08/04/2019'
        }]

        assert parsed['tp_usd'] == [{
            'bolivar_venezolano': '0,0003030',
            'indice_tiempo': '08/04/2019'
        }]
    def test_parse_from_intermediate_panel_empty_value(self):
        """Probar parseo desde el archivo intermedio"""
        start_date = '2019-03-06'
        end_date = '2019-03-06'

        coins = {
            "bolivar_venezolano": "Bolívar Venezolano",
            "chelin_austriaco": 'Chelin Austriaco'
        }
        url = ''

        intermediate_panel_df = MagicMock()
        intermediate_panel_df = {
            'indice_tiempo': [
                '2019-03-06',
                '2019-03-06'
            ],
            'coin': [
                'bolivar_venezolano',
                'bolivar_venezolano'
            ],
            'type': [
                'tc_local', 'tp_usd'
            ],
            'value': [
                '0.0003030',
                '0.0123560'
            ]
        }

        with patch.object(
            BCRAExchangeRateScraper,
            'read_intermediate_panel_dataframe',
            return_value=pd.DataFrame(data=intermediate_panel_df)
        ):
            scraper = BCRAExchangeRateScraper(url, coins, True)
            content = scraper.parse_from_intermediate_panel(
                start_date, end_date,
                )

            assert content == {
                'tc_local':
                [
                    {
                        'indice_tiempo': '2019-03-06',
                        'bolivar_venezolano': '0.0003030'
                    }
                ],
                'tp_usd':
                [
                    {
                        'indice_tiempo': '2019-03-06',
                        'bolivar_venezolano': '0.0123560'
                    }
                ]
            }
    def test_get_intermediate_panel_data_from_empty_parsed(self):
        url = ''
        parsed = {}

        coins = {
            "bolivar_venezolano": "Bolívar Venezolano",
        }

        scraper = BCRAExchangeRateScraper(url, coins, True)

        result = scraper.get_intermediate_panel_data_from_parsed(parsed)

        assert result == []
    def test_validate_coin_in_configuration_file_true(self):
        coins = {}
        url = 'foo.com'
        coin = "Bolívar Venezolano"
        options = []

        for option_text in ['Seleccione Moneda', 'Bolívar Venezolano']:
            mock = MagicMock()
            mock.text = option_text
            options.append(mock)

        scraper = BCRAExchangeRateScraper(url, coins, False)
        coin_in_configuration_file = scraper.validate_coin_in_configuration_file(coin, options)
        assert coin_in_configuration_file is True
    def test_parse_contents(self):
        url = ''

        start_date = datetime(2019, 4, 24)
        end_date = datetime(2019, 4, 24)

        coins = {
            "bolivar_venezolano": "Bolívar Venezolano"
        }

        content = {'bolivar_venezolano': 'foo'}

        parsed = [
            {
                'moneda': 'bolivar_venezolano',
                'indice_tiempo': '24/04/2019',
                'tp_usd': '0,0001930',
                'tc_local': '0,0084610'
            }
        ]

        with patch.object(
            BCRAExchangeRateScraper,
            'parse_coin',
            return_value=parsed
        ):
            scraper = BCRAExchangeRateScraper(url, coins, False)
            result = scraper.parse_contents(content, start_date, end_date)

            assert result == {
                'tc_local':
                [
                    {
                        'indice_tiempo': '24/04/2019',
                        'bolivar_venezolano': '0,0084610'
                    }
                ],
                'tp_usd':
                [
                    {
                        'indice_tiempo': '24/04/2019',
                        'bolivar_venezolano': '0,0001930'
                    }
                ]
            }
    def test_not_table_parse_coin(self):
        url = \
         "http://www.bcra.gov.ar/Publicaciones\
            Estadisticas/Evolucion_moneda.asp"
        coins = {
            "bolivar_venezolano": "Bolívar Venezolano"
        }

        start_date = datetime(2019, 4, 8)
        end_date = datetime(2019, 4, 8)
        coin = 'bolivar_venezolano'

        content = ''

        scraper = BCRAExchangeRateScraper(url, coins, False)
        parsed_coin = scraper.parse_coin(content, start_date, end_date, coin)

        assert parsed_coin == []
    def test_parse_for_empty_contents(self):
        url = \
         "http://www.bcra.gov.ar/Publicaciones\
            Estadisticas/Evolucion_moneda.asp"
        coins = {
            "bolivar_venezolano": "Bolívar Venezolano",
            "chelin_austriaco": "Chelín Austríaco",
            "cordoba_nicaraguense": "Cordoba Nicaraguense",
            "corona_checa": "Corona Checa",
            "corona_danesa": "Corona Danesa",
        }
        scraper = BCRAExchangeRateScraper(url, coins, False)
        start_date = date.today()
        end_date = date.today()
        contents = {}
        parsed = scraper.parse_contents(contents, start_date, end_date)

        assert parsed['tc_local'] == []
        assert parsed['tp_usd'] == []
Пример #11
0
    def test_fetch_contents(self):

        coins = {"bolivar_venezolano": "Bolívar Venezolano"}
        start_date = datetime(2019, 4, 24)
        end_date = datetime(2019, 4, 24)
        url = ''
        content = 'foo'
        with patch.object(BCRAExchangeRateScraper,
                          'fetch_content',
                          return_value=content):
            scraper = BCRAExchangeRateScraper(url,
                                              coins,
                                              intermediate_panel_path=None,
                                              use_intermediate_panel=False)
            result = scraper.fetch_contents(start_date, end_date)

            assert result == {
                'bolivar_venezolano': 'foo',
            }
Пример #12
0
    def test_preprocessed_rows_date(self):
        rows = [{
            'bolivar_venezolano': '0,0003040',
            'dolar_estadounidense': '--------',
            'oro_onza_troy': '1.289,6300000',
            'indice_tiempo': '2019-04-01'
        }]
        scraper = BCRAExchangeRateScraper(False,
                                          rows,
                                          intermediate_panel_path=None,
                                          use_intermediate_panel=False)

        result = scraper.preprocess_rows(rows)

        assert result == [{
            'bolivar_venezolano': Decimal('0.0003040'),
            'dolar_estadounidense': None,
            'oro_onza_troy': Decimal('1289.6300000'),
            'indice_tiempo': date(2019, 4, 1)
        }]
Пример #13
0
    def test_fetch_content_invalid_url_patching_driver(self):
        """Probar fetch content con url invalida"""
        single_date = date(2019, 3, 4)
        coins = {}
        url = 'foo.com'

        mocked_driver = MagicMock()
        mocked_driver.page_source = 400

        with patch.object(BCRAExchangeRateScraper,
                          'get_browser_driver',
                          return_value=mocked_driver):
            with patch.object(BCRAExchangeRateScraper,
                              'validate_coin_in_configuration_file',
                              return_value=True):
                scraper = BCRAExchangeRateScraper(url,
                                                  coins,
                                                  intermediate_panel_path=None,
                                                  use_intermediate_panel=False)
                content = scraper.fetch_content(single_date, coins)
                assert content == 400
    def test_html_is_not_valid(self):
        """Probar que el html no sea valido"""
        url = ""
        start_date = date(2019, 3, 4)
        coin = ''
        coins = {}
        with patch.object(
            BCRAExchangeRateScraper,
            'fetch_content',
            return_value=''
        ):
            scraper = BCRAExchangeRateScraper(url, coins, False)
            content = scraper.fetch_content(start_date, coin)

            soup = BeautifulSoup(content, "html.parser")

            table = soup.find('table')
            head = table.find('thead') if table else None
            body = table.find('tbody') if table else None

            assert table is None
            assert head is None
            assert body is None
    def test_fetch_content_patching_driver(self):
        """Probar fetch content"""
        single_date = date(2019, 3, 4)
        coins = {}
        url = ''

        mocked_driver = MagicMock()
        mocked_driver.page_source = "foo"
        mocked_driver.status_code = 200

        with patch.object(
            BCRAExchangeRateScraper,
            'get_browser_driver',
            return_value=mocked_driver
        ):
            with patch.object(
                BCRAExchangeRateScraper,
                'validate_coin_in_configuration_file',
                return_value=True
            ):
                scraper = BCRAExchangeRateScraper(url, coins, False)
                content = scraper.fetch_content(single_date, coins)
                assert content == 'foo'
Пример #16
0
def exchange_rates(ctx, start_date, end_date, config, use_intermediate_panel,
                   tp_csv_path, tc_csv_path, intermediate_panel_path):

    try:
        logging.basicConfig(level=logging.WARNING)
        config = read_config(file_path=config, command=ctx.command.name)
        validate_url_config(config)
        validate_url_has_value(config)
        validate_coins_key_config(config)
        validate_coins_key_has_values(config)
        validate_dates(start_date, end_date)

        tp_file_path = validate_file_path(tp_csv_path,
                                          config,
                                          file_path_key='tp_file_path')
        tc_file_path = validate_file_path(tc_csv_path,
                                          config,
                                          file_path_key='tc_file_path')
        intermediate_panel_path = validate_file_path(
            intermediate_panel_path,
            config,
            file_path_key='intermediate_panel_path')

        if os.path.isdir(tp_file_path):
            click.echo(
                'Error: el path ingresado para tipo de pase usd es un directorio'
            )
            exit()
        elif os.path.isdir(tc_file_path):
            click.echo(
                'Error: el path ingresado para tipo de cambio local es un directorio'
            )
            exit()
        elif os.path.isdir(intermediate_panel_path):
            click.echo(
                'Error: el path ingresado para el panel intermedio es un directorio'
            )
            exit()

        ensure_dir_exists(os.path.split(tp_file_path)[0])
        ensure_dir_exists(os.path.split(tc_file_path)[0])
        ensure_dir_exists(os.path.split(intermediate_panel_path)[0])

        timeout = (int(config.get('timeout'))
                   if 'timeout' in config.keys() else None)
        tries = int(config.get('tries', 1))

        scraper = BCRAExchangeRateScraper(
            url=config.get('url'),
            timeout=timeout,
            tries=tries,
            coins=config.get('coins'),
            use_intermediate_panel=use_intermediate_panel,
            intermediate_panel_path=intermediate_panel_path)
        parsed = scraper.run(start_date, end_date)

        if parsed:
            coins = config.get('coins')
            csv_header = ['indice_tiempo']
            csv_header.extend([v for v in coins.keys()])
            parsed['tp_usd'].reverse()
            parsed['tc_local'].reverse()
            write_file(csv_header, parsed['tp_usd'], tp_file_path)
            write_file(csv_header, parsed['tc_local'], tc_file_path)

        else:
            click.echo("No se encontraron resultados")

    except InvalidConfigurationError as err:
        click.echo(err)
Пример #17
0
def exchange_rates(ctx, start_date, end_date, refetch_start_date,
                   refetch_end_date, config, skip_intermediate_panel_data,
                   tp_csv_path, tc_csv_path, intermediate_panel_path,
                   skip_clean_last_dates):

    try:
        execution_start_hour = time.time()

        execution_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        logging.basicConfig(format='%(message)s', level=logging.INFO)
        logging.info(Figlet(font='standard').renderText('scraper'))
        logging.info(Figlet(font='standard').renderText('exchange rates'))
        logging.info(f"Inicio de tiempo de ejecución: {execution_start_time}")

        config = read_config(file_path=config, command=ctx.command.name)
        validate_url_config(config)
        validate_url_has_value(config)
        validate_coins_key_config(config)
        validate_coins_key_has_values(config)
        validate_dates(start_date, end_date)
        start_date = start_date.date()
        end_date = end_date.date()
        refetch_dates_range = []
        if refetch_start_date and refetch_end_date:
            validate_refetch_dates(start_date, end_date,
                                   refetch_start_date.date(),
                                   refetch_end_date.date())
            refetch_dates_range = generate_dates_range(
                refetch_start_date.date(), refetch_end_date.date())
        elif refetch_start_date or refetch_end_date:
            logging.warning(
                'No se encontró fecha para refetch_start_date o refetch_end_date, no se hará refetch.'
            )

        tp_file_path = validate_file_path(tp_csv_path,
                                          config,
                                          file_path_key='tp_file_path')
        tc_file_path = validate_file_path(tc_csv_path,
                                          config,
                                          file_path_key='tc_file_path')
        intermediate_panel_path = validate_file_path(
            intermediate_panel_path,
            config,
            file_path_key='intermediate_panel_path')

        if os.path.isdir(tp_file_path):
            click.echo(
                'Error: el path ingresado para tipo de pase usd es un directorio'
            )
            exit()
        elif os.path.isdir(tc_file_path):
            click.echo(
                'Error: el path ingresado para tipo de cambio local es un directorio'
            )
            exit()
        elif os.path.isdir(intermediate_panel_path):
            click.echo(
                'Error: el path ingresado para el panel intermedio es un directorio'
            )
            exit()

        ensure_dir_exists(os.path.split(tp_file_path)[0])
        ensure_dir_exists(os.path.split(tc_file_path)[0])
        ensure_dir_exists(os.path.split(intermediate_panel_path)[0])

        timeout = (int(config.get('timeout'))
                   if 'timeout' in config.keys() else None)
        tries = int(config.get('tries', 1))

        scraper = BCRAExchangeRateScraper(
            url=config.get('url'),
            timeout=timeout,
            tries=tries,
            coins=config.get('coins'),
            skip_intermediate_panel_data=skip_intermediate_panel_data,
            intermediate_panel_path=intermediate_panel_path,
            skip_clean_last_dates=skip_clean_last_dates)
        parsed = scraper.run(start_date, end_date, refetch_dates_range)

        if parsed:
            coins = config.get('coins')
            csv_header = ['indice_tiempo']
            csv_header.extend([v for v in coins.keys()])

            write_file(csv_header, parsed['tp_usd'].values(), tp_file_path)
            write_file(csv_header, parsed['tc_local'].values(), tc_file_path)

        else:
            click.echo("No se encontraron resultados")
        execution_end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        logging.info(f"Fin de tiempo de ejecución: {execution_end_time}")

        execution_end_hour = time.time()
        hours, rem = divmod(execution_end_hour - execution_start_hour, 3600)
        minutes, seconds = divmod(rem, 60)
        execution_total_time = "{:0>2}:{:0>2}:{:05.2f}".format(
            int(hours), int(minutes), seconds)
        Email().send_validation_group_email(execution_start_time,
                                            execution_end_time,
                                            execution_total_time,
                                            start_date,
                                            end_date,
                                            skip_intermediate_panel_data,
                                            identifier='exchange-rates')

    except InvalidConfigurationError as err:
        click.echo(err)
Пример #18
0
def exchange_rates(ctx, start_date, end_date, config, skip_intermediate_panel_data,
                   tp_csv_path, tc_csv_path, intermediate_panel_path, skip_clean_last_dates):

    try:
        execution_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        logging.basicConfig(level=logging.WARNING)
        config = read_config(file_path=config, command=ctx.command.name)
        validate_url_config(config)
        validate_url_has_value(config)
        validate_coins_key_config(config)
        validate_coins_key_has_values(config)
        validate_dates(start_date, end_date)
        start_date = start_date.date()
        end_date = end_date.date()

        tp_file_path = validate_file_path(tp_csv_path, config, file_path_key='tp_file_path')
        tc_file_path = validate_file_path(tc_csv_path, config, file_path_key='tc_file_path')
        intermediate_panel_path = validate_file_path(intermediate_panel_path, config, file_path_key='intermediate_panel_path')

        if os.path.isdir(tp_file_path):
            click.echo('Error: el path ingresado para tipo de pase usd es un directorio')
            exit()
        elif os.path.isdir(tc_file_path):
            click.echo('Error: el path ingresado para tipo de cambio local es un directorio')
            exit()
        elif os.path.isdir(intermediate_panel_path):
            click.echo('Error: el path ingresado para el panel intermedio es un directorio')
            exit()

        ensure_dir_exists(os.path.split(tp_file_path)[0])
        ensure_dir_exists(os.path.split(tc_file_path)[0])
        ensure_dir_exists(os.path.split(intermediate_panel_path)[0])

        timeout = (
            int(config.get('timeout'))
            if 'timeout' in config.keys() else None
        )
        tries = int(config.get('tries', 1))

        scraper = BCRAExchangeRateScraper(
            url=config.get('url'),
            timeout=timeout,
            tries=tries,
            coins=config.get('coins'),
            skip_intermediate_panel_data=skip_intermediate_panel_data,
            intermediate_panel_path=intermediate_panel_path,
            skip_clean_last_dates=skip_clean_last_dates
        )
        parsed = scraper.run(start_date, end_date)

        if parsed:
            coins = config.get('coins')
            csv_header = ['indice_tiempo']
            csv_header.extend([v for v in coins.keys()])
            write_file(csv_header, parsed['tp_usd'], tp_file_path)

            write_file(csv_header, parsed['tc_local'], tc_file_path)

        else:
            click.echo("No se encontraron resultados")
        execution_end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        Email().send_validation_group_email(execution_start_time, execution_end_time, start_date, end_date, skip_intermediate_panel_data, identifier='exchange-rates')

    except InvalidConfigurationError as err:
        click.echo(err)