Exemplo n.º 1
0
    def test_normal_json(self, url, format_name):
        responses.add(
            responses.GET,
            url,
            body=dedent(
                """\
                [
                    {"attr_a": 1},
                    {"attr_b": 2.1, "attr_c": "bb"}
                ]"""
            ),
            content_type="text/plain; charset=utf-8",
            status=200,
        )
        expected_list = [
            TableData(
                "url_loader",
                ["attr_a", "attr_b", "attr_c"],
                [{"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"}],
            )
        ]

        loader = ptr.TableUrlLoader(url, format_name)
        assert loader.format_name == "json"

        loader.table_name = "url_loader"

        for table_data in loader.load():
            print("{} {}".format(table_data, dumps_tabledata(table_data)))
            print(table_data.rows)
            print("[expected]")
            for expected in expected_list:
                print(dumps_tabledata(expected))

            assert table_data.in_tabledata_list(expected_list)
Exemplo n.º 2
0
    def test_normal_csv(self, url, format_name):
        responses.add(responses.GET,
                      url,
                      body='''"attr_a","attr_b","attr_c"
    1,4,"a"
    2,2.1,"bb"
    3,120.9,"ccc"''',
                      content_type='text/plain; charset=utf-8',
                      status=200)

        expeced_list = [
            ptr.TableData("csv1", ["attr_a", "attr_b", "attr_c"], [
                [1, 4, "a"],
                [2, "2.1", "bb"],
                [3, "120.9", "ccc"],
            ])
        ]

        loader = ptr.TableUrlLoader(url, format_name)

        assert loader.format_name == "csv"

        for tabledata, expected in zip(loader.load(), expeced_list):
            print("[expected]\n{}".format(ptw.dump_tabledata(expected)))
            print("[actual]\n{}".format(ptw.dump_tabledata(tabledata)))

            assert tabledata == expected
Exemplo n.º 3
0
    def test_normal_json(self, url, format_name):
        responses.add(responses.GET,
                      url,
                      body=dedent('''\
                [
                    {"attr_a": 1},
                    {"attr_b": 2.1, "attr_c": "bb"}
                ]'''),
                      content_type='text/plain; charset=utf-8',
                      status=200)
        expeced_list = [
            TableData("json1", ["attr_a", "attr_b", "attr_c"], [
                {
                    'attr_a': 1
                },
                {
                    'attr_b': 2.1,
                    'attr_c': 'bb'
                },
            ]),
        ]
        loader = ptr.TableUrlLoader(url, format_name)

        assert loader.format_name == "json"

        for table_data in loader.load():
            assert table_data.in_tabledata_list(expeced_list)
Exemplo n.º 4
0
    def test_normal_excel(self):
        url = "https://github.com/thombashi/valid/test/data/validdata.xlsx"
        data_path = os.path.join(os.path.dirname(__file__),
                                 "data/validdata.xlsx")

        with open(data_path, "rb") as f:
            responses.add(
                responses.GET,
                url,
                body=f.read(),
                content_type="application/octet-stream",
                status=200,
            )

        expeced_list = [
            TableData(
                "testsheet1",
                ["a1", "b1", "c1"],
                [["aa1", "ab1", "ac1"], [1.0, 1.1, "a"], [2.0, 2.2, "bb"],
                 [3.0, 3.3, "cc"]],
            ),
            TableData(
                "testsheet3",
                ["a3", "b3", "c3"],
                [["aa3", "ab3", "ac3"], [4.0, 1.1, "a"], [5.0, "", "bb"],
                 [6.0, 3.3, ""]],
            ),
        ]
        loader = ptr.TableUrlLoader(url)

        assert loader.format_name == "excel"

        for table_data in loader.load():
            assert table_data.in_tabledata_list(expeced_list)
Exemplo n.º 5
0
    def test_normal_json(self, url, format_name):
        responses.add(responses.GET,
                      url,
                      body='''[
        {"attr_a": 1},
        {"attr_b": 2.1, "attr_c": "bb"}
    ]''',
                      content_type='text/plain; charset=utf-8',
                      status=200)

        expeced_list = [
            ptr.TableData("json1", ["attr_a", "attr_b", "attr_c"], [
                {
                    'attr_a': 1
                },
                {
                    'attr_b': 2.1,
                    'attr_c': 'bb'
                },
            ])
        ]

        loader = ptr.TableUrlLoader(url, format_name)

        assert loader.format_name == "json"

        for tabledata, expected in zip(loader.load(), expeced_list):
            assert tabledata == expected
Exemplo n.º 6
0
    def test_normal_csv(self, url, format_name):
        responses.add(
            responses.GET,
            url,
            body=dedent("""\
                "attr_a","attr_b","attr_c"
                1,4,"a"
                2,2.1,"bb"
                3,120.9,"ccc"
                """),
            content_type="text/plain; charset=utf-8",
            status=200,
        )
        expeced_list = [
            TableData(
                "csv1",
                ["attr_a", "attr_b", "attr_c"],
                [[1, 4, "a"], [2, "2.1", "bb"], [3, "120.9", "ccc"]],
            )
        ]
        loader = ptr.TableUrlLoader(url, format_name)

        assert loader.format_name == "csv"

        for table_data in loader.load():
            assert table_data.in_tabledata_list(expeced_list)
Exemplo n.º 7
0
    def test_exception(self, value, format_name, expected):
        responses.add(responses.GET,
                      value,
                      body="""404: Not Found""",
                      status=404)

        with pytest.raises(expected):
            ptr.TableUrlLoader(value, format_name)
Exemplo n.º 8
0
def create_url_loader(logger, source_url, format_name, encoding, proxies):
    try:
        return ptr.TableUrlLoader(source_url, format_name, encoding=encoding, proxies=proxies)
    except (ptr.HTTPError, ptr.UrlError) as e:
        logger.error(msgfy.to_error_message(e))
        sys.exit(ExitCode.FAILED_HTTP)
    except ptr.ProxyError as e:
        logger.error(msgfy.to_error_message(e))
        sys.exit(errno.ECONNABORTED)
Exemplo n.º 9
0
    def test_normal(self, value, format_name, expected):
        responses.add(responses.GET,
                      value,
                      body='''{}''',
                      content_type='text/plain; charset=utf-8',
                      status=200)

        loader = ptr.TableUrlLoader(value, format_name)
        expected_loader = expected("")

        assert loader.source_type == expected_loader.source_type
        assert loader.format_name == expected_loader.format_name
Exemplo n.º 10
0
def create_url_loader(logger, url, format_name, encoding, proxies):
    try:
        return ptr.TableUrlLoader(url,
                                  format_name,
                                  encoding=encoding,
                                  proxies=proxies)
    except ptr.HTTPError as e:
        logger.error(e)
        sys.exit(ExitCode.FAILED_HTTP)
    except ptr.ProxyError as e:
        logger.error(e)
        sys.exit(errno.ECONNABORTED)
Exemplo n.º 11
0
def create_url_loader(logger, source_url, format_name, encoding, proxies):
    try:
        return ptr.TableUrlLoader(source_url,
                                  format_name,
                                  encoding=encoding,
                                  proxies=proxies)
    except ptr.HTTPError as e:
        logger.error("{:s}: {}".format(e.__class__.__name__, e))
        sys.exit(ExitCode.FAILED_HTTP)
    except ptr.ProxyError as e:
        logger.error("{:s}: {}".format(e.__class__.__name__, e))
        sys.exit(errno.ECONNABORTED)
Exemplo n.º 12
0
def create_url_loader(
    logger,
    source_url: str,
    format_name: str,
    encoding: str,
    type_hint_rules: Optional[TypeHintRules],
    proxies: Optional[Dict],
) -> AbstractTableReader:
    try:
        return ptr.TableUrlLoader(
            source_url,
            format_name,
            encoding=encoding,
            type_hint_rules=type_hint_rules,
            proxies=proxies,
        )
    except (ptr.HTTPError, ptr.UrlError) as e:
        logger.error(msgfy.to_error_message(e))
        sys.exit(ExitCode.FAILED_HTTP)
    except ptr.ProxyError as e:
        logger.error(msgfy.to_error_message(e))
        sys.exit(errno.ECONNABORTED)
Exemplo n.º 13
0
    def test_normal_excel(self):
        url = 'https://github.com/thombashi/valid/test/data/validdata.xlsx'

        data_path = os.path.join(os.path.dirname(__file__),
                                 "data/validdata.xlsx")

        with open(data_path, "rb") as f:
            responses.add(responses.GET,
                          url,
                          body=f.read(),
                          content_type='application/octet-stream',
                          status=200)

        expeced_list = [
            ptr.TableData(table_name='testsheet1',
                          header_list=['a1', 'b1', 'c1'],
                          record_list=[
                              ['aa1', 'ab1', 'ac1'],
                              [1.0, 1.1, 'a'],
                              [2.0, 2.2, 'bb'],
                              [3.0, 3.3, 'cc'],
                          ]),
            ptr.TableData(table_name='testsheet3',
                          header_list=['a3', 'b3', 'c3'],
                          record_list=[
                              ['aa3', 'ab3', 'ac3'],
                              [4.0, 1.1, 'a'],
                              [5.0, '', 'bb'],
                              [6.0, 3.3, ''],
                          ]),
        ]

        loader = ptr.TableUrlLoader(url)

        assert loader.format_name == "excel"

        for tabledata, expected in zip(loader.load(), expeced_list):
            assert tabledata == expected
Exemplo n.º 14
0
    def test_normal_sqlite(self):
        url = "https://github.com/thombashi/valid/test/data/valid.sqlite3"
        data_path = os.path.join(dirname(dirname(__file__)), "data/valid.sqlite3")

        with open(data_path, "rb") as f:
            responses.add(
                responses.GET,
                url,
                body=f.read(),
                content_type="application/octet-stream",
                status=200,
            )

        loader = ptr.TableUrlLoader(url)

        assert loader.format_name == "sqlite"

        for table_data in loader.load():
            assert table_data == TableData(
                "tblfaker",
                ["file_extension", "random_number"],
                [["webm", 679215], ["jpg", 5088743], ["avi", 8268]],
            )
Exemplo n.º 15
0
#!/usr/bin/env python3

import pytablewriter as ptw

import pytablereader as ptr

loader = ptr.TableUrlLoader(
    "https://en.wikipedia.org/wiki/List_of_unit_testing_frameworks", "html")

writer = ptw.TableWriterFactory.create_from_format_name("rst")
writer.stream = open("load_url_result.rst", "w", encoding=loader.encoding)
for table_data in loader.load():
    writer.from_tabledata(table_data)
    writer.write_table()
Exemplo n.º 16
0
def url(ctx, url, format_name, output_path, encoding, proxy):
    """
    Fetch data from a URL and convert data to a SQLite database file.
    """

    if dataproperty.is_empty_sequence(url):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()

    logger = logbook.Logger("sqlitebiter url")
    _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL])

    proxies = {}
    if dataproperty.is_not_empty_string(proxy):
        proxies = {
            "http": proxy,
            "https": proxy,
        }

    try:
        loader = ptr.TableUrlLoader(url,
                                    format_name,
                                    encoding=encoding,
                                    proxies=proxies)
    except ptr.LoaderNotFoundError as e:
        try:
            loader = ptr.TableUrlLoader(url,
                                        "html",
                                        encoding=encoding,
                                        proxies=proxies)
        except (ptr.LoaderNotFoundError, ptr.HTTPError):
            logger.error(e)
            sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
    except ptr.HTTPError as e:
        logger.error(e)
        sys.exit(ExitCode.FAILED_HTTP)

    try:
        for tabledata in loader.load():
            sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                tabledata).sanitize()

            try:
                con.create_table_from_tabledata(sqlite_tabledata)
                result_counter.inc_success()
            except (ValueError) as e:
                logger.debug(u"url={}, message={}".format(url, str(e)))
                result_counter.inc_fail()
                continue

            log_message = get_success_log_format(verbosity_level).format(
                url,
                extractor.get_table_schema_text(
                    sqlite_tabledata.table_name).strip())
            logger.info(log_message)
    except ptr.InvalidDataError as e:
        logger.error(u"invalid data: url={}, message={}".format(url, str(e)))
        result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Exemplo n.º 17
0
    if show_flags:
        for country in sorted(data['total_country']):
            total_country_flags.append(get_country_flag_emoji(country))

    print("Involved in " + str(number_of_events) + " events in " +
          str(len(data['total_country'])) + " countries.")
    print("   ".join(total_country_flags))


if __name__ == "__main__":
    latest_year = 2019
    stats_pr_year = defaultdict(list)
    stats_total = defaultdict(list)
    current_year = latest_year

    loader = ptr.TableUrlLoader("http://localhost:4000/talks/", "html")

    writer = ptw.TableWriterFactory.create_from_format_name("md")

    for table_data in loader.load():
        country_city_pr_year = []
        country_pr_year = []
        city_pr_year = []

        for record in table_data.row_list:
            country_city_pr_year.append(record[1])
            parsed = record[1].split(",")
            country_pr_year.append(parsed[0])
            city_pr_year.append(parsed[1])

        writer.from_tabledata(table_data)