Exemplo n.º 1
0
    def test_normal_(self, table_name, headers, dup_col_handler, expected):
        new_tabledata = SQLiteTableDataSanitizer(
            TableData(table_name, headers, []), dup_col_handler=dup_col_handler
        ).normalize()

        print_test_result(expected=dump_tabledata(expected), actual=dump_tabledata(new_tabledata))

        assert new_tabledata.equals(expected)
Exemplo n.º 2
0
    def test_normal(self, table_name, headers, records, expected):
        new_tabledata = SQLiteTableDataSanitizer(
            TableData(table_name, headers, records)
        ).normalize()

        print_test_result(expected=dump_tabledata(expected), actual=dump_tabledata(new_tabledata))

        con = connect_memdb()
        con.create_table_from_tabledata(new_tabledata)
        assert con.select_as_tabledata(new_tabledata.table_name) == expected

        assert new_tabledata.equals(expected)
Exemplo n.º 3
0
    def test_normal_(self, table_name, headers, dup_col_handler, expected):
        new_tabledata = SQLiteTableDataSanitizer(
            TableData(table_name, headers, []),
            dup_col_handler=dup_col_handler).normalize()

        try:
            from pytablewriter import dumps_tabledata

            print_test_result(expected=dumps_tabledata(expected),
                              actual=dumps_tabledata(new_tabledata))
        except ImportError:
            pass

        assert new_tabledata.equals(expected)
Exemplo n.º 4
0
    def normalize_table(self, table_data: TableData, dup_col_handler=None) -> TableData:
        from pathvalidate import replace_symbol, replace_unprintable_char
        from simplesqlite import SQLiteTableDataSanitizer

        if dup_col_handler is None:
            dup_col_handler = DEFAULT_DUP_COL_HANDLER

        normalized_table_data = SQLiteTableDataSanitizer(
            table_data,
            dup_col_handler=dup_col_handler,
            is_type_inference=self._is_type_inference,
            max_workers=self._max_workers,
        ).normalize()

        if self._symbol_replace_value is None:
            return normalized_table_data

        return TableData(
            normalized_table_data.table_name,
            [
                replace_symbol(
                    replace_unprintable_char(header),
                    self._symbol_replace_value,
                    is_replace_consecutive_chars=True,
                    is_strip=True,
                )
                for header in normalized_table_data.headers
            ],
            normalized_table_data.rows,
            dp_extractor=normalized_table_data.dp_extractor,
            type_hints=table_data.dp_extractor.column_type_hints,
        )
Exemplo n.º 5
0
    def normalize_table(self, table_data, dup_col_handler=None):
        from tabledata import TableData
        from pathvalidate import replace_symbol, replace_unprintable_char
        from simplesqlite import SQLiteTableDataSanitizer

        if dup_col_handler is None:
            dup_col_handler = DEFAULT_DUP_COL_HANDLER

        normalized_table_data = SQLiteTableDataSanitizer(
            table_data, dup_col_handler=dup_col_handler
        ).normalize()

        if self._symbol_replace_value is None:
            return normalized_table_data

        return TableData(
            table_name=normalized_table_data.table_name,
            header_list=[
                replace_symbol(
                    replace_unprintable_char(header),
                    self._symbol_replace_value,
                    is_replace_consecutive_chars=True,
                    is_strip=True,
                )
                for header in normalized_table_data.header_list
            ],
            row_list=normalized_table_data.row_list,
            dp_extractor=normalized_table_data.dp_extractor,
        )
Exemplo n.º 6
0
    def __convert(self, table_data):
        self.__logger.debug("loaded tabledata: {}".format(six.text_type(table_data)))

        sqlite_tabledata = SQLiteTableDataSanitizer(table_data).normalize()
        self.__table_creator.create(
            sqlite_tabledata, self.__index_list, source_info=self.__source_info
        )
        self.__converted_table_name_set.add(sqlite_tabledata.table_name)
Exemplo n.º 7
0
    def test_normal(self, table_name, headers, records, expected):
        new_tabledata = SQLiteTableDataSanitizer(
            TableData(table_name, headers, records)).normalize()

        try:
            from pytablewriter import dumps_tabledata

            print_test_result(expected=dumps_tabledata(expected),
                              actual=dumps_tabledata(new_tabledata))
        except ImportError:
            pass

        con = connect_memdb()
        con.create_table_from_tabledata(new_tabledata)
        assert con.select_as_tabledata(new_tabledata.table_name) == expected

        assert new_tabledata.equals(expected)
Exemplo n.º 8
0
def gs(ctx, credentials, title, output_path):
    """
    Convert a spreadsheet in Google Sheets to a SQLite database file.

    CREDENTIALS: OAuth2 Google credentials file.
    TITLE: Title of the Google Sheets to convert.
    """

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} gs".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])
    table_creator = TableCreator(logger=logger, dst_con=con)

    loader = ptr.GoogleSheetsTableLoader()
    loader.source = credentials
    loader.title = title

    # if typepy.is_null_string(loader.source):
    #     loader.source = app_config_manager.load().get(
    #         ConfigKey.GS_CREDENTIALS_FILE_PATH)

    try:
        for table_data in loader.load():
            logger.debug(u"loaded table_data: {}".format(
                six.text_type(table_data)))

            sqlite_tabledata = SQLiteTableDataSanitizer(table_data).normalize()

            try:
                table_creator.create(sqlite_tabledata,
                                     ctx.obj.get(Context.INDEX_LIST))
            except (ptr.ValidationError, ptr.DataError):
                result_counter.inc_fail()

            logger.info(
                get_success_message(
                    verbosity_level, "google sheets",
                    schema_extractor.get_table_schema_text(
                        sqlite_tabledata.table_name)))
    except ptr.OpenError as e:
        logger.error(msgfy.to_error_message(e))
        result_counter.inc_fail()
    except AttributeError:
        logger.error(u"invalid credentials data: path={}".format(credentials))
        result_counter.inc_fail()
    except (ptr.ValidationError, ptr.DataError) as e:
        logger.error(u"invalid credentials data: path={}, message={}".format(
            credentials, str(e)))
        result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Exemplo n.º 9
0
    def __write(self, table_data):
        from simplesqlite import SQLiteTableDataSanitizer

        self.__logger.debug(u"loaded tabledata: {}".format(six.text_type(table_data)))

        sqlite_tabledata = SQLiteTableDataSanitizer(table_data).normalize()
        self.__table_creator.create(sqlite_tabledata, self.__index_list)
        self.__result_counter.inc_success()

        self.__logger.info(get_success_message(
            self.__verbosity_level, self.__source,
            self.__schema_extractor.get_table_schema_text(sqlite_tabledata.table_name)))
Exemplo n.º 10
0
    def __convert(self, table_data: TableData) -> None:
        self.__logger.debug("loaded tabledata: {}".format(str(table_data)))

        sqlite_tabledata = SQLiteTableDataSanitizer(
            table_data, max_workers=self.__max_workers).normalize()
        self.__table_creator.create(
            sqlite_tabledata,
            self.__index_list,
            source_info=self.__source_info,
        )
        self.__converted_table_name_set.add(
            cast(str, sqlite_tabledata.table_name))
Exemplo n.º 11
0
    def test_normal_type_hints(self, table_name, headers, records, type_hints,
                               expecte_col_types, expecte_data):
        new_tabledata = SQLiteTableDataSanitizer(
            TableData(table_name, headers, records,
                      type_hints=type_hints)).normalize()

        actual_col_types = [
            col_dp.typename for col_dp in new_tabledata.column_dp_list
        ]
        assert actual_col_types == expecte_col_types

        con = connect_memdb()
        con.create_table_from_tabledata(new_tabledata)
        assert con.select_as_tabledata(
            new_tabledata.table_name) == expecte_data
Exemplo n.º 12
0
    def test_smoke(self, tmpdir, filename):
        try:
            import pytablereader as ptr
        except ImportError:
            pytest.skip("requires pytablereader")

        p = tmpdir.join("tmp.db")
        con = SimpleSQLite(str(p), "w")

        test_data_file_path = os.path.join(os.path.dirname(__file__), "data",
                                           filename)
        loader = ptr.TableFileLoader(test_data_file_path)

        success_count = 0

        for table_data in loader.load():
            if table_data.is_empty():
                continue

            try:
                from pytablewriter import dumps_tabledata

                print(dumps_tabledata(table_data))
            except ImportError:
                pass

            try:
                con.create_table_from_tabledata(
                    SQLiteTableDataSanitizer(table_data).normalize())
                success_count += 1
            except ValueError as e:
                print(e)

        con.commit()

        assert success_count > 0
Exemplo n.º 13
0
def url(ctx, url, format_name, output_path, encoding, proxy):
    """
    Scrape tabular data from a URL and convert data to a SQLite database file.
    """

    from ._ipynb_converter import is_ipynb_url, load_ipynb_url, convert_nb

    if typepy.is_empty_sequence(url):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} url".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])

    if typepy.is_empty_sequence(encoding):
        encoding = app_config_manager.load().get(ConfigKey.DEFAULT_ENCODING)
        logger.debug("use default encoding: {}".format(encoding))

    if typepy.is_null_string(proxy):
        proxy = app_config_manager.load().get(ConfigKey.PROXY_SERVER)

    proxies = {
        "http": proxy,
        "https": proxy,
    }

    if format_name in IPYNB_FORMAT_NAME_LIST or is_ipynb_url(url):
        convert_nb(logger,
                   con,
                   result_counter,
                   nb=load_ipynb_url(url, proxies=proxies))
        for table_name in con.get_table_name_list():
            logger.info(
                get_success_message(
                    verbosity_level, get_logging_url_path(url),
                    schema_extractor.get_table_schema_text(table_name)))
            result_counter.inc_success()
        if result_counter.total_count == 0:
            table_not_found_msg_format.format(url)
        else:
            write_completion_message(logger, output_path, result_counter)

        sys.exit(result_counter.get_return_code())

    try:
        loader = create_url_loader(logger, url, format_name, encoding, proxies)
    except ptr.LoaderNotFoundError as e:
        logger.debug(e)

        try:
            loader = create_url_loader(logger, url, "html", encoding, proxies)
        except ptr.LoaderNotFoundError as e:
            logger.error(msgfy.to_error_message(e))
            sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)

    table_creator = TableCreator(logger=logger, dst_con=con)

    try:
        for table_data in loader.load():
            logger.debug(u"loaded table_data: {}".format(
                six.text_type(table_data)))

            sqlite_tabledata = SQLiteTableDataSanitizer(table_data).normalize()

            try:
                table_creator.create(sqlite_tabledata,
                                     ctx.obj.get(Context.INDEX_LIST))
                result_counter.inc_success()
            except simplesqlite.OperationalError as e:
                logger.error(
                    u"{:s}: failed to convert: url={}, message={}".format(
                        e.__class__.__name__, url, e.message))
                result_counter.inc_fail()
                continue
            except ValueError as e:
                logger.debug(u"{:s}: url={}, message={}".format(
                    e.__class__.__name__, url, str(e)))
                result_counter.inc_fail()
                continue

            logger.info(
                get_success_message(
                    verbosity_level, get_logging_url_path(url),
                    schema_extractor.get_table_schema_text(
                        sqlite_tabledata.table_name)))
    except ptr.ValidationError as e:
        is_fail = True
        if loader.format_name == "json":
            dict_converter = DictConverter(logger,
                                           table_creator,
                                           result_counter,
                                           schema_extractor,
                                           verbosity_level,
                                           source=url,
                                           index_list=ctx.obj.get(
                                               Context.INDEX_LIST))

            try:
                dict_converter.to_sqlite_table(loader.loader.load_dict(), [])
            except AttributeError:
                pass
            else:
                is_fail = False

        if is_fail:
            logger.error(u"{:s}: url={}, message={}".format(
                e.__class__.__name__, url, str(e)))
            result_counter.inc_fail()
    except ptr.DataError as e:
        logger.error(u"{:s}: invalid data: url={}, message={}".format(
            e.__class__.__name__, url, str(e)))
        result_counter.inc_fail()

    if result_counter.total_count == 0:
        logger.warn(table_not_found_msg_format.format(url))

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Exemplo n.º 14
0
def file(ctx, files, format_name, output_path, encoding):
    """
    Convert tabular data within
    CSV/Excel/HTML/JSON/Jupyter Notebook/LTSV/Markdown/Mediawiki/SQLite/SSV/TSV
    file(s) to a SQLite database file.
    """

    from ._ipynb_converter import is_ipynb_file_path, load_ipynb_file, convert_nb

    if typepy.is_empty_sequence(files):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} file".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])
    table_creator = TableCreator(logger=logger, dst_con=con)

    for file_path in files:
        file_path = path.Path(file_path)

        if not file_path.isfile():
            logger.error(u"file not found: {}".format(file_path))
            result_counter.inc_fail()
            continue

        if file_path == output_path:
            logger.warn(
                u"skip a file which has the same path as the output file ({})".
                format(file_path))
            continue

        logger.debug(u"converting '{}'".format(file_path))
        convert_count = result_counter.total_count

        if format_name in IPYNB_FORMAT_NAME_LIST or is_ipynb_file_path(
                file_path):
            convert_nb(logger,
                       con,
                       result_counter,
                       nb=load_ipynb_file(file_path, encoding=encoding))
            for table_name in con.get_table_name_list():
                logger.info(
                    get_success_message(
                        verbosity_level, file_path,
                        schema_extractor.get_table_schema_text(table_name)))
                result_counter.inc_success()
            if result_counter.total_count == convert_count:
                table_not_found_msg_format.format(file_path)
            continue

        try:
            loader = ptr.TableFileLoader(file_path,
                                         format_name=format_name,
                                         encoding=encoding)
        except ptr.InvalidFilePathError as e:
            logger.debug(msgfy.to_debug_message(e))
            result_counter.inc_fail()
            continue
        except ptr.LoaderNotFoundError:
            logger.debug(
                u"loader not found that coincide with '{}'".format(file_path))
            result_counter.inc_fail()
            continue

        try:
            for table_data in loader.load():
                logger.debug(u"loaded tabledata: {}".format(
                    six.text_type(table_data)))

                sqlite_tabledata = SQLiteTableDataSanitizer(
                    table_data).normalize()

                try:
                    table_creator.create(sqlite_tabledata,
                                         ctx.obj.get(Context.INDEX_LIST))
                    result_counter.inc_success()
                except (ValueError, IOError) as e:
                    logger.debug(u"exception={:s}, path={}, message={}".format(
                        type(e).__name__, file_path, e))
                    result_counter.inc_fail()
                    continue

                logger.info(
                    get_success_message(
                        verbosity_level, file_path,
                        schema_extractor.get_table_schema_text(
                            sqlite_tabledata.table_name)))
        except ptr.OpenError as e:
            logger.error(u"{:s}: open error: file={}, message='{}'".format(
                e.__class__.__name__, file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            if loader.format_name == "json":
                dict_converter = DictConverter(logger,
                                               table_creator,
                                               result_counter,
                                               schema_extractor,
                                               verbosity_level,
                                               source=file_path,
                                               index_list=ctx.obj.get(
                                                   Context.INDEX_LIST))

                try:
                    dict_converter.to_sqlite_table(loader.loader.load_dict(),
                                                   [])
                except AttributeError:
                    pass
                else:
                    continue

            logger.error(
                u"{:s}: invalid {} data format: path={}, message={}".format(
                    e.__class__.__name__,
                    _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()
        except ptr.DataError as e:
            logger.error(u"{:s}: invalid {} data: path={}, message={}".format(
                e.__class__.__name__, _get_format_type_from_path(file_path),
                file_path, str(e)))
            result_counter.inc_fail()

        if result_counter.total_count == convert_count:
            logger.warn(table_not_found_msg_format.format(file_path))

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Exemplo n.º 15
0
 def test_exception(self, table_name, headers, expected):
     with pytest.raises(expected):
         SQLiteTableDataSanitizer(TableData(table_name, headers, []),
                                  dup_col_handler="error").normalize()
Exemplo n.º 16
0
 def test_exception_invalid_data(self, table_name, headers, records,
                                 expected):
     with pytest.raises(expected):
         SQLiteTableDataSanitizer(TableData(table_name, headers,
                                            records)).normalize()