def __init__(self, logger, dst_con, result_logger, verbosity_level): self.__logger = logger self.__dst_con = dst_con self.__result_logger = result_logger self.__verbosity_level = verbosity_level self.__schema_extractor = SQLiteSchemaExtractor(dst_con)
def __init__( self, logger, con, symbol_replace_value, index_list, verbosity_level, format_name=None, encoding=None, ): self._logger = logger self._con = con self._symbol_replace_value = symbol_replace_value self._index_list = index_list self._verbosity_level = verbosity_level self._format_name = format_name self._encoding = encoding self._schema_extractor = SQLiteSchemaExtractor(con) self._result_counter = ResultCounter() self._result_logger = ResultLogger( logger, self._schema_extractor, self._result_counter, self._verbosity_level ) self._table_creator = TableCreator( logger=self._logger, dst_con=con, result_logger=self._result_logger, verbosity_level=verbosity_level, ) SourceInfo.attach(con, is_hidden=True) SourceInfo.create()
def test_normal_index(self, file_creator, index_list, expected): db_path = "test_index.sqlite" runner = CliRunner() with runner.isolated_filesystem(): file_path = file_creator() result = runner.invoke(cmd, ["-o", db_path, "--index", index_list, "file", file_path]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) output = extractor.fetch_table_schema("valid_csv_3_1").dumps() print_test_result(expected=expected, actual=output) assert output == expected
def test_normal_index(self, file_creator, index_list, expected): db_path = "test_index.sqlite" runner = CliRunner() with runner.isolated_filesystem(): file_path = file_creator() result = runner.invoke( cmd, ["-o", db_path, "--index", index_list, "file", file_path]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) output = extractor.fetch_table_schema("valid_csv_3_1").dumps() print_test_result(expected=expected, actual=output) assert output == expected
def test_normal_type_hints(self, tmpdir): test_file_path = str(tmpdir.join("test.sqlite")) writer = ptw.SqliteTableWriter() writer.open(test_file_path) writer.table_name = "hoge" writer.headers = ["a", "b"] writer.value_matrix = [[1, 2], [11, 12]] writer.type_hints = [ptw.String] writer.write_table() writer.close() schema = SQLiteSchemaExtractor( test_file_path).fetch_database_schema_as_dict() assert schema[writer.table_name] == [ OrderedDict([ ("Field", "a"), ("Index", False), ("Type", "TEXT"), ("Null", "YES"), ("Key", ""), ("Default", "NULL"), ("Extra", ""), ]), OrderedDict([ ("Field", "b"), ("Index", False), ("Type", "INTEGER"), ("Null", "YES"), ("Key", ""), ("Default", "NULL"), ("Extra", ""), ]), ]
def test_normal_symbols_attr(self): db_path = "test_symbols_attr.sqlite" runner = CliRunner() expected = dedent("symbols_attr (A1_A, B2B, C3_C)") with runner.isolated_filesystem(): result = runner.invoke( cmd, ["-o", db_path, "--replace-symbol", "_", "file", symbols_attr_csv_file()] ) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) options = {"output_format": "text", "verbosity_level": 1} schema = extractor.fetch_table_schema("symbols_attr") print_test_result(expected=expected, actual=schema.dumps(**options)) assert schema.dumps(**options) == expected
def test_normal_dup_col_csv_file(self): db_path = "test_dup_col.sqlite" runner = CliRunner() expected = dedent( """\ _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime) dup_col (A, A_2, A_1)""" ) with runner.isolated_filesystem(): result = runner.invoke(cmd, ["-o", db_path, "file", dup_col_csv_file()]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) options = {"output_format": "text", "verbosity_level": 1} print_test_result(expected=expected, actual=extractor.dumps(**options)) assert len(extractor.dumps(**options)) > 100
def test_normal_symbols_attr(self): db_path = "test_symbols_attr.sqlite" runner = CliRunner() expected = dedent("symbols_attr (A1_A, B2B, C3_C)") with runner.isolated_filesystem(): result = runner.invoke(cmd, [ "-o", db_path, "--replace-symbol", "_", "file", symbols_attr_csv_file() ]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) options = {"output_format": "text", "verbosity_level": 1} schema = extractor.fetch_table_schema("symbols_attr") print_test_result(expected=expected, actual=schema.dumps(**options)) assert schema.dumps(**options) == expected
def test_normal_dup_col_csv_file(self): db_path = "test_dup_col.sqlite" runner = CliRunner() expected = dedent("""\ _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime) dup_col (A, A_2, A_1)""") with runner.isolated_filesystem(): result = runner.invoke(cmd, ["-o", db_path, "file", dup_col_csv_file()]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) options = {"output_format": "text", "verbosity_level": 1} print_test_result(expected=expected, actual=extractor.dumps(**options)) assert len(extractor.dumps(**options)) > 100
def __require_rename_table(self, src_con, src_table_name): if not self.__dst_con.has_table(src_table_name): return False lhs = self.__schema_extractor.fetch_table_schema( src_table_name).as_dict() rhs = SQLiteSchemaExtractor(src_con).fetch_table_schema( src_table_name).as_dict() return lhs != rhs
class TableCreator(object): def __init__(self, logger, dst_con, result_logger, verbosity_level): self.__logger = logger self.__dst_con = dst_con self.__result_logger = result_logger self.__verbosity_level = verbosity_level self.__schema_extractor = SQLiteSchemaExtractor(dst_con) def create(self, table_data, index_list, source_info): con_mem = simplesqlite.connect_memdb() con_mem.create_table_from_tabledata(table_data) need_rename = self.__require_rename_table(con_mem, table_data.table_name) src_table_name = con_mem.fetch_table_name_list()[0] dst_table_name = src_table_name if need_rename: dst_table_name = self.__make_unique_table_name(src_table_name) self.__logger.debug("rename table from '{}' to '{}'".format( src_table_name, dst_table_name)) is_create_table = True simplesqlite.copy_table( src_con=con_mem, dst_con=self.__dst_con, src_table_name=src_table_name, dst_table_name=dst_table_name, ) else: is_create_table = not self.__dst_con.has_table(dst_table_name) simplesqlite.append_table(src_con=con_mem, dst_con=self.__dst_con, table_name=dst_table_name) self.__dst_con.create_index_list(dst_table_name, index_list) self.__result_logger.logging_success( source_info.get_name(self.__verbosity_level), dst_table_name, is_create_table) def __require_rename_table(self, src_con, src_table_name): if not self.__dst_con.has_table(src_table_name): return False lhs = self.__schema_extractor.fetch_table_schema( src_table_name).as_dict() rhs = SQLiteSchemaExtractor(src_con).fetch_table_schema( src_table_name).as_dict() return lhs != rhs def __make_unique_table_name(self, table_name_base): exist_table_name_list = self.__dst_con.fetch_table_name_list() if table_name_base not in exist_table_name_list: return table_name_base suffix_id = 1 while True: table_name_candidate = "{:s}_{:d}".format(table_name_base, suffix_id) if table_name_candidate not in exist_table_name_list: return table_name_candidate suffix_id += 1
class TableConverter(object): def __init__( self, logger, con, symbol_replace_value, index_list, verbosity_level, format_name=None, encoding=None, ): self._logger = logger self._con = con self._symbol_replace_value = symbol_replace_value self._index_list = index_list self._verbosity_level = verbosity_level self._format_name = format_name self._encoding = encoding self._schema_extractor = SQLiteSchemaExtractor(con) self._result_counter = ResultCounter() self._result_logger = ResultLogger( logger, self._schema_extractor, self._result_counter, self._verbosity_level ) self._table_creator = TableCreator( logger=self._logger, dst_con=con, result_logger=self._result_logger, verbosity_level=verbosity_level, ) SourceInfo.attach(con, is_hidden=True) SourceInfo.create() def _fetch_source_id(self, source_info): where_list = [ Where("base_name", source_info.base_name), Where("format_name", source_info.format_name), ] if source_info.dir_name: where_list.append(Where("dir_name", source_info.dir_name)) if source_info.size is not None: where_list.append(Where("size", source_info.size)) if source_info.mtime is not None: where_list.append(Where("mtime", source_info.mtime)) return self._con.fetch_value( select=Attr("source_id"), table_name=SourceInfo.get_table_name(), where=And(where_list) ) def _fetch_next_source_id(self): source_id = self._con.fetch_value( select="MAX({})".format("source_id"), table_name=SourceInfo.get_table_name() ) if source_id is None: return 1 return source_id + 1 def get_return_code(self): return self._result_counter.get_return_code() def get_success_count(self): return self._result_counter.success_count def normalize_table(self, table_data, dup_col_handler=None): from tabledata import TableData from pathvalidate import replace_symbol, replace_unprintable_char from simplesqlite import SQLiteTableDataSanitizer if dup_col_handler is None: dup_col_handler = DEFAULT_DUP_COL_HANDLER normalized_table_data = SQLiteTableDataSanitizer( table_data, dup_col_handler=dup_col_handler ).normalize() if self._symbol_replace_value is None: return normalized_table_data return TableData( table_name=normalized_table_data.table_name, header_list=[ replace_symbol( replace_unprintable_char(header), self._symbol_replace_value, is_replace_consecutive_chars=True, is_strip=True, ) for header in normalized_table_data.header_list ], row_list=normalized_table_data.row_list, dp_extractor=normalized_table_data.dp_extractor, ) def write_completion_message(self): logger = self._logger logger.debug("----- {:s} completed -----".format(PROGRAM_NAME)) log_list = [ "source={}".format( bright( self._con.fetch_value( select="COUNT(DISTINCT({}))".format("source_id"), table_name=SourceInfo.get_table_name(), ) ) ) ] if self.get_success_count() > 0: log_list.append(green("success={}".format(bright(self.get_success_count())))) if self._result_counter.fail_count > 0: log_list.append(red("fail={}".format(bright(self._result_counter.fail_count)))) if self._result_counter.skip_count > 0: log_list.append(yellow("skip={}".format(bright(self._result_counter.skip_count)))) if self._result_counter.created_table_count > 0: log_list.append( "created-table={}".format(bright(self._result_counter.created_table_count)) ) logger.info("converted results: {}".format(", ".join(log_list))) database_path_msg = "database path: {:s}".format( bright(Path(self._con.database_path).relpath()) ) if self.get_success_count() > 0: output_format, verbosity_level = self.__get_dump_param() logger.info(database_path_msg) try: from textwrap import indent except ImportError: # for Python 2 compatibility def indent(value, _): return value logger.debug( "----- database schema -----\n{}".format( indent( self._schema_extractor.dumps( output_format=output_format, verbosity_level=verbosity_level ), " ", ) ) ) else: logger.debug(database_path_msg) def _convert_nb(self, nb, source_info): success_count = self._result_counter.success_count created_table_set = convert_nb( logger=self._logger, source_info=source_info, con=self._con, result_logger=self._result_logger, nb=nb, ) if self._result_counter.success_count == success_count: self._logger.warn(TABLE_NOT_FOUND_MSG_FORMAT.format(source_info.base_name)) return return created_table_set def _convert_complex_json(self, json_loader, source_info): from .._dict_converter import DictConverter dict_converter = DictConverter( self._logger, self._table_creator, source_info=source_info, index_list=self._index_list ) try: dict_converter.to_sqlite_table(json_loader.load_dict(), []) except AttributeError: pass return dict_converter.converted_table_name_set def __get_dump_param(self): found_ptw = True try: import pytablewriter # noqa: W0611 except ImportError: found_ptw = False if found_ptw: return ("rst_simple_table", self._verbosity_level) if self._verbosity_level >= 1: return ("text", MAX_VERBOSITY_LEVEL) if self._verbosity_level == 0: return ("text", 1) raise ValueError("invalid verbosity_level: {}".format(self._verbosity_level))