def test_normal_dup_col_csv_file(self): db_path = "test_dup_col.sqlite" runner = CliRunner() expected = dedent( """\ _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime) dup_col (A, A_2, A_1)""" ) with runner.isolated_filesystem(): result = runner.invoke(cmd, ["-o", db_path, "file", dup_col_csv_file()]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) options = {"output_format": "text", "verbosity_level": 1} print_test_result(expected=expected, actual=extractor.dumps(**options)) assert len(extractor.dumps(**options)) > 100
def test_normal_dup_col_csv_file(self): db_path = "test_dup_col.sqlite" runner = CliRunner() expected = dedent("""\ _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime) dup_col (A, A_2, A_1)""") with runner.isolated_filesystem(): result = runner.invoke(cmd, ["-o", db_path, "file", dup_col_csv_file()]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) options = {"output_format": "text", "verbosity_level": 1} print_test_result(expected=expected, actual=extractor.dumps(**options)) assert len(extractor.dumps(**options)) > 100
def test_normal_index(self, file_creator, index_list, expected): db_path = "test_index.sqlite" runner = CliRunner() with runner.isolated_filesystem(): file_path = file_creator() result = runner.invoke( cmd, ["-o", db_path, "--index", index_list, "file", file_path]) print_traceback(result) assert result.exit_code == ExitCode.SUCCESS extractor = SQLiteSchemaExtractor(db_path) print_test_result(expected=expected, actual=extractor.dumps()) assert extractor.fetch_table_schema( "valid_csv_3_1").dumps() == expected
class TableConverter(object): def __init__( self, logger, con, symbol_replace_value, index_list, verbosity_level, format_name=None, encoding=None, ): self._logger = logger self._con = con self._symbol_replace_value = symbol_replace_value self._index_list = index_list self._verbosity_level = verbosity_level self._format_name = format_name self._encoding = encoding self._schema_extractor = SQLiteSchemaExtractor(con) self._result_counter = ResultCounter() self._result_logger = ResultLogger( logger, self._schema_extractor, self._result_counter, self._verbosity_level ) self._table_creator = TableCreator( logger=self._logger, dst_con=con, result_logger=self._result_logger, verbosity_level=verbosity_level, ) SourceInfo.attach(con, is_hidden=True) SourceInfo.create() def _fetch_source_id(self, source_info): where_list = [ Where("base_name", source_info.base_name), Where("format_name", source_info.format_name), ] if source_info.dir_name: where_list.append(Where("dir_name", source_info.dir_name)) if source_info.size is not None: where_list.append(Where("size", source_info.size)) if source_info.mtime is not None: where_list.append(Where("mtime", source_info.mtime)) return self._con.fetch_value( select=Attr("source_id"), table_name=SourceInfo.get_table_name(), where=And(where_list) ) def _fetch_next_source_id(self): source_id = self._con.fetch_value( select="MAX({})".format("source_id"), table_name=SourceInfo.get_table_name() ) if source_id is None: return 1 return source_id + 1 def get_return_code(self): return self._result_counter.get_return_code() def get_success_count(self): return self._result_counter.success_count def normalize_table(self, table_data, dup_col_handler=None): from tabledata import TableData from pathvalidate import replace_symbol, replace_unprintable_char from simplesqlite import SQLiteTableDataSanitizer if dup_col_handler is None: dup_col_handler = DEFAULT_DUP_COL_HANDLER normalized_table_data = SQLiteTableDataSanitizer( table_data, dup_col_handler=dup_col_handler ).normalize() if self._symbol_replace_value is None: return normalized_table_data return TableData( table_name=normalized_table_data.table_name, header_list=[ replace_symbol( replace_unprintable_char(header), self._symbol_replace_value, is_replace_consecutive_chars=True, is_strip=True, ) for header in normalized_table_data.header_list ], row_list=normalized_table_data.row_list, dp_extractor=normalized_table_data.dp_extractor, ) def write_completion_message(self): logger = self._logger logger.debug("----- {:s} completed -----".format(PROGRAM_NAME)) log_list = [ "source={}".format( bright( self._con.fetch_value( select="COUNT(DISTINCT({}))".format("source_id"), table_name=SourceInfo.get_table_name(), ) ) ) ] if self.get_success_count() > 0: log_list.append(green("success={}".format(bright(self.get_success_count())))) if self._result_counter.fail_count > 0: log_list.append(red("fail={}".format(bright(self._result_counter.fail_count)))) if self._result_counter.skip_count > 0: log_list.append(yellow("skip={}".format(bright(self._result_counter.skip_count)))) if self._result_counter.created_table_count > 0: log_list.append( "created-table={}".format(bright(self._result_counter.created_table_count)) ) logger.info("converted results: {}".format(", ".join(log_list))) database_path_msg = "database path: {:s}".format( bright(Path(self._con.database_path).relpath()) ) if self.get_success_count() > 0: output_format, verbosity_level = self.__get_dump_param() logger.info(database_path_msg) try: from textwrap import indent except ImportError: # for Python 2 compatibility def indent(value, _): return value logger.debug( "----- database schema -----\n{}".format( indent( self._schema_extractor.dumps( output_format=output_format, verbosity_level=verbosity_level ), " ", ) ) ) else: logger.debug(database_path_msg) def _convert_nb(self, nb, source_info): success_count = self._result_counter.success_count created_table_set = convert_nb( logger=self._logger, source_info=source_info, con=self._con, result_logger=self._result_logger, nb=nb, ) if self._result_counter.success_count == success_count: self._logger.warn(TABLE_NOT_FOUND_MSG_FORMAT.format(source_info.base_name)) return return created_table_set def _convert_complex_json(self, json_loader, source_info): from .._dict_converter import DictConverter dict_converter = DictConverter( self._logger, self._table_creator, source_info=source_info, index_list=self._index_list ) try: dict_converter.to_sqlite_table(json_loader.load_dict(), []) except AttributeError: pass return dict_converter.converted_table_name_set def __get_dump_param(self): found_ptw = True try: import pytablewriter # noqa: W0611 except ImportError: found_ptw = False if found_ptw: return ("rst_simple_table", self._verbosity_level) if self._verbosity_level >= 1: return ("text", MAX_VERBOSITY_LEVEL) if self._verbosity_level == 0: return ("text", 1) raise ValueError("invalid verbosity_level: {}".format(self._verbosity_level))