Exemplo n.º 1
0
def verify_checksum():
    """Verfies that the backup checksum matches the imported data checksum.

  If verification fails we halt the temp file cleanup so the user can re-check
  the checksum files and decide how to proceed.
  """
    if cloudsql_importer_constants.VERIFY_CHECKSUM:
        checksumed_tables = checksum.get_checksum(cloudsql_importer_constants)

        checksum_file_name = (cloudsql_importer_constants.DATABASE_NAME +
                              cloudsql_importer_constants.CHECKSUM_FILE_SUFFIX)

        import_checksum_path = os.path.join(
            cloudsql_importer_constants.DATABASE_TEMP_FOLDER,
            checksum_file_name)
        backup_checksum_path = os.path.join(
            cloudsql_importer_constants.MYSQL_BACKUP_FOLDER,
            checksum_file_name)

        file_writer.write(import_checksum_path, checksumed_tables)

        checksum.compare_checksum_files(import_checksum_path,
                                        backup_checksum_path)
    else:
        logging.info("Skipping checksum verification")
Exemplo n.º 2
0
def handle_restore_secondary_indexes_failures(failed_commands,
                                              commands_and_files):
    """Handles Restore Secondary Indexes failures.

  Logs out the list of commands that failed to be executed; deletes and
  recreates restore secondary indexes folder and saves the newly failing
  commands in case the user attempts to retry the operation.

  Args:
    failed_commands: List of groups of failing commands.
    commands_and_files: Dict containing the commands were executed and their
      filesystem path.
  """

    logging.warning("Not all secondary indexes were restored.")
    dir_util.remove_tree(
        cloudsql_importer_constants.RESTORE_INDEXES_COMMANDS_FOLDER)
    dir_util.mkpath(
        cloudsql_importer_constants.RESTORE_INDEXES_COMMANDS_FOLDER)

    for failed_commands_group in failed_commands:
        failed_commands_path = os.path.join(
            cloudsql_importer_constants.RESTORE_INDEXES_COMMANDS_FOLDER,
            commands_and_files[failed_commands_group])

        logging.warning(
            "One or more of the following restore indexes commands"
            " failed:\n%s", failed_commands_group)
        logging.info(
            "The failed restore indexes commands will be saved in order to"
            " be retried in a follow-up execution. If these keep failing or"
            " you just wish to skip processing them delete the contents of file"
            " '%s'", failed_commands_path)

        file_writer.write(failed_commands_path, failed_commands_group)
Exemplo n.º 3
0
def checksum_tables():
    """Get the checksums for the Database's tables and stores them in a file."""
    logging.info("Checksumming exported tables")

    checksumed_tables = checksum.get_checksum(mysql_exporter_constants)
    checksum_path = os.path.join(
        mysql_exporter_constants.TEMP_FOLDER,
        mysql_exporter_constants.DATABASE_NAME +
        mysql_exporter_constants.CHECKSUM_FILE_SUFFIX)
    file_writer.write(checksum_path, checksumed_tables)
Exemplo n.º 4
0
def prepare_secondary_index_script(input_file, output_file, replacements):
    """Creates secondary index scripts to be run against CloudSQL.

  Replaces the <source_database_name> placeholder if it's in the provided file
  and stores the new version in the path defined by the output_file parameter.

  Args:
    input_file: The file that contains the pattern to replace
    output_file: The file to write
    replacements: A dict containing the patterns to replace as keys and the
      actual text as values.
  """
    replaced_text = replace_pattern_in_file(input_file, replacements)
    file_writer.write(output_file, replaced_text)
Exemplo n.º 5
0
    def test_write_file_with_no_path(self, os_mock, open_mock, dir_util_mock):
        file_dirname = ""
        test_file = file_dirname + "test_file.txt"
        test_contents = "Lorem Ipsum"

        os_mock.path.dirname.return_value = file_dirname

        output_mock = mock.Mock()
        open_mock.return_value.__enter__.return_value = output_mock

        file_writer.write(test_file, test_contents)
        os_mock.path.dirname.assert_called_with(test_file)
        dir_util_mock.mkpath.assert_called_with(file_dirname)
        open_mock.assert_called_with(test_file, "w")
        output_mock.write.assert_called_with(test_contents)
Exemplo n.º 6
0
def export_schema():
    """Exports the database schema into a sql file."""
    logging.info("Exporting the schema for database '%s'",
                 mysql_exporter_constants.DATABASE_NAME)

    options = ["--no-data"]
    schema_file = sql_wrapper.call_mysqldump(mysql_exporter_constants, options)

    logging.debug("Schema dump output:\n%s", schema_file)

    schema_path = os.path.join(
        mysql_exporter_constants.TEMP_FOLDER,
        mysql_exporter_constants.DATABASE_NAME +
        mysql_exporter_constants.SCHEMA_FILE_SUFFIX)
    file_writer.write(schema_path, schema_file)
Exemplo n.º 7
0
def chunk_by_line(data_file_name, constants):
  """Chunks provided files in a memory efficient manner.

  Chunks files based on constants.CHUNKS, the chunks are
  processed by reading data into a ~512MB buffer and appending the retrieved
  data into the chunk file. When the file reaches it's max chunk size the
  destination folder is rotated and a new file is written.

  Note: Small data_files may end up being chunked in less files than requested
  as readlines byte hint usually reads more bytes than requested. Nevertheless,
  these chunked files should contain all the data present in the original file.

  Args:
    data_file_name: The name of the file being chunked
    constants: The importer / exporter constants
  """
  data_file_path = os.path.join(constants.MYSQL_BACKUP_FOLDER, data_file_name)
  data_file_size = os.path.getsize(data_file_path)

  chunk_size = (data_file_size / constants.CHUNKS)
  chunk_size = int(math.ceil(chunk_size))

  max_buffer_size = 512 * 1024 * 1024 # 512MB
  buffer_size = min(max_buffer_size, chunk_size)
  total_written_bytes = 0
  written_chunk_bytes = 0
  load_group_index = 1

  with open(data_file_path, "r") as data_file:
    while total_written_bytes < data_file_size:
      if written_chunk_bytes >= chunk_size:
        load_group_index += 1
        written_chunk_bytes = 0

      remaining_bytes_in_chunk = chunk_size - written_chunk_bytes
      bytes_to_read = min(buffer_size, remaining_bytes_in_chunk)

      chunk, chunk_bytes = get_chunk(data_file, bytes_to_read)
      total_written_bytes += chunk_bytes
      written_chunk_bytes += chunk_bytes

      chunked_file_path = os.path.join(
          constants.LOAD_GROUP_FOLDER +
          str(load_group_index), data_file_name)

      file_writer.write(chunked_file_path, chunk, "a")
Exemplo n.º 8
0
def generate_restore_secondary_indexes_commands():
    """Saves the SQL commands that will be run to restore secondary indexes.

  Before dropping the secondary indexes to make data ingestion faster we save a
  list of SQL files containing the commands that will need to be run to restore
  said indexes once the data loading stage is finished. We save the restore
  commands for each table separately so that we can later execute them in
  parallel.
  """
    logging.info("Saving commands needed to restore secondary indexes")
    raw_tables = sql_wrapper.call_show_tables(cloudsql_importer_constants)
    database_tables = filter(bool, raw_tables.split("\n"))

    for table_name in database_tables:
        replacements = {
            "<source_database_name>":
            cloudsql_importer_constants.DATABASE_NAME,
            "<source_database_table>": table_name
        }

        restore_indexes_script_path = os.path.join(
            cloudsql_importer_constants.SECONDARY_INDEXES_SCRIPTS_FOLDER,
            table_name +
            cloudsql_importer_constants.RESTORE_INDEXES_SCRIPT_SUFFIX)

        prepare_secondary_index_script(
            cloudsql_importer_constants.RESTORE_INDEXES_TEMPLATE_PATH,
            restore_indexes_script_path, replacements)

        with open(restore_indexes_script_path, "r") as restore_indexes_script:
            options = ["--skip_column_names"]
            restore_indexes_commands_to_run = (
                sql_wrapper.call_mysql_with_stdin_command(
                    restore_indexes_script, cloudsql_importer_constants,
                    options))

            restore_indexes_commands_path = os.path.join(
                cloudsql_importer_constants.RESTORE_INDEXES_COMMANDS_FOLDER,
                table_name +
                cloudsql_importer_constants.RESTORE_INDEXES_COMMANDS_SUFFIX)

            file_writer.write(restore_indexes_commands_path,
                              restore_indexes_commands_to_run)