def test_db(mock_ast):
    mock_ast.DB.return_value = Mock()

    result = query.db("foo")
    mock_ast.DB.assert_called_once_with("foo")

    assert result == mock_ast.DB.return_value
Пример #2
0
def check_minimum_version(options, minimum_version='1.6'):
    minimum_version = distutils.version.LooseVersion(minimum_version)
    version_string = options.retryQuery('get server version', query.db(
        'rethinkdb').table('server_status')[0]['process']['version'])

    matches = re.match(r'rethinkdb (?P<version>(\d+)\.(\d+)\.(\d+)).*', version_string)

    if not matches:
        raise RuntimeError("invalid version string format: %s" % version_string)

    if distutils.version.LooseVersion(matches.group('version')) < minimum_version:
        raise RuntimeError("Incompatible version, expected >= %s got: %s" % (minimum_version, version_string))
Пример #3
0
def check_minimum_version(options,
                          minimum_version="1.6",
                          raise_exception=True):
    minimum_version = distutils.version.LooseVersion(minimum_version)
    version_string = options.retryQuery(
        "get server version",
        query.db("rethinkdb").table("server_status")[0]["process"]["version"],
    )

    matches = re.match(
        r"(rethinkdb|rebirthdb) (?P<version>(\d+)\.(\d+)\.(\d+)).*",
        version_string)

    if not matches:
        raise RuntimeError("invalid version string format: %s" %
                           version_string)

    if distutils.version.LooseVersion(
            matches.group("version")) < minimum_version:
        if raise_exception:
            raise RuntimeError("Incompatible version, expected >= %s got: %s" %
                               (minimum_version, version_string))
        return False
    return True
Пример #4
0
def run(options):
    # Make sure this isn't a pre-`reql_admin` cluster - which could result in data loss
    # if the user has a database named 'rethinkdb'
    utils_common.check_minimum_version(options, '1.6')

    # get the complete list of tables
    db_table_set = set()
    all_tables = [
        utils_common.DbTable(x['db'], x['name']) for x in options.retryQuery(
            'list tables',
            query.db('rethinkdb').table('table_config').pluck(['db', 'name']))
    ]
    if not options.db_tables:
        db_table_set = all_tables  # default to all tables
    else:
        all_databases = options.retryQuery(
            'list dbs',
            query.db_list().filter(query.row.ne('rethinkdb')))
        for db_table in options.db_tables:
            db, table = db_table

            if db == 'rethinkdb':
                raise AssertionError(
                    'Can not export tables from the system database')

            if db not in all_databases:
                raise RuntimeError("Error: Database '%s' not found" % db)

            if table is None:  # This is just a db name, implicitly selecting all tables in that db
                db_table_set.update(set([x for x in all_tables if x.db == db]))
            else:
                if utils_common.DbTable(db, table) not in all_tables:
                    raise RuntimeError("Error: Table not found: '%s.%s'" %
                                       (db, table))
                db_table_set.add(db_table)

    # Determine the actual number of client processes we'll have
    options.clients = min(options.clients, len(db_table_set))

    # create the working directory and its structure
    parent_dir = os.path.dirname(options.directory)
    if not os.path.exists(parent_dir):
        if os.path.isdir(parent_dir):
            raise RuntimeError(
                "Output parent directory is not a directory: %s" % parent_dir)
        try:
            os.makedirs(parent_dir)
        except OSError as e:
            raise optparse.OptionValueError(
                "Unable to create parent directory for %s: %s" %
                (parent_dir, e.strerror))
    working_dir = tempfile.mkdtemp(prefix=os.path.basename(options.directory) +
                                   '_partial_',
                                   dir=os.path.dirname(options.directory))
    try:
        for db in set([database for database, _ in db_table_set]):
            os.makedirs(os.path.join(working_dir, str(db)))
    except OSError as e:
        raise RuntimeError("Failed to create temporary directory (%s): %s" %
                           (e.filename, e.strerror))

    # Run the export
    run_clients(options, working_dir, db_table_set)

    # Move the temporary directory structure over to the original output directory
    try:
        if os.path.isdir(options.directory):
            os.rmdir(options.directory
                     )  # an empty directory is created here when using _dump
        elif os.path.exists(options.directory):
            raise Exception('There was a file at the output location: %s' %
                            options.directory)
        os.rename(working_dir, options.directory)
    except OSError as e:
        raise RuntimeError(
            "Failed to move temporary directory to output directory (%s): %s" %
            (options.directory, e.strerror))
Пример #5
0
def run_clients(options, workingDir, db_table_set):
    # Spawn one client for each db.table, up to options.clients at a time
    exit_event = multiprocessing.Event()
    processes = []
    if six.PY3:
        ctx = multiprocessing.get_context(multiprocessing.get_start_method())
        error_queue = SimpleQueue(ctx=ctx)
    else:
        error_queue = SimpleQueue()
    interrupt_event = multiprocessing.Event()
    sindex_counter = multiprocessing.Value(ctypes.c_longlong, 0)
    hook_counter = multiprocessing.Value(ctypes.c_longlong, 0)

    signal.signal(signal.SIGINT,
                  lambda a, b: abort_export(a, b, exit_event, interrupt_event))
    errors = []

    try:
        progress_info = []
        arg_lists = []
        for db, table in db_table_set:

            tableSize = int(
                options.retryQuery(
                    "count",
                    query.db(db).table(table).info()
                    ['doc_count_estimates'].sum()))

            progress_info.append(
                (multiprocessing.Value(ctypes.c_longlong, 0),
                 multiprocessing.Value(ctypes.c_longlong, tableSize)))
            arg_lists.append((
                db,
                table,
                workingDir,
                options,
                error_queue,
                progress_info[-1],
                sindex_counter,
                hook_counter,
                exit_event,
            ))

        # Wait for all tables to finish
        while processes or arg_lists:
            time.sleep(0.1)

            while not error_queue.empty():
                exit_event.set()  # Stop immediately if an error occurs
                errors.append(error_queue.get())

            processes = [
                process for process in processes if process.is_alive()
            ]

            if len(processes) < options.clients and len(arg_lists) > 0:
                new_process = multiprocessing.Process(target=export_table,
                                                      args=arg_lists.pop(0))
                new_process.start()
                processes.append(new_process)

            update_progress(progress_info, options)

        # If we were successful, make sure 100% progress is reported
        # (rows could have been deleted which would result in being done at less than 100%)
        if len(errors
               ) == 0 and not interrupt_event.is_set() and not options.quiet:
            utils_common.print_progress(1.0, indent=4)

        # Continue past the progress output line and print total rows processed
        def plural(num, text, plural_text):
            return "%d %s" % (num, text if num == 1 else plural_text)

        if not options.quiet:
            print(
                "\n    %s exported from %s, with %s, and %s" %
                (plural(sum([max(0, info[0].value)
                             for info in progress_info]), "row",
                        "rows"), plural(len(db_table_set), "table", "tables"),
                 plural(sindex_counter.value, "secondary index",
                        "secondary indexes"),
                 plural(hook_counter.value, "hook function",
                        "hook functions")))
    finally:
        signal.signal(signal.SIGINT, signal.SIG_DFL)

    if interrupt_event.is_set():
        raise RuntimeError("Interrupted")

    if len(errors) != 0:
        # multiprocessing queues don't handle tracebacks, so they've already been stringified in the queue
        for error in errors:
            print("%s" % error[1], file=sys.stderr)
            if options.debug:
                print("%s traceback: %s" % (error[0].__name__, error[2]),
                      file=sys.stderr)
        raise RuntimeError("Errors occurred during export")
Пример #6
0
def export_table(db, table, directory, options, error_queue, progress_info,
                 sindex_counter, hook_counter, exit_event):
    signal.signal(
        signal.SIGINT, signal.SIG_DFL
    )  # prevent signal handlers from being set in child processes

    writer = None

    try:
        # -- get table info

        table_info = options.retryQuery('table info: %s.%s' % (db, table),
                                        query.db(db).table(table).info())

        # Rather than just the index names, store all index information
        table_info['indexes'] = options.retryQuery(
            'table index data %s.%s' % (db, table),
            query.db(db).table(table).index_status(),
            run_options={'binary_format': 'raw'})

        sindex_counter.value += len(table_info["indexes"])

        table_info['write_hook'] = options.retryQuery(
            'table write hook data %s.%s' % (db, table),
            query.db(db).table(table).get_write_hook(),
            run_options={'binary_format': 'raw'})

        if table_info['write_hook'] is not None:
            hook_counter.value += 1

        with open(os.path.join(directory, db, table + '.info'),
                  'w') as info_file:
            info_file.write(json.dumps(table_info) + "\n")
        with sindex_counter.get_lock():
            sindex_counter.value += len(table_info["indexes"])
        # -- start the writer
        if six.PY3:
            ctx = multiprocessing.get_context(
                multiprocessing.get_start_method())
            task_queue = SimpleQueue(ctx=ctx)
        else:
            task_queue = SimpleQueue()

        writer = None
        if options.format == "json":
            filename = directory + "/%s/%s.json" % (db, table)
            writer = multiprocessing.Process(target=json_writer,
                                             args=(filename, options.fields,
                                                   task_queue, error_queue,
                                                   options.format))
        elif options.format == "csv":
            filename = directory + "/%s/%s.csv" % (db, table)
            writer = multiprocessing.Process(target=csv_writer,
                                             args=(filename, options.fields,
                                                   options.delimiter,
                                                   task_queue, error_queue))
        elif options.format == "ndjson":
            filename = directory + "/%s/%s.ndjson" % (db, table)
            writer = multiprocessing.Process(target=json_writer,
                                             args=(filename, options.fields,
                                                   task_queue, error_queue,
                                                   options.format))
        else:
            raise RuntimeError("unknown format type: %s" % options.format)
        writer.start()

        # -- read in the data source

        # -

        lastPrimaryKey = None
        read_rows = 0
        run_options = {"time_format": "raw", "binary_format": "raw"}
        if options.outdated:
            run_options["read_mode"] = "outdated"
        cursor = options.retryQuery('inital cursor for %s.%s' % (db, table),
                                    query.db(db).table(table).order_by(
                                        index=table_info["primary_key"]),
                                    run_options=run_options)
        while not exit_event.is_set():
            try:
                for row in cursor:
                    # bail on exit
                    if exit_event.is_set():
                        break

                    # add to the output queue
                    task_queue.put([row])
                    lastPrimaryKey = row[table_info["primary_key"]]
                    read_rows += 1

                    # Update the progress every 20 rows
                    if read_rows % 20 == 0:
                        progress_info[0].value = read_rows

                else:
                    # Export is done - since we used estimates earlier, update the actual table size
                    progress_info[0].value = read_rows
                    progress_info[1].value = read_rows
                    break

            except (errors.ReqlTimeoutError, errors.ReqlDriverError):
                # connection problem, re-setup the cursor
                try:
                    cursor.close()
                except errors.ReqlError as exc:
                    default_logger.exception(exc)

                cursor = options.retryQuery(
                    'backup cursor for %s.%s' % (db, table),
                    query.db(db).table(table).between(
                        lastPrimaryKey, None, left_bound="open").order_by(
                            index=table_info["primary_key"]),
                    run_options=run_options)

    except (errors.ReqlError, errors.ReqlDriverError) as ex:
        error_queue.put((RuntimeError, RuntimeError(ex.message),
                         traceback.extract_tb(sys.exc_info()[2])))
    except BaseException:
        ex_type, ex_class, tb = sys.exc_info()
        error_queue.put((ex_type, ex_class, traceback.extract_tb(tb)))
    finally:
        if writer and writer.is_alive():
            task_queue.put(StopIteration())
            writer.join()
Пример #7
0
def rebuild_indexes(options):

    # flesh out options.db_table
    if not options.db_table:
        options.db_table = [
            utils_common.DbTable(x["db"], x["name"])
            for x in options.retryQuery(
                "all tables",
                query.db("rethinkdb").table("table_config").pluck(["db", "name"]),
            )
        ]
    else:
        for db_table in options.db_table[:]:  # work from a copy
            if not db_table[1]:
                options.db_table += [
                    utils_common.DbTable(db_table[0], x)
                    for x in options.retryQuery(
                        "table list of %s" % db_table[0],
                        query.db(db_table[0]).table_list(),
                    )
                ]
                del options.db_table[db_table]

    # wipe out any indexes with the TMP_INDEX_PREFIX
    for db, table in options.db_table:
        for index in options.retryQuery(
            "list indexes on %s.%s" % (db, table),
            query.db(db).table(table).index_list(),
        ):
            if index.startswith(TMP_INDEX_PREFIX):
                options.retryQuery(
                    "drop index: %s.%s:%s" % (db, table, index),
                    query.db(index["db"])
                    .table(index["table"])
                    .index_drop(index["name"]),
                )

    # get the list of indexes to rebuild
    indexes_to_build = []
    for db, table in options.db_table:
        indexes = None
        if not options.force:
            indexes = options.retryQuery(
                "get outdated indexes from %s.%s" % (db, table),
                query.db(db)
                .table(table)
                .index_status()
                .filter({"outdated": True})
                .get_field("index"),
            )
        else:
            indexes = options.retryQuery(
                "get all indexes from %s.%s" % (db, table),
                query.db(db).table(table).index_status().get_field("index"),
            )
        for index in indexes:
            indexes_to_build.append({"db": db, "table": table, "name": index})

    # rebuild selected indexes

    total_indexes = len(indexes_to_build)
    indexes_completed = 0
    progress_ratio = 0.0
    highest_progress = 0.0
    indexes_in_progress = []

    if not options.quiet:
        print(
            "Rebuilding %d index%s: %s"
            % (
                total_indexes,
                "es" if total_indexes > 1 else "",
                ", ".join(
                    ["`%(db)s.%(table)s:%(name)s`" % i for i in indexes_to_build]
                ),
            )
        )

    while len(indexes_to_build) > 0 or len(indexes_in_progress) > 0:
        # Make sure we're running the right number of concurrent index rebuilds
        while (
            len(indexes_to_build) > 0 and len(indexes_in_progress) < options.concurrent
        ):
            index = indexes_to_build.pop()
            indexes_in_progress.append(index)
            index["temp_name"] = TMP_INDEX_PREFIX + index["name"]
            index["progress"] = 0
            index["ready"] = False

            existing_indexes = dict(
                (x["index"], x["function"])
                for x in options.retryQuery(
                    "existing indexes",
                    query.db(index["db"])
                    .table(index["table"])
                    .index_status()
                    .pluck("index", "function"),
                )
            )

            if index["name"] not in existing_indexes:
                raise AssertionError(
                    "{index_name} is not part of existing indexes {indexes}".format(
                        index_name=index["name"], indexes=", ".join(existing_indexes)
                    )
                )

            if index["temp_name"] not in existing_indexes:
                options.retryQuery(
                    "create temp index: %(db)s.%(table)s:%(name)s" % index,
                    query.db(index["db"])
                    .table(index["table"])
                    .index_create(index["temp_name"], existing_indexes[index["name"]]),
                )

        # Report progress
        highest_progress = max(highest_progress, progress_ratio)
        if not options.quiet:
            utils_common.print_progress(highest_progress)

        # Check the status of indexes in progress
        progress_ratio = 0.0
        for index in indexes_in_progress:
            status = options.retryQuery(
                "progress `%(db)s.%(table)s` index `%(name)s`" % index,
                query.db(index["db"])
                .table(index["table"])
                .index_status(index["temp_name"])
                .nth(0),
            )
            if status["ready"]:
                index["ready"] = True
                options.retryQuery(
                    "rename `%(db)s.%(table)s` index `%(name)s`" % index,
                    query.db(index["db"])
                    .table(index["table"])
                    .index_rename(index["temp_name"], index["name"], overwrite=True),
                )
            else:
                progress_ratio += status.get("progress", 0) / total_indexes

        indexes_in_progress = [
            index for index in indexes_in_progress if not index["ready"]
        ]
        indexes_completed = (
            total_indexes - len(indexes_to_build) - len(indexes_in_progress)
        )
        progress_ratio += float(indexes_completed) / total_indexes

        if len(indexes_in_progress) == options.concurrent or (
            len(indexes_in_progress) > 0 and len(indexes_to_build) == 0
        ):
            # Short sleep to keep from killing the CPU
            time.sleep(0.1)

    # Make sure the progress bar says we're done and get past the progress bar line
    if not options.quiet:
        utils_common.print_progress(1.0)
        print("")
Пример #8
0
def rebuild_indexes(options):

    # flesh out options.db_table
    if not options.db_table:
        options.db_table = [
            utils_common.DbTable(x['db'], x['name'])
            for x in options.retryQuery(
                'all tables',
                query.db('rethinkdb').table('table_config').pluck(
                    ['db', 'name']))
        ]
    else:
        for db_table in options.db_table[:]:  # work from a copy
            if not db_table[1]:
                options.db_table += [
                    utils_common.DbTable(db_table[0], x)
                    for x in options.retryQuery(
                        'table list of %s' % db_table[0],
                        query.db(db_table[0]).table_list())
                ]
                del options.db_table[db_table]

    # wipe out any indexes with the TMP_INDEX_PREFIX
    for db, table in options.db_table:
        for index in options.retryQuery(
                'list indexes on %s.%s' % (db, table),
                query.db(db).table(table).index_list()):
            if index.startswith(TMP_INDEX_PREFIX):
                options.retryQuery(
                    'drop index: %s.%s:%s' % (db, table, index),
                    query.db(index['db']).table(index['table']).index_drop(
                        index['name']))

    # get the list of indexes to rebuild
    indexes_to_build = []
    for db, table in options.db_table:
        indexes = None
        if not options.force:
            indexes = options.retryQuery(
                'get outdated indexes from %s.%s' % (db, table),
                query.db(db).table(table).index_status().filter({
                    'outdated':
                    True
                }).get_field('index'))
        else:
            indexes = options.retryQuery(
                'get all indexes from %s.%s' % (db, table),
                query.db(db).table(table).index_status().get_field('index'))
        for index in indexes:
            indexes_to_build.append({'db': db, 'table': table, 'name': index})

    # rebuild selected indexes

    total_indexes = len(indexes_to_build)
    indexes_completed = 0
    progress_ratio = 0.0
    highest_progress = 0.0
    indexes_in_progress = []

    if not options.quiet:
        print(
            "Rebuilding %d index%s: %s" %
            (total_indexes, 'es' if total_indexes > 1 else '', ", ".join(
                ["`%(db)s.%(table)s:%(name)s`" % i
                 for i in indexes_to_build])))

    while len(indexes_to_build) > 0 or len(indexes_in_progress) > 0:
        # Make sure we're running the right number of concurrent index rebuilds
        while len(indexes_to_build) > 0 and len(
                indexes_in_progress) < options.concurrent:
            index = indexes_to_build.pop()
            indexes_in_progress.append(index)
            index['temp_name'] = TMP_INDEX_PREFIX + index['name']
            index['progress'] = 0
            index['ready'] = False

            existing_indexes = dict(
                (x['index'], x['function']) for x in options.retryQuery(
                    'existing indexes',
                    query.db(index['db']).table(index['table']).index_status().
                    pluck('index', 'function')))

            if index['name'] not in existing_indexes:
                raise AssertionError(
                    '{index_name} is not part of existing indexes {indexes}'.
                    format(index_name=index['name'],
                           indexes=', '.join(existing_indexes)))

            if index['temp_name'] not in existing_indexes:
                options.retryQuery(
                    'create temp index: %(db)s.%(table)s:%(name)s' % index,
                    query.db(index['db']).table(index['table']).index_create(
                        index['temp_name'], existing_indexes[index['name']]))

        # Report progress
        highest_progress = max(highest_progress, progress_ratio)
        if not options.quiet:
            utils_common.print_progress(highest_progress)

        # Check the status of indexes in progress
        progress_ratio = 0.0
        for index in indexes_in_progress:
            status = options.retryQuery(
                "progress `%(db)s.%(table)s` index `%(name)s`" % index,
                query.db(index['db']).table(index['table']).index_status(
                    index['temp_name']).nth(0))
            if status['ready']:
                index['ready'] = True
                options.retryQuery(
                    "rename `%(db)s.%(table)s` index `%(name)s`" % index,
                    query.db(index['db']).table(index['table']).index_rename(
                        index['temp_name'], index['name'], overwrite=True))
            else:
                progress_ratio += status.get('progress', 0) / total_indexes

        indexes_in_progress = [
            index for index in indexes_in_progress if not index['ready']
        ]
        indexes_completed = total_indexes - len(indexes_to_build) - len(
            indexes_in_progress)
        progress_ratio += float(indexes_completed) / total_indexes

        if len(indexes_in_progress) == options.concurrent or \
           (len(indexes_in_progress) > 0 and len(indexes_to_build) == 0):
            # Short sleep to keep from killing the CPU
            time.sleep(0.1)

    # Make sure the progress bar says we're done and get past the progress bar line
    if not options.quiet:
        utils_common.print_progress(1.0)
        print("")