Exemplo n.º 1
0
def snapshot_converter(db: BaseDb, snapshot_d: Dict[str, Any]) -> Snapshot:
    """Convert snapshot from the flat representation to swh model
    compatible objects.

    """
    columns = ["name", "target", "target_type"]
    query = """
    select %s
    from snapshot_branches sbs
    inner join snapshot_branch sb on sb.object_id=sbs.branch_id
    where sbs.snapshot_id=%%s
    """ % ", ".join(columns)
    with db.cursor() as cur:
        cur.execute(query, (snapshot_d["object_id"], ))
        branches = {}
        for name, *row in cur:
            branch_d = dict(zip(columns[1:], row))
            if branch_d["target"] is not None and branch_d[
                    "target_type"] is not None:
                branch: Optional[SnapshotBranch] = SnapshotBranch(
                    target=branch_d["target"],
                    target_type=TargetType(branch_d["target_type"]),
                )
            else:
                branch = None
            branches[name] = branch

    return Snapshot(
        id=snapshot_d["id"],
        branches=branches,
    )
Exemplo n.º 2
0
def test_cli_swh_db_create_and_init_db_new_api(cli_runner, postgresql,
                                               mock_import_swhmodule, mocker,
                                               tmp_path):
    """Create a db then initializing it should be ok for a "new style" datastore"""
    module_name = "test.cli_new"

    conninfo = craft_conninfo(postgresql)

    # This initializes the schema and data
    cfgfile = tmp_path / "config.yml"
    cfgfile.write_text(
        yaml.dump({module_name: {
            "cls": "postgresql",
            "db": conninfo
        }}))
    result = cli_runner.invoke(
        swhdb, ["init-admin", module_name, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    result = cli_runner.invoke(swhdb, ["-C", cfgfile, "init", module_name])

    assert (result.exit_code == 0
            ), f"Unexpected output: {traceback.print_tb(result.exc_info[2])}"

    # the origin value in the scripts uses a hash function (which implementation wise
    # uses a function from the pgcrypt extension, installed during db creation step)
    with BaseDb.connect(conninfo).cursor() as cur:
        cur.execute("select * from origin")
        origins = cur.fetchall()
        assert len(origins) == 1
Exemplo n.º 3
0
def iter_revision_rows(storage_dbconn: str, first_id: Sha1Git):
    after_id = first_id
    failures = 0
    while True:
        try:
            storage_db = BaseDb.connect(storage_dbconn)
            with storage_db.cursor() as cur:
                while True:
                    cur.execute(
                        f"SELECT {', '.join(REVISION_COLS)} FROM revision "
                        f"WHERE id >= %s AND metadata IS NOT NULL AND type != 'git'"
                        f"ORDER BY id LIMIT 1000",
                        (after_id, ),
                    )
                    new_rows = 0
                    for row in cur:
                        new_rows += 1
                        row_d = dict(zip(REVISION_COLS, row))
                        yield row_d
                    after_id = row_d["id"]
                    if new_rows == 0:
                        return
        except psycopg2.OperationalError as e:
            print(e)
            # most likely a temporary error, try again
            if failures >= 60:
                raise
            else:
                time.sleep(60)
                failures += 1
Exemplo n.º 4
0
def test_smoke_test_fun_db_is_still_up_and_got_reset(postgres_fun):
    """This ensures that within another tests, the 'fun' db is still up, created (and not
    configured again). This time, most of the data has been reset:
    - except for tables 'dbversion' and 'people' which were left as is
    - the other tables from the schema (here only "fun") got truncated
    - the sequences got truncated as well

    """
    with BaseDb.connect(postgres_fun.dsn).cursor() as cur:
        # db version is excluded from the truncate
        cur.execute("select count(*) from dbversion")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 5

        # people is also allowed not to be truncated
        cur.execute("select count(*) from people")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 2

        # table and sequence are reset
        cur.execute("select count(*) from fun")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 0

        cur.execute("select nextval('serial')")
        val = cur.fetchone()[0]
        assert val == 1
Exemplo n.º 5
0
def test_cli_swh_db_initialization_idempotent(swh_db_cli, mock_package_sql, test_db):
    """Multiple runs of the init commands are idempotent

    """
    module_name = "anything"  # mocked
    cli_runner, db_params = swh_db_cli

    result = cli_runner.invoke(
        swhdb, ["init-admin", module_name, "--db-name", db_params["dbname"]]
    )
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    result = cli_runner.invoke(
        swhdb, ["init", module_name, "--db-name", db_params["dbname"]]
    )
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    result = cli_runner.invoke(
        swhdb, ["init-admin", module_name, "--db-name", db_params["dbname"]]
    )
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    result = cli_runner.invoke(
        swhdb, ["init", module_name, "--db-name", db_params["dbname"]]
    )
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    # the origin values in the scripts uses a hash function (which implementation wise
    # uses a function from the pgcrypt extension, init-admin calls installs it)
    with BaseDb.connect(test_db.dsn).cursor() as cur:
        cur.execute("select * from origin")
        origins = cur.fetchall()
        assert len(origins) == 1
Exemplo n.º 6
0
    def run(self, object_type, start_object, end_object, dry_run=False):
        """Reads storage's subscribed object types and send them to the
        journal's reading topic.

        """
        start_object, end_object = self.parse_arguments(
            object_type, start_object, end_object)

        db = BaseDb.connect(self.config["storage"]["db"])
        writer = JournalWriter({
            "cls": "kafka",
            **self.config["journal_writer"]
        })
        assert writer.journal is not None

        for range_start, range_end in RANGE_GENERATORS[object_type](
                start_object, end_object):
            logger.info(
                "Processing %s range %s to %s",
                object_type,
                _format_range_bound(range_start),
                _format_range_bound(range_end),
            )

            objects = fetch(db, object_type, start=range_start, end=range_end)

            if not dry_run:
                writer.write_additions(object_type, objects)
            else:
                # only consume the objects iterator to check for any potential
                # decoding/encoding errors
                for obj in objects:
                    pass
Exemplo n.º 7
0
def test_smoke_test_db_no_init(postgres_no_init):
    """We can connect to the db nonetheless

    """
    with BaseDb.connect(postgres_no_init.dsn).cursor() as cur:
        cur.execute("select now()")
        data = cur.fetchone()[0]
        assert data is not None
Exemplo n.º 8
0
def db_with_data(test_db, request):
    """Fixture to initialize a db with some data out of the "INIT_SQL above"""
    db = BaseDb.connect(test_db.dsn)
    with db.cursor() as cur:
        psycopg2.extras.register_default_jsonb(cur)
        cur.execute(INIT_SQL)
    yield db
    db.conn.rollback()
    db.conn.close()
Exemplo n.º 9
0
def main(storage_dbconn, storage_url, deposit_dbconn, first_id, limit,
         dry_run):
    storage_db = BaseDb.connect(storage_dbconn)
    deposit_db = BaseDb.connect(deposit_dbconn)
    storage = get_storage(
        "pipeline",
        steps=[
            {
                "cls": "retry"
            },
            {
                "cls": "postgresql",
                "db": storage_dbconn,
                "objstorage": {
                    "cls": "memory",
                    "args": {}
                },
            },
        ],
    )

    if not dry_run:
        create_fetchers(storage_db)
        # Not creating authorities, as the loaders are presumably already running
        # and created them already.
        # This also helps make sure this script doesn't accidentally create
        # authorities that differ from what the loaders use.

    total_rows = 0
    with deposit_db.cursor() as deposit_cur:
        rows = iter_revision_rows(storage_dbconn, first_id)
        if limit is not None:
            rows = itertools.islice(rows, limit)
        for row in rows:
            handle_row(row, storage, deposit_cur, dry_run)

            total_rows += 1

            if total_rows % 1000 == 0:
                percents = (int.from_bytes(row["id"][0:4], byteorder="big") *
                            100 / (1 << 32))
                print(f"Processed {total_rows/1000000.:.2f}M rows "
                      f"(~{percents:.1f}%, last revision: {row['id'].hex()})")
Exemplo n.º 10
0
    def __init__(self, db, min_pool_conns=1, max_pool_conns=10):
        """
        Args:
            db_conn: either a libpq connection string, or a psycopg2 connection

        """
        if isinstance(db, psycopg2.extensions.connection):
            self._pool = None
            self._db = BaseDb(db)
        else:
            self._pool = psycopg2.pool.ThreadedConnectionPool(
                min_pool_conns,
                max_pool_conns,
                db,
                cursor_factory=psycopg2.extras.RealDictCursor,
            )
            self._db = None
Exemplo n.º 11
0
def test_smoke_test_people_db_up(postgres_people):
    """'people' db is up and configured"""
    with BaseDb.connect(postgres_people.dsn).cursor() as cur:
        cur.execute("select count(*) from dbversion")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 5

        cur.execute("select count(*) from people")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 2

        cur.execute("select count(*) from fun")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 3

        cur.execute("select nextval('serial')")
        val = cur.fetchone()[0]
        assert val == 2
Exemplo n.º 12
0
def test_smoke_test_fun2_db_is_up(postgres_fun2):
    """This ensures the db is created and configured according to its dumps files."""
    with BaseDb.connect(postgres_fun2.dsn).cursor() as cur:
        cur.execute("select count(*) from dbversion")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 5

        cur.execute("select count(*) from fun")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 3

        cur.execute("select count(*) from people")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 2

        # in data, we requested a value already so it starts at 2
        cur.execute("select nextval('serial')")
        val = cur.fetchone()[0]
        assert val == 2
Exemplo n.º 13
0
def test_cli_swh_db_initialization_with_env(swh_db_cli, mock_import_swhmodule,
                                            postgresql):
    """Init commands with standard environment variables works"""
    module_name = "test.cli"  # it's mocked here
    cli_runner, db_params = swh_db_cli
    result = cli_runner.invoke(
        swhdb, ["init-admin", module_name, "--dbname", db_params["dbname"]])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    result = cli_runner.invoke(
        swhdb, ["init", module_name, "--dbname", db_params["dbname"]])

    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    # the origin values in the scripts uses a hash function (which implementation wise
    # uses a function from the pgcrypt extension, init-admin calls installs it)
    with BaseDb.connect(postgresql.dsn).cursor() as cur:
        cur.execute("select * from origin")
        origins = cur.fetchall()
        assert len(origins) == 1
Exemplo n.º 14
0
def test_db_utils_swh_db_upgrade_sanity_checks(
    cli_runner, postgresql, mock_import_swhmodule, module, datadir
):
    """Check swh_db_upgrade"""
    conninfo = craft_conninfo(postgresql)
    result = cli_runner.invoke(swhdb, ["init-admin", module, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    result = cli_runner.invoke(swhdb, ["init", module, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    cnx = BaseDb.connect(conninfo)
    with cnx.transaction() as cur:
        cur.execute("drop table dbmodule")

    # try to upgrade with a unset module
    with pytest.raises(ValueError):
        swh_db_upgrade(conninfo, module)

    # check the dbmodule is unset
    assert swh_db_module(conninfo) is None

    # set the stored module to something else
    swh_set_db_module(conninfo, f"{module}2")
    assert swh_db_module(conninfo) == f"{module}2"

    # try to upgrade with a different module
    with pytest.raises(ValueError):
        swh_db_upgrade(conninfo, module)

    # revert to the proper module in the db
    swh_set_db_module(conninfo, module, force=True)
    assert swh_db_module(conninfo) == module
    # trying again is a noop
    swh_set_db_module(conninfo, module)
    assert swh_db_module(conninfo) == module

    # drop the dbversion table
    with cnx.transaction() as cur:
        cur.execute("drop table dbversion")
    # an upgrade should fail due to missing stored version
    with pytest.raises(ValueError):
        swh_db_upgrade(conninfo, module)
Exemplo n.º 15
0
def test_cli_swh_db_initialization_works_with_flags(cli_runner, postgresql,
                                                    mock_import_swhmodule):
    """Init commands with carefully crafted libpq conninfo works"""
    module_name = "test.cli"  # it's mocked here
    conninfo = craft_conninfo(postgresql)

    result = cli_runner.invoke(
        swhdb, ["init-admin", module_name, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    result = cli_runner.invoke(swhdb,
                               ["init", module_name, "--dbname", conninfo])

    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    # the origin values in the scripts uses a hash function (which implementation wise
    # uses a function from the pgcrypt extension, init-admin calls installs it)
    with BaseDb.connect(postgresql.dsn).cursor() as cur:
        cur.execute("select * from origin")
        origins = cur.fetchall()
        assert len(origins) == 1
Exemplo n.º 16
0
def test_smoke_test_people_db_up_and_reset(postgres_people):
    """'people' db is up and got reset on every tables and sequences"""
    with BaseDb.connect(postgres_people.dsn).cursor() as cur:
        # tables are truncated after the first round
        cur.execute("select count(*) from dbversion")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 0

        # tables are truncated after the first round
        cur.execute("select count(*) from people")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 0

        # table and sequence are reset
        cur.execute("select count(*) from fun")
        nb_rows = cur.fetchone()[0]
        assert nb_rows == 0

        cur.execute("select nextval('serial')")
        val = cur.fetchone()[0]
        assert val == 1
Exemplo n.º 17
0
def directory_converter(db: BaseDb, directory_d: Dict[str, Any]) -> Directory:
    """Convert directory from the flat representation to swh model
    compatible objects.

    """
    columns = ["target", "name", "perms"]
    query_template = """
    select %(columns)s
    from directory_entry_%(type)s
    where id in %%s
    """

    types = ["file", "dir", "rev"]

    entries = []
    with db.cursor() as cur:
        for type in types:
            ids = directory_d.pop("%s_entries" % type)
            if not ids:
                continue
            query = query_template % {
                "columns": ",".join(columns),
                "type": type,
            }
            cur.execute(query, (tuple(ids), ))
            for row in cur:
                entry_d = dict(zip(columns, row))
                entry = DirectoryEntry(
                    name=entry_d["name"],
                    type=type,
                    target=entry_d["target"],
                    perms=entry_d["perms"],
                )
                entries.append(entry)

    return Directory(
        id=directory_d["id"],
        entries=tuple(entries),
        raw_manifest=directory_d["raw_manifest"],
    )
Exemplo n.º 18
0
def test_cli_swh_db_create_and_init_db(cli_runner, test_db, mock_package_sql):
    """Create a db then initializing it should be ok

    """
    module_name = "something"

    conninfo = craft_conninfo(test_db, "new-db")
    # This creates the db and installs the necessary admin extensions
    result = cli_runner.invoke(swhdb, ["create", module_name, "--db-name", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    # This initializes the schema and data
    result = cli_runner.invoke(swhdb, ["init", module_name, "--db-name", conninfo])

    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    # the origin value in the scripts uses a hash function (which implementation wise
    # uses a function from the pgcrypt extension, installed during db creation step)
    with BaseDb.connect(conninfo).cursor() as cur:
        cur.execute("select * from origin")
        origins = cur.fetchall()
        assert len(origins) == 1
Exemplo n.º 19
0
def test_db_utils_upgrade(
    cli_runner, postgresql, mock_import_swhmodule, module, datadir
):
    """Check swh_db_upgrade"""
    conninfo = craft_conninfo(postgresql)
    result = cli_runner.invoke(swhdb, ["init-admin", module, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    result = cli_runner.invoke(swhdb, ["init", module, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    assert swh_db_version(conninfo) == 1
    new_version = swh_db_upgrade(conninfo, module)
    assert new_version == 6
    assert swh_db_version(conninfo) == 6

    versions = swh_db_versions(conninfo)
    # get rid of dates to ease checking
    versions = [(v[0], v[2]) for v in versions]
    assert versions[-1] == (1, "DB initialization")
    sqlbasedir = path.join(datadir, module.split(".", 1)[1], "sql", "upgrades")

    assert versions[1:-1] == [
        (i, f"Upgraded to version {i} using {sqlbasedir}/{i:03d}.sql")
        for i in range(5, 1, -1)
    ]
    assert versions[0] == (6, "Updated version from upgrade script")

    cnx = BaseDb.connect(conninfo)
    with cnx.transaction() as cur:
        cur.execute("select url from origin where url like 'version%'")
        result = cur.fetchall()
        assert result == [("version%03d" % i,) for i in range(2, 7)]
        cur.execute(
            "select url from origin where url = 'this should never be executed'"
        )
        result = cur.fetchall()
        assert not result
Exemplo n.º 20
0
def test_db_utils_versions(cli_runner, postgresql, mock_import_swhmodule, module):
    """Check get_database_info, swh_db_versions and swh_db_module work ok

    This test checks db versions for both a db with "new style" set of sql init
    scripts (i.e. the dbversion table is not created in these scripts, but by
    the populate_database_for_package() function directly, via the 'swh db
    init' command) and an "old style" set (dbversion created in the scripts)S.

    """
    conninfo = craft_conninfo(postgresql)
    result = cli_runner.invoke(swhdb, ["init-admin", module, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    result = cli_runner.invoke(
        swhdb, ["init", module, "--dbname", conninfo, "--initial-version", 10]
    )
    assert result.exit_code == 0, f"Unexpected output: {result.output}"

    # check the swh_db_module() function
    assert swh_db_module(conninfo) == module

    # the dbversion and dbmodule tables exists and are populated
    dbmodule, dbversion, dbflavor = get_database_info(conninfo)
    # check also the swh_db_versions() function
    versions = swh_db_versions(conninfo)

    assert dbmodule == module
    assert dbversion == 10
    assert dbflavor is None
    # check also the swh_db_versions() function
    versions = swh_db_versions(conninfo)
    assert len(versions) == 1
    assert versions[0][0] == 10
    if module == "test.cli":
        assert versions[0][1] == datetime.fromisoformat(
            "2016-02-22T15:56:28.358587+00:00"
        )
        assert versions[0][2] == "Work In Progress"
    else:
        # new scheme but with no datastore (so no version support from there)
        assert versions[0][2] == "DB initialization"

    # add a few versions in dbversion
    cnx = BaseDb.connect(conninfo)
    with cnx.transaction() as cur:
        cur.executemany(
            "insert into dbversion(version, release, description) values (%s, %s, %s)",
            [(i, now(), f"Upgrade to version {i}") for i in range(11, 15)],
        )

    dbmodule, dbversion, dbflavor = get_database_info(conninfo)
    assert dbmodule == module
    assert dbversion == 14
    assert dbflavor is None

    versions = swh_db_versions(conninfo)
    assert len(versions) == 5
    for i, (version, ts, desc) in enumerate(versions):
        assert version == (14 - i)  # these are in reverse order
        if version > 10:
            assert desc == f"Upgrade to version {version}"
            assert (now() - ts) < timedelta(seconds=1)
Exemplo n.º 21
0
def test_cli_swh_db_upgrade_new_api(cli_runner, postgresql, datadir, mocker,
                                    tmp_path):
    """Upgrade scenario for a "new style" datastore"""
    module_name = "test.cli_new"

    # the `current_version` variable is the version that will be returned by
    # any call to `get_current_version()` in this test session, thanks to the
    # local mocked version of import_swhmodule() below.
    current_version = 1

    # custom version of the mockup to make it easy to change the
    # current_version returned by get_current_version()
    # TODO: find a better solution for this...
    def import_swhmodule_mock(modname):
        if modname.startswith("test."):
            dirname = modname.split(".", 1)[1]

            def get_datastore(cls, **kw):
                return mocker.MagicMock(
                    get_current_version=lambda: current_version)

            return mocker.MagicMock(
                __name__=modname,
                __file__=os.path.join(datadir, dirname, "__init__.py"),
                name=modname,
                get_datastore=get_datastore,
            )

        return import_swhmodule(modname)

    mocker.patch("swh.core.db.db_utils.import_swhmodule",
                 import_swhmodule_mock)
    conninfo = craft_conninfo(postgresql)

    # This initializes the schema and data
    cfgfile = tmp_path / "config.yml"
    cfgfile.write_text(
        yaml.dump({module_name: {
            "cls": "postgresql",
            "db": conninfo
        }}))
    result = cli_runner.invoke(
        swhdb, ["init-admin", module_name, "--dbname", conninfo])
    assert result.exit_code == 0, f"Unexpected output: {result.output}"
    result = cli_runner.invoke(swhdb, ["-C", cfgfile, "init", module_name])

    assert (result.exit_code == 0
            ), f"Unexpected output: {traceback.print_tb(result.exc_info[2])}"

    assert swh_db_version(conninfo) == 1

    # the upgrade should not do anything because the datastore does advertise
    # version 1
    result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name])
    assert swh_db_version(conninfo) == 1

    # advertise current version as 3, a simple upgrade should get us there, but
    # no further
    current_version = 3
    result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name])
    assert swh_db_version(conninfo) == 3

    # an attempt to go further should not do anything
    result = cli_runner.invoke(
        swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 5])
    assert swh_db_version(conninfo) == 3
    # an attempt to go lower should not do anything
    result = cli_runner.invoke(
        swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 2])
    assert swh_db_version(conninfo) == 3

    # advertise current version as 6, an upgrade with --to-version 4 should
    # stick to the given version 4 and no further
    current_version = 6
    result = cli_runner.invoke(
        swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 4])
    assert swh_db_version(conninfo) == 4
    assert "migration was not complete" in result.output

    # attempt to upgrade to a newer version than current code version fails
    result = cli_runner.invoke(
        swhdb,
        [
            "-C", cfgfile, "upgrade", module_name, "--to-version",
            current_version + 1
        ],
    )
    assert result.exit_code != 0
    assert swh_db_version(conninfo) == 4

    cnx = BaseDb.connect(conninfo)
    with cnx.transaction() as cur:
        cur.execute("drop table dbmodule")
    assert swh_db_module(conninfo) is None

    # db migration should recreate the missing dbmodule table
    result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name])
    assert result.exit_code == 0
    assert "Warning: the database does not have a dbmodule table." in result.output
    assert (
        "Write the module information (test.cli_new) in the database? [Y/n]"
        in result.output)
    assert swh_db_module(conninfo) == module_name
Exemplo n.º 22
0
 def get_db(self):
     if self._db:
         return self._db
     return BaseDb.from_pool(self._pool)