Пример #1
0
def test_resolve_object_from_extids_missing_target() -> None:
    storage = get_storage("memory")

    target = b"\x01" * 20
    rel = Release(
        name=b"aaaa",
        message=b"aaaa",
        target=target,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=False,
    )

    loader = StubPackageLoader(storage, "http://example.org/")

    p_info = Mock(wraps=BasePackageInfo(None, None, None))  # type: ignore

    known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel.swhid()]}
    p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa")
    whitelist = {rel.id}

    # Targeted release is missing from the storage
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    storage.release_add([rel])

    # Targeted release now exists
    assert (loader.resolve_object_from_extids(known_extids, p_info,
                                              whitelist) == rel.swhid())
Пример #2
0
def dulwich_tag_to_release(obj: ShaFile) -> Release:
    if obj.type_name != b"tag":
        raise ValueError("Argument is not a tag.")
    tag = cast(Tag, obj)

    tagger_timezone = None
    # FIXME: _parse_message is a private function from Dulwich.
    for (field, value) in _parse_message(tag.as_raw_chunks()):
        if field == b"tagger":
            m = AUTHORSHIP_LINE_RE.match(value)
            if m:
                tagger_timezone = m.group("timezone")

    target_type, target = tag.object
    if tag.tagger:
        author: Optional[Person] = parse_author(tag.tagger)
        if tag.tag_time is None:
            date = None
        else:
            date = dulwich_tsinfo_to_timestamp(
                tag.tag_time,
                tag.tag_timezone,
                tag._tag_timezone_neg_utc,
                tagger_timezone,
            )
    else:
        author = date = None

    message = tag.message
    if tag.signature:
        message += tag.signature

    rel = Release(
        id=tag.sha().digest(),
        author=author,
        date=date,
        name=tag.name,
        target=bytes.fromhex(target.decode()),
        target_type=DULWICH_OBJECT_TYPES[target_type.type_name],
        message=message,
        metadata=None,
        synthetic=False,
    )

    if rel.compute_hash() != rel.id:
        expected_id = rel.id
        actual_id = rel.compute_hash()
        logger.warning(
            "Expected release to have id %s, but got %s. Recording raw_manifest.",
            hash_to_hex(expected_id),
            hash_to_hex(actual_id),
        )
        raw_string = tag.as_raw_string()
        rel = attr.evolve(
            rel,
            raw_manifest=git_object_header("tag", len(raw_string)) +
            raw_string)

    check_id(rel)
    return rel
Пример #3
0
 def test_release_identifier(self):
     self.assertEqual(
         Release.from_dict(self.release).id,
         self.release["id"],
     )
     self.assertEqual(
         Release.from_dict(remove_id(self.release)).id,
         self.release["id"],
     )
Пример #4
0
def test_resolve_object_from_extids() -> None:
    storage = get_storage("memory")
    target = b"\x01" * 20
    rel1 = Release(
        name=b"aaaa",
        message=b"aaaa",
        target=target,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=False,
    )
    rel2 = Release(
        name=b"bbbb",
        message=b"bbbb",
        target=target,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=False,
    )
    storage.release_add([rel1, rel2])

    loader = StubPackageLoader(storage, "http://example.org/")

    p_info = Mock(wraps=BasePackageInfo(None, None, None))  # type: ignore

    # The PackageInfo does not support extids
    p_info.extid.return_value = None
    known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel1.swhid()]}
    whitelist = {b"unused"}
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    # Some known extid, and the PackageInfo is not one of them (ie. cache miss)
    p_info.extid.return_value = ("extid-type", 0, b"extid-of-cccc")
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    # Some known extid, and the PackageInfo is one of them (ie. cache hit),
    # but the target release was not in the previous snapshot
    p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa")
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    # Some known extid, and the PackageInfo is one of them (ie. cache hit),
    # and the target release was in the previous snapshot
    whitelist = {rel1.id}
    assert (loader.resolve_object_from_extids(known_extids, p_info,
                                              whitelist) == rel1.swhid())

    # Same as before, but there is more than one extid, and only one is an allowed
    # release
    whitelist = {rel1.id}
    known_extids = {
        ("extid-type", 0, b"extid-of-aaaa"): [rel2.swhid(),
                                              rel1.swhid()]
    }
    assert (loader.resolve_object_from_extids(known_extids, p_info,
                                              whitelist) == rel1.swhid())
Пример #5
0
    def test_load_tag_minimal(self):
        with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
            fd.write("print('Hello world')\n")

        self.repo.stage([b"hello.py"])
        new_revision = self.repo.do_commit(b"Hello world\n")

        # dulwich.porcelain.tag_create doesn't allow creating tags without
        # a tagger or a date, so we have to create it "manually"
        tag = dulwich.objects.Tag()
        tag.message = b"First release!\n"
        tag.name = b"v1.0.0"
        tag.object = (dulwich.objects.Commit, new_revision)
        self.repo.object_store.add_object(tag)
        self.repo[b"refs/tags/v1.0.0"] = tag.id

        res = self.loader.load()
        assert res == {"status": "eventful"}

        branches = self.loader.storage.snapshot_get_branches(self.loader.snapshot.id)

        branch = branches["branches"][b"refs/tags/v1.0.0"]
        assert branch.target_type == TargetType.RELEASE

        release = self.loader.storage.release_get([branch.target])[0]
        assert release == Release(
            id=bytehex_to_hash(tag.id),
            name=b"v1.0.0",
            message=b"First release!\n",
            target_type=ObjectType.REVISION,
            target=bytehex_to_hash(new_revision),
            synthetic=False,
        )
Пример #6
0
def db_to_release(db_release: Dict[str, Any]) -> Optional[Release]:
    """Convert a database representation of a release to its swh-model
    representation.
    """
    if db_release["target_type"] is None:
        assert all(v is None for (k, v) in db_release.items() if k != "id")
        return None

    author = db_to_author(
        db_release["author_fullname"],
        db_release["author_name"],
        db_release["author_email"],
    )
    date = db_to_date(
        db_release["date"],
        db_release["date_offset_bytes"],
    )

    return Release(
        author=author,
        date=date,
        id=db_release["id"],
        name=db_release["name"],
        message=db_release["comment"],
        synthetic=db_release["synthetic"],
        target=db_release["target"],
        target_type=ObjectType(db_release["target_type"]),
        raw_manifest=db_release["raw_manifest"],
    )
Пример #7
0
    def build_release(
        self,
        p_info: DebianPackageInfo,
        uncompressed_path: str,
        directory: Sha1Git,
    ) -> Optional[Release]:
        dsc_url, dsc_name = dsc_information(p_info)
        if not dsc_name:
            raise ValueError("dsc name for url %s should not be None" %
                             dsc_url)
        dsc_path = path.join(path.dirname(uncompressed_path), dsc_name)
        intrinsic_metadata = get_intrinsic_package_metadata(
            p_info, dsc_path, uncompressed_path)

        logger.debug("intrinsic_metadata: %s", intrinsic_metadata)
        logger.debug("p_info: %s", p_info)

        msg = (f"Synthetic release for Debian source package {p_info.name} "
               f"version {p_info.intrinsic_version}\n")

        author = prepare_person(intrinsic_metadata.changelog.person)
        date = TimestampWithTimezone.from_iso8601(
            intrinsic_metadata.changelog.date)

        # inspired from swh.loader.debian.converters.package_metadata_to_revision
        return Release(
            name=p_info.intrinsic_version.encode(),
            message=msg.encode(),
            author=author,
            date=date,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
Пример #8
0
    def build_release(
        self,
        p_info: DepositPackageInfo,
        uncompressed_path: str,
        directory: Sha1Git,
    ) -> Optional[Release]:
        message = (
            f"{p_info.client}: Deposit {p_info.id} in collection {p_info.collection}"
        )

        if p_info.release_notes:
            message += "\n\n" + p_info.release_notes

        if not message.endswith("\n"):
            message += "\n"

        return Release(
            name=p_info.version.encode(),
            message=message.encode(),
            author=p_info.author,
            date=TimestampWithTimezone.from_dict(p_info.author_date),
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
    def test_dulwich_tag_to_release_no_author_no_date(self):
        sha = hash_to_bytes("f6e367357b446bd1315276de5e88ba3d0d99e136")
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = None
        tag.tag_time = None
        tag.tag_timezone = None
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=None,
            date=None,
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
Пример #10
0
    def build_release(self, p_info: PyPIPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        i_metadata = extract_intrinsic_metadata(uncompressed_path)
        if not i_metadata:
            return None

        # from intrinsic metadata
        version_ = i_metadata.get("version", p_info.version)
        author_ = author(i_metadata)

        if p_info.comment_text:
            msg = p_info.comment_text
        else:
            msg = (f"Synthetic release for PyPI source package {p_info.name} "
                   f"version {version_}\n")

        date = TimestampWithTimezone.from_iso8601(p_info.upload_time)

        return Release(
            name=p_info.version.encode(),
            message=msg.encode(),
            author=author_,
            date=date,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
    def test_dulwich_tag_to_release_signature(self):
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        sha = hash_to_bytes("46fff489610ed733d2cc904e363070dadee05c71")
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = GPGSIG
        tag.tagger = None
        tag.tag_time = None
        tag.tag_timezone = None
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=None,
            date=None,
            id=sha,
            message=message + GPGSIG,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
Пример #12
0
 def build_release(
     self, p_info: NixGuixPackageInfo, uncompressed_path: str, directory: Sha1Git
 ) -> Optional[Release]:
     return Release(
         name=p_info.version.encode(),
         message=None,
         author=EMPTY_AUTHOR,
         date=None,
         target=directory,
         target_type=ObjectType.DIRECTORY,
         synthetic=True,
     )
Пример #13
0
def expected_releases(jar_dirs):
    return [
        Release(
            name=b"0.1.0",
            message=REL_MSGS[0],
            author=EMPTY_AUTHOR,
            date=REL_DATES[0],
            target_type=ModelObjectType.DIRECTORY,
            target=jar_dirs[0].hash,
            synthetic=True,
            metadata=None,
        ),
        Release(
            name=b"0.1.1",
            message=REL_MSGS[1],
            author=EMPTY_AUTHOR,
            date=REL_DATES[1],
            target_type=ModelObjectType.DIRECTORY,
            target=jar_dirs[1].hash,
            synthetic=True,
            metadata=None,
        ),
    ]
Пример #14
0
 def build_release(self, p_info: MavenPackageInfo, uncompressed_path: str,
                   directory: Sha1Git) -> Optional[Release]:
     msg = f"Synthetic release for archive at {p_info.url}\n".encode(
         "utf-8")
     normalized_time = TimestampWithTimezone.from_datetime(p_info.time)
     return Release(
         name=p_info.version.encode(),
         message=msg,
         date=normalized_time,
         author=EMPTY_AUTHOR,
         target=directory,
         target_type=ObjectType.DIRECTORY,
         synthetic=True,
     )
Пример #15
0
 def build_release(
     self,
     p_info: BasePackageInfo,
     uncompressed_path: str,
     directory: Sha1Git,
 ):
     return Release(
         name=p_info.version.encode(),
         message=b"",
         author=Person.from_fullname(b""),
         date=None,
         target=DIRECTORY_ID,
         target_type=ObjectType.DIRECTORY,
         synthetic=False,
     )
    def test_dulwich_tag_to_release_author_and_date(self):
        sha = hash_to_bytes("fc1e6a4f1e37e93e28e78560e73efd0b12f616ef")
        tagger = b"hey dude <*****@*****.**>"
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"

        date = int(
            datetime.datetime(2007, 12, 5,
                              tzinfo=datetime.timezone.utc).timestamp())

        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = tagger
        tag.tag_time = date
        tag.tag_timezone = 0
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=Person(
                email=b"*****@*****.**",
                fullname=b"hey dude <*****@*****.**>",
                name=b"hey dude",
            ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(
                    seconds=1196812800,
                    microseconds=0,
                ),
                offset_bytes=b"+0000",
            ),
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
Пример #17
0
 def build_release(self, p_info: CRANPackageInfo, uncompressed_path: str,
                   directory: Sha1Git) -> Optional[Release]:
     # a_metadata is empty
     metadata = extract_intrinsic_metadata(uncompressed_path)
     date = parse_date(metadata.get("Date"))
     author = Person.from_fullname(metadata.get("Maintainer", "").encode())
     msg = (f"Synthetic release for CRAN source package {p_info.name} "
            f"version {p_info.version}\n")
     return Release(
         name=p_info.version.encode(),
         message=msg.encode(),
         date=date,
         author=author,
         target_type=ObjectType.DIRECTORY,
         target=directory,
         synthetic=True,
     )
    def test_dulwich_tag_to_release_author_zero_date(self):
        # to reproduce bug T815 (fixed)
        sha = hash_to_bytes("6cc1deff5cdcd853428bb63b937f43dd2566c36f")
        tagger = b"hey dude <*****@*****.**>"
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        date = int(
            datetime.datetime(1970, 1, 1,
                              tzinfo=datetime.timezone.utc).timestamp())
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = tagger
        tag.tag_time = date
        tag.tag_timezone = 0
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=Person(
                email=b"*****@*****.**",
                fullname=b"hey dude <*****@*****.**>",
                name=b"hey dude",
            ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(
                    seconds=0,
                    microseconds=0,
                ),
                offset_bytes=b"+0000",
            ),
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
Пример #19
0
def identify_release(
    hg: Hg,
    node_id_2_swhid: Optional[Dict[bytes, CoreSWHID]] = None,
) -> Iterator[ReleaseIdentity]:
    """Return the repository's release identities.

    Args:
        hg: A `Hg` repository instance
        node_id_2_swhid: An optional cache mapping hg node ids to SWHIDs
            If not provided it will be computed using `identify_revision`.
    """
    from swh.model.model import ObjectType as ModelObjectType
    from swh.model.model import Release

    if node_id_2_swhid is None:
        node_id_2_swhid = {
            revision.node_id: revision.swhid
            for revision in identify_revision(hg)
        }

    for tag in hg.tags():
        assert node_id_2_swhid[tag.node_id].object_type == ObjectType.REVISION
        data = {
            "name": tag.name,
            "target": node_id_2_swhid[tag.node_id].object_id,
            "target_type": ModelObjectType.REVISION.value,
            "message": None,
            "metadata": None,
            "synthetic": False,
            "author": {
                "name": None,
                "email": None,
                "fullname": b""
            },
            "date": None,
        }

        release_swhid = Release.from_dict(data).swhid()

        yield ReleaseIdentity(
            swhid=release_swhid,
            node_id=tag.node_id,
            name=tag.name,
        )
Пример #20
0
def test_from_release():
    """Convert release model object to a dict should be ok"""
    ts = int(
        datetime.datetime(2015, 1, 1, 22, 0, 0,
                          tzinfo=datetime.timezone.utc).timestamp())
    release_input = Release(
        id=hashutil.hash_to_bytes("aad23fa492a0c5fed0708a6703be875448c86884"),
        target=hashutil.hash_to_bytes(
            "5e46d564378afc44b31bb89f99d5675195fbdf67"),
        target_type=ObjectType.REVISION,
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=ts, microseconds=0),
            offset=0,
            negative_utc=False,
        ),
        author=Person(
            name=b"author name",
            fullname=b"Author Name author@email",
            email=b"author@email",
        ),
        name=b"v0.0.1",
        message=b"some comment on release",
        synthetic=True,
    )

    expected_release = {
        "id": "aad23fa492a0c5fed0708a6703be875448c86884",
        "target": "5e46d564378afc44b31bb89f99d5675195fbdf67",
        "target_type": "revision",
        "date": "2015-01-01T22:00:00+00:00",
        "author": {
            "name": "author name",
            "fullname": "Author Name author@email",
            "email": "author@email",
        },
        "name": "v0.0.1",
        "message": "some comment on release",
        "target_type": "revision",
        "synthetic": True,
    }

    actual_release = converters.from_release(release_input)

    assert actual_release == expected_release
Пример #21
0
    def build_release(
        self,
        p_info: OpamPackageInfo,
        uncompressed_path: str,
        directory: Sha1Git,
    ) -> Optional[Release]:

        msg = (
            f"Synthetic release for OPAM source package {self.opam_package} "
            f"version {p_info.version}\n")
        return Release(
            name=p_info.version.encode(),
            author=p_info.author,
            message=msg.encode(),
            date=None,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
Пример #22
0
    def build_release(self, p_info: ArchPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        intrinsic_metadata = extract_intrinsic_metadata(
            Path(uncompressed_path))
        author = Person.from_fullname(intrinsic_metadata["packager"].encode())
        description = intrinsic_metadata["pkgdesc"]

        message = (
            f"Synthetic release for Arch Linux source package {p_info.name} "
            f"version {p_info.version}\n\n"
            f"{description}\n")
        return Release(
            name=p_info.version.encode(),
            author=author,
            date=TimestampWithTimezone.from_iso8601(p_info.last_modified),
            message=message.encode(),
            target_type=ObjectType.DIRECTORY,
            target=directory,
            synthetic=True,
        )
Пример #23
0
    def test_load_tag(self):
        with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
            fd.write("print('Hello world')\n")

        self.repo.stage([b"hello.py"])
        new_revision = self.repo.do_commit(b"Hello world\n")

        # Newer Dulwich versions always add a \n to tag messages.
        if dulwich.__version__ >= (0, 20, 22):
            message = b"First release!"
        else:
            message = b"First release!\n"

        dulwich.porcelain.tag_create(
            self.repo,
            b"v1.0.0",
            message=message,
            annotated=True,
            objectish=new_revision,
        )

        res = self.loader.load()
        assert res == {"status": "eventful"}

        branches = self.loader.storage.snapshot_get_branches(self.loader.snapshot.id)

        branch = branches["branches"][b"refs/tags/v1.0.0"]
        assert branch.target_type == TargetType.RELEASE

        release = self.loader.storage.release_get([branch.target])[0]
        assert release.date is not None
        assert release.author is not None
        assert release == Release(
            name=b"v1.0.0",
            message=b"First release!\n",
            target_type=ObjectType.REVISION,
            target=bytehex_to_hash(new_revision),
            author=release.author,
            date=release.date,
            synthetic=False,
        )
Пример #24
0
def test_kafka_writer_anonymized(
    kafka_prefix: str,
    kafka_server: str,
    consumer: Consumer,
    privileged_object_types: Iterable[str],
):
    writer = KafkaJournalWriter(
        brokers=[kafka_server],
        client_id="kafka_writer",
        prefix=kafka_prefix,
        value_sanitizer=model_object_dict_sanitizer,
        anonymize=True,
    )

    expected_messages = 0

    for object_type, objects in TEST_OBJECTS.items():
        writer.write_additions(object_type, objects)
        expected_messages += len(objects)
        if object_type in privileged_object_types:
            expected_messages += len(objects)

    consumed_messages = consume_messages(consumer, kafka_prefix,
                                         expected_messages)
    assert_all_objects_consumed(consumed_messages,
                                exclude=["revision", "release"])

    for key, obj_dict in consumed_messages["revision"]:
        obj = Revision.from_dict(obj_dict)
        for person in (obj.author, obj.committer):
            assert (len(person.fullname) == 32 and person.name is None
                    and person.email is None)
    for key, obj_dict in consumed_messages["release"]:
        obj = Release.from_dict(obj_dict)
        # author is optional for release
        if obj.author is None:
            continue
        for person in (obj.author, ):
            assert (len(person.fullname) == 32 and person.name is None
                    and person.email is None)
Пример #25
0
    def build_release(self, p_info: NpmPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        # Metadata from NPM is not intrinsic to tarballs.
        # This means two package versions can have the same tarball, but different
        # metadata. To avoid mixing up releases, every field used to build the
        # release object must be part of NpmPackageInfo.MANIFEST_FORMAT.
        i_metadata = extract_intrinsic_metadata(uncompressed_path)
        if not i_metadata:
            return None
        author = extract_npm_package_author(i_metadata)
        assert self.package_name == p_info.package_name
        msg = (
            f"Synthetic release for NPM source package {p_info.package_name} "
            f"version {p_info.version}\n")

        if p_info.date is None:
            url = p_info.url
            artifact_name = os.path.basename(url)
            raise ValueError(
                "Origin %s: Cannot determine upload time for artifact %s." %
                (p_info.url, artifact_name))

        date = TimestampWithTimezone.from_iso8601(p_info.date)

        # FIXME: this is to remain bug-compatible with earlier versions:
        date = attr.evolve(date,
                           timestamp=attr.evolve(date.timestamp,
                                                 microseconds=0))

        r = Release(
            name=p_info.version.encode(),
            message=msg.encode(),
            author=author,
            date=date,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
        return r
Пример #26
0
def from_release(release: Release) -> Dict[str, Any]:
    """Convert from a swh release to a json serializable release dictionary.

    Args:
        release: A release model object

    Returns:
        release dictionary with the following keys

        - id: hexadecimal sha1 (string)
        - revision: hexadecimal sha1 (string)
        - comment: release's comment message (string)
        - name: release's name (string)
        - author: release's author identifier (swh's id)
        - synthetic: the synthetic property (boolean)

    """
    return from_swh(
        release.to_dict(),
        hashess={"id", "target"},
        bytess={"message", "name", "fullname", "email"},
        dates={"date"},
    )
Пример #27
0
    def build_release(self, p_info: CratesPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        # Extract intrinsic metadata from dir_path/Cargo.toml
        name = p_info.name
        version = p_info.version
        dir_path = Path(uncompressed_path, f"{name}-{version}")
        i_metadata_raw = extract_intrinsic_metadata(dir_path)
        # Get only corresponding key of IntrinsicPackageMetadata
        i_metadata_keys = [
            k for k in IntrinsicPackageMetadata.__annotations__.keys()
        ]
        # We use data only from "package" entry
        i_metadata = {
            k: v
            for k, v in i_metadata_raw["package"].items()
            if k in i_metadata_keys
        }
        p_info.i_metadata = IntrinsicPackageMetadata(
            **i_metadata)  # type: ignore[misc]

        author = extract_author(p_info)
        description = extract_description(p_info)
        message = (f"Synthetic release for Crate source package {p_info.name} "
                   f"version {p_info.version}\n\n"
                   f"{description}\n")
        # The only way to get a value for updated_at is through extrinsic metadata
        updated_at = p_info.e_metadata_version.get("updated_at")

        return Release(
            name=version.encode(),
            author=author,
            date=TimestampWithTimezone.from_iso8601(updated_at),
            message=message.encode(),
            target_type=ObjectType.DIRECTORY,
            target=directory,
            synthetic=True,
        )
    def test_dulwich_tag_to_release_author_no_date(self):
        # to reproduce bug T815 (fixed)
        sha = hash_to_bytes("41076e970975122dc6b2a878aa9797960bc4781d")
        tagger = b"hey dude <*****@*****.**>"
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = tagger
        tag.tag_time = None
        tag.tag_timezone = None
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=Person(
                email=b"*****@*****.**",
                fullname=b"hey dude <*****@*****.**>",
                name=b"hey dude",
            ),
            date=None,
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
def test_db_to_release():
    # when
    actual_release = converters.db_to_release(
        {
            "id": b"release-id",
            "target": b"revision-id",
            "target_type": "revision",
            "date": None,
            "date_offset": None,
            "date_neg_utc_offset": None,
            "date_offset_bytes": None,
            "name": b"release-name",
            "comment": b"release comment",
            "synthetic": True,
            "author_fullname": b"auth-name <auth-email>",
            "author_name": b"auth-name",
            "author_email": b"auth-email",
            "raw_manifest": None,
        }
    )

    # then
    assert actual_release == Release(
        author=Person(
            fullname=b"auth-name <auth-email>",
            name=b"auth-name",
            email=b"auth-email",
        ),
        date=None,
        id=b"release-id",
        name=b"release-name",
        message=b"release comment",
        synthetic=True,
        target=b"revision-id",
        target_type=ObjectType.REVISION,
    )
Пример #30
0
def test_npm_loader_duplicate_shasum(swh_storage, requests_mock_datadir):
    """Test with two versions that have exactly the same tarball"""
    package = "org_version_mismatch"
    url = package_url(package)
    loader = NpmLoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("ac867a4c22ba4e22a022d319f309714477412a5a")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
    )

    beta_release_id = "e6d5490a02ac2a8dcd49702f9ccd5a64c90a46f1"
    release_id = "f6985f437e28db6eb1b7533230e05ed99f2c91f0"
    versions = [
        ("0.0.3-beta", beta_release_id),
        ("0.0.3", release_id),
    ]

    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"HEAD": SnapshotBranch(
                target=b"releases/0.0.3", target_type=TargetType.ALIAS
            ),
            **{
                b"releases/"
                + version_name.encode(): SnapshotBranch(
                    target=hash_to_bytes(version_id),
                    target_type=TargetType.RELEASE,
                )
                for (version_name, version_id) in versions
            },
        },
    )
    check_snapshot(expected_snapshot, swh_storage)

    assert swh_storage.release_get([hash_to_bytes(beta_release_id)])[0] == Release(
        name=b"0.0.3-beta",
        message=(
            b"Synthetic release for NPM source package org_version_mismatch "
            b"version 0.0.3-beta\n"
        ),
        target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"),
        target_type=ModelObjectType.DIRECTORY,
        synthetic=True,
        author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"),
        date=TimestampWithTimezone.from_datetime(
            datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc)
        ),
        id=hash_to_bytes(beta_release_id),
    )

    assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release(
        name=b"0.0.3",
        message=(
            b"Synthetic release for NPM source package org_version_mismatch "
            b"version 0.0.3\n"
        ),
        target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"),
        target_type=ModelObjectType.DIRECTORY,
        synthetic=True,
        author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"),
        date=TimestampWithTimezone.from_datetime(
            datetime.datetime(2014, 1, 1, 15, 55, 45, tzinfo=datetime.timezone.utc)
        ),
        id=hash_to_bytes(release_id),
    )

    # Check incremental re-load keeps it unchanged

    loader = NpmLoader(swh_storage, url)

    actual_load_status = loader.load()
    assert actual_load_status == {
        "status": "uneventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
    )