示例#1
0
def test_resolve_object_from_extids() -> None:
    storage = get_storage("memory")
    target = b"\x01" * 20
    rel1 = Release(
        name=b"aaaa",
        message=b"aaaa",
        target=target,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=False,
    )
    rel2 = Release(
        name=b"bbbb",
        message=b"bbbb",
        target=target,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=False,
    )
    storage.release_add([rel1, rel2])

    loader = StubPackageLoader(storage, "http://example.org/")

    p_info = Mock(wraps=BasePackageInfo(None, None, None))  # type: ignore

    # The PackageInfo does not support extids
    p_info.extid.return_value = None
    known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel1.swhid()]}
    whitelist = {b"unused"}
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    # Some known extid, and the PackageInfo is not one of them (ie. cache miss)
    p_info.extid.return_value = ("extid-type", 0, b"extid-of-cccc")
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    # Some known extid, and the PackageInfo is one of them (ie. cache hit),
    # but the target release was not in the previous snapshot
    p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa")
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    # Some known extid, and the PackageInfo is one of them (ie. cache hit),
    # and the target release was in the previous snapshot
    whitelist = {rel1.id}
    assert (loader.resolve_object_from_extids(known_extids, p_info,
                                              whitelist) == rel1.swhid())

    # Same as before, but there is more than one extid, and only one is an allowed
    # release
    whitelist = {rel1.id}
    known_extids = {
        ("extid-type", 0, b"extid-of-aaaa"): [rel2.swhid(),
                                              rel1.swhid()]
    }
    assert (loader.resolve_object_from_extids(known_extids, p_info,
                                              whitelist) == rel1.swhid())
    def build_release(
        self,
        p_info: DebianPackageInfo,
        uncompressed_path: str,
        directory: Sha1Git,
    ) -> Optional[Release]:
        dsc_url, dsc_name = dsc_information(p_info)
        if not dsc_name:
            raise ValueError("dsc name for url %s should not be None" %
                             dsc_url)
        dsc_path = path.join(path.dirname(uncompressed_path), dsc_name)
        intrinsic_metadata = get_intrinsic_package_metadata(
            p_info, dsc_path, uncompressed_path)

        logger.debug("intrinsic_metadata: %s", intrinsic_metadata)
        logger.debug("p_info: %s", p_info)

        msg = (f"Synthetic release for Debian source package {p_info.name} "
               f"version {p_info.intrinsic_version}\n")

        author = prepare_person(intrinsic_metadata.changelog.person)
        date = TimestampWithTimezone.from_iso8601(
            intrinsic_metadata.changelog.date)

        # inspired from swh.loader.debian.converters.package_metadata_to_revision
        return Release(
            name=p_info.intrinsic_version.encode(),
            message=msg.encode(),
            author=author,
            date=date,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
    def build_release(self, p_info: PyPIPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        i_metadata = extract_intrinsic_metadata(uncompressed_path)
        if not i_metadata:
            return None

        # from intrinsic metadata
        version_ = i_metadata.get("version", p_info.version)
        author_ = author(i_metadata)

        if p_info.comment_text:
            msg = p_info.comment_text
        else:
            msg = (f"Synthetic release for PyPI source package {p_info.name} "
                   f"version {version_}\n")

        date = TimestampWithTimezone.from_iso8601(p_info.upload_time)

        return Release(
            name=p_info.version.encode(),
            message=msg.encode(),
            author=author_,
            date=date,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
示例#4
0
def dulwich_tag_to_release(obj: ShaFile) -> Release:
    if obj.type_name != b"tag":
        raise ValueError("Argument is not a tag.")
    tag = cast(Tag, obj)

    tagger_timezone = None
    # FIXME: _parse_message is a private function from Dulwich.
    for (field, value) in _parse_message(tag.as_raw_chunks()):
        if field == b"tagger":
            m = AUTHORSHIP_LINE_RE.match(value)
            if m:
                tagger_timezone = m.group("timezone")

    target_type, target = tag.object
    if tag.tagger:
        author: Optional[Person] = parse_author(tag.tagger)
        if tag.tag_time is None:
            date = None
        else:
            date = dulwich_tsinfo_to_timestamp(
                tag.tag_time,
                tag.tag_timezone,
                tag._tag_timezone_neg_utc,
                tagger_timezone,
            )
    else:
        author = date = None

    message = tag.message
    if tag.signature:
        message += tag.signature

    rel = Release(
        id=tag.sha().digest(),
        author=author,
        date=date,
        name=tag.name,
        target=bytes.fromhex(target.decode()),
        target_type=DULWICH_OBJECT_TYPES[target_type.type_name],
        message=message,
        metadata=None,
        synthetic=False,
    )

    if rel.compute_hash() != rel.id:
        expected_id = rel.id
        actual_id = rel.compute_hash()
        logger.warning(
            "Expected release to have id %s, but got %s. Recording raw_manifest.",
            hash_to_hex(expected_id),
            hash_to_hex(actual_id),
        )
        raw_string = tag.as_raw_string()
        rel = attr.evolve(
            rel,
            raw_manifest=git_object_header("tag", len(raw_string)) +
            raw_string)

    check_id(rel)
    return rel
def db_to_release(db_release: Dict[str, Any]) -> Optional[Release]:
    """Convert a database representation of a release to its swh-model
    representation.
    """
    if db_release["target_type"] is None:
        assert all(v is None for (k, v) in db_release.items() if k != "id")
        return None

    author = db_to_author(
        db_release["author_fullname"],
        db_release["author_name"],
        db_release["author_email"],
    )
    date = db_to_date(
        db_release["date"],
        db_release["date_offset_bytes"],
    )

    return Release(
        author=author,
        date=date,
        id=db_release["id"],
        name=db_release["name"],
        message=db_release["comment"],
        synthetic=db_release["synthetic"],
        target=db_release["target"],
        target_type=ObjectType(db_release["target_type"]),
        raw_manifest=db_release["raw_manifest"],
    )
    def build_release(
        self,
        p_info: DepositPackageInfo,
        uncompressed_path: str,
        directory: Sha1Git,
    ) -> Optional[Release]:
        message = (
            f"{p_info.client}: Deposit {p_info.id} in collection {p_info.collection}"
        )

        if p_info.release_notes:
            message += "\n\n" + p_info.release_notes

        if not message.endswith("\n"):
            message += "\n"

        return Release(
            name=p_info.version.encode(),
            message=message.encode(),
            author=p_info.author,
            date=TimestampWithTimezone.from_dict(p_info.author_date),
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
    def test_dulwich_tag_to_release_no_author_no_date(self):
        sha = hash_to_bytes("f6e367357b446bd1315276de5e88ba3d0d99e136")
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = None
        tag.tag_time = None
        tag.tag_timezone = None
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=None,
            date=None,
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
    def test_dulwich_tag_to_release_signature(self):
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        sha = hash_to_bytes("46fff489610ed733d2cc904e363070dadee05c71")
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = GPGSIG
        tag.tagger = None
        tag.tag_time = None
        tag.tag_timezone = None
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=None,
            date=None,
            id=sha,
            message=message + GPGSIG,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
示例#9
0
    def test_load_tag_minimal(self):
        with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
            fd.write("print('Hello world')\n")

        self.repo.stage([b"hello.py"])
        new_revision = self.repo.do_commit(b"Hello world\n")

        # dulwich.porcelain.tag_create doesn't allow creating tags without
        # a tagger or a date, so we have to create it "manually"
        tag = dulwich.objects.Tag()
        tag.message = b"First release!\n"
        tag.name = b"v1.0.0"
        tag.object = (dulwich.objects.Commit, new_revision)
        self.repo.object_store.add_object(tag)
        self.repo[b"refs/tags/v1.0.0"] = tag.id

        res = self.loader.load()
        assert res == {"status": "eventful"}

        branches = self.loader.storage.snapshot_get_branches(self.loader.snapshot.id)

        branch = branches["branches"][b"refs/tags/v1.0.0"]
        assert branch.target_type == TargetType.RELEASE

        release = self.loader.storage.release_get([branch.target])[0]
        assert release == Release(
            id=bytehex_to_hash(tag.id),
            name=b"v1.0.0",
            message=b"First release!\n",
            target_type=ObjectType.REVISION,
            target=bytehex_to_hash(new_revision),
            synthetic=False,
        )
示例#10
0
def test_resolve_object_from_extids_missing_target() -> None:
    storage = get_storage("memory")

    target = b"\x01" * 20
    rel = Release(
        name=b"aaaa",
        message=b"aaaa",
        target=target,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=False,
    )

    loader = StubPackageLoader(storage, "http://example.org/")

    p_info = Mock(wraps=BasePackageInfo(None, None, None))  # type: ignore

    known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel.swhid()]}
    p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa")
    whitelist = {rel.id}

    # Targeted release is missing from the storage
    assert loader.resolve_object_from_extids(known_extids, p_info,
                                             whitelist) is None

    storage.release_add([rel])

    # Targeted release now exists
    assert (loader.resolve_object_from_extids(known_extids, p_info,
                                              whitelist) == rel.swhid())
示例#11
0
 def build_release(
     self, p_info: NixGuixPackageInfo, uncompressed_path: str, directory: Sha1Git
 ) -> Optional[Release]:
     return Release(
         name=p_info.version.encode(),
         message=None,
         author=EMPTY_AUTHOR,
         date=None,
         target=directory,
         target_type=ObjectType.DIRECTORY,
         synthetic=True,
     )
示例#12
0
 def build_release(self, p_info: MavenPackageInfo, uncompressed_path: str,
                   directory: Sha1Git) -> Optional[Release]:
     msg = f"Synthetic release for archive at {p_info.url}\n".encode(
         "utf-8")
     normalized_time = TimestampWithTimezone.from_datetime(p_info.time)
     return Release(
         name=p_info.version.encode(),
         message=msg,
         date=normalized_time,
         author=EMPTY_AUTHOR,
         target=directory,
         target_type=ObjectType.DIRECTORY,
         synthetic=True,
     )
def expected_releases(jar_dirs):
    return [
        Release(
            name=b"0.1.0",
            message=REL_MSGS[0],
            author=EMPTY_AUTHOR,
            date=REL_DATES[0],
            target_type=ModelObjectType.DIRECTORY,
            target=jar_dirs[0].hash,
            synthetic=True,
            metadata=None,
        ),
        Release(
            name=b"0.1.1",
            message=REL_MSGS[1],
            author=EMPTY_AUTHOR,
            date=REL_DATES[1],
            target_type=ModelObjectType.DIRECTORY,
            target=jar_dirs[1].hash,
            synthetic=True,
            metadata=None,
        ),
    ]
示例#14
0
 def build_release(
     self,
     p_info: BasePackageInfo,
     uncompressed_path: str,
     directory: Sha1Git,
 ):
     return Release(
         name=p_info.version.encode(),
         message=b"",
         author=Person.from_fullname(b""),
         date=None,
         target=DIRECTORY_ID,
         target_type=ObjectType.DIRECTORY,
         synthetic=False,
     )
    def test_dulwich_tag_to_release_author_and_date(self):
        sha = hash_to_bytes("fc1e6a4f1e37e93e28e78560e73efd0b12f616ef")
        tagger = b"hey dude <*****@*****.**>"
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"

        date = int(
            datetime.datetime(2007, 12, 5,
                              tzinfo=datetime.timezone.utc).timestamp())

        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = tagger
        tag.tag_time = date
        tag.tag_timezone = 0
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=Person(
                email=b"*****@*****.**",
                fullname=b"hey dude <*****@*****.**>",
                name=b"hey dude",
            ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(
                    seconds=1196812800,
                    microseconds=0,
                ),
                offset_bytes=b"+0000",
            ),
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
    def test_dulwich_tag_to_release_author_zero_date(self):
        # to reproduce bug T815 (fixed)
        sha = hash_to_bytes("6cc1deff5cdcd853428bb63b937f43dd2566c36f")
        tagger = b"hey dude <*****@*****.**>"
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        date = int(
            datetime.datetime(1970, 1, 1,
                              tzinfo=datetime.timezone.utc).timestamp())
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = tagger
        tag.tag_time = date
        tag.tag_timezone = 0
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=Person(
                email=b"*****@*****.**",
                fullname=b"hey dude <*****@*****.**>",
                name=b"hey dude",
            ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(
                    seconds=0,
                    microseconds=0,
                ),
                offset_bytes=b"+0000",
            ),
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
示例#17
0
 def build_release(self, p_info: CRANPackageInfo, uncompressed_path: str,
                   directory: Sha1Git) -> Optional[Release]:
     # a_metadata is empty
     metadata = extract_intrinsic_metadata(uncompressed_path)
     date = parse_date(metadata.get("Date"))
     author = Person.from_fullname(metadata.get("Maintainer", "").encode())
     msg = (f"Synthetic release for CRAN source package {p_info.name} "
            f"version {p_info.version}\n")
     return Release(
         name=p_info.version.encode(),
         message=msg.encode(),
         date=date,
         author=author,
         target_type=ObjectType.DIRECTORY,
         target=directory,
         synthetic=True,
     )
示例#18
0
def test_from_release():
    """Convert release model object to a dict should be ok"""
    ts = int(
        datetime.datetime(2015, 1, 1, 22, 0, 0,
                          tzinfo=datetime.timezone.utc).timestamp())
    release_input = Release(
        id=hashutil.hash_to_bytes("aad23fa492a0c5fed0708a6703be875448c86884"),
        target=hashutil.hash_to_bytes(
            "5e46d564378afc44b31bb89f99d5675195fbdf67"),
        target_type=ObjectType.REVISION,
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=ts, microseconds=0),
            offset=0,
            negative_utc=False,
        ),
        author=Person(
            name=b"author name",
            fullname=b"Author Name author@email",
            email=b"author@email",
        ),
        name=b"v0.0.1",
        message=b"some comment on release",
        synthetic=True,
    )

    expected_release = {
        "id": "aad23fa492a0c5fed0708a6703be875448c86884",
        "target": "5e46d564378afc44b31bb89f99d5675195fbdf67",
        "target_type": "revision",
        "date": "2015-01-01T22:00:00+00:00",
        "author": {
            "name": "author name",
            "fullname": "Author Name author@email",
            "email": "author@email",
        },
        "name": "v0.0.1",
        "message": "some comment on release",
        "target_type": "revision",
        "synthetic": True,
    }

    actual_release = converters.from_release(release_input)

    assert actual_release == expected_release
示例#19
0
    def build_release(
        self,
        p_info: OpamPackageInfo,
        uncompressed_path: str,
        directory: Sha1Git,
    ) -> Optional[Release]:

        msg = (
            f"Synthetic release for OPAM source package {self.opam_package} "
            f"version {p_info.version}\n")
        return Release(
            name=p_info.version.encode(),
            author=p_info.author,
            message=msg.encode(),
            date=None,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
示例#20
0
    def build_release(self, p_info: ArchPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        intrinsic_metadata = extract_intrinsic_metadata(
            Path(uncompressed_path))
        author = Person.from_fullname(intrinsic_metadata["packager"].encode())
        description = intrinsic_metadata["pkgdesc"]

        message = (
            f"Synthetic release for Arch Linux source package {p_info.name} "
            f"version {p_info.version}\n\n"
            f"{description}\n")
        return Release(
            name=p_info.version.encode(),
            author=author,
            date=TimestampWithTimezone.from_iso8601(p_info.last_modified),
            message=message.encode(),
            target_type=ObjectType.DIRECTORY,
            target=directory,
            synthetic=True,
        )
示例#21
0
    def test_load_tag(self):
        with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
            fd.write("print('Hello world')\n")

        self.repo.stage([b"hello.py"])
        new_revision = self.repo.do_commit(b"Hello world\n")

        # Newer Dulwich versions always add a \n to tag messages.
        if dulwich.__version__ >= (0, 20, 22):
            message = b"First release!"
        else:
            message = b"First release!\n"

        dulwich.porcelain.tag_create(
            self.repo,
            b"v1.0.0",
            message=message,
            annotated=True,
            objectish=new_revision,
        )

        res = self.loader.load()
        assert res == {"status": "eventful"}

        branches = self.loader.storage.snapshot_get_branches(self.loader.snapshot.id)

        branch = branches["branches"][b"refs/tags/v1.0.0"]
        assert branch.target_type == TargetType.RELEASE

        release = self.loader.storage.release_get([branch.target])[0]
        assert release.date is not None
        assert release.author is not None
        assert release == Release(
            name=b"v1.0.0",
            message=b"First release!\n",
            target_type=ObjectType.REVISION,
            target=bytehex_to_hash(new_revision),
            author=release.author,
            date=release.date,
            synthetic=False,
        )
示例#22
0
    def build_release(self, p_info: NpmPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        # Metadata from NPM is not intrinsic to tarballs.
        # This means two package versions can have the same tarball, but different
        # metadata. To avoid mixing up releases, every field used to build the
        # release object must be part of NpmPackageInfo.MANIFEST_FORMAT.
        i_metadata = extract_intrinsic_metadata(uncompressed_path)
        if not i_metadata:
            return None
        author = extract_npm_package_author(i_metadata)
        assert self.package_name == p_info.package_name
        msg = (
            f"Synthetic release for NPM source package {p_info.package_name} "
            f"version {p_info.version}\n")

        if p_info.date is None:
            url = p_info.url
            artifact_name = os.path.basename(url)
            raise ValueError(
                "Origin %s: Cannot determine upload time for artifact %s." %
                (p_info.url, artifact_name))

        date = TimestampWithTimezone.from_iso8601(p_info.date)

        # FIXME: this is to remain bug-compatible with earlier versions:
        date = attr.evolve(date,
                           timestamp=attr.evolve(date.timestamp,
                                                 microseconds=0))

        r = Release(
            name=p_info.version.encode(),
            message=msg.encode(),
            author=author,
            date=date,
            target=directory,
            target_type=ObjectType.DIRECTORY,
            synthetic=True,
        )
        return r
示例#23
0
    def build_release(self, p_info: CratesPackageInfo, uncompressed_path: str,
                      directory: Sha1Git) -> Optional[Release]:
        # Extract intrinsic metadata from dir_path/Cargo.toml
        name = p_info.name
        version = p_info.version
        dir_path = Path(uncompressed_path, f"{name}-{version}")
        i_metadata_raw = extract_intrinsic_metadata(dir_path)
        # Get only corresponding key of IntrinsicPackageMetadata
        i_metadata_keys = [
            k for k in IntrinsicPackageMetadata.__annotations__.keys()
        ]
        # We use data only from "package" entry
        i_metadata = {
            k: v
            for k, v in i_metadata_raw["package"].items()
            if k in i_metadata_keys
        }
        p_info.i_metadata = IntrinsicPackageMetadata(
            **i_metadata)  # type: ignore[misc]

        author = extract_author(p_info)
        description = extract_description(p_info)
        message = (f"Synthetic release for Crate source package {p_info.name} "
                   f"version {p_info.version}\n\n"
                   f"{description}\n")
        # The only way to get a value for updated_at is through extrinsic metadata
        updated_at = p_info.e_metadata_version.get("updated_at")

        return Release(
            name=version.encode(),
            author=author,
            date=TimestampWithTimezone.from_iso8601(updated_at),
            message=message.encode(),
            target_type=ObjectType.DIRECTORY,
            target=directory,
            synthetic=True,
        )
    def test_dulwich_tag_to_release_author_no_date(self):
        # to reproduce bug T815 (fixed)
        sha = hash_to_bytes("41076e970975122dc6b2a878aa9797960bc4781d")
        tagger = b"hey dude <*****@*****.**>"
        target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
        message = b"some release message"
        tag = dulwich.objects.Tag()
        tag.name = b"blah"
        tag.object = (dulwich.objects.Commit, target)
        tag.message = message
        tag.signature = None
        tag.tagger = tagger
        tag.tag_time = None
        tag.tag_timezone = None
        assert tag.sha().digest() == sha

        # when
        actual_release = converters.dulwich_tag_to_release(tag)

        # then
        expected_release = Release(
            author=Person(
                email=b"*****@*****.**",
                fullname=b"hey dude <*****@*****.**>",
                name=b"hey dude",
            ),
            date=None,
            id=sha,
            message=message,
            metadata=None,
            name=b"blah",
            synthetic=False,
            target=hash_to_bytes(target.decode()),
            target_type=ObjectType.REVISION,
        )

        assert actual_release == expected_release
def test_db_to_release():
    # when
    actual_release = converters.db_to_release(
        {
            "id": b"release-id",
            "target": b"revision-id",
            "target_type": "revision",
            "date": None,
            "date_offset": None,
            "date_neg_utc_offset": None,
            "date_offset_bytes": None,
            "name": b"release-name",
            "comment": b"release comment",
            "synthetic": True,
            "author_fullname": b"auth-name <auth-email>",
            "author_name": b"auth-name",
            "author_email": b"auth-email",
            "raw_manifest": None,
        }
    )

    # then
    assert actual_release == Release(
        author=Person(
            fullname=b"auth-name <auth-email>",
            name=b"auth-name",
            email=b"auth-email",
        ),
        date=None,
        id=b"release-id",
        name=b"release-name",
        message=b"release comment",
        synthetic=True,
        target=b"revision-id",
        target_type=ObjectType.REVISION,
    )
示例#26
0
def test_npm_loader_duplicate_shasum(swh_storage, requests_mock_datadir):
    """Test with two versions that have exactly the same tarball"""
    package = "org_version_mismatch"
    url = package_url(package)
    loader = NpmLoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("ac867a4c22ba4e22a022d319f309714477412a5a")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
    )

    beta_release_id = "e6d5490a02ac2a8dcd49702f9ccd5a64c90a46f1"
    release_id = "f6985f437e28db6eb1b7533230e05ed99f2c91f0"
    versions = [
        ("0.0.3-beta", beta_release_id),
        ("0.0.3", release_id),
    ]

    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"HEAD": SnapshotBranch(
                target=b"releases/0.0.3", target_type=TargetType.ALIAS
            ),
            **{
                b"releases/"
                + version_name.encode(): SnapshotBranch(
                    target=hash_to_bytes(version_id),
                    target_type=TargetType.RELEASE,
                )
                for (version_name, version_id) in versions
            },
        },
    )
    check_snapshot(expected_snapshot, swh_storage)

    assert swh_storage.release_get([hash_to_bytes(beta_release_id)])[0] == Release(
        name=b"0.0.3-beta",
        message=(
            b"Synthetic release for NPM source package org_version_mismatch "
            b"version 0.0.3-beta\n"
        ),
        target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"),
        target_type=ModelObjectType.DIRECTORY,
        synthetic=True,
        author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"),
        date=TimestampWithTimezone.from_datetime(
            datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc)
        ),
        id=hash_to_bytes(beta_release_id),
    )

    assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release(
        name=b"0.0.3",
        message=(
            b"Synthetic release for NPM source package org_version_mismatch "
            b"version 0.0.3\n"
        ),
        target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"),
        target_type=ModelObjectType.DIRECTORY,
        synthetic=True,
        author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"),
        date=TimestampWithTimezone.from_datetime(
            datetime.datetime(2014, 1, 1, 15, 55, 45, tzinfo=datetime.timezone.utc)
        ),
        id=hash_to_bytes(release_id),
    )

    # Check incremental re-load keeps it unchanged

    loader = NpmLoader(swh_storage, url)

    actual_load_status = loader.load()
    assert actual_load_status == {
        "status": "uneventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
    )
示例#27
0
def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info):
    package = "org"
    url = package_url(package)
    loader = NpmLoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
    )

    release_id = "d38cc0b571cd41f3c85513864e049766b42032a7"
    versions = [
        ("0.0.2", release_id),
        ("0.0.3", "62bf7076bae9aa2cb4d6cb3bf7ce0ea4fdd5b295"),
        ("0.0.4", "6e976db82f6c310596b21fb0ed8b11f507631434"),
    ]

    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"HEAD": SnapshotBranch(
                target=b"releases/0.0.4", target_type=TargetType.ALIAS
            ),
            **{
                b"releases/"
                + version_name.encode(): SnapshotBranch(
                    target=hash_to_bytes(version_id),
                    target_type=TargetType.RELEASE,
                )
                for (version_name, version_id) in versions
            },
        },
    )
    check_snapshot(expected_snapshot, swh_storage)

    assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release(
        name=b"0.0.2",
        message=b"Synthetic release for NPM source package org version 0.0.2\n",
        target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"),
        target_type=ModelObjectType.DIRECTORY,
        synthetic=True,
        author=Person(
            fullname=b"mooz <*****@*****.**>",
            name=b"mooz",
            email=b"*****@*****.**",
        ),
        date=TimestampWithTimezone.from_datetime(
            datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc)
        ),
        id=hash_to_bytes(release_id),
    )

    contents = swh_storage.content_get(_expected_new_contents_first_visit)
    count = sum(0 if content is None else 1 for content in contents)
    assert count == len(_expected_new_contents_first_visit)

    assert (
        list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == []
    )

    assert list(swh_storage.release_missing(_expected_new_releases_first_visit)) == []

    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.FORGE,
        url="https://npmjs.com/",
    )

    for (version_name, release_id) in versions:
        release = swh_storage.release_get([hash_to_bytes(release_id)])[0]
        assert release.target_type == ModelObjectType.DIRECTORY
        directory_id = release.target
        directory_swhid = ExtendedSWHID(
            object_type=ExtendedObjectType.DIRECTORY,
            object_id=directory_id,
        )
        release_swhid = CoreSWHID(
            object_type=ObjectType.RELEASE,
            object_id=hash_to_bytes(release_id),
        )
        expected_metadata = [
            RawExtrinsicMetadata(
                target=directory_swhid,
                authority=metadata_authority,
                fetcher=MetadataFetcher(
                    name="swh.loader.package.npm.loader.NpmLoader",
                    version=__version__,
                ),
                discovery_date=loader.visit_date,
                format="replicate-npm-package-json",
                metadata=json.dumps(
                    json.loads(org_api_info)["versions"][version_name]
                ).encode(),
                origin="https://www.npmjs.com/package/org",
                release=release_swhid,
            )
        ]
        assert swh_storage.raw_extrinsic_metadata_get(
            directory_swhid,
            metadata_authority,
        ) == PagedResult(
            next_page_token=None,
            results=expected_metadata,
        )

    stats = get_stats(swh_storage)

    assert {
        "content": len(_expected_new_contents_first_visit),
        "directory": len(_expected_new_directories_first_visit),
        "origin": 1,
        "origin_visit": 1,
        "release": len(_expected_new_releases_first_visit),
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats
示例#28
0
class StorageData:
    """Data model objects to use within tests."""

    content = Content(
        data=b"42\n",
        length=3,
        sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"),
        sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
        sha256=hash_to_bytes(
            "084c799cd551dd1d8d5c5f9a5d593b2e931f5e36122ee5c793c1d08a19839cc0"
        ),
        blake2s256=hash_to_bytes(
            "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d"
        ),
        status="visible",
    )
    content2 = Content(
        data=b"4242\n",
        length=5,
        sha1=hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"),
        sha1_git=hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"),
        sha256=hash_to_bytes(
            "859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd"
        ),
        blake2s256=hash_to_bytes(
            "849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d"
        ),
        status="visible",
    )
    content3 = Content(
        data=b"424242\n",
        length=7,
        sha1=hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"),
        sha1_git=hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"),
        sha256=hash_to_bytes(
            "92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36"
        ),
        blake2s256=hash_to_bytes(
            "76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11"
        ),
        status="visible",
        ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc),
    )
    contents: Tuple[Content, ...] = (content, content2, content3)

    skipped_content = SkippedContent(
        length=1024 * 1024 * 200,
        sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"),
        sha1=hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"),
        sha256=hash_to_bytes(
            "7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a"
        ),
        blake2s256=hash_to_bytes(
            "ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b"
        ),
        reason="Content too long",
        status="absent",
        origin="file:///dev/zero",
    )
    skipped_content2 = SkippedContent(
        length=1024 * 1024 * 300,
        sha1_git=hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"),
        sha1=hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"),
        sha256=hash_to_bytes(
            "8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a"
        ),
        blake2s256=hash_to_bytes(
            "9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b"
        ),
        reason="Content too long",
        status="absent",
    )
    skipped_contents: Tuple[SkippedContent,
                            ...] = (skipped_content, skipped_content2)

    directory5 = Directory(
        id=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
        entries=(),
    )
    directory = Directory(
        id=hash_to_bytes("5256e856a0a0898966d6ba14feb4388b8b82d302"),
        entries=tuple([
            DirectoryEntry(
                name=b"foo",
                type="file",
                target=content.sha1_git,
                perms=from_disk.DentryPerms.content,
            ),
            DirectoryEntry(
                name=b"bar\xc3",
                type="dir",
                target=directory5.id,
                perms=from_disk.DentryPerms.directory,
            ),
        ], ),
    )
    directory2 = Directory(
        id=hash_to_bytes("8505808532953da7d2581741f01b29c04b1cb9ab"),
        entries=tuple([
            DirectoryEntry(
                name=b"oof",
                type="file",
                target=content2.sha1_git,
                perms=from_disk.DentryPerms.content,
            )
        ], ),
    )
    directory3 = Directory(
        id=hash_to_bytes("13089e6e544f78df7c9a40a3059050d10dee686a"),
        entries=tuple([
            DirectoryEntry(
                name=b"foo",
                type="file",
                target=content.sha1_git,
                perms=from_disk.DentryPerms.content,
            ),
            DirectoryEntry(
                name=b"subdir",
                type="dir",
                target=directory.id,
                perms=from_disk.DentryPerms.directory,
            ),
            DirectoryEntry(
                name=b"hello",
                type="file",
                target=content2.sha1_git,
                perms=from_disk.DentryPerms.content,
            ),
        ], ),
    )
    directory4 = Directory(
        id=hash_to_bytes("cd5dfd9c09d9e99ed123bc7937a0d5fddc3cd531"),
        entries=tuple([
            DirectoryEntry(
                name=b"subdir1",
                type="dir",
                target=directory3.id,
                perms=from_disk.DentryPerms.directory,
            )
        ], ),
    )

    directory6 = Directory(
        id=hash_to_bytes("afa0105cfcaa14fdbacee344e96659170bb1bda5"),
        entries=tuple([
            DirectoryEntry(
                name=b"foo",
                type="file",
                target=b"\x00" * 20,
                perms=from_disk.DentryPerms.content,
            ),
            DirectoryEntry(
                name=b"bar",
                type="dir",
                target=b"\x01" * 20,
                perms=from_disk.DentryPerms.directory,
            ),
        ], ),
        raw_manifest=(
            b"tree 61\x00"
            b"100644 foo\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"  # noqa
            b"40000 bar\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"  # noqa
        ),
    )

    directories: Tuple[Directory, ...] = (
        directory2,
        directory,
        directory3,
        directory4,
        directory5,
        directory6,
    )

    revision = Revision(
        id=hash_to_bytes("01a7114f36fddd5ef2511b2cadda237a68adbb12"),
        message=b"hello",
        author=Person(
            name=b"Nicolas Dandrimont",
            email=b"*****@*****.**",
            fullname=b"Nicolas Dandrimont <*****@*****.**> ",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1234567890, microseconds=0),
            offset_bytes=b"+0200",
        ),
        committer=Person(
            name=b"St\xc3fano Zacchiroli",
            email=b"*****@*****.**",
            fullname=b"St\xc3fano Zacchiroli <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1123456789, microseconds=0),
            offset_bytes=b"+0200",
        ),
        parents=(),
        type=RevisionType.GIT,
        directory=directory.id,
        metadata={
            "checksums": {
                "sha1": "tarball-sha1",
                "sha256": "tarball-sha256",
            },
            "signed-off-by": "some-dude",
        },
        extra_headers=(
            (b"gpgsig", b"test123"),
            (b"mergetag", b"foo\\bar"),
            (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"),
        ),
        synthetic=True,
    )
    revision2 = Revision(
        id=hash_to_bytes("a646dd94c912829659b22a1e7e143d2fa5ebde1b"),
        message=b"hello again",
        author=Person(
            name=b"Roberto Dicosmo",
            email=b"*****@*****.**",
            fullname=b"Roberto Dicosmo <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1234567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        committer=Person(
            name=b"tony",
            email=b"*****@*****.**",
            fullname=b"tony <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1123456789,
                microseconds=220000,
            ),
            offset_bytes=b"+0000",
        ),
        parents=tuple([revision.id]),
        type=RevisionType.GIT,
        directory=directory2.id,
        metadata=None,
        extra_headers=(),
        synthetic=False,
    )
    revision3 = Revision(
        id=hash_to_bytes("beb2844dff30658e27573cb46eb55980e974b391"),
        message=b"a simple revision with no parents this time",
        author=Person(
            name=b"Roberto Dicosmo",
            email=b"*****@*****.**",
            fullname=b"Roberto Dicosmo <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1234567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        committer=Person(
            name=b"tony",
            email=b"*****@*****.**",
            fullname=b"tony <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1127351742,
                microseconds=220000,
            ),
            offset_bytes=b"+0000",
        ),
        parents=tuple([revision.id, revision2.id]),
        type=RevisionType.GIT,
        directory=directory2.id,
        metadata=None,
        extra_headers=(),
        synthetic=True,
    )
    revision4 = Revision(
        id=hash_to_bytes("ae860aec43700c7f5a295e2ef47e2ae41b535dfe"),
        message=b"parent of self.revision2",
        author=Person(
            name=b"me",
            email=b"*****@*****.**",
            fullname=b"me <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1234567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        committer=Person(
            name=b"committer-dude",
            email=b"*****@*****.**",
            fullname=b"committer-dude <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1244567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        parents=tuple([revision3.id]),
        type=RevisionType.GIT,
        directory=directory.id,
        metadata=None,
        extra_headers=(),
        synthetic=False,
    )
    git_revisions: Tuple[Revision,
                         ...] = (revision, revision2, revision3, revision4)

    hg_revision = Revision(
        id=hash_to_bytes("951c9503541e7beaf002d7aebf2abd1629084c68"),
        message=b"hello",
        author=Person(
            name=b"Nicolas Dandrimont",
            email=b"*****@*****.**",
            fullname=b"Nicolas Dandrimont <*****@*****.**> ",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1234567890, microseconds=0),
            offset_bytes=b"+0200",
        ),
        committer=Person(
            name=b"St\xc3fano Zacchiroli",
            email=b"*****@*****.**",
            fullname=b"St\xc3fano Zacchiroli <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1123456789, microseconds=0),
            offset_bytes=b"+0200",
        ),
        parents=(),
        type=RevisionType.MERCURIAL,
        directory=directory.id,
        metadata={
            "checksums": {
                "sha1": "tarball-sha1",
                "sha256": "tarball-sha256",
            },
            "signed-off-by": "some-dude",
            "node": "a316dfb434af2b451c1f393496b7eaeda343f543",
        },
        extra_headers=(),
        synthetic=True,
    )
    hg_revision2 = Revision(
        id=hash_to_bytes("df4afb063236300eb13b96a0d7fff03f7b7cbbaf"),
        message=b"hello again",
        author=Person(
            name=b"Roberto Dicosmo",
            email=b"*****@*****.**",
            fullname=b"Roberto Dicosmo <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1234567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        committer=Person(
            name=b"tony",
            email=b"*****@*****.**",
            fullname=b"tony <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1123456789,
                microseconds=220000,
            ),
            offset_bytes=b"+0000",
        ),
        parents=tuple([hg_revision.id]),
        type=RevisionType.MERCURIAL,
        directory=directory2.id,
        metadata=None,
        extra_headers=(
            (b"node",
             hash_to_bytes("fa1b7c84a9b40605b67653700f268349a6d6aca1")), ),
        synthetic=False,
    )
    hg_revision3 = Revision(
        id=hash_to_bytes("84d8e7081b47ebb88cad9fa1f25de5f330872a37"),
        message=b"a simple revision with no parents this time",
        author=Person(
            name=b"Roberto Dicosmo",
            email=b"*****@*****.**",
            fullname=b"Roberto Dicosmo <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1234567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        committer=Person(
            name=b"tony",
            email=b"*****@*****.**",
            fullname=b"tony <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1127351742,
                microseconds=220000,
            ),
            offset_bytes=b"+0000",
        ),
        parents=tuple([hg_revision.id, hg_revision2.id]),
        type=RevisionType.MERCURIAL,
        directory=directory2.id,
        metadata=None,
        extra_headers=(
            (b"node",
             hash_to_bytes("7f294a01c49065a90b3fe8b4ad49f08ce9656ef6")), ),
        synthetic=True,
    )
    hg_revision4 = Revision(
        id=hash_to_bytes("4683324ba26dfe941a72cc7552e86eaaf7c27fe3"),
        message=b"parent of self.revision2",
        author=Person(
            name=b"me",
            email=b"*****@*****.**",
            fullname=b"me <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1234567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        committer=Person(
            name=b"committer-dude",
            email=b"*****@*****.**",
            fullname=b"committer-dude <*****@*****.**>",
        ),
        committer_date=TimestampWithTimezone(
            timestamp=Timestamp(
                seconds=1244567843,
                microseconds=220000,
            ),
            offset_bytes=b"-1200",
        ),
        parents=tuple([hg_revision3.id]),
        type=RevisionType.MERCURIAL,
        directory=directory.id,
        metadata=None,
        extra_headers=(
            (b"node",
             hash_to_bytes("f4160af0485c85823d9e829bae2c00b00a2e6297")), ),
        synthetic=False,
    )
    hg_revisions: Tuple[Revision, ...] = (
        hg_revision,
        hg_revision2,
        hg_revision3,
        hg_revision4,
    )
    revisions: Tuple[Revision, ...] = git_revisions + hg_revisions

    origins: Tuple[Origin, ...] = (
        Origin(url="https://github.com/user1/repo1"),
        Origin(url="https://github.com/user2/repo1"),
        Origin(url="https://github.com/user3/repo1"),
        Origin(url="https://gitlab.com/user1/repo1"),
        Origin(url="https://gitlab.com/user2/repo1"),
        Origin(url="https://forge.softwareheritage.org/source/repo1"),
        Origin(url="https://example.рф/🏛️.txt"),
    )
    origin, origin2 = origins[:2]

    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url="http://hal.inria.example.com/",
    )
    metadata_authority2 = MetadataAuthority(
        type=MetadataAuthorityType.REGISTRY,
        url="http://wikidata.example.com/",
    )
    authorities: Tuple[MetadataAuthority, ...] = (
        metadata_authority,
        metadata_authority2,
    )

    metadata_fetcher = MetadataFetcher(
        name="swh-deposit",
        version="0.0.1",
    )
    metadata_fetcher2 = MetadataFetcher(
        name="swh-example",
        version="0.0.1",
    )
    fetchers: Tuple[MetadataFetcher,
                    ...] = (metadata_fetcher, metadata_fetcher2)

    date_visit1 = datetime.datetime(2015,
                                    1,
                                    1,
                                    23,
                                    0,
                                    0,
                                    tzinfo=datetime.timezone.utc)
    date_visit2 = datetime.datetime(2017,
                                    1,
                                    1,
                                    23,
                                    0,
                                    0,
                                    tzinfo=datetime.timezone.utc)
    date_visit3 = datetime.datetime(2018,
                                    1,
                                    1,
                                    23,
                                    0,
                                    0,
                                    tzinfo=datetime.timezone.utc)

    type_visit1 = "git"
    type_visit2 = "hg"
    type_visit3 = "deb"

    origin_visit = OriginVisit(
        origin=origin.url,
        visit=1,
        date=date_visit1,
        type=type_visit1,
    )
    origin_visit2 = OriginVisit(
        origin=origin.url,
        visit=2,
        date=date_visit2,
        type=type_visit1,
    )
    origin_visit3 = OriginVisit(
        origin=origin2.url,
        visit=1,
        date=date_visit1,
        type=type_visit2,
    )
    origin_visits: Tuple[OriginVisit, ...] = (
        origin_visit,
        origin_visit2,
        origin_visit3,
    )

    release = Release(
        id=hash_to_bytes("f7f222093a18ec60d781070abec4a630c850b837"),
        name=b"v0.0.1",
        author=Person(
            name=b"olasd",
            email=b"*****@*****.**",
            fullname=b"olasd <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1234567890, microseconds=0),
            offset_bytes=b"+0042",
        ),
        target=revision.id,
        target_type=ObjectType.REVISION,
        message=b"synthetic release",
        synthetic=True,
    )
    release2 = Release(
        id=hash_to_bytes("db81a26783a3f4a9db07b4759ffc37621f159bb2"),
        name=b"v0.0.2",
        author=Person(
            name=b"tony",
            email=b"*****@*****.**",
            fullname=b"tony <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1634366813, microseconds=0),
            offset_bytes=b"-0200",
        ),
        target=revision2.id,
        target_type=ObjectType.REVISION,
        message=b"v0.0.2\nMisc performance improvements + bug fixes",
        synthetic=False,
    )
    release3 = Release(
        id=hash_to_bytes("1c5d42e603ce2eea44917fadca76c78bad76aeb9"),
        name=b"v0.0.2",
        author=Person(
            name=b"tony",
            email=b"*****@*****.**",
            fullname=b"tony <*****@*****.**>",
        ),
        date=TimestampWithTimezone(
            timestamp=Timestamp(seconds=1634366813, microseconds=0),
            offset_bytes=b"-0200",
        ),
        target=revision3.id,
        target_type=ObjectType.REVISION,
        message=b"yet another synthetic release",
        synthetic=True,
    )

    releases: Tuple[Release, ...] = (release, release2, release3)

    snapshot = Snapshot(
        id=hash_to_bytes("9b922e6d8d5b803c1582aabe5525b7b91150788e"),
        branches={
            b"master":
            SnapshotBranch(
                target=revision.id,
                target_type=TargetType.REVISION,
            ),
        },
    )
    empty_snapshot = Snapshot(
        id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"),
        branches={},
    )
    complete_snapshot = Snapshot(
        id=hash_to_bytes("db99fda25b43dc5cd90625ee4b0744751799c917"),
        branches={
            b"directory":
            SnapshotBranch(
                target=directory.id,
                target_type=TargetType.DIRECTORY,
            ),
            b"directory2":
            SnapshotBranch(
                target=directory2.id,
                target_type=TargetType.DIRECTORY,
            ),
            b"content":
            SnapshotBranch(
                target=content.sha1_git,
                target_type=TargetType.CONTENT,
            ),
            b"alias":
            SnapshotBranch(
                target=b"revision",
                target_type=TargetType.ALIAS,
            ),
            b"revision":
            SnapshotBranch(
                target=revision.id,
                target_type=TargetType.REVISION,
            ),
            b"release":
            SnapshotBranch(
                target=release.id,
                target_type=TargetType.RELEASE,
            ),
            b"snapshot":
            SnapshotBranch(
                target=empty_snapshot.id,
                target_type=TargetType.SNAPSHOT,
            ),
            b"dangling":
            None,
        },
    )

    snapshots: Tuple[Snapshot,
                     ...] = (snapshot, empty_snapshot, complete_snapshot)

    content_metadata1 = RawExtrinsicMetadata(
        target=ExtendedSWHID(object_type=ExtendedObjectType.CONTENT,
                             object_id=content.sha1_git),
        origin=origin.url,
        discovery_date=datetime.datetime(2015,
                                         1,
                                         1,
                                         21,
                                         0,
                                         0,
                                         tzinfo=datetime.timezone.utc),
        authority=metadata_authority,
        fetcher=metadata_fetcher,
        format="json",
        metadata=b'{"foo": "bar"}',
    )
    content_metadata2 = RawExtrinsicMetadata(
        target=ExtendedSWHID(object_type=ExtendedObjectType.CONTENT,
                             object_id=content.sha1_git),
        origin=origin2.url,
        discovery_date=datetime.datetime(2017,
                                         1,
                                         1,
                                         22,
                                         0,
                                         0,
                                         tzinfo=datetime.timezone.utc),
        authority=metadata_authority,
        fetcher=metadata_fetcher,
        format="yaml",
        metadata=b"foo: bar",
    )
    content_metadata3 = RawExtrinsicMetadata(
        target=ExtendedSWHID(object_type=ExtendedObjectType.CONTENT,
                             object_id=content.sha1_git),
        discovery_date=datetime.datetime(2017,
                                         1,
                                         1,
                                         22,
                                         0,
                                         0,
                                         tzinfo=datetime.timezone.utc),
        authority=attr.evolve(metadata_authority2, metadata=None),
        fetcher=attr.evolve(metadata_fetcher2, metadata=None),
        format="yaml",
        metadata=b"foo: bar",
        origin=origin.url,
        visit=42,
        snapshot=snapshot.swhid(),
        release=release.swhid(),
        revision=revision.swhid(),
        directory=directory.swhid(),
        path=b"/foo/bar",
    )

    content_metadata: Tuple[RawExtrinsicMetadata, ...] = (
        content_metadata1,
        content_metadata2,
        content_metadata3,
    )

    origin_metadata1 = RawExtrinsicMetadata(
        target=Origin(origin.url).swhid(),
        discovery_date=datetime.datetime(2015,
                                         1,
                                         1,
                                         21,
                                         0,
                                         0,
                                         tzinfo=datetime.timezone.utc),
        authority=attr.evolve(metadata_authority, metadata=None),
        fetcher=attr.evolve(metadata_fetcher, metadata=None),
        format="json",
        metadata=b'{"foo": "bar"}',
    )
    origin_metadata2 = RawExtrinsicMetadata(
        target=Origin(origin.url).swhid(),
        discovery_date=datetime.datetime(2017,
                                         1,
                                         1,
                                         22,
                                         0,
                                         0,
                                         tzinfo=datetime.timezone.utc),
        authority=attr.evolve(metadata_authority, metadata=None),
        fetcher=attr.evolve(metadata_fetcher, metadata=None),
        format="yaml",
        metadata=b"foo: bar",
    )
    origin_metadata3 = RawExtrinsicMetadata(
        target=Origin(origin.url).swhid(),
        discovery_date=datetime.datetime(2017,
                                         1,
                                         1,
                                         22,
                                         0,
                                         0,
                                         tzinfo=datetime.timezone.utc),
        authority=attr.evolve(metadata_authority2, metadata=None),
        fetcher=attr.evolve(metadata_fetcher2, metadata=None),
        format="yaml",
        metadata=b"foo: bar",
    )

    origin_metadata: Tuple[RawExtrinsicMetadata, ...] = (
        origin_metadata1,
        origin_metadata2,
        origin_metadata3,
    )

    extid1 = ExtID(
        target=CoreSWHID(object_type=SwhidObjectType.REVISION,
                         object_id=revision.id),
        extid_type="git",
        extid=revision.id,
    )

    extid2 = ExtID(
        target=CoreSWHID(object_type=SwhidObjectType.REVISION,
                         object_id=hg_revision.id),
        extid_type="mercurial",
        extid=hash_to_bytes("a316dfb434af2b451c1f393496b7eaeda343f543"),
    )

    extid3 = ExtID(
        target=CoreSWHID(object_type=SwhidObjectType.DIRECTORY,
                         object_id=directory.id),
        extid_type="directory",
        extid=b"something",
    )
    extid4 = ExtID(
        target=CoreSWHID(object_type=SwhidObjectType.DIRECTORY,
                         object_id=directory2.id),
        extid_type="directory",
        extid=b"something",
        extid_version=2,
    )

    extids: Tuple[ExtID, ...] = (
        extid1,
        extid2,
        extid3,
        extid4,
    )
示例#29
0
        (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"),
    ),
    synthetic=True,
)

RELEASE = Release(
    id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"),
    name=b"v0.0.2",
    author=Person(
        name=b"tony",
        email=b"*****@*****.**",
        fullname=b"tony <*****@*****.**>",
    ),
    date=TimestampWithTimezone.from_datetime(
        datetime.datetime(2021,
                          10,
                          15,
                          22,
                          26,
                          53,
                          tzinfo=datetime.timezone.utc)),
    target=REVISION.id,
    target_type=ObjectType.REVISION,
    message=b"yet another synthetic release",
    synthetic=True,
)

SNAPSHOT = Snapshot(
    id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"),
    branches={
        b"release/0.1.0":
    def test_weird_tag(self):
        """Checks raw_manifest is set when the tag cannot fit the data model"""

        # Well-formed manifest
        raw_manifest = (b"object 641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce\n"
                        b"type commit\n"
                        b"tag blah\n"
                        b"tagger Foo <*****@*****.**> 1640191027 +0200\n\n"
                        b"some release message")
        tag = dulwich.objects.Tag.from_raw_string(b"tag", raw_manifest)
        assert converters.dulwich_tag_to_release(tag) == Release(
            name=b"blah",
            message=b"some release message",
            target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"),
            target_type=ObjectType.REVISION,
            synthetic=False,
            author=Person.from_fullname(b"Foo <*****@*****.**>", ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(seconds=1640191027, microseconds=0),
                offset_bytes=b"+0200",
            ),
            raw_manifest=None,
        )

        # Mess with the offset (negative UTC)
        raw_manifest2 = raw_manifest.replace(b"+0200", b"-0000")
        tag = dulwich.objects.Tag.from_raw_string(b"tag", raw_manifest2)
        assert converters.dulwich_tag_to_release(tag) == Release(
            name=b"blah",
            message=b"some release message",
            target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"),
            target_type=ObjectType.REVISION,
            synthetic=False,
            author=Person.from_fullname(b"Foo <*****@*****.**>", ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(seconds=1640191027, microseconds=0),
                offset_bytes=b"-0000",
            ),
        )

        # Mess with the offset (other)
        raw_manifest2 = raw_manifest.replace(b"+0200", b"+200")
        tag = dulwich.objects.Tag.from_raw_string(b"tag", raw_manifest2)
        assert converters.dulwich_tag_to_release(tag) == Release(
            name=b"blah",
            message=b"some release message",
            target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"),
            target_type=ObjectType.REVISION,
            synthetic=False,
            author=Person.from_fullname(b"Foo <*****@*****.**>", ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(seconds=1640191027, microseconds=0),
                offset_bytes=b"+200",
            ),
        )

        # Mess with the rest of the manifest
        raw_manifest2 = raw_manifest.replace(
            b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce",
            b"641FB6E08DDB2E4FD096DCF18E80B894BF7E25CE",
        )
        tag = dulwich.objects.Tag.from_raw_string(b"tag", raw_manifest2)
        assert converters.dulwich_tag_to_release(tag) == Release(
            name=b"blah",
            message=b"some release message",
            target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"),
            target_type=ObjectType.REVISION,
            synthetic=False,
            author=Person.from_fullname(b"Foo <*****@*****.**>", ),
            date=TimestampWithTimezone(
                timestamp=Timestamp(seconds=1640191027, microseconds=0),
                offset_bytes=b"+0200",
            ),
            raw_manifest=b"tag 136\x00" + raw_manifest2,
        )