Пример #1
0
    def test_directory_to_objects_ignore_name_case(self):
        directory = Directory.from_disk(
            path=self.tmpdir_name,
            dir_filter=from_disk.ignore_named_directories(
                [b"symLiNks"], case_sensitive=False
            ),
        )
        for name, value in self.contents.items():
            self.assertContentEqual(directory[b"contents/" + name], value)

        for name in self.specials:
            self.assertContentEqual(
                directory[b"specials/" + name],
                self.empty_content,
            )

        self.assertEqual(
            directory[b"empty1/empty2"].get_data(),
            self.empty_directory,
        )

        with self.assertRaisesRegex(KeyError, "b'symlinks'"):
            directory[b"symlinks"]

        objs = directory.collect()

        self.assertCountEqual(["content", "directory"], objs)

        self.assertEqual(len(objs["directory"]), 5)
        self.assertEqual(len(objs["content"]), len(self.contents) + 1)
def jar_dirs(datadir, tmp_path):
    jar_1_path = os.path.join(datadir, "https_maven.org",
                              "sprova4j-0.1.0-sources.jar")
    jar_2_path = os.path.join(datadir, "https_maven.org",
                              "sprova4j-0.1.1-sources.jar")

    jar_1_extract_path = os.path.join(tmp_path, "jar_1")
    jar_2_extract_path = os.path.join(tmp_path, "jar_2")

    uncompress(jar_1_path, jar_1_extract_path)
    uncompress(jar_2_path, jar_2_extract_path)

    jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode())
    jar_2_dir = Directory.from_disk(path=jar_2_extract_path.encode())

    return [jar_1_dir, jar_2_dir]
Пример #3
0
    def test_directory_to_objects_ignore_empty(self):
        directory = Directory.from_disk(
            path=self.tmpdir_name, dir_filter=from_disk.ignore_empty_directories
        )

        for name, value in self.contents.items():
            self.assertContentEqual(directory[b"contents/" + name], value)

        for name, value in self.symlinks.items():
            self.assertContentEqual(directory[b"symlinks/" + name], value)

        for name in self.specials:
            self.assertContentEqual(
                directory[b"specials/" + name],
                self.empty_content,
            )

        # empty directories have been ignored recursively
        with self.assertRaisesRegex(KeyError, "b'empty1'"):
            directory[b"empty1"]
        with self.assertRaisesRegex(KeyError, "b'empty1'"):
            directory[b"empty1/empty2"]

        objs = directory.collect()

        self.assertCountEqual(["content", "directory"], objs)

        self.assertEqual(len(objs["directory"]), 4)
        self.assertEqual(
            len(objs["content"]), len(self.contents) + len(self.symlinks) + 1
        )
Пример #4
0
    def fetch_data(self):
        """Retrieve, uncompress archive and fetch objects from the tarball.
           The actual ingestion takes place in the :meth:`store_data`
           implementation below.

        """
        url = self.get_tarball_url_to_retrieve()
        filepath, hashes = self.client.download(url)
        nature = tarball.uncompress(filepath, self.dir_path)

        dir_path = self.dir_path.encode('utf-8')
        directory = Directory.from_disk(path=dir_path, save_path=True)
        objects = directory.collect()
        if 'content' not in objects:
            objects['content'] = {}
        if 'directory' not in objects:
            objects['directory'] = {}

        # compute the full revision (with ids)
        revision = self.build_revision(filepath, nature, hashes)
        revision = revision_from(directory.hash, revision)
        objects['revision'] = {
            revision['id']: revision,
        }

        snapshot = self.build_snapshot(revision)
        objects['snapshot'] = {
            snapshot['id']: snapshot
        }
        self.objects = objects
Пример #5
0
    def list_objs(self, *, dir_path, revision, release, branch_name):
        """List all objects from dir_path.

        Args:
            dir_path (str): the directory to list
            revision (dict): revision dictionary representation
            release (dict): release dictionary representation
            branch_name (str): branch name

        Returns:
            dict: a mapping from object types ('content', 'directory',
            'revision', 'release', 'snapshot') with a dictionary
            mapping each object's id to the object

        """
        log_id = str(uuid.uuid4())
        sdir_path = dir_path.decode('utf-8')

        log_data = {
            'swh_type': 'dir_list_objs_end',
            'swh_repo': sdir_path,
            'swh_id': log_id,
        }

        self.log.debug("Started listing {swh_repo}".format(**log_data),
                       extra=log_data)

        directory = Directory.from_disk(path=dir_path, save_path=True)
        objects = directory.collect()
        if 'content' not in objects:
            objects['content'] = {}
        if 'directory' not in objects:
            objects['directory'] = {}

        full_rev = revision_from(directory.hash, revision)
        rev_id = full_rev['id']
        objects['revision'] = {rev_id: full_rev}

        objects['release'] = {}
        if release and 'name' in release:
            full_rel = release_from(rev_id, release)
            objects['release'][full_rel['id']] = full_rel

        snapshot = snapshot_from(rev_id, branch_name)
        objects['snapshot'] = {snapshot['id']: snapshot}

        log_data.update({
            'swh_num_%s' % key: len(values)
            for key, values in objects.items()
        })

        self.log.debug(("Done listing the objects in {swh_repo}: "
                        "{swh_num_content} contents, "
                        "{swh_num_directory} directories, "
                        "{swh_num_revision} revisions, "
                        "{swh_num_release} releases, "
                        "{swh_num_snapshot} snapshot").format(**log_data),
                       extra=log_data)

        return objects
Пример #6
0
def model_of_dir(path: bytes,
                 exclude_patterns: Iterable[bytes] = None) -> Directory:
    from swh.model.from_disk import accept_all_directories, ignore_directories_patterns

    dir_filter = (ignore_directories_patterns(path, exclude_patterns)
                  if exclude_patterns else accept_all_directories)

    return Directory.from_disk(path=path, dir_filter=dir_filter)
Пример #7
0
    def test_contents_match(self):
        directory = Directory.from_disk(
            path=os.path.join(self.tmpdir_name, b"sample-folder")
        )

        for name, expected in self.tarball_contents.items():
            obj = directory[name]
            if isinstance(obj, Content):
                self.assertContentEqual(obj, expected)
            elif isinstance(obj, Directory):
                self.assertDirectoryEqual(obj, expected)
            else:
                raise self.failureException("Unknown type for %s" % obj)
Пример #8
0
    def test_directory_entry_order(self):
        with tempfile.TemporaryDirectory() as dirname:
            dirname = os.fsencode(dirname)
            open(os.path.join(dirname, b"foo."), "a")
            open(os.path.join(dirname, b"foo0"), "a")
            os.mkdir(os.path.join(dirname, b"foo"))

            directory = Directory.from_disk(path=dirname)

        assert [entry["name"] for entry in directory.entries] == [
            b"foo.",
            b"foo",
            b"foo0",
        ]
Пример #9
0
    def test_iter_directory(self):
        """Iter from_disk.directory should yield the full arborescence tree"""
        directory = Directory.from_disk(
            path=os.path.join(self.tmpdir_name, b"sample-folder")
        )

        contents, skipped_contents, directories = from_disk.iter_directory(directory)

        expected_nb = defaultdict(int)
        for name in self.tarball_contents.keys():
            obj = directory[name]
            expected_nb[obj.object_type] += 1

        assert len(contents) == expected_nb["content"] and len(contents) > 0
        assert len(skipped_contents) == 0
        assert len(directories) == expected_nb["directory"] and len(directories) > 0
Пример #10
0
def process_package(package):
    """Process a source package into its constituent components.

    The source package will be decompressed in a temporary directory.

    Args:
        package (dict): a dict with the following keys:

            - name: source package name
            - version: source package version
            - dsc: the full path of the package's DSC file.

    Returns:
        tuple: A tuple with two elements:

        - package: the original package dict augmented with the following keys:

            - metadata: the metadata from get_package_metadata
            - directory: the sha1_git of the root directory of the package

        - objects: a dictionary of the parsed directories and files, both
          indexed by id

    Raises:
        FileNotFoundError: if the dsc file does not exist
        PackageExtractionFailed: if package extraction failed

    """
    log.info("Processing package %s_%s" %
             (package['name'], str(package['version'])),
             extra={
                 'swh_type': 'deb_process_start',
                 'swh_name': package['name'],
                 'swh_version': str(package['version']),
             })

    tempdir = download_package(package)
    dsc, debdir = extract_package(package, tempdir)

    directory = Directory.from_disk(path=os.fsencode(debdir), save_path=True)
    metadata = get_package_metadata(package, dsc, debdir)

    return directory, metadata, tempdir
Пример #11
0
    def swh_hash_data_at_revision(
            self, revision: int) -> Tuple[Dict, DirectoryFromDisk]:
        """Compute the information at a given svn revision. This is expected to be used
        for checks only.

        Yields:
            The tuple (commit dictionary, targeted directory object).

        """
        # Update disk representation of the repository at revision id
        local_dirname, local_url = self.export_temporary(revision)
        # Compute the current hashes on disk
        directory = DirectoryFromDisk.from_disk(
            path=local_url, max_content_length=self.max_content_length)

        # Retrieve the commit information for revision
        commit = self.commit_info(revision)

        # Clean export directory
        self.clean_fs(local_dirname)

        return commit, directory
Пример #12
0
    def test_directory_to_objects(self):
        directory = Directory.from_disk(path=self.tmpdir_name)

        for name, value in self.contents.items():
            self.assertContentEqual(directory[b"contents/" + name], value)

        for name, value in self.symlinks.items():
            self.assertContentEqual(directory[b"symlinks/" + name], value)

        for name in self.specials:
            self.assertContentEqual(
                directory[b"specials/" + name],
                self.empty_content,
            )

        self.assertEqual(
            directory[b"empty1/empty2"].get_data(),
            self.empty_directory,
        )

        # Raise on non existent file
        with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
            directory[b"empty1/nonexistent"]

        # Raise on non existent directory
        with self.assertRaisesRegex(KeyError, "b'nonexistentdir'"):
            directory[b"nonexistentdir/file"]

        objs = directory.collect()

        self.assertCountEqual(["content", "directory"], objs)

        self.assertEqual(len(objs["directory"]), 6)
        self.assertEqual(
            len(objs["content"]), len(self.contents) + len(self.symlinks) + 1
        )
Пример #13
0
 def test_directory_swhid(self):
     directory_swhid = "swh:1:dir:" + hash_to_hex(self.empty_directory["id"])
     directory = Directory.from_disk(path=self.tmpdir_name)
     assert str(directory.swhid()) == directory_swhid
                        out.append(spindent + "}]")
                elif key in ALGORITHMS | {"id", "target"}:
                    format_hash(value, indent=indent)
                elif isinstance(value, DentryPerms):
                    out.append(str(value))
                else:
                    out.append(repr(value))
            out.append(",\n")

    spindent = " " * indent
    out.append(spindent + "%s = {\n" % varname)
    format_dict_items(data, indent=4 + indent)
    out.append(spindent + "}")

    return "".join(out)


if __name__ == "__main__":
    if not sys.argv[1:]:
        print("Usage: %s dir1 dir2" % sys.argv[0], file=sys.stderr)
        exit(2)

    for dirname in sys.argv[1:]:
        basename = os.path.basename(dirname)
        varname = "expected_%s" % basename
        testdata = generate_from_directory(
            varname, Directory.from_disk(path=os.fsencode(dirname)), indent=8
        )
        print(testdata)
        print()