Python run_urls_async_with_pagination示例，presqt.targets.utilities.run_urls_async_with_pagination Python示例

示例#1

0

显示文件

    def get_resources(self, url=None):
        """
        Get all of the user's resources. To batch calls together asynchronously we will group calls
        together by projects, then storages, then each storage's resources.
        """
        resources = []

        all_projects, top_level_projects = self.projects(url)

        # Add all top level projects and subprojects to the resources list
        self.iter_project_hierarchy(all_projects, top_level_projects, resources)

        # Add all storages to the resource list
        user_storages_links = self.iter_project_storages(all_projects, resources)

        # Get initial resources for all storages
        all_storages_resources = run_urls_async_with_pagination(self, user_storages_links)
        # Loop through the storage resources to either add them to the main resources list or
        # traverse further down the tree to get their children resources.
        for storage_resources in all_storages_resources:
            if storage_resources['data']:
                # Calculate the given resource's container_id
                parent_project_id = storage_resources['data'][0]['relationships']['node']['data']['id']
                parent_storage = storage_resources['data'][0]['attributes']['provider']
                container_id = '{}:{}'.format(parent_project_id, parent_storage)

                self.iter_resources_objects(storage_resources, resources, container_id)

        return resources

示例#2

0

显示文件

文件： main.py 项目： ndlib/presqt

    def iter_resources_objects(self, container_resource, resources,
                               container_id):
        """
        Recursive function to add resource data to the resources list.
        """
        folder_data = []
        for resource in container_resource['data']:
            kind = resource['attributes']['kind']

            if kind == 'file':
                file = File(resource, self.session)
                file_obj = {
                    'kind': file.kind,
                    'kind_name': file.kind_name,
                    'id': file.id,
                    'container': container_id,
                    'title': file.title
                }
                resources.append(file_obj)

            elif kind == 'folder':
                folder = Folder(resource, self.session)
                folder_obj = {
                    'kind': folder.kind,
                    'kind_name': folder.kind_name,
                    'id': folder.id,
                    'container': container_id,
                    'title': folder.title
                }
                resources.append(folder_obj)

                # Keep track of all folders' file urls that need to be called.
                folder_data.append({
                    'url': folder._files_url,
                    'id': folder.id,
                    'path': folder.materialized_path
                })

        # Asynchronously call all folder file urls to get the folder's top level resources.
        all_folders_resources = run_urls_async_with_pagination(
            self, [folder_dict['url'] for folder_dict in folder_data])

        # For each folder, get it's container_id and resources
        for folder_resources in all_folders_resources:
            if folder_resources['data']:
                resource_attr = folder_resources['data'][0]['attributes']
                if resource_attr['kind'] == 'folder':
                    parent_path = resource_attr[
                        'materialized_path'][:-len(resource_attr['name']) - 1]
                else:
                    parent_path = resource_attr[
                        'materialized_path'][:-len(resource_attr['name'])]
                # Find the corresponding parent_path in the folder_data list of dictionaries so we
                # can get the container id for this resource.
                container_id = get_dictionary_from_list(
                    folder_data, 'path', parent_path)['id']

                self.iter_resources_objects(folder_resources, resources,
                                            container_id)

示例#3

0

显示文件

文件： main.py 项目： ndlib/presqt

    def iter_project_hierarchy(self, all_projects, current_level_projects,
                               resources):
        """
        Recursive function to add project data to the resources list.
        """
        # Keep track of every project's children links so we can call them asynchronously
        child_projects_links = []

        # Add each project to the resource list
        for project in current_level_projects:
            # It's possible for a project to be a subproject while the user does not have access to the
            # parent project. Check if the current project has a parent project owned by the user.
            for proj in all_projects:
                if proj.id == project.parent_node_id:
                    container_id = proj.id
                    break
            else:
                container_id = None

            resources.append({
                'kind': 'container',
                'kind_name': 'project',
                'id': project.id,
                'container': container_id,
                'title': project.title
            })
            child_projects_links.append(project.children_link)

        # Asynchronously get data for all child projects
        child_projects_data = run_urls_async_with_pagination(
            self, child_projects_links)

        # Create Project class instances for child projects
        children_projects = []
        for child_data in child_projects_data:
            for child in child_data['data']:
                child_project = Project(child, self.session)
                children_projects.append(child_project)

                # If the collection is part of a search result then the children projects haven't
                # been added to the main all_projects list yet. Add them in this case.
                for proj in all_projects:
                    if proj.id == child_project.id:
                        break
                else:
                    all_projects.append(child_project)

        # recursively call the iter_project_hierarchy for all child projects
        if children_projects:
            self.iter_project_hierarchy(all_projects, children_projects,
                                        resources)

示例#4

0

显示文件

文件： main.py 项目： ndlib/presqt

    def get_resources(self, process_info_path, url=None):
        """
        Get all of the user's resources. To batch calls together asynchronously we will group calls
        together by projects, then storages, then each storage's resources.
        """
        resources = []
        all_projects, top_level_projects = self.projects(url)
        # Add all top level projects and subprojects to the resources list
        self.iter_project_hierarchy(all_projects, top_level_projects,
                                    resources)
        # Add all storages to the resource list
        user_storages_links = self.iter_project_storages(
            all_projects, resources)
        # Get initial resources for all storages
        all_storages_resources = run_urls_async_with_pagination(
            self, user_storages_links)

        # Add the total number of storages to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, len(all_storages_resources),
                            'resource_collection', 'fetch')

        # Loop through the storage resources to either add them to the main resources list or
        # traverse further down the tree to get their children resources.
        for storage_resources in all_storages_resources:
            # Increment the number of files done in the process info file.
            increment_process_info(process_info_path, 'resource_collection',
                                   'fetch')

            if storage_resources and storage_resources[
                    'data']:  #TODO: First if check doing this to avoid private file errors look into it
                # Calculate the given resource's container_id
                parent_project_id = storage_resources['data'][0][
                    'relationships']['node']['data']['id']
                parent_storage = storage_resources['data'][0]['attributes'][
                    'provider']
                container_id = '{}:{}'.format(parent_project_id,
                                              parent_storage)

                self.iter_resources_objects(storage_resources, resources,
                                            container_id)

        return resources

示例#5

0

显示文件

文件： main.py 项目： ndlib/presqt

    def iter_project_storages(self, projects, resources):
        """
        Function to add storage data to the resources list.
        """
        # Keep track of all storage file urls that need to be called.
        user_storages_links = []

        # Asynchronously get storage data for all projects
        storages = run_urls_async_with_pagination(
            self, [project._storages_url for project in projects])

        # Add each storage to the resource list
        for proj_storage in storages:
            for storage in proj_storage['data']:
                storage_obj = Storage(storage, self.session)
                resources.append({
                    'kind': 'container',
                    'kind_name': 'storage',
                    'id': storage_obj.id,
                    'container': storage_obj.node,
                    'title': storage_obj.title
                })
                user_storages_links.append(storage_obj._files_url)
        return user_storages_links