Пример #1
0
def get_follow_next_urls(data_list):
    """
    Get a list of 'next' urls to run asynchronously.

    Parameters
    ----------
    data_list: list
        List of json data.

    Returns
    -------
    List of urls
    """
    url_list = []
    for data in data_list:
        if data:  #ToDo: doing this to avoid private file errors look into it
            meta = data['links']['meta']
            next_url = data['links']['next']
            if next_url:
                page_total = get_page_total(meta['total'], meta['per_page'])
                [
                    url_list.append('{}{}'.format(next_url[:-1], number))
                    for number in range(2, page_total + 1)
                ]
    return url_list
Пример #2
0
    def _get_all_paginated_data(self, url):
        """
        Get all data for the requesting user.

        Parameters
        ----------
        url : str
            URL to the current data to get

        Returns
        -------
        Data dictionary of the data points gathered up until now.
        """
        # Get initial data
        response_json = self._json(self.get(url))
        data = response_json['data']
        meta = response_json['links']['meta']

        # Calculate pagination pages
        if '?filter' in url or '?page' in url:
            # We already have all the data we need for this request
            return data
        else:
            page_total = get_page_total(meta['total'], meta['per_page'])
            url_list = ['{}?page={}'.format(url, number) for number in range(2, page_total + 1)]

        # Call all pagination pages asynchronously
        children_data = run_urls_async(self, url_list)
        [data.extend(child['data']) for child in children_data]

        return data
Пример #3
0
    def _get_all_paginated_data(self, url):
        """
        Get all data for the requesting user.

        Parameters
        ----------
        url : str
            URL to the current data to get

        Returns
        -------
        Data dictionary of the data points gathered up until now.
        """
        if url is None:
            url = 'https://curate.nd.edu/api/items?editor=self'
        # Get initial data
        response_json = self._json(self.get(url))
        data = response_json['results']
        pagination = response_json['pagination']

        # Calculate pagination pages
        if "?q=" in url:
            page_total = 2
        else:
            page_total = get_page_total(pagination['totalResults'],
                                        pagination['itemsPerPage'])
        url_list = [
            '{}&page={}'.format(url, number)
            for number in range(2, page_total)
        ]

        # Call all pagination pages asynchronously
        children_data = run_urls_async(self, url_list)
        [data.extend(child['results']) for child in children_data]
        return data
Пример #4
0
def get_all_paginated_data(url, token):
    """
    Get all data for the requesting user.

    Parameters
    ----------
    url : str
        URL to the current data to get

    token: str
        User's OSF token

    Returns
    -------
    Data dictionary of the data points gathered up until now.
    """
    headers = {'Authorization': 'Bearer {}'.format(token)}
    # Get initial data
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        response_json = response.json()
    elif response.status_code == 410:
        raise PresQTResponseException("The requested resource is no longer available.", status.HTTP_410_GONE)
    elif response.status_code == 404:
        raise OSFNotFoundError("Resource not found.", status.HTTP_404_NOT_FOUND)
    elif response.status_code == 403:
        raise OSFForbiddenError(
        "User does not have access to this resource with the token provided.", status.HTTP_403_FORBIDDEN)

    data = response_json['data']
    meta = response_json['links']['meta']

    # Calculate pagination pages
    if '?filter' in url or '?page' in url:
        # We already have all the data we need for this request
        return data
    else:
        page_total = get_page_total(meta['total'], meta['per_page'])
        url_list = ['{}?page={}'.format(url, number) for number in range(2, page_total + 1)]

    # Call all pagination pages asynchronously
    from presqt.targets.osf.utilities.utils.async_functions import run_urls_async
    children_data = run_urls_async(url_list, headers)
    [data.extend(child['data']) for child in children_data]

    return data