def get_follow_next_urls(data_list): """ Get a list of 'next' urls to run asynchronously. Parameters ---------- data_list: list List of json data. Returns ------- List of urls """ url_list = [] for data in data_list: if data: #ToDo: doing this to avoid private file errors look into it meta = data['links']['meta'] next_url = data['links']['next'] if next_url: page_total = get_page_total(meta['total'], meta['per_page']) [ url_list.append('{}{}'.format(next_url[:-1], number)) for number in range(2, page_total + 1) ] return url_list
def _get_all_paginated_data(self, url): """ Get all data for the requesting user. Parameters ---------- url : str URL to the current data to get Returns ------- Data dictionary of the data points gathered up until now. """ # Get initial data response_json = self._json(self.get(url)) data = response_json['data'] meta = response_json['links']['meta'] # Calculate pagination pages if '?filter' in url or '?page' in url: # We already have all the data we need for this request return data else: page_total = get_page_total(meta['total'], meta['per_page']) url_list = ['{}?page={}'.format(url, number) for number in range(2, page_total + 1)] # Call all pagination pages asynchronously children_data = run_urls_async(self, url_list) [data.extend(child['data']) for child in children_data] return data
def _get_all_paginated_data(self, url): """ Get all data for the requesting user. Parameters ---------- url : str URL to the current data to get Returns ------- Data dictionary of the data points gathered up until now. """ if url is None: url = 'https://curate.nd.edu/api/items?editor=self' # Get initial data response_json = self._json(self.get(url)) data = response_json['results'] pagination = response_json['pagination'] # Calculate pagination pages if "?q=" in url: page_total = 2 else: page_total = get_page_total(pagination['totalResults'], pagination['itemsPerPage']) url_list = [ '{}&page={}'.format(url, number) for number in range(2, page_total) ] # Call all pagination pages asynchronously children_data = run_urls_async(self, url_list) [data.extend(child['results']) for child in children_data] return data
def get_all_paginated_data(url, token): """ Get all data for the requesting user. Parameters ---------- url : str URL to the current data to get token: str User's OSF token Returns ------- Data dictionary of the data points gathered up until now. """ headers = {'Authorization': 'Bearer {}'.format(token)} # Get initial data response = requests.get(url, headers=headers) if response.status_code == 200: response_json = response.json() elif response.status_code == 410: raise PresQTResponseException("The requested resource is no longer available.", status.HTTP_410_GONE) elif response.status_code == 404: raise OSFNotFoundError("Resource not found.", status.HTTP_404_NOT_FOUND) elif response.status_code == 403: raise OSFForbiddenError( "User does not have access to this resource with the token provided.", status.HTTP_403_FORBIDDEN) data = response_json['data'] meta = response_json['links']['meta'] # Calculate pagination pages if '?filter' in url or '?page' in url: # We already have all the data we need for this request return data else: page_total = get_page_total(meta['total'], meta['per_page']) url_list = ['{}?page={}'.format(url, number) for number in range(2, page_total + 1)] # Call all pagination pages asynchronously from presqt.targets.osf.utilities.utils.async_functions import run_urls_async children_data = run_urls_async(url_list, headers) [data.extend(child['data']) for child in children_data] return data