def sync_files(self, dataset_key): """ Trigger synchronization process to update all dataset files linked to source URLs. Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id Raises ------ RestApiException If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.sync_files('username/test-dataset') # doctest: +SKIP """ try: self._datasets_api.sync(*(parse_dataset_key(dataset_key))) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def upload_file(self, dataset_key, name, file_metadata={}, **kwargs): """Upload one file to a dataset :param dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :param name: Name/path for files stored in the local filesystem :type name: str :param expand_archives: Boolean value to indicate files should be expanded upon upload :type expand_archive: bool optional :param files_metadata: Dict containing the name of files and metadata Uses file name as a dict containing File description, labels and source URLs to add or update :type files_metadata: dict optional :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.upload_file( ... 'username/test-dataset', ... 'example.csv') # doctest: +SKIP """ owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._uploads_api.upload_file(owner_id, dataset_id, name, **kwargs) if file_metadata: self.update_dataset(dataset_key, files=file_metadata) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def sparql(self, dataset_key, query, desired_mimetype='application/sparql-results+json', **kwargs): """Executes SPARQL queries against a dataset via POST :param dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :param query: SPARQL query :type query: str :returns: file object that can be used in file parsers and data handling modules. :rtype: file object :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.sparql_post('username/test-dataset',\ >>> query) # doctest: +SKIP """ api_client = self._build_api_client( default_mimetype_header_accept=desired_mimetype) sparql_api = kwargs.get('sparql_api_mock', _swagger.SparqlApi(api_client)) owner_id, dataset_id = parse_dataset_key(dataset_key) try: return sparql_api.sparql_post(owner_id, dataset_id, query, **kwargs) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def upload_files(self, dataset_key, files): """Upload dataset files Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id files : list of str The list of names/paths for files stored in the local filesystem Raises ------ RestApiException If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.upload_files( ... 'username/test-dataset', ... ['/my/local/example.csv']) # doctest: +SKIP """ owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._uploads_api.upload_files(owner_id, dataset_id, files) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def delete_files(self, dataset_key, names): """Delete dataset file(s) Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id names : list of str The list of names for files to be deleted Raises ------ RestApiException If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.delete_files( ... 'username/test-dataset', ['example.csv']) # doctest: +SKIP """ owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._datasets_api.delete_files_and_sync_sources( owner_id, dataset_id, names) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def get_dataset(self, dataset_key): """Retrieve an existing dataset definition This method retrieves metadata about an existing Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id Returns ------- dict Dataset definition, with all attributes Raises ------ RestApiException If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> intro_dataset = api_client.get_dataset( ... 'jonloyens/an-intro-to-dataworld-dataset') >>> intro_dataset['title'] 'An Intro to data.world Dataset' """ try: return self._datasets_api.get_dataset( *(parse_dataset_key(dataset_key))).to_dict() except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def append_records(self, dataset_key, stream_id, body, provided_mimetype='application/json', **kwargs): """Append records to a stream. :param dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :param stream_id: Stream unique identifier. :type stream_id: str :param body: Object body :type body: obj :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.append_records('username/test-dataset','streamId', \ >>> {'content':'content'}) """ api_client = self._build_api_client( default_mimetype_header_content_type=provided_mimetype) streams_api = kwargs.get('streams_api_mock', _swagger.StreamsApi(api_client)) owner_id, dataset_id = parse_dataset_key(dataset_key) try: return streams_api.append_records(owner_id, dataset_id, stream_id, body, **kwargs) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def load_dataset(self, dataset_key, force_update=False): """ Load a dataset from the local filesystem, downloading it from data.world first, if necessary. This function returns an object of type `LocalDataset`. The object allows access to metedata via it's `describe()` method and to all the data via three properties `raw_data`, `tables` and `dataframes`, all of which are mappings (dict-like structures). Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url force_update : bool Flag, indicating if a new copy of the dataset should be downloaded replacing any previously downloaded copy Returns ------- LocalDataset The object representing the dataset Raises ------ RestApiError If a server error occurs """ owner_id, dataset_id = parse_dataset_key(dataset_key) cache_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'latest') backup_dir = None if path.isdir(cache_dir) and force_update: backup_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'backup') if path.isdir(backup_dir): shutil.rmtree(backup_dir) shutil.move(cache_dir, backup_dir) descriptor_file = path.join(cache_dir, 'datapackage.json') if not path.isfile(descriptor_file): try: descriptor_file = self.api_client.download_datapackage( dataset_key, cache_dir) except RestApiError as e: if backup_dir is not None: shutil.move(backup_dir, cache_dir) warn('Unable to download datapackage ({}). ' 'Loading previously saved version.'.format(e.reason)) else: raise if backup_dir is not None: shutil.rmtree(backup_dir, ignore_errors=True) return LocalDataset(descriptor_file)
def query(self, dataset_key, query, query_type="sql", parameters=None): """Query an existing dataset :param dataset_key: Dataset identifier, in the form of owner/id or of a url :type dataset_key: str :param query: SQL or SPARQL query :type query: str :param query_type: The type of the query. Must be either 'sql' or 'sparql'. (Default value = "sql") :type query_type: {'sql', 'sparql'}, optional :param parameters: parameters to the query - if SPARQL query, this should be a dict containing named parameters, if SQL query,then this should be a list containing positional parameters. Boolean values will be converted to xsd:boolean, Integer values to xsd:integer, and other Numeric values to xsd:decimal. Anything else is treated as a String literal (Default value = None) :type parameters: query parameters, optional :returns: Object containing the results of the query :rtype: Results :raises RuntimeError: If a server error occurs """ # TODO Move network request to RestApiClient owner_id, dataset_id = parse_dataset_key(dataset_key) params = {"query": query} if parameters and query_type == "sparql": # if SPARQL, then the parameters should be a Mapping containing # named parameters params["parameters"] = ",".join([ "{}={}".format(k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys() ]) elif parameters and query_type == "sql": # if SQL, then the parameters should be an array with positional # parameters, need to unwind them to $data_world_paramN for each # 0-indexed position N parameters = { "$data_world_param{}".format(i): x for i, x in enumerate(parameters) } params["parameters"] = ",".join([ "{}={}".format(k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys() ]) url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host, query_type, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Accept': 'application/sparql-results+json', 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } response = requests.get(url, params=params, headers=headers) if response.status_code == 200: return QueryResults(response.json()) raise RuntimeError('Error executing query: {}'.format( response.content))
def put_request(body): ownerid, datasetid = parse_dataset_key(self._dataset_key) response = requests.put( "{}/uploads/{}/{}/files/{}".format(self._api_host, ownerid, datasetid, self._file_name), data=body, headers={ 'User-Agent': self._user_agent, 'Authorization': 'Bearer {}'.format(self._config.auth_token) }) self._response_queue.put(response)
def _open_for_read(self): """open the file in read mode""" ownerid, datasetid = parse_dataset_key(self._dataset_key) response = requests.get( '{}/file_download/{}/{}/{}'.format( self._query_host, ownerid, datasetid, self._file_name), headers={ 'User-Agent': self._user_agent, 'Authorization': 'Bearer {}'.format( self._config.auth_token) }, stream=True) try: response.raise_for_status() except Exception as e: raise RestApiError(cause=e) self._read_response = response
def replace_dataset(self, dataset_key, **kwargs): """Replace an existing dataset *This method will completely overwrite an existing dataset.* :param description: Dataset description :type description: str, optional :param summary: Dataset summary markdown :type summary: str, optional :param tags: Dataset tags :type tags: list, optional :param license: {'CC-BY-SA', 'ODC-ODbL', 'CC BY-NC', 'CC BY-NC-SA', 'Other'} Dataset license :type license: {'Public Domain', 'PDDL', 'CC-0', 'CC-BY', 'ODC-BY'} :param visibility: Dataset visibility :type visibility: {'OPEN', 'PRIVATE'} :param files: File names and source URLs to add or update :type files: dict, optional :param dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :param **kwargs: :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.replace_dataset( ... 'username/test-dataset', ... visibility='PRIVATE', license='Public Domain', ... description='A better description') # doctest: +SKIP """ request = self.__build_dataset_obj( lambda: _swagger.DatasetPutRequest(), lambda name, url, expand_archive, description, labels: _swagger. FileCreateRequest(name=name, source=_swagger.FileSourceCreateRequest( url=url, expand_archive=expand_archive), description=description, labels=labels), kwargs) owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._datasets_api.replace_dataset(owner_id, dataset_id, request) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def replace_dataset(self, dataset_key, **kwargs): """Replace an existing dataset *This method will completely overwrite an existing dataset.* Parameters ---------- description : str, optional Dataset description summary : str, optional Dataset summary markdown tags : list, optional Dataset tags license : {'Public Domain', 'PDDL', 'CC-0', 'CC-BY', 'ODC-BY', 'CC-BY-SA', 'ODC-ODbL', 'CC BY-NC', 'CC BY-NC-SA', 'Other'} Dataset license visibility : {'OPEN', 'PRIVATE'} Dataset visibility files : dict, optional File names and source URLs to add or update Raises ------ RestApiException If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.replace_dataset( ... 'username/test-dataset', ... visibility='PRIVATE', license='Public Domain', ... description='A better description') # doctest: +SKIP """ request = self.__build_dataset_obj( lambda: _swagger.DatasetPutRequest(), lambda name, url: _swagger.FileCreateRequest( name=name, source=_swagger.FileSourceCreateRequest(url=url)), kwargs) owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._datasets_api.replace_dataset(owner_id, dataset_id, request) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def download_dataset(self, dataset_key): """Return a .zip containing all files within the dataset as uploaded. :param dataset_key : Dataset identifier, in the form of owner/id :type dataset_key: str :returns: .zip file contain files within dataset :rtype: file object :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.download_dataset('username/test-dataset') """ owner_id, dataset_id = parse_dataset_key(dataset_key) try: return self._download_api.download_dataset(owner_id, dataset_id) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def add_files_via_url(self, dataset_key, files={}): """Add or update dataset files linked to source URLs :param dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :param files: Dict containing the name of files and metadata Uses file name as a dict containing File description, labels and source URLs to add or update (Default value = {}) *description and labels are optional.* :type files: dict :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> url = 'http://www.acme.inc/example.csv' >>> api_client = dw.api_client() >>> api_client.add_files_via_url( ... 'username/test-dataset', ... 'example.csv': { ... 'url': url, ... 'labels': ['raw data'], ... 'description': 'file description'}) # doctest: +SKIP """ file_requests = [ _swagger.FileCreateOrUpdateRequest( name=file_name, source=_swagger.FileSourceCreateOrUpdateRequest( url=file_info['url'], expand_archive=file_info.get('expand_archive', False)), description=file_info.get('description'), labels=file_info.get('labels'), ) for file_name, file_info in files.items() ] owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._datasets_api.add_files_by_source( owner_id, dataset_id, _swagger.FileBatchUpdateRequest(files=file_requests)) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def delete_dataset(self, dataset_key): """Deletes a dataset and all associated data :params dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.delete_dataset( ... 'jonloyens/an-intro-to-dataworld-dataset') >>> del_dataset.message 'Dataset has been successfully deleted.' """ owner_id, dataset_id = parse_dataset_key(dataset_key) try: return self._datasets_api.delete_dataset(owner_id, dataset_id) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def add_files_via_url(self, dataset_key, files={}): """Add or update dataset files linked to source URLs Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id files : dict File names and source URLs to add or update Raises ------ RestApiException If a server error occurs Examples -------- >>> import datadotworld as dw >>> url = 'http://www.acme.inc/example.csv' >>> api_client = dw.api_client() >>> api_client.add_files_via_url( ... 'username/test-dataset', ... {'example.csv': url}) # doctest: +SKIP """ file_requests = [ _swagger.FileCreateOrUpdateRequest( name=name, source=_swagger.FileSourceCreateOrUpdateRequest(url=url)) for name, url in files.items() ] owner_id, dataset_id = parse_dataset_key(dataset_key) try: self._datasets_api.add_files_by_source( owner_id, dataset_id, _swagger.FileBatchUpdateRequest(files=file_requests)) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def download_file(self, dataset_key, file): """Return a file within the dataset as uploaded. :param dataset_key: Dataset identifier, in the form of owner/id :type dataset_key: str :param file: File path to be returned :type file: str :returns: file in which the data was uploaded :rtype: file object :raises RestApiException: If a server error occurs Examples -------- >>> import datadotworld as dw >>> api_client = dw.api_client() >>> api_client.download_file('username/test-dataset',\ >>> '/my/local/example.csv') """ owner_id, dataset_id = parse_dataset_key(dataset_key) try: return self._download_api.download_file(owner_id, dataset_id, file) except _swagger.rest.ApiException as e: raise RestApiError(cause=e)
def query(self, dataset_key, query, query_type="sql"): """Query an existing dataset Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url query : str SQL or SPARQL query query_type : {'sql', 'sparql'}, optional The type of the query. Must be either 'sql' or 'sparql'. Returns ------- Results Object containing the results of the query Raises ------ RuntimeError If a server error occurs """ # TODO Move network request to RestApiClient owner_id, dataset_id = parse_dataset_key(dataset_key) params = {"query": query} url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host, query_type, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Accept': 'application/sparql-results+json', 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } response = requests.get(url, params=params, headers=headers) if response.status_code == 200: return QueryResults(response.json()) raise RuntimeError('Error executing query: {}'.format( response.content))
def load_dataset(self, dataset_key, force_update=False, auto_update=False): """Load a dataset from the local filesystem, downloading it from data.world first, if necessary. This function returns an object of type `LocalDataset`. The object allows access to metedata via it's `describe()` method and to all the data via three properties `raw_data`, `tables` and `dataframes`, all of which are mappings (dict-like structures). :param dataset_key: Dataset identifier, in the form of owner/id or of a url :type dataset_key: str :param force_update: Flag, indicating if a new copy of the dataset should be downloaded replacing any previously downloaded copy (Default value = False) :type force_update: bool :param auto_update: Flag, indicating that dataset be updated to the latest version :type auto_update: bool :returns: The object representing the dataset :rtype: LocalDataset :raises RestApiError: If a server error occurs """ owner_id, dataset_id = parse_dataset_key(dataset_key) cache_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'latest') backup_dir = None if path.isdir(cache_dir) and force_update: backup_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'backup') move_cache_dir_to_backup_dir(backup_dir, cache_dir) descriptor_file = path.join(cache_dir, 'datapackage.json') if not path.isfile(descriptor_file): try: descriptor_file = self.api_client.download_datapackage( dataset_key, cache_dir) except RestApiError as e: if backup_dir is not None: shutil.move(backup_dir, cache_dir) warn('Unable to download datapackage ({}). ' 'Loading previously saved version.'.format(e.reason)) else: raise else: try: dataset_info = self.api_client.get_dataset(dataset_key) except RestApiError: return LocalDataset(descriptor_file) last_modified = datetime.strptime(dataset_info['updated'], '%Y-%m-%dT%H:%M:%S.%fZ') if (last_modified > datetime.utcfromtimestamp( path.getmtime(str(descriptor_file)))): if auto_update: try: backup_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'backup') move_cache_dir_to_backup_dir(backup_dir, cache_dir) descriptor_file = self.api_client. \ download_datapackage(dataset_key, cache_dir) except RestApiError as e: if backup_dir is not None: shutil.move(backup_dir, cache_dir) warn('Unable to auto update datapackage ({}). ' 'Loading previously saved version.' .format(e.reason)) else: raise else: filterwarnings('always', message='You are using an outdated copy') warn('You are using an outdated copy of {}. ' 'If you wish to use the latest version, call this ' 'function with the argument ' 'auto_update=True or ' 'force_update=True'.format(dataset_key)) if backup_dir is not None: shutil.rmtree(backup_dir, ignore_errors=True) return LocalDataset(descriptor_file)
def download_datapackage(self, dataset_key, dest_dir): """ Download and unzip a dataset's datapackage Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id dest_dir : str or path Directory under which datapackage should be saved Returns ------- path Location of the datapackage descriptor (datapackage.json) in the local filesystem Raises ------ RestApiException If a server error occurs Examples >>> import datadotworld as dw >>> api_client = dw.api_client() >>> datapackage_descriptor = api_client.download_datapackage( ... 'jonloyens/an-intro-to-dataworld-dataset', '/tmp/test') >>> datapackage_descriptor '/tmp/test/datapackage.json' """ if path.isdir(dest_dir): raise ValueError('dest_dir must be a new directory, ' 'but {} already exists'.format(dest_dir)) owner_id, dataset_id = parse_dataset_key(dataset_key) url = "{0}://{1}/datapackage/{2}/{3}".format(self._protocol, self._download_host, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } try: response = requests.get(url, headers=headers, stream=True) response.raise_for_status() except requests.RequestException as e: raise RestApiError(cause=e) unzip_dir = path.join(self._config.tmp_dir, str(uuid.uuid4())) os.makedirs(unzip_dir) zip_file = path.join(unzip_dir, 'dataset.zip') with open(zip_file, 'wb') as f: for data in response.iter_content(chunk_size=4096): f.write(data) zip_obj = zipfile.ZipFile(zip_file) zip_obj.extractall(path=unzip_dir) # Find where datapackage.json is within expanded files unzipped_descriptor = glob.glob( '{}/**/datapackage.json'.format(unzip_dir)) if not unzipped_descriptor: raise RuntimeError( 'Zip file did not contain a datapackage manifest.') unzipped_dir = path.dirname(unzipped_descriptor[0]) shutil.move(unzipped_dir, dest_dir) shutil.rmtree(unzip_dir, ignore_errors=True) return path.join(dest_dir, 'datapackage.json')
def query(self, dataset_key, query, query_type="sql", parameters=None): """Query an existing dataset Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url query : str SQL or SPARQL query query_type : {'sql', 'sparql'}, optional The type of the query. Must be either 'sql' or 'sparql'. parameters: query parameters, optional parameters to the query - if SPARQL query, this should be a dict containing named parameters, if SQL query, then this should be a list containing positional parameters. Boolean values will be converted to xsd:boolean, Integer values to xsd:integer, and other Numeric values to xsd:decimal. anything else is treated as a String literal Returns ------- Results Object containing the results of the query Raises ------ RuntimeError If a server error occurs """ # TODO Move network request to RestApiClient owner_id, dataset_id = parse_dataset_key(dataset_key) params = { "query": query } if parameters and query_type == "sparql": # if SPARQL, then the parameters should be a Mapping containing # named parameters params["parameters"] = ",".join( ["{}={}".format(k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys()]) elif parameters and query_type == "sql": # if SQL, then the parameters should be an array with positional # parameters, need to unwind them to $data_world_paramN for each # 0-indexed position N parameters = {"$data_world_param{}".format(i): x for i, x in enumerate(parameters)} params["parameters"] = ",".join(["{}={}".format( k, convert_to_sparql_literal(parameters[k])) for k in parameters.keys()]) url = "{0}://{1}/{2}/{3}/{4}".format(self._protocol, self._query_host, query_type, owner_id, dataset_id) headers = { 'User-Agent': _user_agent(), 'Accept': 'application/sparql-results+json', 'Authorization': 'Bearer {0}'.format(self._config.auth_token) } response = requests.get(url, params=params, headers=headers) if response.status_code == 200: return QueryResults(response.json()) raise RuntimeError( 'Error executing query: {}'.format(response.content))
def test_parse_dataset_key(): path_owner, path_id = util.parse_dataset_key('owner/dataset') assert_that(path_owner, equal_to('owner')) assert_that(path_id, equal_to('dataset'))
def test_parse_dataset_key_with_url(): url_owner, url_id = util.parse_dataset_key( 'https://data.world/owner/dataset') assert_that(url_owner, equal_to('owner')) assert_that(url_id, equal_to('dataset'))
def load_dataset(self, dataset_key, force_update=False): """ Load a dataset from the local filesystem, downloading it from data.world first, if necessary. This function returns an object of type `LocalDataset`. The object allows access to metedata via it's `describe()` method and to all the data via three properties `raw_data`, `tables` and `dataframes`, all of which are mappings (dict-like structures). Parameters ---------- dataset_key : str Dataset identifier, in the form of owner/id or of a url force_update : bool Flag, indicating if a new copy of the dataset should be downloaded replacing any previously downloaded copy Returns ------- LocalDataset The object representing the dataset Raises ------ RestApiError If a server error occurs """ owner_id, dataset_id = parse_dataset_key(dataset_key) cache_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'latest') backup_dir = None if path.isdir(cache_dir) and force_update: backup_dir = path.join(self._config.cache_dir, owner_id, dataset_id, 'backup') if path.isdir(backup_dir): shutil.rmtree(backup_dir) shutil.move(cache_dir, backup_dir) descriptor_file = path.join(cache_dir, 'datapackage.json') if not path.isfile(descriptor_file): try: descriptor_file = self.api_client.download_datapackage( dataset_key, cache_dir) except RestApiError as e: if backup_dir is not None: shutil.move(backup_dir, cache_dir) warn('Unable to download datapackage ({}). ' 'Loading previously saved version.'.format(e.reason)) else: raise else: try: dataset_info = self.api_client.get_dataset(dataset_key) last_modified = datetime.strptime(dataset_info['updated'], '%Y-%m-%dT%H:%M:%S.%fZ') if (last_modified > datetime.utcfromtimestamp( path.getmtime(str(descriptor_file)))): warn('You are using an outdated copy of {}. ' 'If you wish to use the latest version, call this ' 'function with the argument ' 'force_update=True'.format(dataset_key)) except RestApiError: # Not a critical step pass if backup_dir is not None: shutil.rmtree(backup_dir, ignore_errors=True) return LocalDataset(descriptor_file)