Exemplo n.º 1
0
 def test_write_timeout_error(self):
     with pytest.raises(RemoteFileException):
         with RemoteFile(DefaultConfig(),
                         "user/dataset",
                         "file.txt",
                         timeout=0.0) as writer:
             writer.write("test")
Exemplo n.º 2
0
    def test_read_basic(self):
        with responses.RequestsMock() as resp:
            def download_endpoint(request):
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, "this is the test."

            resp.add_callback(resp.GET, '{}/file_download/{}/{}/{}'
                              .format('https://query.data.world',
                                        'user', 'dataset', 'file.txt'),
                              callback=download_endpoint)
            with RemoteFile(DefaultConfig(), "user/dataset", "file.txt",
                            mode="r") as reader:
                contents = reader.read()
                assert "this is the test." == contents
Exemplo n.º 3
0
    def test_read_binary_bytes_iter(self):
        with responses.RequestsMock() as resp:
            def download_endpoint(request):
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, struct.pack('BBBB', 0, 1, 254, 255)

            resp.add_callback(resp.GET, '{}/file_download/{}/{}/{}'
                              .format('https://query.data.world',
                                      'user', 'dataset', 'file.txt'),
                              callback=download_endpoint)
            with RemoteFile(DefaultConfig(), "user/dataset", "file.txt",
                            mode="rb") as reader:
                contents = list(reader)
                assert [b"\x00", b"\x01", b"\xfe", b"\xff"] == contents
Exemplo n.º 4
0
    def test_write_error(self):
        with pytest.raises(RestApiError):
            with responses.RequestsMock() as resp:
                def upload_endpoint(request):
                    assert request.headers.get('User-Agent') == _user_agent()
                    return 400, {}, json.dumps({})

                resp.add_callback(resp.PUT, '{}/uploads/{}/{}/files/{}'
                                  .format('https://api.data.world/v0',
                                          'user', 'dataset', 'file.txt'),
                                  callback=upload_endpoint)
                with RemoteFile(DefaultConfig(),
                                "user/dataset", "file.txt") as writer:
                    writer.write("test")
Exemplo n.º 5
0
    def test_read_binary_iter_chunks(self):
        with responses.RequestsMock() as resp:
            def download_endpoint(request):
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, "abcdef"

            resp.add_callback(resp.GET, '{}/file_download/{}/{}/{}'
                              .format('https://query.data.world',
                                      'user', 'dataset', 'file.txt'),
                              callback=download_endpoint)
            with RemoteFile(DefaultConfig(), "user/dataset", "file.txt",
                            mode="rb", chunk_size=4) as reader:
                contents = list(reader)
                assert [b'abcd', b'ef'] == contents
Exemplo n.º 6
0
    def test_write_basic(self):
        with responses.RequestsMock() as resp:
            def upload_endpoint(request):
                assert "test" == ''.join([chunk.decode('utf-8')
                                          for chunk in request.body])
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, json.dumps({})

            resp.add_callback(resp.PUT,
                              '{}/uploads/{}/{}/files/{}'
                              .format('https://api.data.world/v0',
                                      'user', 'dataset', 'file.txt'),
                              callback=upload_endpoint)
            with RemoteFile(DefaultConfig(),
                            "user/dataset", "file.txt") as writer:
                writer.write("test")
Exemplo n.º 7
0
    def test_read_jsonl(self):
        with responses.RequestsMock() as resp:
            def download_endpoint(request):
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, '{"A":"1", "B":"2", "C":"3"}\n' \
                                '{"A":"4", "B":"5", "C":"6"}\n'

            resp.add_callback(resp.GET, '{}/file_download/{}/{}/{}'
                              .format('https://query.data.world',
                                      'user', 'dataset', 'file.csv'),
                              callback=download_endpoint)
            with RemoteFile(DefaultConfig(), "user/dataset", "file.csv",
                            mode="r") as reader:
                rows = [json.loads(line) for line in reader if line.strip()]
                assert rows[0] == {'A': '1', 'B': '2', 'C': '3'}
                assert rows[1] == {'A': '4', 'B': '5', 'C': '6'}
Exemplo n.º 8
0
    def test_read_csv(self):
        with responses.RequestsMock() as resp:
            def download_endpoint(request):
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, "A,B,C\n1,2,3\n4,5,6"

            resp.add_callback(resp.GET, '{}/file_download/{}/{}/{}'
                              .format('https://query.data.world',
                                      'user', 'dataset', 'file.csv'),
                              callback=download_endpoint)
            with RemoteFile(DefaultConfig(), "user/dataset", "file.csv",
                            mode="r") as reader:
                csvr = csv.DictReader(reader)
                rows = list(csvr)
                assert rows[0] == {'A': '1', 'B': '2', 'C': '3'}
                assert rows[1] == {'A': '4', 'B': '5', 'C': '6'}
Exemplo n.º 9
0
    def test_write_csv(self):
        with responses.RequestsMock() as resp:
            def upload_endpoint(request):
                assert "a,b\r\n42,17\r\n420,178\r\n" == \
                       ''.join([chunk.decode('utf-8')
                                for chunk in request.body])
                assert request.headers.get('User-Agent') == _user_agent()
                return 200, {}, json.dumps({})

            resp.add_callback(resp.PUT, '{}/uploads/{}/{}/files/{}'
                              .format('https://api.data.world/v0',
                                      'user', 'dataset', 'file.csv'),
                              callback=upload_endpoint)
            with RemoteFile(DefaultConfig(),
                            "user/dataset", "file.csv") as writer:
                csvw = csv.DictWriter(writer, fieldnames=['a', 'b'])
                csvw.writeheader()
                csvw.writerow({'a': 42, 'b': 17})
                csvw.writerow({'a': 420, 'b': 178})
Exemplo n.º 10
0
    def open_remote_file(self, dataset_key, file_name,
                         mode='w', **kwargs):
        """Open a remote file object that can be used to write to or read from
        a file in a data.world dataset

        :param dataset_key: Dataset identifier, in the form of owner/id
        :type dataset_key: str
        :param file_name: The name of the file to open
        :type file_name: str
        :param mode: the mode for the file - must be 'w', 'wb', 'r', or 'rb' -
            indicating read/write ('r'/'w') and optionally "binary"
            handling of the file data. (Default value = 'w')
        :type mode: str, optional
        :param chunk_size: size of chunked bytes to return when reading
            streamed bytes in 'rb' mode
        :type chunk_size: int, optional
        :param decode_unicode: whether to decode textual responses as unicode
            when returning streamed lines in 'r' mode
        :type decode_unicode: bool, optional
        :param **kwargs:

        Examples
        --------
        >>> import datadotworld as dw
        >>>
        >>> # write a text file
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'test.txt') as w:
        ...   w.write("this is a test.")
        >>>
        >>> # write a jsonlines file
        >>> import json
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'test.jsonl') as w:
        ...   json.dump({'foo':42, 'bar':"A"}, w)
        ...   w.write("\\n")
        ...   json.dump({'foo':13, 'bar':"B"}, w)
        ...   w.write("\\n")
        >>>
        >>> # write a csv file
        >>> import csv
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'test.csv') as w:
        ...   csvw = csv.DictWriter(w, fieldnames=['foo', 'bar'])
        ...   csvw.writeheader()
        ...   csvw.writerow({'foo':42, 'bar':"A"})
        ...   csvw.writerow({'foo':13, 'bar':"B"})
        >>>
        >>> # write a pandas dataframe as a csv file
        >>> import pandas as pd
        >>> df = pd.DataFrame({'foo':[1,2,3,4],'bar':['a','b','c','d']})
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'dataframe.csv') as w:
        ...   df.to_csv(w, index=False)
        >>>
        >>> # write a binary file
        >>> with dw.open_remote_file('username/test-dataset',
        >>>                          'test.txt', mode='wb') as w:
        ...   w.write(bytes([100,97,116,97,46,119,111,114,108,100]))
        >>>
        >>> # read a text file
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'test.txt', mode='r') as r:
        ...   print(r.read())
        >>>
        >>> # read a csv file
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'test.csv', mode='r') as r:
        ...   csvr = csv.DictReader(r)
        ...   for row in csvr:
        ...      print(row['column a'], row['column b'])
        >>>
        >>> # read a binary file
        >>> with dw.open_remote_file('username/test-dataset',
        ...                          'test', mode='rb') as r:
        ...   bytes = r.read()
        """
        try:
            return RemoteFile(self._config, dataset_key, file_name,
                              mode=mode, **kwargs)
        except Exception as e:
            raise RestApiError(cause=e)