示例#1
0
class CatTestCase(SandboxedTestCase):
    def setUp(self):
        super(CatTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_cat_uncompressed(self):
        path = self.makefile('f', b'bar\nfoo\n')

        self.assertEqual(b''.join(self.fs._cat_file(path)), b'bar\nfoo\n')

    def test_yields_lines(self):
        # since it's just opening the fileobj directly
        path = self.makefile('f', b'bar\nfoo\n')

        self.assertEqual(list(self.fs._cat_file(path)), [b'bar\n', b'foo\n'])

    def test_cat_gz(self):
        input_gz_path = join(self.tmp_dir, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'wb') as input_gz:
            input_gz.write(b'foo\nbar\n')

        self.assertEqual(b''.join(self.fs._cat_file(input_gz_path)),
                         b'foo\nbar\n')

    def test_cat_bz2(self):
        input_bz2_path = join(self.tmp_dir, 'input.bz2')

        with bz2.BZ2File(input_bz2_path, 'wb') as input_bz2:
            input_bz2.write(b'bar\nbar\nfoo\n')

        self.assertEqual(b''.join(self.fs._cat_file(input_bz2_path)),
                         b'bar\nbar\nfoo\n')
示例#2
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['project_id'],
                    location=self._opts['gcs_region'],
                    object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
                ), disable_if=_is_permanent_google_error)

            # Hadoop FS is responsible for all URIs that fall through to it
            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#3
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['google_project_id']
                ), disable_if=_is_permanent_google_error)

            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#4
0
 def fs(self):
     """:py:class:`mrjob.fs.base.Filesystem` object for HDFS and the local
     filesystem.
     """
     if self._fs is None:
         self._fs = CompositeFilesystem(
             HadoopFilesystem(self._opts['hadoop_bin']), LocalFilesystem())
     return self._fs
示例#5
0
 def fs(self):
     """:py:class:`~mrjob.fs.base.Filesystem` object for the local
     filesystem.
     """
     if self._fs is None:
         # wrap LocalFilesystem in CompositeFilesystem to get IOError
         # on URIs (see #1185)
         self._fs = CompositeFilesystem(LocalFilesystem())
     return self._fs
 def fs(self):
     """:py:class:`~mrjob.fs.base.Filesystem` object for the local
     filesystem. Methods on :py:class:`~mrjob.fs.base.Filesystem` objects
     will be forwarded to :py:class:`~mrjob.runner.MRJobRunner` until mrjob
     0.5, but **this behavior is deprecated.**
     """
     if self._fs is None:
         self._fs = LocalFilesystem()
     return self._fs
示例#7
0
 def fs(self):
     """:py:class:`~mrjob.fs.base.Filesystem` object for the local
     filesystem. Methods on :py:class:`~mrjob.fs.base.Filesystem` objects
     will be forwarded to :py:class:`~mrjob.runner.MRJobRunner` until mrjob
     0.6.0, but **this behavior is deprecated.**
     """
     if self._fs is None:
         # wrap LocalFilesystem in CompositeFilesystem to get IOError
         # on URIs (see #1185)
         self._fs = CompositeFilesystem(LocalFilesystem())
     return self._fs
示例#8
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem()

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
示例#9
0
class CatTestCase(SandboxedTestCase):

    def setUp(self):
        super(CatTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_cat_uncompressed(self):
        path = self.makefile('f', b'bar\nfoo\n')

        self.assertEqual(
            b''.join(self.fs._cat_file(path)),
            b'bar\nfoo\n')

    def test_yields_lines(self):
        # since it's just opening the fileobj directly
        path = self.makefile('f', b'bar\nfoo\n')

        self.assertEqual(list(self.fs._cat_file(path)),
                         [b'bar\n', b'foo\n'])

    def test_cat_gz(self):
        input_gz_path = join(self.tmp_dir, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'wb') as input_gz:
            input_gz.write(b'foo\nbar\n')

        self.assertEqual(
            b''.join(self.fs._cat_file(input_gz_path)),
            b'foo\nbar\n')

    def test_cat_bz2(self):
        input_bz2_path = join(self.tmp_dir, 'input.bz2')

        with bz2.BZ2File(input_bz2_path, 'wb') as input_bz2:
            input_bz2.write(b'bar\nbar\nfoo\n')

        self.assertEqual(
            b''.join(self.fs._cat_file(input_bz2_path)),
            b'bar\nbar\nfoo\n')
示例#10
0
    def fs(self):
        """:py:class:`mrjob.fs.base.Filesystem` object for HDFS and the local
        filesystem.
        """
        if self._fs is None:
            self._fs = CompositeFilesystem()

            # don't pass [] to fs; this means not to use hadoop until
            # fs.set_hadoop_bin() is called (used for running hadoop over SSH).
            hadoop_bin = self._opts['hadoop_bin'] or None

            self._fs.add_fs('hadoop', HadoopFilesystem(hadoop_bin))
            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#11
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem(
            credentials=self._credentials,
            local_tmp_dir=self._get_local_tmp_dir(),
            project_id=self._project_id,
        )

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
示例#12
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is None:
            self._fs = CompositeFilesystem()

            location = self._opts['region'] or _zone_to_region(
                self._opts['zone'])

            self._fs.add_fs('gcs', GCSFilesystem(
                credentials=self._credentials,
                project_id=self._project_id,
                part_size=self._upload_part_size(),
                location=location,
                object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
            ))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#13
0
class LocalFSTestCase(TempdirTestCase):
    def setUp(self):
        super(LocalFSTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_can_handle_path_match(self):
        self.assertEqual(self.fs.can_handle_path('/dem/bitties'), True)

    def test_can_handle_path_nomatch(self):
        self.assertEqual(self.fs.can_handle_path('http://yelp.com/'), False)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls(self.root)), [])

    def test_ls_basic(self):
        self.makefile('f', 'contents')
        self.assertEqual(list(self.fs.ls(self.root)), self.abs_paths('f'))

    def test_ls_basic_2(self):
        self.makefile('f', 'contents')
        self.makefile('f2', 'contents')
        self.assertEqual(list(self.fs.ls(self.root)),
                         self.abs_paths('f', 'f2'))

    def test_ls_recurse(self):
        self.makefile('f', 'contents')
        self.makefile('d/f2', 'contents')
        self.assertEqual(list(self.fs.ls(self.root)),
                         self.abs_paths('f', 'd/f2'))

    def test_cat_uncompressed(self):
        path = self.makefile('f', 'bar\nfoo\n')
        self.assertEqual(list(self.fs._cat_file(path)), ['bar\n', 'foo\n'])

    def test_cat_gz(self):
        input_gz_path = os.path.join(self.root, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'w') as input_gz:
            input_gz.write('foo\nbar\n')

        self.assertEqual(list(self.fs._cat_file(input_gz_path)),
                         ['foo\n', 'bar\n'])

    def test_cat_bz2(self):
        input_bz2_path = os.path.join(self.root, 'input.bz2')
        with bz2.BZ2File(input_bz2_path, 'w') as input_bz2:
            input_bz2.write('bar\nbar\nfoo\n')

        self.assertEqual(list(self.fs._cat_file(input_bz2_path)),
                         ['bar\n', 'bar\n', 'foo\n'])

    def test_du(self):
        data_path_1 = self.makefile('data1', 'abcd')
        data_path_2 = self.makefile('more/data2', 'defg')

        self.assertEqual(self.fs.du(self.root), 8)
        self.assertEqual(self.fs.du(data_path_1), 4)
        self.assertEqual(self.fs.du(data_path_2), 4)

    def test_mkdir(self):
        path = os.path.join(self.root, 'dir')
        self.fs.mkdir(path)
        self.assertEqual(os.path.isdir(path), True)

    def test_path_exists_no(self):
        path = os.path.join(self.root, 'f')
        self.assertEqual(self.fs.path_exists(path), False)

    def test_path_exists_yes(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.path_exists(path), True)

    def test_touchz(self):
        path = os.path.join(self.root, 'f')
        self.fs.touchz(path)
        self.fs.touchz(path)
        with open(path, 'w') as f:
            f.write('not empty anymore')
        self.assertRaises(OSError, self.fs.touchz, path)

    def test_md5sum(self):
        path = self.makefile('f', 'abcd')
        self.assertEqual(self.fs.md5sum(path),
                         'e2fc714c4727ee9395f324cd2e7f331f')
示例#14
0
 def setUp(self):
     super(LocalFSTestCase, self).setUp()
     self.fs = LocalFilesystem()
示例#15
0
class LocalFSTestCase(SandboxedTestCase):

    def setUp(self):
        super(LocalFSTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_can_handle_local_paths(self):
        self.assertEqual(self.fs.can_handle_path('/dem/bitties'), True)
        # relative paths
        self.assertEqual(self.fs.can_handle_path('garden'), True)

    def test_cant_handle_uris(self):
        self.assertEqual(self.fs.can_handle_path('http://yelp.com/'), False)

    def test_du(self):
        data_path_1 = self.makefile('data1', 'abcd')
        data_path_2 = self.makefile('more/data2', 'defg')

        self.assertEqual(self.fs.du(self.tmp_dir), 8)
        self.assertEqual(self.fs.du(data_path_1), 4)
        self.assertEqual(self.fs.du(data_path_2), 4)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), [])

    def test_ls_basic(self):
        self.makefile('f', 'contents')
        self.assertEqual(sorted(self.fs.ls(self.tmp_dir)),
                         sorted(self.abs_paths('f')))

    def test_ls_basic_2(self):
        self.makefile('f', 'contents')
        self.makefile('f2', 'contents')
        self.assertEqual(sorted(self.fs.ls(self.tmp_dir)),
                         sorted(self.abs_paths('f', 'f2')))

    def test_ls_recurse(self):
        self.makefile('f', 'contents')
        self.makefile('d/f2', 'contents')
        self.assertEqual(sorted(list(self.fs.ls(self.tmp_dir))),
                         sorted(self.abs_paths('f', 'd/f2')))

    def test_cat_uncompressed(self):
        path = self.makefile('f', b'bar\nfoo\n')
        self.assertEqual(list(self.fs._cat_file(path)), [b'bar\n', b'foo\n'])

    def test_cat_gz(self):
        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'wb')
        input_gz.write(b'foo\nbar\n')
        input_gz.close()

        self.assertEqual(list(self.fs._cat_file(input_gz_path)),
                         [b'foo\n', b'bar\n'])

    def test_cat_bz2(self):
        input_bz2_path = os.path.join(self.tmp_dir, 'input.bz2')
        input_bz2 = bz2.BZ2File(input_bz2_path, 'wb')
        input_bz2.write(b'bar\nbar\nfoo\n')
        input_bz2.close()

        self.assertEqual(list(self.fs._cat_file(input_bz2_path)),
                         [b'bar\n', b'bar\n', b'foo\n'])

    def test_mkdir(self):
        path = os.path.join(self.tmp_dir, 'dir')
        self.fs.mkdir(path)
        self.assertEqual(os.path.isdir(path), True)

    def test_exists_no(self):
        path = os.path.join(self.tmp_dir, 'f')
        self.assertEqual(self.fs.exists(path), False)

    def test_exists_yes(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.exists(path), True)

    def test_rm_file(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.exists(path), True)

        self.fs.rm(path)
        self.assertEqual(self.fs.exists(path), False)

    def test_rm_dir(self):
        path = self.makedirs('foobar')
        self.assertEqual(self.fs.exists(path), True)

        self.fs.rm(path)
        self.assertEqual(self.fs.exists(path), False)

    def test_touchz(self):
        path = os.path.join(self.tmp_dir, 'f')
        self.fs.touchz(path)
        self.fs.touchz(path)
        with open(path, 'w') as f:
            f.write('not empty anymore')
        self.assertRaises(OSError, self.fs.touchz, path)

    def test_md5sum(self):
        path = self.makefile('f', 'abcd')
        self.assertEqual(self.fs.md5sum(path),
                         'e2fc714c4727ee9395f324cd2e7f331f')
示例#16
0
class LocalFSTestCase(TempdirTestCase):

    def setUp(self):
        super(LocalFSTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_can_handle_path_match(self):
        self.assertEqual(self.fs.can_handle_path('/dem/bitties'), True)

    def test_can_handle_path_nomatch(self):
        self.assertEqual(self.fs.can_handle_path('http://yelp.com/'), False)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls(self.root)), [])

    def test_ls_basic(self):
        self.makefile('f', 'contents')
        self.assertEqual(list(self.fs.ls(self.root)), self.abs_paths('f'))

    def test_ls_basic_2(self):
        self.makefile('f', 'contents')
        self.makefile('f2', 'contents')
        self.assertEqual(list(self.fs.ls(self.root)), self.abs_paths('f', 'f2'))

    def test_ls_recurse(self):
        self.makefile('f', 'contents')
        self.makefile('d/f2', 'contents')
        self.assertEqual(list(self.fs.ls(self.root)),
                         self.abs_paths('f', 'd/f2'))

    def test_cat_uncompressed(self):
        path = self.makefile('f', 'bar\nfoo\n')
        self.assertEqual(list(self.fs._cat_file(path)), ['bar\n', 'foo\n'])

    def test_cat_gz(self):
        input_gz_path = os.path.join(self.root, 'input.gz')
        with gzip.GzipFile(input_gz_path, 'w') as input_gz:
            input_gz.write('foo\nbar\n')

        self.assertEqual(list(self.fs._cat_file(input_gz_path)), ['foo\n', 'bar\n'])

    def test_cat_bz2(self):
        input_bz2_path = os.path.join(self.root, 'input.bz2')
        with bz2.BZ2File(input_bz2_path, 'w') as input_bz2:
            input_bz2.write('bar\nbar\nfoo\n')

        self.assertEqual(list(self.fs._cat_file(input_bz2_path)),
                         ['bar\n', 'bar\n', 'foo\n'])

    def test_du(self):
        data_path_1 = self.makefile('data1', 'abcd')
        data_path_2 = self.makefile('more/data2', 'defg')

        self.assertEqual(self.fs.du(self.root), 8)
        self.assertEqual(self.fs.du(data_path_1), 4)
        self.assertEqual(self.fs.du(data_path_2), 4)

    def test_mkdir(self):
        path = os.path.join(self.root, 'dir')
        self.fs.mkdir(path)
        self.assertEqual(os.path.isdir(path), True)

    def test_path_exists_no(self):
        path = os.path.join(self.root, 'f')
        self.assertEqual(self.fs.path_exists(path), False)

    def test_path_exists_yes(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.path_exists(path), True)

    def test_touchz(self):
        path = os.path.join(self.root, 'f')
        self.fs.touchz(path)
        self.fs.touchz(path)
        with open(path, 'w') as f:
            f.write('not empty anymore')
        self.assertRaises(OSError, self.fs.touchz, path)

    def test_md5sum(self):
        path = self.makefile('f', 'abcd')
        self.assertEqual(self.fs.md5sum(path),
                         'e2fc714c4727ee9395f324cd2e7f331f')
示例#17
0
class LocalFSTestCase(SandboxedTestCase):
    def setUp(self):
        super(LocalFSTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_can_handle_local_paths(self):
        self.assertEqual(self.fs.can_handle_path('/dem/bitties'), True)
        # relative paths
        self.assertEqual(self.fs.can_handle_path('garden'), True)

    def test_can_handle_file_uris(self):
        self.assertEqual(self.fs.can_handle_path('file:///dem/bitties'), True)

    def test_cant_handle_other_uris(self):
        self.assertEqual(self.fs.can_handle_path('http://yelp.com/'), False)

    def test_du(self):
        data_path_1 = self.makefile('data1', 'abcd')
        data_path_2 = self.makefile('more/data2', 'defg')

        self.assertEqual(self.fs.du(self.tmp_dir), 8)
        self.assertEqual(self.fs.du(data_path_1), 4)
        self.assertEqual(self.fs.du('file://' + data_path_2), 4)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), [])

    def test_ls_basic(self):
        self.makefile('f', 'contents')
        self.assertEqual(sorted(self.fs.ls(self.tmp_dir)),
                         sorted(self.abs_paths('f')))

    def test_ls_basic_2(self):
        self.makefile('f', 'contents')
        self.makefile('f2', 'contents')
        self.assertEqual(sorted(self.fs.ls(self.tmp_dir)),
                         sorted(self.abs_paths('f', 'f2')))

    def test_ls_recurse(self):
        self.makefile('f', 'contents')
        self.makefile(join('d', 'f2'), 'contents')
        self.assertEqual(sorted(list(self.fs.ls(self.tmp_dir))),
                         sorted(self.abs_paths('f', 'd/f2')))

    def test_ls_with_file_uri(self):
        f_path = self.makefile('f', 'contents')
        f_uri = 'file://' + f_path

        self.assertEqual(list(self.fs.ls(f_uri)), [f_uri])

    def test_ls_dir_with_file_uri(self):
        self.makefile('f', 'contents')
        self.makefile('f2', 'contents')
        tmp_dir_uri = 'file://' + self.tmp_dir

        self.assertEqual(sorted(list(self.fs.ls(tmp_dir_uri))),
                         [tmp_dir_uri + '/f', tmp_dir_uri + '/f2'])

    def test_mkdir(self):
        path = join(self.tmp_dir, 'dir')
        self.fs.mkdir(path)
        self.assertEqual(os.path.isdir(path), True)

    def test_mkdir_file_uri(self):
        path = join(self.tmp_dir, 'dir')
        self.fs.mkdir('file://' + path)
        self.assertEqual(os.path.isdir(path), True)

    def test_exists_no(self):
        path = join(self.tmp_dir, 'f')
        self.assertEqual(self.fs.exists(path), False)
        self.assertEqual(self.fs.exists('file://' + path), False)

    def test_exists_yes(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.exists(path), True)
        self.assertEqual(self.fs.exists('file://' + path), True)

    def test_put(self):
        src = self.makefile('f', 'contents')
        dest1 = join(self.tmp_dir, 'g')
        dest2 = join(self.tmp_dir, 'h')

        self.fs.put(src, dest1)
        self.assertEqual(b''.join(self.fs.cat(dest1)), b'contents')

        # test put()-ing to a URI. *src* has to be an actual path
        self.fs.put(src, 'file://' + dest2)
        self.assertEqual(b''.join(self.fs.cat(dest1)), b'contents')

    def test_rm_file(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.exists(path), True)

        self.fs.rm(path)
        self.assertEqual(self.fs.exists(path), False)

    def test_rm_file_by_uri(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.exists(path), True)

        self.fs.rm('file://' + path)
        self.assertEqual(self.fs.exists(path), False)

    def test_rm_dir(self):
        path = self.makedirs('foobar')
        self.assertEqual(self.fs.exists(path), True)

        self.fs.rm(path)
        self.assertEqual(self.fs.exists(path), False)

    def test_touchz(self):
        path = join(self.tmp_dir, 'f')

        self.assertEqual(self.fs.exists(path), False)

        self.fs.touchz(path)
        self.assertEqual(self.fs.exists(path), True)

        # okay to touchz() an empty file
        self.fs.touchz(path)

        with open(path, 'w') as f:
            f.write('not empty anymore')

        # not okay to touchz() a non-empty file
        self.assertRaises(OSError, self.fs.touchz, path)

    def test_touchz_file_uri(self):
        uri = 'file://' + join(self.tmp_dir, 'f')

        self.assertEqual(self.fs.exists(uri), False)

        self.fs.touchz(uri)
        self.assertEqual(self.fs.exists(uri), True)

    def test_md5sum(self):
        path = self.makefile('f', 'abcd')

        self.assertEqual(self.fs.md5sum(path),
                         'e2fc714c4727ee9395f324cd2e7f331f')

        self.assertEqual(self.fs.md5sum('file://' + path),
                         'e2fc714c4727ee9395f324cd2e7f331f')
示例#18
0
class LocalFSTestCase(SandboxedTestCase):
    def setUp(self):
        super(LocalFSTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_can_handle_local_paths(self):
        self.assertEqual(self.fs.can_handle_path("/dem/bitties"), True)
        # relative paths
        self.assertEqual(self.fs.can_handle_path("garden"), True)

    def test_cant_handle_uris(self):
        self.assertEqual(self.fs.can_handle_path("http://yelp.com/"), False)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), [])

    def test_ls_basic(self):
        self.makefile("f", "contents")
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), self.abs_paths("f"))

    def test_ls_basic_2(self):
        self.makefile("f", "contents")
        self.makefile("f2", "contents")
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), self.abs_paths("f", "f2"))

    def test_ls_recurse(self):
        self.makefile("f", "contents")
        self.makefile("d/f2", "contents")
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), self.abs_paths("f", "d/f2"))

    def test_cat_uncompressed(self):
        path = self.makefile("f", "bar\nfoo\n")
        self.assertEqual(list(self.fs._cat_file(path)), ["bar\n", "foo\n"])

    def test_cat_gz(self):
        input_gz_path = os.path.join(self.tmp_dir, "input.gz")
        input_gz = gzip.GzipFile(input_gz_path, "w")
        input_gz.write("foo\nbar\n")
        input_gz.close()

        self.assertEqual(list(self.fs._cat_file(input_gz_path)), ["foo\n", "bar\n"])

    def test_cat_bz2(self):
        input_bz2_path = os.path.join(self.tmp_dir, "input.bz2")
        input_bz2 = bz2.BZ2File(input_bz2_path, "w")
        input_bz2.write("bar\nbar\nfoo\n")
        input_bz2.close()

        self.assertEqual(list(self.fs._cat_file(input_bz2_path)), ["bar\n", "bar\n", "foo\n"])

    def test_du(self):
        data_path_1 = self.makefile("data1", "abcd")
        data_path_2 = self.makefile("more/data2", "defg")

        self.assertEqual(self.fs.du(self.tmp_dir), 8)
        self.assertEqual(self.fs.du(data_path_1), 4)
        self.assertEqual(self.fs.du(data_path_2), 4)

    def test_mkdir(self):
        path = os.path.join(self.tmp_dir, "dir")
        self.fs.mkdir(path)
        self.assertEqual(os.path.isdir(path), True)

    def test_path_exists_no(self):
        path = os.path.join(self.tmp_dir, "f")
        self.assertEqual(self.fs.path_exists(path), False)

    def test_path_exists_yes(self):
        path = self.makefile("f", "contents")
        self.assertEqual(self.fs.path_exists(path), True)

    def test_touchz(self):
        path = os.path.join(self.tmp_dir, "f")
        self.fs.touchz(path)
        self.fs.touchz(path)
        with open(path, "w") as f:
            f.write("not empty anymore")
        self.assertRaises(OSError, self.fs.touchz, path)

    def test_md5sum(self):
        path = self.makefile("f", "abcd")
        self.assertEqual(self.fs.md5sum(path), "e2fc714c4727ee9395f324cd2e7f331f")
示例#19
0
class LocalFSTestCase(SandboxedTestCase):

    def setUp(self):
        super(LocalFSTestCase, self).setUp()
        self.fs = LocalFilesystem()

    def test_can_handle_local_paths(self):
        self.assertEqual(self.fs.can_handle_path('/dem/bitties'), True)
        # relative paths
        self.assertEqual(self.fs.can_handle_path('garden'), True)

    def test_cant_handle_uris(self):
        self.assertEqual(self.fs.can_handle_path('http://yelp.com/'), False)

    def test_du(self):
        data_path_1 = self.makefile('data1', 'abcd')
        data_path_2 = self.makefile('more/data2', 'defg')

        self.assertEqual(self.fs.du(self.tmp_dir), 8)
        self.assertEqual(self.fs.du(data_path_1), 4)
        self.assertEqual(self.fs.du(data_path_2), 4)

    def test_write_str(self):
        path = self.abs_paths('new-str')[0]
        content = 'some content!'
        self.fs.write(path, content)
        self.assertEqual("".join(self.fs.cat(path)), content)

    def test_write_file(self):
        path = self.abs_paths('new-fileobj')[0]
        content = StringIO('some content!')
        self.fs.write(path, content)
        self.assertEqual("".join(self.fs.cat(path)), content.getvalue())

    def test_overwrite(self):
        path = self.makefile('existing', 'herp')
        self.assertRaises(OSError, self.fs.write, path, 'derp')

    def test_copy_from_local(self):
        content = 'Never poke a bear in the zoo'
        src = self.makefile('copy-src', content)
        dst = self.abs_paths('copy-dst')[0]
        self.fs.copy_from_local(dst, src)
        self.assertEqual("".join(self.fs.cat(dst)), content)

    def test_copy_from_local_override(self):
        src = self.makefile('copy-src', 'in')
        dst = self.makefile('copy-dst', 'out')
        self.assertRaises(OSError, self.fs.copy_from_local, dst, src)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), [])

    def test_ls_basic(self):
        self.makefile('f', 'contents')
        self.assertEqual(list(self.fs.ls(self.tmp_dir)), self.abs_paths('f'))

    def test_ls_basic_2(self):
        self.makefile('f', 'contents')
        self.makefile('f2', 'contents')
        self.assertItemsEqual(list(self.fs.ls(self.tmp_dir)),
                         self.abs_paths('f', 'f2'))

    def test_ls_recurse(self):
        self.makefile('f', 'contents')
        self.makefile('d/f2', 'contents')
        self.assertItemsEqual(list(self.fs.ls(self.tmp_dir)),
                         self.abs_paths('f', 'd/f2'))

    def test_cat_uncompressed(self):
        path = self.makefile('f', 'bar\nfoo\n')
        self.assertEqual(list(self.fs._cat_file(path)), ['bar\n', 'foo\n'])

    def test_cat_gz(self):
        input_gz_path = os.path.join(self.tmp_dir, 'input.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'w')
        input_gz.write('foo\nbar\n')
        input_gz.close()

        self.assertEqual(list(self.fs._cat_file(input_gz_path)),
                         ['foo\n', 'bar\n'])

    def test_cat_bz2(self):
        input_bz2_path = os.path.join(self.tmp_dir, 'input.bz2')
        input_bz2 = bz2.BZ2File(input_bz2_path, 'w')
        input_bz2.write('bar\nbar\nfoo\n')
        input_bz2.close()

        self.assertEqual(list(self.fs._cat_file(input_bz2_path)),
                         ['bar\n', 'bar\n', 'foo\n'])

    def test_mkdir(self):
        path = os.path.join(self.tmp_dir, 'dir')
        self.fs.mkdir(path)
        self.assertEqual(os.path.isdir(path), True)

    def test_path_exists_no(self):
        path = os.path.join(self.tmp_dir, 'f')
        self.assertEqual(self.fs.path_exists(path), False)

    def test_path_exists_yes(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.path_exists(path), True)

    def test_rm_file(self):
        path = self.makefile('f', 'contents')
        self.assertEqual(self.fs.path_exists(path), True)

        self.fs.rm(path)
        self.assertEqual(self.fs.path_exists(path), False)

    def test_rm_dir(self):
        path = self.makedirs('foobar')
        self.assertEqual(self.fs.path_exists(path), True)

        self.fs.rm(path)
        self.assertEqual(self.fs.path_exists(path), False)

    def test_rm_tree_noslash_files(self):
        path = self.maketree("icio/goodbye-1")
        self.fs.rm(path.rstrip("/"))

        # Check that the directory and its files have been removed
        self.assertEqual(os.path.isdir(path), False)
        self.assertEqual(self.fs.path_exists(path), False)
        self.assertEqual(list(self.fs.ls(path)), [])

    def test_rm_tree_slash_files(self):
        path = self.maketree("icio/goodbye-2")
        self.fs.rm(path.rstrip("/") + "/")

        # Check that the directory and its files have been removed
        self.assertEqual(os.path.isdir(path), False)
        self.assertEqual(self.fs.path_exists(path), False)
        self.assertEqual(list(self.fs.ls(path)), [])

    def test_rm_tree_star_files(self):
        path = self.maketree("icio/goodbye-3")
        self.fs.rm(path.rstrip("/") + "/*")

        # Check that the files have been removed but not the root directory
        self.assertEqual(os.path.isdir(path), True)
        self.assertEqual(self.fs.path_exists(path), True)
        self.assertEqual(list(self.fs.ls(path)), [])

    def test_touchz(self):
        path = os.path.join(self.tmp_dir, 'f')
        self.fs.touchz(path)
        self.fs.touchz(path)
        with open(path, 'w') as f:
            f.write('not empty anymore')
        self.assertRaises(OSError, self.fs.touchz, path)

    def test_md5sum(self):
        path = self.makefile('f', 'abcd')
        self.assertEqual(self.fs.md5sum(path),
                         'e2fc714c4727ee9395f324cd2e7f331f')