示例#1
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['project_id'],
                    location=self._opts['gcs_region'],
                    object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
                ), disable_if=_is_permanent_google_error)

            # Hadoop FS is responsible for all URIs that fall through to it
            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#2
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['google_project_id']
                ), disable_if=_is_permanent_google_error)

            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#3
0
 def fs(self):
     """:py:class:`mrjob.fs.base.Filesystem` object for HDFS and the local
     filesystem.
     """
     if self._fs is None:
         self._fs = CompositeFilesystem(
             HadoopFilesystem(self._opts['hadoop_bin']), LocalFilesystem())
     return self._fs
示例#4
0
 def fs(self):
     """:py:class:`~mrjob.fs.base.Filesystem` object for the local
     filesystem.
     """
     if self._fs is None:
         # wrap LocalFilesystem in CompositeFilesystem to get IOError
         # on URIs (see #1185)
         self._fs = CompositeFilesystem(LocalFilesystem())
     return self._fs
 def fs(self):
     """:py:class:`~mrjob.fs.base.Filesystem` object for the local
     filesystem. Methods on :py:class:`~mrjob.fs.base.Filesystem` objects
     will be forwarded to :py:class:`~mrjob.runner.MRJobRunner` until mrjob
     0.5, but **this behavior is deprecated.**
     """
     if self._fs is None:
         self._fs = LocalFilesystem()
     return self._fs
示例#6
0
 def fs(self):
     """:py:class:`~mrjob.fs.base.Filesystem` object for the local
     filesystem. Methods on :py:class:`~mrjob.fs.base.Filesystem` objects
     will be forwarded to :py:class:`~mrjob.runner.MRJobRunner` until mrjob
     0.6.0, but **this behavior is deprecated.**
     """
     if self._fs is None:
         # wrap LocalFilesystem in CompositeFilesystem to get IOError
         # on URIs (see #1185)
         self._fs = CompositeFilesystem(LocalFilesystem())
     return self._fs
示例#7
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem()

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
示例#8
0
    def fs(self):
        """:py:class:`mrjob.fs.base.Filesystem` object for HDFS and the local
        filesystem.
        """
        if self._fs is None:
            self._fs = CompositeFilesystem()

            # don't pass [] to fs; this means not to use hadoop until
            # fs.set_hadoop_bin() is called (used for running hadoop over SSH).
            hadoop_bin = self._opts['hadoop_bin'] or None

            self._fs.add_fs('hadoop', HadoopFilesystem(hadoop_bin))
            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#9
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem(
            credentials=self._credentials,
            local_tmp_dir=self._get_local_tmp_dir(),
            project_id=self._project_id,
        )

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
示例#10
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is None:
            self._fs = CompositeFilesystem()

            location = self._opts['region'] or _zone_to_region(
                self._opts['zone'])

            self._fs.add_fs('gcs', GCSFilesystem(
                credentials=self._credentials,
                project_id=self._project_id,
                part_size=self._upload_part_size(),
                location=location,
                object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
            ))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
示例#11
0
 def setUp(self):
     super(LocalFSTestCase, self).setUp()
     self.fs = LocalFilesystem()