def fs(self): # Spark supports basically every filesystem there is if not self._fs: self._fs = CompositeFilesystem() if boto3_installed: self._fs.add_fs('s3', S3Filesystem( aws_access_key_id=self._opts['aws_access_key_id'], aws_secret_access_key=self._opts['aws_secret_access_key'], aws_session_token=self._opts['aws_session_token'], s3_endpoint=self._opts['s3_endpoint'], s3_region=self._opts['s3_region'], ), disable_if=_is_permanent_boto3_error) if google_libs_installed: self._fs.add_fs('gcs', GCSFilesystem( project_id=self._opts['project_id'], location=self._opts['gcs_region'], object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS, ), disable_if=_is_permanent_google_error) # Hadoop FS is responsible for all URIs that fall through to it self._fs.add_fs('hadoop', HadoopFilesystem( self._opts['hadoop_bin'])) self._fs.add_fs('local', LocalFilesystem()) return self._fs
def fs(self): # Spark supports basically every filesystem there is if not self._fs: self._fs = CompositeFilesystem() if boto3_installed: self._fs.add_fs('s3', S3Filesystem( aws_access_key_id=self._opts['aws_access_key_id'], aws_secret_access_key=self._opts['aws_secret_access_key'], aws_session_token=self._opts['aws_session_token'], s3_endpoint=self._opts['s3_endpoint'], s3_region=self._opts['s3_region'], ), disable_if=_is_permanent_boto3_error) if google_libs_installed: self._fs.add_fs('gcs', GCSFilesystem( project_id=self._opts['google_project_id'] ), disable_if=_is_permanent_google_error) self._fs.add_fs('hadoop', HadoopFilesystem( self._opts['hadoop_bin'])) self._fs.add_fs('local', LocalFilesystem()) return self._fs
def fs(self): """:py:class:`mrjob.fs.base.Filesystem` object for HDFS and the local filesystem. """ if self._fs is None: self._fs = CompositeFilesystem( HadoopFilesystem(self._opts['hadoop_bin']), LocalFilesystem()) return self._fs
def fs(self): """:py:class:`~mrjob.fs.base.Filesystem` object for the local filesystem. """ if self._fs is None: # wrap LocalFilesystem in CompositeFilesystem to get IOError # on URIs (see #1185) self._fs = CompositeFilesystem(LocalFilesystem()) return self._fs
def fs(self): """:py:class:`~mrjob.fs.base.Filesystem` object for the local filesystem. Methods on :py:class:`~mrjob.fs.base.Filesystem` objects will be forwarded to :py:class:`~mrjob.runner.MRJobRunner` until mrjob 0.5, but **this behavior is deprecated.** """ if self._fs is None: self._fs = LocalFilesystem() return self._fs
def fs(self): """:py:class:`~mrjob.fs.base.Filesystem` object for the local filesystem. Methods on :py:class:`~mrjob.fs.base.Filesystem` objects will be forwarded to :py:class:`~mrjob.runner.MRJobRunner` until mrjob 0.6.0, but **this behavior is deprecated.** """ if self._fs is None: # wrap LocalFilesystem in CompositeFilesystem to get IOError # on URIs (see #1185) self._fs = CompositeFilesystem(LocalFilesystem()) return self._fs
def fs(self): """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and the local filesystem. """ if self._fs is not None: return self._fs self._gcs_fs = GCSFilesystem() self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem()) return self._fs
def fs(self): """:py:class:`mrjob.fs.base.Filesystem` object for HDFS and the local filesystem. """ if self._fs is None: self._fs = CompositeFilesystem() # don't pass [] to fs; this means not to use hadoop until # fs.set_hadoop_bin() is called (used for running hadoop over SSH). hadoop_bin = self._opts['hadoop_bin'] or None self._fs.add_fs('hadoop', HadoopFilesystem(hadoop_bin)) self._fs.add_fs('local', LocalFilesystem()) return self._fs
def fs(self): """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and the local filesystem. """ if self._fs is not None: return self._fs self._gcs_fs = GCSFilesystem( credentials=self._credentials, local_tmp_dir=self._get_local_tmp_dir(), project_id=self._project_id, ) self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem()) return self._fs
def fs(self): """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and the local filesystem. """ if self._fs is None: self._fs = CompositeFilesystem() location = self._opts['region'] or _zone_to_region( self._opts['zone']) self._fs.add_fs('gcs', GCSFilesystem( credentials=self._credentials, project_id=self._project_id, part_size=self._upload_part_size(), location=location, object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS, )) self._fs.add_fs('local', LocalFilesystem()) return self._fs
def setUp(self): super(LocalFSTestCase, self).setUp() self.fs = LocalFilesystem()