class FileSystemTest(unittest.TestCase): path = '/tmp/luigi-test-dir' fs = LocalFileSystem() def setUp(self): if os.path.exists(self.path): shutil.rmtree(self.path) def tearDown(self): self.setUp() def test_mkdir(self): testpath = os.path.join(self.path, 'foo/bar') self.assertRaises(MissingParentDirectory, self.fs.mkdir, testpath, parents=False) self.fs.mkdir(testpath) self.assertTrue(os.path.exists(testpath)) self.assertTrue(self.fs.isdir(testpath)) self.assertRaises(FileAlreadyExists, self.fs.mkdir, testpath, raise_if_exists=True) def test_exists(self): self.assertFalse(self.fs.exists(self.path)) os.mkdir(self.path) self.assertTrue(self.fs.exists(self.path)) self.assertTrue(self.fs.isdir(self.path)) def test_listdir(self): os.mkdir(self.path) with open(self.path + '/file', 'w'): pass self.assertTrue([self.path + '/file'], list(self.fs.listdir(self.path + '/')))
class VigraRagTarget(FileSystemTarget): fs = LocalFileSystem() def makedirs(self): """ Create all parent folders if they do not exist. """ normpath = os.path.normpath(self.path) parentfolder = os.path.dirname(normpath) if parentfolder: try: os.makedirs(parentfolder) except OSError: pass def __init__(self, path): super(VigraRagTarget, self).__init__(path) def open(self, mode='r'): raise AttributeError("Not implemented") def write(self, rag): self.makedirs() rag.writeHDF5(self.path, 'data') def read(self): return vigra.graphs.loadGridRagHDF5(self.path, 'data')
class FileSystemTest(unittest.TestCase): path = '/tmp/luigi-test-dir' fs = LocalFileSystem() def setUp(self): if os.path.exists(self.path): shutil.rmtree(self.path) def tearDown(self): self.setUp() def test_copy(self): src = os.path.join(self.path, 'src.txt') dest = os.path.join(self.path, 'newdir', 'dest.txt') LocalTarget(src).open('w').close() self.fs.copy(src, dest) self.assertTrue(os.path.exists(src)) self.assertTrue(os.path.exists(dest)) def test_mkdir(self): testpath = os.path.join(self.path, 'foo/bar') self.assertRaises(MissingParentDirectory, self.fs.mkdir, testpath, parents=False) self.fs.mkdir(testpath) self.assertTrue(os.path.exists(testpath)) self.assertTrue(self.fs.isdir(testpath)) self.assertRaises(FileAlreadyExists, self.fs.mkdir, testpath, raise_if_exists=True) def test_exists(self): self.assertFalse(self.fs.exists(self.path)) os.mkdir(self.path) self.assertTrue(self.fs.exists(self.path)) self.assertTrue(self.fs.isdir(self.path)) def test_listdir(self): os.mkdir(self.path) with open(self.path + '/file', 'w'): pass self.assertTrue([self.path + '/file'], list(self.fs.listdir(self.path + '/'))) def test_move_to_new_dir(self): # Regression test for a bug in LocalFileSystem.move src = os.path.join(self.path, 'src.txt') dest = os.path.join(self.path, 'newdir', 'dest.txt') LocalTarget(src).open('w').close() self.fs.move(src, dest) self.assertTrue(os.path.exists(dest))
class TestFileSystem(unittest.TestCase): path = '/tmp/luigi-test-dir' fs = LocalFileSystem() def setUp(self): if os.path.exists(self.path): shutil.rmtree(self.path) def tearDown(self): self.setUp() def test_mkdir(self): testpath = os.path.join(self.path, 'foo/bar') self.fs.mkdir(testpath) self.assertTrue(os.path.exists(testpath)) def test_exists(self): self.assertFalse(self.fs.exists(self.path)) os.mkdir(self.path) self.assertTrue(self.fs.exists(self.path))
class BaseTarget(FileSystemTarget): """ Custom target base class """ fs = LocalFileSystem() def __init__(self, path): super(BaseTarget, self).__init__(path) def makedirs(self): """ Create all parent folders if they do not exist. """ normpath = os.path.normpath(self.path) parentfolder = os.path.dirname(normpath) if parentfolder: try: os.makedirs(parentfolder) except OSError: pass
def _make_target_classes(self): '''Create client and target objects for storage service.''' if self.storage_service == 'local': client = LocalFileSystem() target = LocalTarget flag_target = LocalTarget # just use a normal target elif self.storage_service == 's3': client = S3Client( aws_access_key_id=self.config['aws_access_key_id'], aws_secret_access_key=self.config['aws_secret_access_key']) target = S3Target flag_target = S3FlagTarget elif self.storage_service == 'gcs': # Use GCP Service Account private key credentials to # authenticate with the GCS API. Service Accounts # are unique to the GCP project. key_json = json.loads( self.config['gcs_service_account_private_key_json']) cred = ServiceAccountCredentials.from_json_keyfile_dict(key_json) client = GCSClient(oauth_credentials=cred) # Hack around a bug in Luigi's GCS module class GCSFlagTarget2(GCSTarget): fs = None def __init__(self, path, format=None, client=None, flag='_SUCCESS'): if format is None: format = luigi.format.get_default_format() if path[-1] != "/": raise ValueError("GCSFlagTarget requires the path to be to a " "directory. It must end with a slash ( / ).") # This is the only line that's different super(GCSFlagTarget2, self).__init__(path, client=client) self.format = format self.fs = client or GCSClient() self.flag = flag def exists(self): flag_target = self.path + self.flag return self.fs.exists(flag_target) target = GCSTarget flag_target = GCSFlagTarget2 else: raise EnvironmentError('Please add known file_storage value to config.') # Targets will be initialized many times in luigi tasks. # Subclass the chosen Target and add the Client to it, so # you don't have to pass the Client to the DAG. init_kwargs = { 'format': MixedUnicodeBytesFormat() } if self.storage_service != 'local': init_kwargs['client'] = client class TargetWithClient(target): def __init__(self, path): super(TargetWithClient, self).__init__(path, **init_kwargs) class FlagTargetWithClient(flag_target): def __init__(self, path): super(FlagTargetWithClient, self).__init__(path, **init_kwargs) self.client = client self.target_class = TargetWithClient self.flag_target_class = FlagTargetWithClient