def output(self): # return luigi.LocalTarget(os.path.join( # self.logDir, # 'DownloadRawData_{}.txt'.format(self.runDate.strftime('%Y%m%d')) # )) # client = S3Client() # return S3Target('s3://' + self.bucket + '/' + self.getKeyS3(), client=client) return utilFuncs.getS3Target(S3Client(), self.getKeyS3())
def test_get_as_string_latin1(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put(self.tempFilePath, 's3://mybucket/putMe3') contents = s3_client.get_as_string('s3://mybucket/putMe3', encoding='ISO-8859-1') self.assertEquals(contents, self.tempFileContents.decode('ISO-8859-1'))
def output(self): params = config(section='s3') client = S3Client(**params) return S3Target( 's3://s3-bucket-wikidata/{}/wikipedia_info_output.csv'.format( strftime("%Y-%m-%d")), format=UTF8, client=client)
def test_get_as_string(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put(self.tempFilePath, 's3://mybucket/putMe2') contents = s3_client.get_as_string('s3://mybucket/putMe2') self.assertEquals(contents, self.tempFileContents.decode('utf-8'))
def test_read(self): client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) create_bucket() client.put(self.tempFilePath, 's3://mybucket/tempfile') t = S3Target('s3://mybucket/tempfile', client=client) read_file = t.open() file_str = read_file.read() self.assertEqual(self.tempFileContents, file_str.encode('utf-8'))
def test_get_as_bytes(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put(self.tempFilePath, 's3://mybucket/putMe') contents = s3_client.get_as_bytes('s3://mybucket/putMe') self.assertEquals(contents, self.tempFileContents)
def test_remove_bucket_dne(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) self.assertRaises( ClientError, lambda: s3_client.remove('s3://bucketdoesnotexist/file') )
def output(self): # return luigi.LocalTarget(os.path.join( # self.logDir, # 'CalcData_{}_{}.csv'.format(self.country, self.runDate.strftime('%Y%m%d')) # )) # client = S3Client() # return self.getS3Target(client, self.getKeyS3()) return utilFuncs.getS3Target(S3Client(), self.getKeyS3())
def output(self): ctryStr = self.country if self.country else 'All' # return luigi.LocalTarget(os.path.join( # self.logDir, # 'NormData_{}_{}.csv'.format(ctryStr, self.runDate.strftime('%Y%m%d')) # )) # return S3Target('s3://' + utilFuncs.BUCKET + '/' + self.getKeyS3(ctryStr), client=S3Client()) return utilFuncs.getS3Target(S3Client(), self.getKeyS3(ctryStr))
def test_remove_invalid(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) self.assertRaises( InvalidDeleteException, lambda: s3_client.remove('s3://mybucket/') )
def _read_schema_file(self): s3Client = S3Client() if not s3Client.exists(self.s3_schema_path()): raise Exception("No schema file located at %s. Can not set Redshift columns." % s3_schema_path) else: logger.info("Found schema file %s" % self.s3_load_path()) schema_key = s3Client.get_key(self.s3_load_path()) return schema_key.get_contents_as_string()
def test_get_as_string(self): # put a file on s3 first s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.s3.create_bucket('mybucket') s3_client.put(self.tempFilePath, 's3://mybucket/putMe') contents = s3_client.get_as_string('s3://mybucket/putMe') self.assertEqual(contents, self.tempFileContents)
def test_remove_dir(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) # test that the marker file created by Hadoop S3 Native FileSystem is removed s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file') s3_client.put_string("", 's3://mybucket/removemedir_$folder$') self.assertTrue(s3_client.remove('s3://mybucket/removemedir')) self.assertFalse(s3_client.exists('s3://mybucket/removemedir_$folder$'))
def test_list(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put_string("", 's3://mybucket/hello/frank') s3_client.put_string("", 's3://mybucket/hello/world') self.assertEqual(['frank', 'world'], list(s3_client.list('s3://mybucket/hello')))
class ContentData(ExternalTask): DATA_ROOT = 's3://advancedpythonmeenu/scifact/' # Root S3 path, as a constant data_name = Parameter(default="arxivData.json" ) # Filename of the dataset under the root s3 path client = S3Client(env("AWS_ACCESS_KEY_ID"), env("AWS_SECRET_ACCESS_KEY")) def output(self): # return the S3Target of the dataset return S3Target(self.DATA_ROOT + self.data_name, client=self.client)
def test_remove_dir_batch(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) for i in range(0, 2000): s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file{i}'.format(i=i)) self.assertTrue(s3_client.remove('s3://mybucket/removemedir/')) self.assertFalse(s3_client.exists('s3://mybucket/removedir/'))
def test_remove_dir_not_recursive(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file') self.assertRaises( InvalidDeleteException, lambda: s3_client.remove('s3://mybucket/removemedir', recursive=False) )
def test_list_key(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put_string("", 's3://mybucket/hello/frank') s3_client.put_string("", 's3://mybucket/hello/world') self.assertEqual([True, True], [s3_client.exists('s3://' + x.bucket_name + '/' + x.key) for x in s3_client.listdir('s3://mybucket/hello', return_key=True)])
def test_init_without_init_or_config(self, mock): """If no config or arn provided, boto3 client should be called with default parameters. Delegating ENV or Task Role credential handling to boto3 itself. """ S3Client().s3 mock.assert_called_with('s3', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None)
def run(self): s3filename = str(self.S3_ROOT + self.data) print("S3 filename:", s3filename) print("Local path:", self.path) client = S3Client(env("AWS_ACCESS_KEY_ID"), env("AWS_SECRET_ACCESS_KEY")) #This function creates the file atomically client.get(s3filename, self.path + self.data)
class SavedModel(ExternalTask): MODEL_ROOT = 's3://advancedpythonmeenu/scifact/' model = Parameter( default="rationale_roberta_large_fever.zip") # Filename of the model client = S3Client(env("AWS_ACCESS_KEY_ID"), env("AWS_SECRET_ACCESS_KEY")) def output(self): # return the S3Target of the model return S3Target(self.MODEL_ROOT + self.model, client=self.client)
def test_list_key(self): s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.s3.create_bucket('mybucket') s3_client.put_string("", 's3://mybucket/hello/frank') s3_client.put_string("", 's3://mybucket/hello/world') self.assertEqual([True, True], [ x.exists() for x in s3_client.list('s3://mybucket/hello', return_key=True) ])
def test_exists(self): # Test exists S3Client create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) # self.assertTrue(s3_client.exists('s3://psetbucket/')) # self.assertTrue(s3_client.exists('s3://psetbucket')) # self.assertFalse(s3_client.exists('s3://psetbucket/nope')) # self.assertFalse(s3_client.exists('s3://psetbucket/nope/')) s3_client.put(self.tempFilePath, 's3://psetbucket/tempfile')
def test_init_with_environment_variables(self): os.environ['AWS_ACCESS_KEY_ID'] = 'foo' os.environ['AWS_SECRET_ACCESS_KEY'] = 'bar' # Don't read any exsisting config old_config_paths = configuration.LuigiConfigParser._config_paths configuration.LuigiConfigParser._config_paths = [tempfile.mktemp()] s3_client = S3Client() configuration.LuigiConfigParser._config_paths = old_config_paths self.assertEqual(s3_client.s3.gs_access_key_id, 'foo') self.assertEqual(s3_client.s3.gs_secret_access_key, 'bar')
def setUp(self, mock_config): f = tempfile.NamedTemporaryFile(mode='wb', delete=False) self.tempFileContents = "I'm a temporary file for testing\nAnd this is the second line\nThis is the third." f.write(self.tempFileContents) f.close() self.tempFilePath = f.name self.file_name = f.name[f.name.rindex('/')+1:] self.local_path = f.name[:f.name.rindex('/')] self.s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) bucket = self.s3_client.s3.create_bucket('bucket') k = Key(bucket) k.key = 'key/%s' % self.file_name mock_config.get_config.return_value.get.return_value = AWS_ACCESS_KEY
def test_get(self): # put a file on s3 first s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.s3.create_bucket('mybucket') s3_client.put(self.tempFilePath, 's3://mybucket/putMe') tmp_file = tempfile.NamedTemporaryFile(delete=True) tmp_file_path = tmp_file.name s3_client.get('s3://mybucket/putMe', tmp_file_path) self.assertEqual(tmp_file.read(), self.tempFileContents) tmp_file.close()
def test_get(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_client.put(self.tempFilePath, 's3://mybucket/putMe') tmp_file = tempfile.NamedTemporaryFile(delete=True) tmp_file_path = tmp_file.name s3_client.get('s3://mybucket/putMe', tmp_file_path) with open(tmp_file_path, 'r') as f: content = f.read() self.assertEquals(content, self.tempFileContents.decode("utf-8")) tmp_file.close()
def test_isdir(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) self.assertTrue(s3_client.isdir('s3://mybucket')) s3_client.put(self.tempFilePath, 's3://mybucket/tempdir0_$folder$') self.assertTrue(s3_client.isdir('s3://mybucket/tempdir0')) s3_client.put(self.tempFilePath, 's3://mybucket/tempdir1/') self.assertTrue(s3_client.isdir('s3://mybucket/tempdir1')) s3_client.put(self.tempFilePath, 's3://mybucket/key') self.assertFalse(s3_client.isdir('s3://mybucket/key'))
def test_mkdir(self): create_bucket() s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY) self.assertTrue(s3_client.isdir('s3://mybucket')) s3_client.mkdir('s3://mybucket') s3_client.mkdir('s3://mybucket/dir') self.assertTrue(s3_client.isdir('s3://mybucket/dir')) self.assertRaises(MissingParentDirectory, s3_client.mkdir, 's3://mybucket/dir/foo/bar', parents=False) self.assertFalse(s3_client.isdir('s3://mybucket/dir/foo/bar'))
def _connect(self, write=False): credentials = self._get_credentials(write) if self.settings['protocol'] == 's3': from luigi.contrib.s3 import S3Client from d6tpipe.luigi.s3 import S3Client as S3ClientToken if write: if 'aws_session_token' in credentials: cnxn = S3ClientToken(**credentials) else: cnxn = S3Client(**credentials) else: if 'aws_session_token' in credentials: cnxn = S3ClientToken(**credentials) else: cnxn = S3Client(**credentials) elif self.settings['protocol'] == 'ftp': from d6tpipe.luigi.ftp import RemoteFileSystem cnxn = RemoteFileSystem(self.settings['location'], credentials['username'], credentials['password']) elif self.settings['protocol'] == 'sftp': from d6tpipe.luigi.ftp import RemoteFileSystem try: import pysftp except ImportError: raise ModuleNotFoundError('Please install pysftp to use SFTP.') cnopts = pysftp.CnOpts() cnopts.hostkeys = None cnxn = RemoteFileSystem(self.settings['location'], credentials['username'], credentials['password'], sftp=True, pysftp_conn_kwargs={'cnopts': cnopts}) else: raise NotImplementedError('only s3 and ftp supported') return cnxn
def test__path_to_bucket_and_key(self): self.assertEqual(('bucket', 'key'), S3Client._path_to_bucket_and_key('s3://bucket/key'))
def test__path_to_bucket_and_key_with_question_mark(self): self.assertEqual(('bucket', 'key?blade'), S3Client._path_to_bucket_and_key('s3://bucket/key?blade'))