示例#1
0
 def test_blank_region(self):
     # blank region should be treated the same as no region
     runner = EMRJobRunner(conf_path=False, aws_region='')
     assert_equal(runner.make_emr_conn().endpoint,
                  'elasticmapreduce.amazonaws.com')
     assert_equal(runner.make_s3_conn().endpoint, 's3.amazonaws.com')
     assert_equal(runner._aws_region, '')
示例#2
0
 def test_explicit_endpoints(self):
     runner = EMRJobRunner(conf_path=False,
                           aws_region='EU',
                           s3_endpoint='s3-proxy',
                           emr_endpoint='emr-proxy')
     assert_equal(runner.make_emr_conn().endpoint, 'emr-proxy')
     assert_equal(runner.make_s3_conn().endpoint, 's3-proxy')
示例#3
0
 def test_no_region(self):
     runner = EMRJobRunner(conf_path=False)
     assert_equal(runner.make_emr_conn().endpoint,
                  'elasticmapreduce.amazonaws.com')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3.amazonaws.com')
     assert_equal(runner._aws_region, '')
示例#4
0
 def test_no_region(self):
     runner = EMRJobRunner(conf_path=False)
     assert_equal(runner.make_emr_conn().endpoint,
                  'elasticmapreduce.amazonaws.com')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3.amazonaws.com')
     assert_equal(runner._aws_region, '')
示例#5
0
    def test_cleanup(self):
        runner = EMRJobRunner(conf_paths=[], s3_sync_wait_time=0.01)

        # add some mock data and change last_modified
        remote_input_path = 's3://walrus/data/'
        self.add_mock_s3_data({
            'walrus': {
                'data/foo': 'foo\n',
                'data/bar': 'bar\n',
                'data/qux': 'qux\n'
            }
        })

        s3_conn = runner.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(remote_input_path)
        bucket = s3_conn.get_bucket(bucket_name)

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')
        key_bar.last_modified = datetime.now() - timedelta(days=45)
        key_qux.last_modified = datetime.now() - timedelta(hours=50)

        # make sure keys are there
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path,
                   timedelta(days=30),
                   dry_run=True,
                   conf_paths=[])

        # dry-run shouldn't delete anything
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_bar is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(hours=48), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_qux is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        self.assertEqual(key_qux, None)
示例#6
0
 def test_blank_region(self):
     # blank region should be treated the same as no region
     runner = EMRJobRunner(conf_path=False, aws_region='')
     assert_equal(runner.make_emr_conn().endpoint,
                  'elasticmapreduce.amazonaws.com')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3.amazonaws.com')
     assert_equal(runner._aws_region, '')
示例#7
0
    def test_cleanup(self):
        runner = EMRJobRunner(conf_paths=[], s3_sync_wait_time=0.01)

        # add some mock data and change last_modified
        remote_input_path = 's3://walrus/data/'
        self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n',
                                        'data/bar': 'bar\n',
                                        'data/qux': 'qux\n'}})

        s3_conn = runner.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(remote_input_path)
        bucket = s3_conn.get_bucket(bucket_name)

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')
        key_bar.last_modified = datetime.now() - timedelta(days=45)
        key_qux.last_modified = datetime.now() - timedelta(hours=50)

        # make sure keys are there
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), dry_run=True,
                   conf_paths=[])

        # dry-run shouldn't delete anything
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_bar is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(hours=48), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_qux is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        self.assertEqual(key_qux, None)
示例#8
0
def s3_cleanup(glob_path, time_old, dry_run=False, conf_path=None):
    """Delete all files older than *time_old* in *path*.
       If *dry_run* is ``True``, then just log the files that need to be
       deleted without actually deleting them
       """
    runner = EMRJobRunner(conf_path=conf_path)
    s3_conn = runner.make_s3_conn()

    log.info("Deleting all files in %s that are older than %s" % (glob_path, time_old))

    for path in runner.ls(glob_path):
        bucket_name, key_name = parse_s3_uri(path)
        bucket = s3_conn.get_bucket(bucket_name)

        for key in bucket.list(key_name):
            last_modified = iso8601_to_datetime(key.last_modified)
            age = datetime.utcnow() - last_modified
            if age > time_old:
                # Delete it
                log.info("Deleting %s; is %s old" % (key.name, age))
                if not dry_run:
                    key.delete()
示例#9
0
def s3_cleanup(glob_path, time_old, dry_run=False, conf_paths=None):
    """Delete all files older than *time_old* in *path*.
       If *dry_run* is ``True``, then just log the files that need to be
       deleted without actually deleting them
       """
    runner = EMRJobRunner(conf_paths=conf_paths)
    s3_conn = runner.make_s3_conn()

    log.info('Deleting all files in %s that are older than %s' %
             (glob_path, time_old))

    for path in runner.ls(glob_path):
        bucket_name, key_name = parse_s3_uri(path)
        bucket = s3_conn.get_bucket(bucket_name)

        for key in bucket.list(key_name):
            last_modified = iso8601_to_datetime(key.last_modified)
            age = datetime.utcnow() - last_modified
            if age > time_old:
                # Delete it
                log.info('Deleting %s; is %s old' % (key.name, age))
                if not dry_run:
                    key.delete()
示例#10
0
 def test_ap_southeast_1(self):
     runner = EMRJobRunner(conf_path=False, aws_region='ap-southeast-1')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3-ap-southeast-1.amazonaws.com')
     assert_raises(Exception, runner.make_emr_conn)
示例#11
0
 def test_us_west_1(self):
     runner = EMRJobRunner(conf_path=False, aws_region='us-west-1')
     assert_equal(runner.make_emr_conn().endpoint,
                  'us-west-1.elasticmapreduce.amazonaws.com')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3-us-west-1.amazonaws.com')
示例#12
0
 def test_explicit_endpoints(self):
     runner = EMRJobRunner(conf_path=False, aws_region='EU',
                           s3_endpoint='s3-proxy', emr_endpoint='emr-proxy')
     assert_equal(runner.make_emr_conn().endpoint, 'emr-proxy')
     assert_equal(runner.make_s3_conn().endpoint, 's3-proxy')
示例#13
0
 def test_ap_southeast_1(self):
     runner = EMRJobRunner(conf_path=False, aws_region='ap-southeast-1')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3-ap-southeast-1.amazonaws.com')
     assert_raises(Exception, runner.make_emr_conn)
示例#14
0
 def test_us_west_1(self):
     runner = EMRJobRunner(conf_path=False, aws_region='us-west-1')
     assert_equal(runner.make_emr_conn().endpoint,
                  'us-west-1.elasticmapreduce.amazonaws.com')
     assert_equal(runner.make_s3_conn().endpoint,
                  's3-us-west-1.amazonaws.com')