def test_download_files_from_multiple_prefixes(): given_a_bucket("some-bucket") given_an_object("some-bucket", "foo/2017/01/01/foo", "foo") given_an_object("some-bucket", "foo/2017/01/02/bar", "bar") given_an_object("some-bucket", "foo/2017/01/03/baz", "baz") given_an_object("some-bucket", "foo/something", "something") given_an_object("some-bucket", "foo/2017/02/01/else", "else") given_an_object("some-bucket", "foo/2018/02/01/entirely", "entirely") with tempfile.TemporaryDirectory() as dirpath: prefixes_to_download = [ "foo/2017/01/01/", "foo/2017/01/02/", "foo/2017/01/03/", ] _s3_download_recursive("some-bucket", prefixes_to_download, dirpath, 10) assert file_content(dirpath, "foo/2017/01/01/foo") == "foo" assert file_content(dirpath, "foo/2017/01/02/bar") == "bar" assert file_content(dirpath, "foo/2017/01/03/baz") == "baz" assert file_does_not_exist(dirpath, "foo/something") assert file_does_not_exist(dirpath, "foo/2017/02/01/else") assert file_does_not_exist(dirpath, "foo/2018/02/01/entirely")
def test_download_log_files_and_skip_existing_files(): with tempfile.TemporaryDirectory() as dirpath: given_a_bucket("some-bucket") given_an_object("some-bucket", TEST_LOG_KEY, "some-file-content") given_an_object("some-bucket", TEST_LOG_KEY_EXISTING, "some-file-content") given_a_file(dirpath, TEST_LOG_KEY_EXISTING, "some-content-already-existing") download_cloudtrail_logs( target_dir=dirpath, bucket="some-bucket", cloudtrail_prefix="some-prefix/", from_date=datetime.datetime(2017, 1, 1, tzinfo=pytz.utc), to_date=datetime.datetime(2017, 1, 1, tzinfo=pytz.utc), account_ids=["000"], regions=["some-region-1"]) runner = CliRunner() result = runner.invoke(cli.root_group, args=[ "download", "--bucket", "some-bucket", "--region", "some-region-1", "--account-id", "000", "--prefix", "some-prefix/", "--from", "2017-01-01", "--to", "2017-01-01" ]) assert result.exit_code == 0 assert file_content(dirpath, TEST_LOG_KEY) == "some-file-content" assert file_content(dirpath, TEST_LOG_KEY_EXISTING) == "some-content-already-existing"
def test_download_log_files_and_skip_existing_files(): with tempfile.TemporaryDirectory() as dirpath: given_a_bucket("some-bucket") given_an_object("some-bucket", TEST_LOG_KEY, "some-file-content") given_an_object("some-bucket", TEST_LOG_KEY_EXISTING, "some-file-content") given_a_file(dirpath, TEST_LOG_KEY_EXISTING, "some-content-already-existing") download_cloudtrail_logs( target_dir=dirpath, bucket="some-bucket", cloudtrail_prefix="some-prefix/", from_date=datetime.datetime(2017, 1, 1, tzinfo=pytz.utc), to_date=datetime.datetime(2018, 1, 1, tzinfo=pytz.utc), account_ids=["000"], org_ids=[], regions=["some-region-1"], parallelism=10, ) assert file_content(dirpath, TEST_LOG_KEY) == "some-file-content" assert file_content( dirpath, TEST_LOG_KEY_EXISTING) == "some-content-already-existing"
def test_download_a_single_file_from_prefix(): given_a_bucket("some-bucket") given_an_object("some-bucket", "foo/bar.log", "bar") with tempfile.TemporaryDirectory() as dirpath: _s3_download_recursive("some-bucket", ["foo/"], dirpath, 10) assert file_content(dirpath, "foo/bar.log") == "bar"
def test_download_multiple_files_but_only_the_exact_prefix_given(): given_a_bucket("some-bucket") given_an_object("some-bucket", "foo/bar.log", "foo/bar") given_an_object("some-bucket", "foo/baz.log", "foo/baz") given_an_object("some-bucket", "foo/bar/baz.log", "foo/bar/baz") with tempfile.TemporaryDirectory() as dirpath: _s3_download_recursive("some-bucket", ["foo/"], dirpath, 10) assert file_content(dirpath, "foo/bar.log") == "foo/bar" assert file_content(dirpath, "foo/baz.log") == "foo/baz" assert file_does_not_exist(dirpath, "foo/bar/baz.log")
def test_download_multiple_files_from_multiple_nested_dirs(): given_a_bucket("some-bucket") given_an_object("some-bucket", "foo/bar.log", "foo/bar") given_an_object("some-bucket", "foo/baz.log", "foo/baz") given_an_object("some-bucket", "foo/bar/baz.log", "foo/bar/baz") with tempfile.TemporaryDirectory() as dirpath: _s3_download_recursive("some-bucket", "foo", dirpath) assert file_content(dirpath, "foo/bar.log") == "foo/bar" assert file_content(dirpath, "foo/baz.log") == "foo/baz" assert file_content(dirpath, "foo/bar/baz.log") == "foo/bar/baz"
def test_download_log_files_and_skip_existing_files(): with tempfile.TemporaryDirectory() as dirpath: given_a_bucket("some-bucket") given_an_object("some-bucket", TEST_LOG_KEY, "some-file-content") given_an_object("some-bucket", TEST_LOG_KEY_EXISTING, "some-file-content") given_a_file(dirpath, TEST_LOG_KEY_EXISTING, "some-content-already-existing") runner = CliRunner() result = runner.invoke(cli.root_group, args=[ "download", "--log-dir", dirpath, "--bucket", "some-bucket", "--region", "some-region-1", "--account-id", "000", "--prefix", "some-prefix/", "--from", "2017-01-01", "--to", "2017-01-01" ]) assert result.exit_code == 0 assert file_content(dirpath, TEST_LOG_KEY) == "some-file-content" assert file_content( dirpath, TEST_LOG_KEY_EXISTING) == "some-content-already-existing"