def fetch_data_buckets_from_config(config_file="config.properties", data_section="data", required_bucket_dir_name='morf-data/'): """ Fetch the buckets from data_section of config_file; warn if key does not exactle match directory_name. :param config_file: path to config file. :param data_section: section of config file with key-value pairs representing institution names and s3 paths. :param required_bucket_dir_name: directory or path that should match ALL values in data_section; if not, throws warning. :return: list of buckets to iterate over; no directories are returned because these should be uniform across all of the buckets. """ cf = configparser.ConfigParser() cf.read(config_file) buckets = [] for item in cf.items(data_section): item_url = item[1] bucket = get_bucket_from_url(item_url) dir = get_key_from_url(item_url) if dir != required_bucket_dir_name: msg = "[ERROR]: specified path {} does not match required directory name {}; change name of directories to be consistent or specify the correct directory to check for.".format( item_url, required_bucket_dir_name) print(msg) raise else: buckets.append(bucket) assert len(buckets) >= 1 return tuple(buckets)
def test_get_bucket_from_url(): assert get_bucket_from_url("s3://my-bucket/some/file.txt") == "my-bucket" assert get_bucket_from_url( "s3://anotherbucket/some/file.txt") == "anotherbucket"