Пример #1
0
def PrepareToUpload(local_root, dest_dir):
    """Check necessary conditions required for uploading.

  Args:
    local_root: The local directory containing directories to upload.
    dest_dir: The remote directory to upload to.

  Returns:
    existing_dest_paths: A set of filenames for existing files in the
        destination directory.
    gs_api: The gsutil.GsutilApi object.

  Raises:
    BadTimeToUploadError: Internet is not available.
    ValueError: local_root is not a valid path.
  """
    if not os.path.isdir(local_root):
        raise ValueError('Cannot find local directory %s.' % local_root)

    if not HasInternet():
        raise BadTimeToUploadError('No internet connection detected.')

    gs_api = gsutil.GsutilApi()
    try:
        existing_dest_paths = set(gs_api.List(dest_dir))
    except httplib2.ServerNotFoundError:
        raise BadTimeToUploadError('Internet has become unavailable.')

    return existing_dest_paths, gs_api
Пример #2
0
 def RemoveRemoteFiles():
     gs_api = gsutil.GsutilApi()
     filenames = gs_api.List(
         gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
     for filename in filenames:
         gs_api.DeleteFile(filename)
     self.assertFalse(
         gs_api.List(gcloud_util.GcsPath(CLOUD_BASE_DIR,
                                         CLOUD_LOG_PATH)))
Пример #3
0
 def __init__(self, folder, base_params):
     self._folder = folder
     self.label = 'Wind Database'
     # Pull down info from the cloud about files in this database set.
     gsutil_api = gsutil.GsutilApi()
     cloud_path = os.path.join(
         'gs://gcp-public-data-makani-deps/deps/turbsim_databases', folder,
         'h5_files')
     self._databases = gsutil_api.List(cloud_path)
     self._base_params = base_params
Пример #4
0
def DownloadDatabase(database_file, local_path):
    """Downloads TurbSim file to the specified local directory, if needed."""

    if os.path.exists(local_path):
        print 'TurbSim database already exists locally, skipping download.'
    else:
        gsutil_api = gsutil.GsutilApi()
        # TODO: Set up a class for handling more autoselection tasks so
        # that we don't have to pass as much back and forth,
        # or requery the folder list?
        cloud_path = os.path.join(GetCloudBasePath(),
                                  GetOnlineFolder(database_file), 'h5_files',
                                  database_file)
        gsutil_api.Copy(cloud_path, local_path, False)
Пример #5
0
def GetOnlineFolder(database_file):
    """Extracts source id of the file and returns matching folder name."""

    assert CheckTurbsimFileName(database_file), (
        'File name does not follow TurbSim naming convention')
    online_folder_id = ('-' + database_file[16:19] + '-')
    gsutil_api = gsutil.GsutilApi()
    online_paths = gsutil_api.List(GetCloudBasePath())
    online_folder = [
        os.path.basename(os.path.normpath(path)) for path in online_paths
        if online_folder_id in path
    ]
    assert len(online_folder) == 1, (
        'Turbsim online folder identification failed.')
    return online_folder[0]
Пример #6
0
 def AssertRemoteFiles():
     gs_api = gsutil.GsutilApi()
     filenames = gs_api.List(
         gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
     prefix = os.path.join(CLOUD_BASE_DIR, CLOUD_LOG_PATH)
     self.assertEqual(
         set(filenames), {
             os.path.join(prefix,
                          'w7/logs/logs1-folder/subfolder/dummy.json'),
             os.path.join(
                 prefix,
                 'w7/logs/logs1-folder/another_subfolder/dummy.json'),
             os.path.join(
                 prefix,
                 'w7/logs/logs1-folder/one_more_subfolder/dummy.json'),
             os.path.join(prefix, 'w7/logs/w7-2013.h5'),
             os.path.join(prefix, 'M600A/logs/m600.h5'),
             os.path.join(prefix, 'M600A/logs/dummy.json'),
             os.path.join(prefix, 'w7/logs/logs1-folder/dummy.json'),
             os.path.join(prefix, 'w7/logs/logs1-folder/dummy.txt'),
         })
Пример #7
0
def IterFilesFromCloud(path_prefix, regex_str):
    """Iterate through files in a cloud path recursively.

  The function downloads matching files in the cloud one by one.
  Downloaded files are removed after each iteration.

  Args:
    path_prefix: Path, or prefix, to the cloud storage. E.g., gs://bucket/dir/.
    regex_str: Regular expression to match the file from the beginning.

  Yields:
    The full cloud path to the file, and the path to the downloaded file.
  """

    gs_api = gsutil.GsutilApi()

    filenames = [
        f for f in gs_api.List(path_prefix)
        if f.endswith('.h5') and not f.endswith('-format.h5')
    ]
    if regex_str:
        regex = re.compile(regex_str)
        filenames = [
            f for f in filenames if regex.match(gcloud_util.GcsBasename(f))
        ]
    if not filenames:
        print 'Found no files matching the criteria.'
        return

    for full_cloud_path in sorted(filenames):
        temp_fp = tempfile.NamedTemporaryFile(suffix='.h5', delete=False)
        temp_fp.close()
        print '------------------- %s ------------' % full_cloud_path
        print 'Downloading %s...' % full_cloud_path
        gs_api.Copy(full_cloud_path, temp_fp.name, overwrite=True)
        yield (full_cloud_path, temp_fp.name)
        os.remove(temp_fp.name)
Пример #8
0
 def AssertNoRemoteFiles():
     gs_api = gsutil.GsutilApi()
     filenames = gs_api.List(
         gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
     self.assertEqual(filenames, [])