def upload_dir( self, dirname, blob, container_name=None, use_basename=True, workers=0, last_time=None, exclude: List[str] = None, ): """ Uploads a local directory to Azure Blob service. Args: dirname: `str`. name of the directory to upload. blob: `str`. blob to upload to. container_name: `str`. the name of the container. use_basename: `bool`. whether or not to use the basename of the directory. last_time: `datetime`. if provided, it will only upload the file if changed after last_time. exclude: `list`. List of paths to exclude. """ if not container_name: container_name, _, blob = self.parse_wasbs_url(blob) if use_basename: blob = append_basename(blob, dirname) pool, future_results = self.init_pool(workers) # Turn the path to absolute paths dirname = os.path.abspath(dirname) with get_files_in_path_context(dirname, exclude=exclude) as files: for f in files: # If last time is provided we check if we should re-upload the file if last_time and not file_modified_since( filepath=f, last_time=last_time ): continue file_blob = os.path.join(blob, os.path.relpath(f, dirname)) future_results = self.submit_pool( workers=workers, pool=pool, future_results=future_results, fn=self.upload_file, filename=f, blob=file_blob, container_name=container_name, use_basename=False, ) if workers: futures.wait(future_results) self.close_pool(pool=pool)
def upload_dir( self, dirname, blob, bucket_name=None, use_basename=True, workers=0, last_time=None, ): """ Uploads a local directory to to Google Cloud Storage. Args: dirname: `str`. name of the directory to upload. blob: `str`. blob to upload to. bucket_name: `str`. the name of the bucket. use_basename: `bool`. whether or not to use the basename of the directory. last_time: `datetime`. If provided will only upload the file if changed after last_time. """ if not bucket_name: bucket_name, blob = self.parse_gcs_url(blob) if use_basename: blob = append_basename(blob, dirname) pool, future_results = self.init_pool(workers) # Turn the path to absolute paths dirname = os.path.abspath(dirname) with get_files_in_path_context(dirname) as files: for f in files: # If last time is provided we check if we should re-upload the file if last_time and not file_modified_since(filepath=f, last_time=last_time): continue file_blob = os.path.join(blob, os.path.relpath(f, dirname)) future_results = self.submit_pool( workers=workers, pool=pool, future_results=future_results, fn=self.upload_file, filename=f, blob=file_blob, bucket_name=bucket_name, use_basename=False, ) if workers: futures.wait(future_results) self.close_pool(pool=pool)
def upload_dir( self, dirname, path_to, use_basename=True, workers=0, last_time=None, exclude: List[str] = None, ): if use_basename: path_to = append_basename(path_to, dirname) if dirname == path_to: return check_or_create_path(path_to, is_dir=True) pool, future_results = self.init_pool(workers) # Turn the path to absolute paths dirname = os.path.abspath(dirname) with get_files_in_path_context(dirname, exclude=exclude) as files: for f in files: # If last time is provided we check if we should re-upload the file if last_time and not file_modified_since( filepath=f, last_time=last_time ): continue file_blob = os.path.join(path_to, os.path.relpath(f, dirname)) future_results = self.submit_pool( workers=workers, pool=pool, future_results=future_results, fn=self.upload_file, filename=f, path_to=file_blob, use_basename=False, ) if workers: futures.wait(future_results) self.close_pool(pool=pool)
def sync_events_summaries( events_path: str, events_kind: str, last_check: Optional[datetime], connection_name: str = None, ) -> Tuple[List, Dict]: current_events_path = get_path(events_path, events_kind) summaries = [] last_values = {} with get_files_in_path_context(current_events_path) as files: for f in files: if last_check and not file_modified_since(filepath=f, last_time=last_check): continue event_name = os.path.basename(f).split(".plx")[0] event = V1Events.read(kind=events_kind, name=event_name, data=f) if event.df.empty: continue # Get only the relpath from run uuid event_rel_path = os.path.relpath(f, CONTEXT_MOUNT_ARTIFACTS) summary = event.get_summary() run_artifact = V1RunArtifact( name=event_name, kind=events_kind, connection=connection_name, summary=summary, path=event_rel_path, is_input=False, ) summaries.append(run_artifact) if events_kind == V1ArtifactKind.METRIC: last_values[event_name] = summary[ V1ArtifactKind.METRIC]["last"] return summaries, last_values
def upload_dir( self, dirname, key, bucket_name=None, overwrite=False, encrypt=False, acl=None, use_basename=True, workers=0, last_time=None, ): """ Uploads a local directory to S3. Args: dirname: `str`. name of the directory to upload. key: `str`. S3 key that will point to the file. bucket_name: `str`. Name of the bucket in which to store the file. overwrite: `bool`. A flag to decide whether or not to overwrite the key if it already exists. If replace is False and the key exists, an error will be raised. encrypt: `bool`. If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. acl: `str`. ACL to use for uploading, e.g. "public-read". use_basename: `bool`. whether or not to use the basename of the directory. last_time: `datetime`. if provided, it will only upload the file if changed after last_time. """ if not bucket_name: bucket_name, key = self.parse_s3_url(key) if use_basename: key = append_basename(key, dirname) pool, future_results = self.init_pool(workers) # Turn the path to absolute paths dirname = os.path.abspath(dirname) with get_files_in_path_context(dirname) as files: for f in files: # If last time is provided we check if we should re-upload the file if last_time and not file_modified_since( filepath=f, last_time=last_time ): continue file_key = os.path.join(key, os.path.relpath(f, dirname)) future_results = self.submit_pool( workers=workers, pool=pool, future_results=future_results, fn=self.upload_file, filename=f, key=file_key, bucket_name=bucket_name, overwrite=overwrite, encrypt=encrypt, acl=acl, use_basename=False, ) if workers: futures.wait(future_results) self.close_pool(pool=pool)