def get_local_path(uri: str, download_dir: str, fs: Optional[FileSystem] = None) -> str: """Return the path where a local copy of URI should be stored. If URI is local, return it. If it's remote, we generate a path for it within download_dir. Args: uri: the URI of the file to be copied download_dir: path of the local directory in which files should be copied fs: if supplied, use fs instead of automatically chosen FileSystem for URI Returns: a local path """ if uri is None: return None if not fs: fs = FileSystem.get_file_system(uri, 'r') path = fs.local_path(uri, download_dir) return path
def upload_or_copy(src_path: str, dst_uri: str, fs: Optional[FileSystem] = None) -> List[str]: """Upload or copy a file. If dst_uri is local, the file is copied. Otherwise, it is uploaded. Args: src_path: path to source file dst_uri: URI of destination for file fs: if supplied, use fs instead of automatically chosen FileSystem for dst_uri Raises: NotWritableError if dst_uri cannot be written to """ if dst_uri is None: return if not (os.path.isfile(src_path) or os.path.isdir(src_path)): raise Exception('{} does not exist.'.format(src_path)) if not src_path == dst_uri: log.info('Uploading {} to {}'.format(src_path, dst_uri)) if not fs: fs = FileSystem.get_file_system(dst_uri, 'w') fs.copy_to(src_path, dst_uri)
def download_if_needed(uri: str, download_dir: str, fs: Optional[FileSystem] = None) -> str: """Download a file into a directory if it's remote. If uri is local, there is no need to download the file. Args: uri: URI of file download_dir: local directory to download file into fs: if supplied, use fs instead of automatically chosen FileSystem for uri Returns: path to local file Raises: NotReadableError if URI cannot be read from """ if uri is None: return None if not fs: fs = FileSystem.get_file_system(uri, 'r') path = get_local_path(uri, download_dir, fs=fs) make_dir(path, use_dirname=True) if path != uri: log.debug('Downloading {} to {}'.format(uri, path)) fs.copy_from(uri, path) return path
def test_sync_from_dir_noop_local(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') src = os.path.join(self.tmp_dir.name, 'lorem') make_dir(src, check_empty=False) fs = FileSystem.get_file_system(src, 'r') fs.write_bytes(path, bytes([0x00, 0x01])) sync_from_dir(src, src, delete=True) self.assertEqual(len(list_paths(src)), 1)
def test_last_modified(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt') directory = os.path.dirname(path) make_dir(directory, check_empty=False) fs = FileSystem.get_file_system(path, 'r') str_to_file(self.lorem, path) stamp = fs.last_modified(path) self.assertTrue(isinstance(stamp, datetime.datetime))
def file_exists(uri, fs=None, include_dir=True) -> bool: """Check if file exists. Args: uri: URI of file fs: if supplied, use fs instead of automatically chosen FileSystem for uri """ if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.file_exists(uri, include_dir)
def test_bytes_local(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') directory = os.path.dirname(path) make_dir(directory, check_empty=False) expected = bytes([0x00, 0x01, 0x02]) fs = FileSystem.get_file_system(path, 'r') fs.write_bytes(path, expected) actual = fs.read_bytes(path) self.assertEqual(actual, expected)
def test_sync_to_dir_local(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') src = os.path.dirname(path) dst = os.path.join(self.tmp_dir.name, 'xxx') make_dir(src, check_empty=False) make_dir(dst, check_empty=False) fs = FileSystem.get_file_system(path, 'r') fs.write_bytes(path, bytes([0x00, 0x01])) sync_to_dir(src, dst, delete=True) self.assertEqual(len(list_paths(dst)), 1)
def test_last_modified_s3(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt') s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name) directory = os.path.dirname(path) make_dir(directory, check_empty=False) fs = FileSystem.get_file_system(s3_path, 'r') str_to_file(self.lorem, path) upload_or_copy(path, s3_path) stamp = fs.last_modified(s3_path) self.assertTrue(isinstance(stamp, datetime.datetime))
def str_to_file(content_str: str, uri: str, fs: Optional[FileSystem] = None): """Writes string to text file. Args: content_str: string to write uri: URI of file to write fs: if supplied, use fs instead of automatically chosen FileSystem Raise: NotWritableError if uri cannot be written """ if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.write_str(uri, content_str)
def test_file_exists(self): fs = FileSystem.get_file_system(self.tmp_dir.name, 'r') path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') dir1 = os.path.dirname(path1) make_dir(dir1, check_empty=False) str_to_file(self.lorem, path1) self.assertTrue(fs.file_exists(dir1, include_dir=True)) self.assertTrue(fs.file_exists(path1, include_dir=False)) self.assertFalse(fs.file_exists(dir1, include_dir=False)) self.assertFalse( fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))
def file_to_str(uri: str, fs: Optional[FileSystem] = None) -> str: """Load contents of text file into a string. Args: uri: URI of file fs: if supplied, use fs instead of automatically chosen FileSystem Returns: contents of text file Raises: NotReadableError if URI cannot be read """ if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.read_str(uri)
def list_paths(uri: str, ext: str = '', fs: Optional[FileSystem] = None) -> List[str]: """List paths rooted at URI. Optionally only includes paths with a certain file extension. Args: uri: the URI of a directory ext: the optional file extension to filter by fs: if supplied, use fs instead of automatically chosen FileSystem for uri """ if uri is None: return None if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.list_paths(uri, ext=ext)
def sync_from_dir(src_dir_uri: str, dst_dir: str, delete: bool = False, fs: Optional[FileSystem] = None): """Synchronize a source directory to local destination directory. Transfers files from source to destination directories so that the destination has all the source files. If FileSystem is remote, this involves downloading. Args: src_dir_uri: URI of source directory dst_dir: path of local destination directory delete: if True, delete files in the destination to match those in the source directory fs: if supplied, use fs instead of automatically chosen FileSystem for dst_dir_uri """ if not fs: fs = FileSystem.get_file_system(src_dir_uri, 'r') fs.sync_from_dir(src_dir_uri, dst_dir, delete=delete)
def __init__(self, cfg: LearnerConfig, tmp_dir: str, model_path: Optional[str] = None, model_def_path: Optional[str] = None, loss_def_path: Optional[str] = None, training: bool = True): """Constructor. Args: cfg (LearnerConfig): Configuration. tmp_dir (str): Root of temp dirs. model_path (str, optional): A local path to model weights. Defaults to None. model_def_path (str, optional): A local path to a directory with a hubconf.py. If provided, the model definition is imported from here. Defaults to None. loss_def_path (str, optional): A local path to a directory with a hubconf.py. If provided, the loss function definition is imported from here. Defaults to None. training (bool, optional): Whether the model is to be used for training or prediction. If False, the model is put in eval mode and the loss function, optimizer, etc. are not initialized. Defaults to True. """ log_system_details() self.cfg = cfg self.tmp_dir = tmp_dir self.preview_batch_limit = self.cfg.data.preview_batch_limit # TODO make cache dirs configurable torch_cache_dir = '/opt/data/torch-cache' os.environ['TORCH_HOME'] = torch_cache_dir self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.data_cache_dir = '/opt/data/data-cache' make_dir(self.data_cache_dir) if FileSystem.get_file_system(cfg.output_uri) == LocalFileSystem: self.output_dir = cfg.output_uri make_dir(self.output_dir) else: self.output_dir = get_local_path(cfg.output_uri, tmp_dir) make_dir(self.output_dir, force_empty=True) if training and not cfg.overfit_mode: self.sync_from_cloud() self.modules_dir = join(self.output_dir, MODULES_DIRNAME) self.setup_model(model_def_path=model_def_path) if model_path is not None: if isfile(model_path): log.info(f'Loading model weights from: {model_path}') self.model.load_state_dict( torch.load(model_path, map_location=self.device)) else: raise Exception( 'Model could not be found at {}'.format(model_path)) if training: self.setup_training(loss_def_path=loss_def_path) else: self.model.eval()
def test_bytes_local_false(self): path = os.path.join(self.tmp_dir.name, 'xxx') fs = FileSystem.get_file_system(path, 'r') self.assertRaises(NotReadableError, lambda: fs.read_bytes(path))
def test_last_modified_http(self): uri = 'http://localhost/' fs = FileSystem.get_file_system(uri, 'r') self.assertEqual(fs.last_modified(uri), None)
def test_write_bytes_http(self): uri = 'http://localhost/' fs = FileSystem.get_file_system(uri, 'r') self.assertRaises(NotWritableError, lambda: fs.write_bytes(uri, bytes([0x00, 0x01])))
def __init__(self, cfg: LearnerConfig, tmp_dir: str, model_path: Optional[str] = None, model_def_path: Optional[str] = None, loss_def_path: Optional[str] = None): """Constructor. Args: cfg: configuration tmp_dir: root of temp dirs model_path: a local path to model weights. If provided, the model is loaded and it is assumed that this Learner will be used for prediction only. model_def_path: a local path to a directory with a hubconf.py. If provided, the model definition is imported from here. loss_def_path: a local path to a directory with a hubconf.py. If provided, the loss function definition is imported from here. """ self.cfg = cfg self.tmp_dir = tmp_dir # TODO make cache dirs configurable torch_cache_dir = '/opt/data/torch-cache' os.environ['TORCH_HOME'] = torch_cache_dir self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.data_cache_dir = '/opt/data/data-cache' make_dir(self.data_cache_dir) if FileSystem.get_file_system(cfg.output_uri) == LocalFileSystem: self.output_dir = cfg.output_uri make_dir(self.output_dir) else: self.output_dir = get_local_path(cfg.output_uri, tmp_dir) make_dir(self.output_dir, force_empty=True) if not cfg.overfit_mode: self.sync_from_cloud() self.modules_dir = join(self.output_dir, MODULES_DIRNAME) self.setup_model(model_def_path=model_def_path) if model_path is not None: if isfile(model_path): self.model.load_state_dict( torch.load(model_path, map_location=self.device)) else: raise Exception( 'Model could not be found at {}'.format(model_path)) self.model.eval() else: log.info(self.cfg) # ds = dataset, dl = dataloader self.train_ds = None self.train_dl = None self.valid_ds = None self.valid_dl = None self.test_ds = None self.test_dl = None self.config_path = join(self.output_dir, 'learner-config.json') str_to_file(self.cfg.json(), self.config_path) self.log_path = join(self.output_dir, 'log.csv') self.train_state_path = join(self.output_dir, 'train-state.json') model_bundle_fname = basename(cfg.get_model_bundle_uri()) self.model_bundle_path = join(self.output_dir, model_bundle_fname) self.metric_names = self.build_metric_names() self.last_model_path = join(self.output_dir, 'last-model.pth') self.load_checkpoint() self.setup_loss(loss_def_path=loss_def_path) self.opt = self.build_optimizer() self.setup_data() self.start_epoch = self.get_start_epoch() self.steps_per_epoch = len( self.train_ds) // self.cfg.solver.batch_sz self.step_scheduler = self.build_step_scheduler() self.epoch_scheduler = self.build_epoch_scheduler() self.setup_tensorboard()