def get_local(self, local_item: Item, overwrite=False): if overwrite or local_item.content is None: local_item.content = Local.load_json(local_item.path) for attribute in self._c.conf.instance_pools.strip_attributes: local_item.content.pop(attribute, None) local_item.content['instance_pool_name'] = self.remote_path( local_item.content['instance_pool_name'])
def get_local(self, local_item: Item, overwrite=False): if overwrite or local_item.content is None: local_item.content = Local.load_json(local_item.path) for attribute in self._c.conf.jobs.strip_attributes: local_item.content.pop(attribute, None) c = local_item.content # apply default values c['timeout_seconds'] = c.get('timeout_seconds', 0) # find the cluster if c.get('existing_cluster_name') and self._clusters: ec = self._clusters.get_single_item(c['existing_cluster_name']) assert ec is not None, f'Cluster "{c["existing_cluster_name"]}", ' \ f'referenced in job "{c["name"]}" not found' c['existing_cluster_id'] = ec.path c.pop('existing_cluster_name', None) # find the right notebook notebook_path = c.get('notebook_task', {}).get('notebook_path') if notebook_path: remote_notebook_path = self._workspace.find_notebook( notebook_path) assert remote_notebook_path is not None, \ f'Notebook "{notebook_path}" referenced in job "{c["name"]}" not found' c['notebook_task']['notebook_path'] = remote_notebook_path c['name'] = self.remote_path(c['name'])
def _ls(self, path=None): query = f'?filter=userName+eq+{path}' if path else None users = json.loads( self._c.api.call(Endpoints.users_list, body={}, query=query).text) return { i['userName']: Item(path=i['id'], kind='user', content=i) for i in users.get('Resources', []) }
def _get_remote(self, remote_item: Item, overwrite=False): if overwrite or remote_item.content is None: response = self._c.api.call(Endpoints.workspace_export, body={ 'path': remote_item.path, 'format': 'SOURCE' }) remote_item.content = base64.b64decode(response.json()['content'])
def dbfs_ls(path) -> OrderedDict: _objects = OrderedDict() if path is not None: for cur_path, dirs, files in os_walk(path): for f in files: _objects[Local._common_dbfs_name( op.join(cur_path, f), path)] = Item(path=op.join(cur_path, f), kind='dbfs file', size=op.getsize(op.join(cur_path, f)), is_dir=False) for d in dirs: _objects[Local._common_dbfs_name(op.join( cur_path, d), path)] = Item(path=op.join(cur_path, d), kind='dbfs directory', is_dir=True) return _objects
def _ls(self, path=None): jobs = json.loads(self._c.api.call(Endpoints.jobs_list, body={}).text) return { self.common_path(i['settings']['name']): Item(path=i['job_id'], kind='job', content=i['settings']) for i in jobs.get('jobs', []) if i['creator_user_name'] == self._c.conf.deploying_user_name and i['settings']['name'].startswith(self._c.conf.name_prefix) }
def workspace_ls(path) -> OrderedDict: _objects = OrderedDict() if path is not None: for cur_path, dirs, files in os_walk(path): for f in files: if op.splitext(f)[1] in NOTEBOOK_EXTENSIONS: _objects[Local._common_name( op.join(cur_path, f), path)] = Item(path=op.join(cur_path, f), kind='workspace notebook', language=NOTEBOOK_EXTENSIONS[ op.splitext(f)[1]], is_dir=False) for d in dirs: _objects[Local._common_name(op.join( cur_path, d), path)] = Item(path=op.join(cur_path, d), kind='workspace directory', is_dir=True) return _objects
def _ls(self, path=None): clusters = json.loads( self._c.api.call(Endpoints.clusters_list, body={}).text) return { self.common_path(i['cluster_name']): Item(path=i['cluster_id'], kind='cluster', content=i) for i in clusters.get('clusters', []) if i['creator_user_name'] == self._c.conf.deploying_user_name and i['cluster_name'].startswith(self._c.conf.name_prefix) }
def _ls(self, path=None): instance_pools = json.loads( self._c.api.call(Endpoints.instance_pools_list, body={}).text) return { self.common_path(i['instance_pool_name']): Item(path=i['instance_pool_id'], kind='instance pool', content=i) for i in instance_pools.get('instance_pools', []) if i['default_tags']['DatabricksInstancePoolCreatorId'] == self._c.conf.deploying_user_id and i['instance_pool_name'].startswith(self._c.conf.name_prefix) }
def get_local(self, local_item: Item, overwrite=False): if overwrite or local_item.content is None: local_item.content = Local.load_json(local_item.path) for attribute in self._c.conf.clusters.strip_attributes: local_item.content.pop(attribute, None) c = local_item.content if c.get('instance_pool_name') and self._instance_pools: ip = self._instance_pools.get_single_item( c['instance_pool_name']) assert ip is not None, f'Instance pool "{c["instance_pool_name"]}", ' \ f'referenced in cluster "{c["cluster_name"]}" not found' c['instance_pool_id'] = ip.path c.pop('instance_pool_name', None) c['cluster_name'] = self.remote_path(c['cluster_name'])
def files_ls(path, extensions=None, kind=None) -> OrderedDict: _files = OrderedDict() if path is not None: for cur_path, _, files in os_walk(path): for f in files: if extensions is None or op.splitext(f)[1] in extensions: _files[Local._common_name( op.join(cur_path, f), path)] = Item(path=op.join(cur_path, f), kind=kind, size=op.getsize(op.join(cur_path, f)), is_dir=False) return _files
def _ls(self, path=None): if path is None: path = self._target_path _objects = OrderedDict() for obj in self._c.api.call(Endpoints.dbfs_list, body={ 'path': path }).json().get('files', []): if obj['is_dir']: _objects = dict(_objects, **self._ls(obj['path'])) _objects[self.common_path(obj['path'])] = Item( path=obj['path'], kind='dbfs directory' if obj['is_dir'] else 'dbfs file', is_dir=obj['is_dir'], size=obj['file_size']) return _objects
def _ls(self, path=None): if path is None: path = self._target_path _objects = OrderedDict() for obj in self._c.api.call(Endpoints.workspace_list, body={ 'path': path }).json().get('objects', []): if obj['object_type'] == 'DIRECTORY': _objects = dict(_objects, **self._ls(obj['path'])) _objects[self.common_path(obj['path'])] = Item( path=obj['path'], kind=obj['object_type'].lower(), language=obj.get('language', ''), is_dir=obj['object_type'] == 'DIRECTORY') return _objects
def get_local(local_item: Item, overwrite=False): if overwrite or local_item.content is None: local_item.content = Local.load_binary(local_item.path)
def _get_remote(self, remote_item: Item, overwrite=False): if overwrite or remote_item.content is None: response = self._c.api.call(Endpoints.dbfs_read, body={'path': remote_item.path}) remote_item.content = base64.b64decode(response.json()['content'])
def _update(self, local_item: Item, remote_item: Item): self._remote_items_stale = True self.get_local(local_item) local_item.content['cluster_id'] = remote_item.path return self._c.api.call(Endpoints.clusters_edit, body=local_item.content)