Exemplo n.º 1
0
def mkdirs_cli(api_client, dbfs_path):
    """
    Make directories in DBFS.

    Mkdirs will create directories along the path to the argument directory.
    """
    DbfsApi(api_client).mkdirs(dbfs_path)
Exemplo n.º 2
0
def rm_cli(api_client, recursive, dbfs_path):
    """
    Remove files from dbfs.

    To remove a directory you must provide the --recursive flag.
    """
    DbfsApi(api_client).delete(dbfs_path, recursive)
Exemplo n.º 3
0
 def __init__(self, user, token, workspaceUrl):
     self.dbcli_apiclient = ApiClient(user,
                                      password=token,
                                      host=workspaceUrl,
                                      verify=True,
                                      command_name='Python Client')
     self.dbfs_api_client = DbfsApi(self.dbcli_apiclient)
Exemplo n.º 4
0
def deploy(client: ApiClient, job_conf: Dict[str, Any], task_args: Dict[str, Any]):
    dbfs_new_jar_name = job_conf['libraries'][0]['jar']
    logging.info("Submitting job with configuration %s and jar file %s" % (job_conf, dbfs_new_jar_name))

    dbfs_api = DbfsApi(client)

    dbfs_api.cp(recursive=False, overwrite=True, src=task_args["jar"], dst=dbfs_new_jar_name)

    job_data = client.perform_query('POST', '/jobs/create', data=job_conf, headers=None)

    logging.info("Job creation data %s" % job_data)

    if task_args["run_now"]:
        logging.info("Requested to launch job immediately")
        run_data = client.perform_query('POST', '/jobs/run-now', data=job_data, headers=None)
        logging.info("Job launched with run data: %s" % run_data)
        if task_args["trace"]:
            logging.info("Requested to trace the job status")
            run_finised = False
            while not run_finised:
                time.sleep(4)
                run_status = client.perform_query('GET', '/jobs/runs/get',
                                                  data={"run_id": run_data["run_id"]},
                                                  headers=None)
                logging.info(run_status)
                result_state = run_status["state"].get("result_state", None)
                if result_state:
                    run_finised = True
                    if result_state == "SUCCESS":
                        logging.info("Job successfully finished!")
                    else:
                        exception_text = "Job finished with result state %s. Please check run UI!" % result_state
                        raise Exception(exception_text)
    logging.info("All deployment actions successfully performed")
Exemplo n.º 5
0
def cp_cli(api_client, recursive, overwrite, src, dst):
    """
    Copy files to and from DBFS.

    Note that this function will fail if the src and dst are both on the local filesystem
    or if they are both DBFS paths.

    For non-recursive copies, if the dst is a directory, the file will be placed inside the
    directory. For example ``dbfs cp dbfs:/apple.txt .`` will create a file at `./apple.txt`.

    For recursive copies, files inside of the src directory will be copied inside the dst directory
    with the same name. If the dst path does not exist, a directory will be created. For example
    ``dbfs cp -r dbfs:/foo foo`` will create a directory foo and place the files ``dbfs:/foo/a`` at
    ``foo/a``. If ``foo/a`` already exists, the file will not be overriden unless the --overwrite
    flag is provided -- however, dbfs cp --recursive will continue to try and copy other files.
    """
    # Copy to DBFS in this case
    dbfs_api = DbfsApi(api_client)
    if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
        if not os.path.exists(src):
            error_and_quit('The local file {} does not exist.'.format(src))
        if not recursive:
            if os.path.isdir(src):
                error_and_quit((
                    'The local file {} is a directory. You must provide --recursive'
                ).format(src))
            copy_to_dbfs_non_recursive(dbfs_api, src, DbfsPath(dst), overwrite)
        else:
            if not os.path.isdir(src):
                copy_to_dbfs_non_recursive(dbfs_api, src, DbfsPath(dst),
                                           overwrite)
                return
            copy_to_dbfs_recursive(dbfs_api, src, DbfsPath(dst), overwrite)
    # Copy from DBFS in this case
    elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
        if not recursive:
            copy_from_dbfs_non_recursive(dbfs_api, DbfsPath(src), dst,
                                         overwrite)
        else:
            dbfs_path_src = DbfsPath(src)
            if not dbfs_api.get_status(dbfs_path_src).is_dir:
                copy_from_dbfs_non_recursive(dbfs_api, dbfs_path_src, dst,
                                             overwrite)
            copy_from_dbfs_recursive(dbfs_api, dbfs_path_src, dst, overwrite)
    elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
        error_and_quit(
            'Both paths provided are from your local filesystem. '
            'To use this utility, one of the src or dst must be prefixed '
            'with dbfs:/')
    elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
        error_and_quit(
            'Both paths provided are from the DBFS filesystem. '
            'To copy between the DBFS filesystem, you currently must copy the '
            'file from DBFS to your local filesystem and then back.')
    else:
        assert False, 'not reached'
Exemplo n.º 6
0
def main():
    ENDPOINT = od.getenv('ENDPOINT')
    TOKEN = os.getenv('TOKEN')

    databricks_client = ApiClient(host=config.ENDPOINT, token=TOKEN)
    dbfs_client = DbfsApi(self.databricks_client)

    src_path = "~/dev/deploy_whl_cluster_test/dist/test_package-0.0.1-py3-none-any.whl"

    dbfs_client.cp(src=src_path, dst=dst_path, overwrite=True, recursive=False)
Exemplo n.º 7
0
def ls_cli(api_client, l, absolute, dbfs_path): #  NOQA
    """
    List files in DBFS.
    """
    if len(dbfs_path) == 0:
        dbfs_path = DbfsPath('dbfs:/')
    elif len(dbfs_path) == 1:
        dbfs_path = dbfs_path[0]
    else:
        error_and_quit('ls can take a maximum of one path.')
    files = DbfsApi(api_client).list_files(dbfs_path)
    table = tabulate([f.to_row(is_long_form=l, is_absolute=absolute) for f in files],
                     tablefmt='plain')
    click.echo(table)
Exemplo n.º 8
0
def prepare_for_operationalization(cluster_id, api_client, dbfs_path,
                                   overwrite, spark_version):
    """
    Installs appropriate versions of several libraries to support operationalization.

    Args:
        cluster_id (str): cluster_id representing the cluster to prepare for operationalization
        api_client (ApiClient): the ApiClient object used to authenticate to the workspace
        dbfs_path (str): the path on dbfs to upload libraries to
        overwrite (bool): whether to overwrite existing files on dbfs with new files of the same name
        spark_version (str): str version indicating which version of spark is installed on the databricks cluster

    Returns:
        A dictionary of libraries installed
    """
    print("Preparing for operationlization...")

    cosmosdb_jar_url = COSMOSDB_JAR_FILE_OPTIONS[spark_version]

    # download the cosmosdb jar
    local_jarname = os.path.basename(cosmosdb_jar_url)
    # only download if you need it:
    if overwrite or not os.path.exists(local_jarname):
        print("Downloading {}...".format(cosmosdb_jar_url))
        local_jarname, _ = urlretrieve(cosmosdb_jar_url, local_jarname)
    else:
        print("File {} already downloaded.".format(local_jarname))

    # upload jar to dbfs:
    upload_path = Path(dbfs_path, local_jarname).as_posix()
    print("Uploading CosmosDB driver to databricks at {}".format(upload_path))
    if dbfs_file_exists(api_client, upload_path) and overwrite:
        print("Overwriting file at {}".format(upload_path))
    DbfsApi(api_client).cp(recursive=False,
                           src=local_jarname,
                           dst=upload_path,
                           overwrite=overwrite)

    # setup the list of libraries to install:
    # jar library setup
    libs2install = [{"jar": upload_path}]
    # setup libraries to install:
    libs2install.extend([{"pypi": {"package": i}} for i in PYPI_O16N_LIBS])
    print(
        "Installing jar and pypi libraries required for operationalization...")
    LibrariesApi(api_client).install_libraries(cluster_id, libs2install)
    return libs2install
Exemplo n.º 9
0
def dbfs_file_exists(api_client, dbfs_path):
    """
    Checks to determine whether a file exists.

    Args:
        api_client (ApiClient object): Object used for authenticating to the workspace
        dbfs_path (str): Path to check
    
    Returns:
        True if file exists on dbfs, False otherwise.
    """
    try:
        DbfsApi(api_client).list_files(dbfs_path=DbfsPath(dbfs_path))
        file_exists = True
    except:
        file_exists = False
    return file_exists
Exemplo n.º 10
0
def cp_cli(api_client, recursive, overwrite, src, dst):
    """
    Copy files to and from DBFS.

    Note that this function will fail if the src and dst are both on the local filesystem.

    For non-recursive copies, if the dst is a directory, the file will be placed inside the
    directory. For example ``dbfs cp dbfs:/apple.txt .`` will create a file at `./apple.txt`.

    For recursive copies, files inside of the src directory will be copied inside the dst directory
    with the same name. If the dst path does not exist, a directory will be created. For example
    ``dbfs cp -r dbfs:/foo foo`` will create a directory foo and place the files ``dbfs:/foo/a`` at
    ``foo/a``. If ``foo/a`` already exists, the file will not be overriden unless the --overwrite
    flag is provided -- however, dbfs cp --recursive will continue to try and copy other files.
    """
    # Copy to DBFS in this case
    DbfsApi(api_client).cp(recursive, overwrite, src, dst)
Exemplo n.º 11
0
def mv_cli(api_client, src, dst):
    """
    Moves a file between two DBFS paths.
    """
    DbfsApi(api_client).move(src, dst)
Exemplo n.º 12
0
    upload_path = Path(args.dbfs_path, args.eggname).as_posix()

    # Check if file exists to alert user.
    print("Uploading {} to databricks at {}".format(args.eggname, upload_path))
    if dbfs_file_exists(my_api_client, upload_path):
        if args.overwrite:
            print("Overwriting file at {}".format(upload_path))
        else:
            raise IOError("""
            {} already exists on databricks cluster. 
            This is likely an older version of the library.
            Please use the '--overwrite' flag to proceed.
            """.format(upload_path))

    DbfsApi(my_api_client).cp(recursive=False,
                              src=myegg,
                              dst=upload_path,
                              overwrite=args.overwrite)

    # steps below require the cluster to be running. Check status
    try:
        status = ClusterApi(my_api_client).get_cluster(args.cluster_id)
    except HTTPError as e:
        print(e)
        print(textwrap.dedent(CLUSTER_NOT_FOUND_MSG.format(args.cluster_id)))
        raise

    if status["state"] == "TERMINATED":
        print(
            textwrap.dedent(
                CLUSTER_NOT_RUNNING_MSG.format(args.cluster_id,
                                               status["state"])))
Exemplo n.º 13
0
 def __init__(self, profile=None):
     api_client =  get_api_client(profile)
     self.dbfs_client = DbfsApi(api_client)
     self.runs_client = RunsApi(api_client)
Exemplo n.º 14
0
def cat_cli(api_client, src):
    """
    Show the contents of a file. Does not work for directories.
    """
    DbfsApi(api_client).cat(src)
Exemplo n.º 15
0
def tgt_dbfs_api(tgt_api_client:ApiClient):
    return DbfsApi(tgt_api_client)
Exemplo n.º 16
0
def src_dbfs_api(src_api_client:ApiClient):
    return DbfsApi(src_api_client)
Exemplo n.º 17
0
 def __init__(self, api_client):
     self.client = DeltaPipelinesService(api_client)
     self.dbfs_client = DbfsApi(api_client)
Exemplo n.º 18
0
 def __init__(self, api_client):
     self.jobs_client = JobsApi(api_client)
     self.workspace_client = WorkspaceApi(api_client)
     self.dbfs_client = DbfsApi(api_client)