示例#1
0
    def __init__(
        self,
        database,
        sql_expr=None,
        table=None,
        schema=None,
        geometry=None,
        crs=None,
        api_key=None,
        civis_kwargs={},
        metadata={},
    ):
        """
        Create the Civis Source.

        Parameters
        ----------
        database: str
            The name of the database in the platform.
        sql_expr: str
            The SQL expression to pass to the database backend. Either this
            or table must be given.
        table: str
            The table name to pass to the database backend. Either this or
            sql_expr must be given.
        schema:
            The schema for the table. Defaults to "public".
        geometry: str or list of str
            A column or list of columns that should be interpreted as geometries.
        crs: str or dict
            A coordinate reference string of the format that GeoPandas can understand.
            Only relevant if geometry columns are given.
        api_key: str
            An optional API key. If not given the env variable CIVIS_API_KEY
            will be used.
        civis_kwargs: dict
            Optional kwargs to pass to the civis.io functions.
        """
        self._database = database
        self._table = table
        self._dbschema = schema
        self._sql_expr = sql_expr
        self._geom = [geometry] if isinstance(geometry, str) else geometry
        self._crs = crs
        self._client = civis.APIClient(api_key)
        self._civis_kwargs = civis_kwargs
        self._dataframe = None

        if crs and not geometry:
            warnings.warn("A CRS was provided but no geometry columns")

        # Only support reading with pandas
        self._civis_kwargs["use_pandas"] = True
        self._civis_kwargs["client"] = self._client

        # Enforce that exactly one of table or sql_expr are provided
        if bool(table) == bool(sql_expr):
            raise ValueError("Must provide a table OR a sql_expr")

        super(CivisSource, self).__init__(metadata=metadata)
示例#2
0
def get_most_recent_run(scriptid, client=None):
    """Get the most recent run of a container script.

    Parameters
    ----------
    scriptid : int
        The ID of the container script.
    client : None or civis.APIClient, optional
        A Civis API client to use. If not given, one will be
        instantiated.

    Returns
    -------
    runid : int
        The ID of the latest run.
    """
    client = client or civis.APIClient(resources='all')

    # First get all of the runs.
    runs = client.scripts.list_containers_runs(scriptid)

    if len(runs) > 0:
        # Get the most recent one.
        run_times = [
            dateutil.parser.parse(r['finished_at'] or r['started_at'])
            for r in runs
        ]
        run = runs[run_times.index(max(run_times))]
        return int(run['id'])
    else:
        return None
def magic(line, cell=None):
    """Civis query magic.

    This magic works both as a cell magic (for table previews) and a
    line magic to query a table and return a DataFrame.
    """

    client = civis.APIClient()

    if cell is None:
        # Not using maxsplit kwarg b/c it is not compatible w/ Python 2
        database, sql = line.split(' ', 1)
        df = civis.io.read_civis_sql(
            sql, database.strip(), use_pandas=True, client=client)
        if len(df) == 0:
            df = None
    else:
        database = line.strip()
        sql = cell

        fut = civis.io.query_civis(
            sql, database, client=client, preview_rows=100)
        res = fut.result()
        if len(res['result_rows']) > 0:
            df = pd.DataFrame.from_records(
                res['result_rows'], columns=res['result_columns'])
        else:
            df = None

    return df
示例#4
0
def _print_script_runs(scriptid):
    """Print the runs for a script.

    Parameters
    ----------
    scriptid : int
        The container script to print the runs for.
    """
    client = civis.APIClient(resources='all')

    try:
        # First get all of the runs.
        runs = client.scripts.list_containers_runs(scriptid)

        fmt = "%- 25s %- 25s %- 25s %- 25s %s"
        head = fmt % ("run id", "started", "finished", "status", "error msg")
        print(head)
        for run in runs:
            print(fmt %
                  (run['id'], run['started_at'] if run['started_at'] else '-',
                   run['finished_at'] if run['finished_at'] else '-',
                   run['state'], run['error'] if run['error'] else '-'))
    except civis.base.CivisAPIError as e:
        print("Could not get script %s runs: %s.%s: %s" %
              (scriptid, e.__module__, e.__class__.__name__, e),
              file=sys.stderr)
        sys.exit(-1)
示例#5
0
def worker_func(func_file_id):
    # Have the output File expire in 7 days.
    expires_at = (datetime.now() + timedelta(days=7)).isoformat()

    client = civis.APIClient()
    job_id = os.environ.get('CIVIS_JOB_ID')
    run_id = os.environ.get('CIVIS_RUN_ID')
    if not job_id or not run_id:
        raise RuntimeError("This function must be run inside a "
                           "Civis container job.")

    # Run the function.
    result = None
    try:
        func, remote_backend = _robust_pickle_download(
            func_file_id, client=client, n_retries=5, delay=0.5)

        _backend = _setup_remote_backend(remote_backend)

        # graceful nested context managers are ~hard across python versions,
        # this just works...
        if NO_SKLEARN:
            with _joblib_para_backend(_backend):
                result = func()
        else:
            # we are using the nested context managers to set the joblib
            # backend to the requested one in both copes of joblib, the
            # package and the copy shipped by sklearn at
            # `sklearn.externals.joblib`. joblib maintains the current
            # backend as global state in the package and thus there are
            # two backends to set when you have two copies of the package
            # in play.
            with _sklearn_para_backend(_backend):
                with _joblib_para_backend(_backend):
                    result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL)
            result_buffer.seek(0)
            output_name = "Results from Joblib job {} / run {}".format(job_id,
                                                                       run_id)
            output_file_id = _robust_file_to_civis(result_buffer, output_name,
                                                   n_retries=5, delay=0.5,
                                                   expires_at=expires_at,
                                                   client=client)
            client.scripts.post_containers_runs_outputs(job_id, run_id,
                                                        'File', output_file_id)
            print("Results output to file ID: {}".format(output_file_id))
示例#6
0
def notebooks_up(notebook_id, mem=None, cpu=None):
    """Start an existing notebook and open it in the browser."""
    client = civis.APIClient()
    kwargs = {'memory': mem, 'cpu': cpu}
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
    client.notebooks.patch(notebook_id, **kwargs)
    _notebooks_up(notebook_id)
    _notebooks_open(notebook_id)
def jobs_follow_log(id):
    client = civis.APIClient()
    runs = client.jobs.list_runs(id, limit=1, order='id', order_dir='desc')
    if not runs:
        raise click.ClickException('No runs found for that job ID.')
    run_id = runs[0].id
    print('Run ID: ' + str(run_id))
    _jobs_follow_run_log(id, run_id)
示例#8
0
def _print_scripts(user_ids, state, hidden):
    """List scripts given a list of users.

    This function only lists scripts that are containers.

    Parameters
    ----------
    user_ids : list of ints
        List of user IDs to get jobs for.
        An empty list corresponds to all scripts
        visible to the user making the API call.
    state : str
        List scripts only in this state. Use None to
        get scripts in all states.
    hidden : bool
        If True, display hidden scripts in addition to non-hidden ones.
    """
    client = civis.APIClient(resources='all')

    # Get scripts.
    scripts = []

    def _append_scripts(_hidden):
        _scripts = client.scripts.list(
            limit=50,
            hidden=_hidden,
            status=state,
            order='updated_at',
            type='containers',
            author=','.join(str(u) for u in user_ids) if user_ids else None)
        for s in _scripts:
            scripts.append(s)

    _append_scripts(_hidden=False)
    if hidden:
        _append_scripts(_hidden=True)

    # Print them out.
    fmt_str = "%- 20s %- 20s %- 20s %- 25s %- 25s %- 25s %s"
    head = fmt_str % ("id", "author", "status", "created", "started",
                      "finished", "name")
    print(head)

    for s in scripts:
        if user_ids and s['author']['id'] not in user_ids:
            continue

        if s['last_run'] is not None:
            s_tme = s['last_run']['startedAt'] or '-'
            f_tme = s['last_run']['finishedAt'] or '-'
        else:
            s_tme = '-'
            f_tme = '-'
        c_tme = s['created_at'] or '-'

        print(fmt_str % (s['id'], s['author']['username'], s['state'], c_tme,
                         s_tme, f_tme, s['name']))
示例#9
0
def notebooks_down(notebook_id):
    """Shut down a running notebook."""
    client = civis.APIClient()
    nb = client.notebooks.get(notebook_id)
    state = nb['most_recent_deployment']['state']
    if state not in ['running', 'pending']:
        print('Notebook is in state "{}" and can\'t be stopped.'.format(state))
    deployment_id = nb['most_recent_deployment']['deploymentId']
    client.notebooks.delete_deployments(notebook_id, deployment_id)
示例#10
0
def unshare_service(args):
    client = civis.APIClient()
    tokens = client.services.list_tokens(args.id)
    try:
        token = next(t for t in tokens if t["name"] == args.name)
        client.services.delete_tokens(args.id, token["id"])
        print(f"Successfully unshared {args.name}")
    except StopIteration:
        print(f"Could not find share token with the name {args.name}")
示例#11
0
def _robust_file_to_civis(buf, name, client=None, n_retries=5,
                          delay=0.0, **kwargs):
    """Upload the contents of an input file-like buffer

    Call :func:`~civis.io.file_to_civis`, and retry a specified
    number of times before giving up. This will abandon
    Civis files created for failed uploads. Thoase files may
    be partially filled; it's necessary to create new files
    to ensure that the contents are exactly as requested.

    .. note:: This function starts by calling ``.seek(0)`` on the
              buffer, and will do so before every retry.

    Parameters
    ----------
    buf : File
        File-like bytes object to send to a Civis File
    name : str
        Name of the new Civis File
    client : civis.APIClient, optional
    n_retries : int, optional
        Retry the upload this many times before raising an error.
    delay : float, optional
        If provided, wait this many seconds between retries.
    kwargs :
        Extra keyword arguments will be passed to ``io.file_to_civis``

    Returns
    -------
    int
        ID of the new Civis File

    See Also
    --------
    civis.io.file_to_civis
    """
    client = client or civis.APIClient()
    retry_exc = (requests.HTTPError,
                 requests.ConnectionError,
                 requests.ConnectTimeout)
    n_failed = 0
    while True:
        buf.seek(0)
        try:
            file_id = civis.io.file_to_civis(buf, name=name,
                                             client=client, **kwargs)
        except retry_exc as exc:
            if n_failed < n_retries:
                n_failed += 1
                log.debug("Upload failure %s due to %s; retrying.",
                          n_failed, str(exc))
                time.sleep(delay)
            else:
                raise
        else:
            return file_id
示例#12
0
def _print_script_logs(scriptid, runid=None):
    """Print the logs and information for a script.

    Parameters
    ----------
    scriptid : int
        The script ID to print out.
    runid : int or None, optional
        An optional run ID. If None, then the most recent
        run will be used.
    """
    client = civis.APIClient(resources='all')

    try:
        # get the run details
        runid = runid or get_most_recent_run(scriptid, client=client)

        # Get rest of script details.
        deets = client.scripts.get_containers(scriptid)

        # Make them into pure dicts for dumping to yaml.
        deets = {
            k: dict(v) if (isinstance(v, civis.response.Response)
                           or isinstance(v, dict)) else v
            for k, v in deets.items()
        }

        # Put the name and id at the top so they are easy to see
        print('name:', deets['name'])
        print('id:', deets['id'])
        del deets['name']
        del deets['id']

        # Make the docker command more grep-able.
        print('docker_command:')
        for line in deets['docker_command'].split('\n'):
            line = line.rstrip()
            print('  ' + line)
        del deets['docker_command']

        # now dump the rest.
        print(yaml.dump(deets, default_flow_style=False).strip())

        if runid is not None:
            # Get and print the logs for this run
            resp = client.scripts.list_containers_runs_logs(scriptid, runid)
            print("log file:")
            for r in resp[::-1]:
                print("  [%s] %s" % (r['created_at'], r['message']))
        else:
            print('log file: -')
    except civis.base.CivisAPIError as e:
        print("Could not print script %s logs: %s.%s: %s" %
              (scriptid, e.__module__, e.__class__.__name__, e),
              file=sys.stderr)
        sys.exit(-1)
示例#13
0
def get_pword(login):
    if login == '':
        raise IndexError
    client = civis.APIClient(api_key=os.environ['CIVIS_API_KEY'])
    return civis.io.read_civis_sql(
        sql=
        f"SELECT pword FROM users.creds WHERE LOWER(login) = '{login.lower()}'",
        database='HRC',
        client=client,
        hidden=True)[1][0]
示例#14
0
def notebooks_new_cmd(language='python3', mem=None, cpu=None):
    """Create a new notebook and open it in the browser."""
    client = civis.APIClient()
    kwargs = {'memory': mem, 'cpu': cpu}
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
    new_nb = client.notebooks.post(language=language, **kwargs)
    print("Created new {language} notebook with ID {id} in Civis Platform"
          " (https://platform.civisanalytics.com/#/notebooks/{id})."
          .format(language=language, id=new_nb.id))
    _notebooks_up(new_nb.id)
    _notebooks_open(new_nb.id)
示例#15
0
def notebooks_download_cmd(notebook_id, path):
    """Download a notebook to a specified local path."""
    client = civis.APIClient()
    info = client.notebooks.get(notebook_id)
    response = requests.get(info['notebook_url'], stream=True)
    response.raise_for_status()
    chunk_size = 32 * 1024
    chunked = response.iter_content(chunk_size)
    with open(path, 'wb') as f:
        for lines in chunked:
            f.write(lines)
示例#16
0
    def __init__(self,
                 *args,
                 civis_api_key=None,
                 civis_api_key_env_var="CIVIS_API_KEY",
                 database=None,
                 schema=None,
                 existing_table_rows="append",
                 include_columns=None,
                 dummy_run=False,
                 block=False,
                 max_errors=0,
                 table=None,
                 via_staging_table=False,
                 columns=None,
                 staging_table=None,
                 remap=None,
                 recorded_tables=TimedDict(timeout=30),
                 **kwargs):
        self.civis_api_key = civis_api_key or os.environ[civis_api_key_env_var]
        self.include_columns = include_columns
        self.table = table
        self.dummy_run = dummy_run
        self.schema = schema
        self.max_errors = int(max_errors)
        self.existing_table_rows = existing_table_rows
        self.database = database
        self.via_staging_table = via_staging_table
        self.block = block
        self.remap = remap
        self.api_client = civis.APIClient()
        self.recorded_tables = recorded_tables

        self.columns = columns

        super(SendToCivis, self).__init__(**kwargs)

        if self.via_staging_table:
            self.staging_table = "_".join([
                table,
                "staging",
                hashlib.md5(bytes(str(random.random()),
                                  "ascii")).hexdigest()[:HASH_SUFFIX_LENGTH],
            ])
            self.log_info("staging table for: " + self.name + " " +
                          str(self.staging_table))
        else:
            self.staging_table = staging_table

        if self.civis_api_key is None and len(self.civis_api_key) == 0:
            raise Exception("Could not get a Civis API key.")

        self.monitor_futures_thread = threading.Thread(
            target=SendToCivis.monitor_futures, args=(self, ), daemon=True)
        self.monitor_futures_thread.start()
示例#17
0
def post_json_run_output(json_value_dict):
    client = civis.APIClient()
    json_value_object = client.json_values.post(
            json.dumps(json_value_dict),
            name='email_outputs'
            )
    client.scripts.post_python3_runs_outputs(
            os.environ['CIVIS_JOB_ID'],
            os.environ['CIVIS_RUN_ID'],
            'JSONValue',
            json_value_object.id
            )
示例#18
0
def share_service(args):
    client = civis.APIClient()
    service = client.services.get(args.id)
    try:
        response = client.services.post_tokens(args.id, args.name)
        url = f"{service['current_url']}/civis-platform-auth?token={response['token']}"
        print(f"Share service id {args.id} with the following URL: {url}")
    except civis.base.CivisAPIError as e:
        if "Name has already been taken" in str(e):
            print(f"The share name {args.name} is already in use. "
                  "Please choose another")
        else:
            raise e
示例#19
0
def _get(scriptid, runid=None, path=None):
    """Download job data for a script.

    During jobs, civis-compute automatically saves any outputs written
    to the directory given by the environment variable `${CIVIS_JOB_DATA}`.

    The `get` command downloads these outputs if they exist.

    The outputs are stored as a run output with the name

        civis_job_data_${CIVIS_JOB_ID}_${CIVIS_RUN_ID}

    Parameters
    ----------
    scriptid : int
        The container script ID.
    runid : int or None, optional
        The run ID to get outputs for. If None,
        the most recent run is used.
    path : str or None
        The path to download the data to. If None,
        the current working directory is used.
    """
    client = civis.APIClient(resources='all')

    try:
        runid = runid or get_most_recent_run(scriptid, client=client)
        outputs = client.scripts.list_containers_runs_outputs(scriptid, runid)

        for output in outputs:
            if (output['object_type'] == 'File' and
                    'civis_job_data_' in output['name']):

                if path:
                    oname = os.path.join(path, output['name'])
                else:
                    oname = output['name']

                print(oname)

                with open(oname, 'wb') as fp:
                    civis.io.civis_to_file(output['object_id'], fp)

                break
    except civis.base.CivisAPIError as e:
        print(
            "Could not get script %s outputs: %s.%s: %s" % (
                scriptid, e.__module__, e.__class__.__name__, e),
            file=sys.stderr)
        sys.exit(-1)
示例#20
0
def test_get_table_id(schema_tablename):
    """Check that get_table_id handles quoted schema.tablename correctly."""
    client = civis.APIClient(local_api_spec=TEST_SPEC, api_key='none')
    client.get_database_id = mock.Mock(return_value=123)

    mock_tables = mock.MagicMock()
    mock_tables.__getitem__.side_effect = {0: mock.Mock()}.__getitem__

    client.tables.list = mock.Mock(return_value=mock_tables)

    client.get_table_id(table=schema_tablename, database=123)

    client.tables.list.assert_called_once_with(database_id=123,
                                               schema='foo',
                                               name='bar')
示例#21
0
def _robust_pickle_download(output_file_id,
                            client=None,
                            n_retries=5,
                            delay=0.0):
    """Download and deserialize the result from output_file_id

    Retry network errors `n_retries` times with `delay` seconds between calls

    Parameters
    ----------
    output_file_id : int
        ID of the file to download
    client : civis.APIClient, optional
    n_retries : int, optional
        Retry the upload this many times before raising an error.
    delay : float, optional
        If provided, wait this many seconds between retries.

    Returns
    -------
    obj
        Any Python object; the result of calling ``cloudpickle.load`` on the
        downloaded file

    See Also
    --------
    cloudpickle.load
    """
    client = client or civis.APIClient()
    retry_exc = (requests.HTTPError, requests.ConnectionError,
                 requests.ConnectTimeout)
    n_failed = 0
    while True:
        buffer = BytesIO()
        try:
            civis.io.civis_to_file(output_file_id, buffer, client=client)
        except retry_exc as exc:
            buffer.close()
            if n_failed < n_retries:
                n_failed += 1
                log.debug("Download failure %s due to %s; retrying.", n_failed,
                          str(exc))
                time.sleep(delay)
            else:
                raise
        else:
            buffer.seek(0)
            return cloudpickle.load(buffer)
示例#22
0
def worker_func(func_file_id):
    # Have the output File expire in 7 days.
    expires_at = (datetime.now() + timedelta(days=7)).isoformat()

    client = civis.APIClient()
    job_id = os.environ.get('CIVIS_JOB_ID')
    run_id = os.environ.get('CIVIS_RUN_ID')
    if not job_id or not run_id:
        raise RuntimeError("This function must be run inside a "
                           "Civis container job.")

    func_buffer = BytesIO()
    civis.io.civis_to_file(func_file_id, func_buffer)
    func_buffer.seek(0)
    func = joblib.load(func_buffer)

    # Run the function.
    result = None
    try:
        result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        # Note that if compress is 0, joblib will output multiple files.
        # compress=3 is a good compromise between space and read/write times
        # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358).
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            joblib.dump(result, result_buffer, compress=3)
            result_buffer.seek(0)
            output_name = "Results from Joblib job {} / run {}".format(
                job_id, run_id)
            output_file_id = civis.io.file_to_civis(result_buffer,
                                                    output_name,
                                                    expires_at=expires_at)
            client.scripts.post_containers_runs_outputs(
                job_id, run_id, 'File', output_file_id)
            print("Results output to file ID: {}".format(
                output_name, output_file_id))
示例#23
0
    def _init_civis_backend(self):
        """init the Civis API client and the executors"""
        self.using_template = (self.from_template_id is not None)

        if self.max_submit_retries < 0:
            raise ValueError(
                "max_submit_retries cannot be negative (value = %d)" %
                self.max_submit_retries)

        self.client = self.client or civis.APIClient()
        if self.from_template_id:
            self.executor = CustomScriptExecutor(self.from_template_id,
                                                 client=self.client,
                                                 **self.executor_kwargs)
        else:
            self.executor = _ContainerShellExecutor(client=self.client,
                                                    **self.executor_kwargs)
示例#24
0
def create_user(login, pword):
    client = civis.APIClient(api_key=os.environ['CIVIS_API_KEY'])
    try:
        civis.io.read_civis_sql(
            sql=
            f"SELECT login FROM users.creds WHERE LOWER(login) = '{login.lower()}'",
            database='HRC',
            client=client,
            hidden=True)[1][0]
        raise ValueError
    except civis.base.EmptyResultError:
        return civis.io.query_civis(
            sql=
            f"INSERT INTO users.creds VALUES ({login}, {hash_pword(pword)})",
            database='HRC',
            client=client,
            hidden=True)
示例#25
0
    def __init__(self, future, callback):
        self._future = future
        self._callback = callback
        self.result = None
        if hasattr(future, 'client'):
            self._client = future.client
        else:
            self._client = civis.APIClient()

        # Download results and trigger the next job as a callback
        # so that we don't have to wait for `get` to be called.
        # Note that the callback of a `concurrent.futures.Future`
        # (which self._future is a subclass of) is called with a
        # single argument, the Future itself.
        self._future.remote_func_output = None  # `get` reads results from here
        self._future.result_fetched = False  # Did we get the result?
        self._future.add_done_callback(
            self._make_fetch_callback(self._callback, self._client))
def run_container(dropbox_path):
    client = civis.APIClient()
    script_id = client.scripts.post_containers(
        name=f'Invisible Institute Data Run {dropbox_path}',
        docker_image_name='civisanalytics/datascience-python',
        docker_image_tag='5.0.0',
        required_resources={
            'cpu': 256,
            'memory': 4096,
            'disk_space': 5,
        },
        repo_http_uri='https://github.com/invinst/chicago-police-data.git',
        repo_ref='master',
        docker_command=f'''cd app
pip install -r requirements.txt
python -m get_data.run --path_to_execute {dropbox_path}''',
        params=[{
            'allowed_values': [],
            'default': None,
            'description': None,
            'label': 'Dropbox Credential',
            'name': 'DROPBOX_OAUTH',
            'required': True,
            'type': 'credential_custom',
            'value': None
        }],
        arguments={'DROPBOX_OAUTH': 6644})['id']

    run_id = client.scripts.post_containers_runs(script_id)['id']

    LOG.info(f'Analyzing {dropbox_path} in container script {script_id}'
             f' at run {run_id}')

    future = civis.futures.CivisFuture(client.scripts.get_containers_runs,
                                       (script_id, run_id))

    try:
        result = future.result()
        state = result['state']
        LOG.info(f'Script {script_id} run {run_id} {state}')
    except CivisJobFailure:
        result = client.scripts.get_containers_runs(script_id, run_id)
        state = result['state']
        LOG.warning(f'Error: script {script_id} run {run_id} {state}')
示例#27
0
def open_postgres_catalog(api_key=None):
    """
    Top-level function to create a PostgreSQL CivisCatalog object.

    Parameters
    ==========
    api_key: Optional[str]
        An API key. If not provided, uses the environment variable CIVIS_API_KEY.

    Returns
    =======
    A CivisCatalog targeting PostgreSQL.
    """
    client = civis.APIClient(api_key)
    hosts = client.remote_hosts.list()
    try:
        db = next(h for h in hosts if h["type"] == POSTGRES_KIND)
    except StopIteration:
        raise RuntimeError("Unable to find PostgreSQL database")
    return CivisCatalog(db["name"], api_key=api_key)
示例#28
0
def _cancel(scriptids):
    """Cancel running scripts.

    Parameters
    ----------
    scriptids : tuple of ints
        The IDs of the scripts to cancel.
    """
    errored = False
    client = civis.APIClient(resources='all')
    for scriptid in scriptids:
        try:
            client.scripts.post_cancel(scriptid)
        except civis.base.CivisAPIError as e:
            errored = True
            print("Could not cancel script %s: %s.%s: %s" %
                  (scriptid, e.__module__, e.__class__.__name__, e),
                  file=sys.stderr)

    if errored:
        sys.exit(-1)
示例#29
0
def test_get_storage_host_id():
    client = civis.APIClient(local_api_spec=TEST_SPEC, api_key='none')

    class StorageHost:
        def __init__(self, id, name):
            self.id = id
            self.name = name

        def __getitem__(self, key):
            return getattr(self, key)

    storage_hosts = [StorageHost(1234, 'test'), StorageHost(5678, 'othertest')]
    client.storage_hosts.list = mock.Mock(return_value=storage_hosts)

    assert client.get_storage_host_id('test') == 1234

    client.storage_hosts.list.assert_called_once_with()

    assert client.get_storage_host_id(4732) == 4732
    with pytest.raises(ValueError, match="Storage Host invalidname not found"):
        client.get_storage_host_id('invalidname')
示例#30
0
    def __init__(
        self,
        database,
        schema="public",
        api_key=None,
        civis_kwargs={},
        has_geometry_column_table=None,
        **kwargs,
    ):
        """
        Construct the Civis Schema.

        Parameters
        ----------
        database: str
            The name of the database.
        schema: str
            The schema to list (defaults to "public").
        api_key: str
            An optional API key. If not given the env variable CIVIS_API_KEY
            will be used.
        has_geometry_column_table: bool
            Whether the database has a "geometry_columns" table, which can be used
            to query for SRID information for a given column. Otherwise we try to
            infer based on whether it is a postgres database.
        civis_kwargs: dict
            Optional kwargs to pass to the sources.
        """
        self._civis_kwargs = civis_kwargs
        self._database = database
        self._api_key = api_key
        self._client = civis.APIClient(api_key)
        if has_geometry_column_table is not None:
            self._has_geom = has_geometry_column_table
        else:
            self._has_geom = "redshift" not in self._database.lower()
        self._dbschema = schema  # Don't shadow self._schema upstream
        kwargs["ttl"] = (kwargs.get("ttl")
                         or 100)  # Bump TTL so as not to load too often.
        super(CivisSchema, self).__init__(**kwargs)