示例#1
0
def _import_bytes(buf, database, table, client, max_errors,
                  existing_table_rows, distkey, sortkey1, sortkey2, delimiter,
                  headers, credential_id, polling_interval, archive, hidden):
    schema, table = table.split(".", 1)
    db_id = client.get_database_id(database)
    cred_id = credential_id or client.default_credential
    delimiter = DELIMITERS.get(delimiter)
    assert delimiter, "delimiter must be one of {}".format(DELIMITERS.keys())

    kwargs = dict(schema=schema, name=table, remote_host_id=db_id,
                  credential_id=cred_id, max_errors=max_errors,
                  existing_table_rows=existing_table_rows, distkey=distkey,
                  sortkey1=sortkey1, sortkey2=sortkey2,
                  column_delimiter=delimiter, first_row_is_header=headers,
                  hidden=hidden)

    import_job = client.imports.post_files(**kwargs)
    put_response = requests.put(import_job.upload_uri, buf)

    put_response.raise_for_status()
    run_job_result = client._session.post(import_job.run_uri)
    run_job_result.raise_for_status()
    run_info = run_job_result.json()
    fut = CivisFuture(client.imports.get_files_runs,
                      (run_info['importId'], run_info['id']),
                      polling_interval=polling_interval,
                      client=client,
                      poll_on_creation=False)
    if archive:

        def f(x):
            return client.imports.put_archive(import_job.id, True)

        fut.add_done_callback(f)
    return fut
示例#2
0
    def test_outputs_succeeded(self):
        poller = _create_poller_mock("succeeded")
        mock_client = create_client_mock()
        expected_return = [{'test': 'test_result'}]
        mock_client.jobs.list_runs_outputs.return_value = expected_return

        result = CivisFuture(poller, (1, 2), client=mock_client)
        assert result.outputs() == expected_return
    def test_outputs_succeeded(self):
        poller = mock.Mock()
        api_result = mock.Mock()
        api_result.state = 'succeeded'
        mock_client = create_client_mock()
        expected_return = [{'test': 'test_result'}]
        mock_client.jobs.list_runs_outputs.return_value = expected_return

        result = CivisFuture(poller, (1, 2), client=mock_client)
        result._set_api_result(api_result)
        assert result.outputs() == expected_return
示例#4
0
 def test_check_message(self, *mocks):
     result = CivisFuture(lambda x: x, (1, 20))
     message = {
         'object': {
             'id': 1
         },
         'run': {
             'id': 20,
             'state': 'succeeded'
         }
     }
     self.assertTrue(result._check_message(message))
示例#5
0
 def test_check_message_with_different_run_id(self, *mocks):
     result = CivisFuture(lambda x: x, (1, 20))
     message = {
         'object': {
             'id': 2
         },
         'run': {
             'id': 20,
             'state': 'succeeded'
         }
     }
     self.assertFalse(result._check_message(message))
示例#6
0
 def test_check_message_when_job_is_running(self, *mocks):
     result = CivisFuture(lambda x: x, (1, 20))
     message = {
         'object': {
             'id': 1
         },
         'run': {
             'id': 20,
             'state': 'running'
         }
     }
     self.assertFalse(result._check_message(message))
示例#7
0
def run_template(id, arguments, JSONValue=False, client=None):
    """Run a template and return the results.

    Parameters
    ----------
    id: int
        The template id to be run.
    arguments: dict
        Dictionary of arguments to be passed to the template.
    JSONValue: bool, optional
        If True, will return the JSON output of the template.
        If False, will return the file ids associated with the
        output results.
    client: :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.

    Returns
    -------
    output: dict
        If JSONValue = False, dictionary of file ids with the keys
        being their output names.
        If JSONValue = True, JSON dict containing the results of the
        template run. Expects only a single JSON result. Will return
        nothing if either there is no JSON result or there is more
        than 1 JSON result.


    """
    if client is None:
        client = APIClient()
    job = client.scripts.post_custom(id, arguments=arguments)
    run = client.scripts.post_custom_runs(job.id)
    fut = CivisFuture(client.scripts.get_custom_runs, (job.id, run.id),
                      client=client)
    fut.result()
    outputs = client.scripts.list_custom_runs_outputs(job.id, run.id)
    if JSONValue:
        json_output = [
            o.value for o in outputs if o.object_type == "JSONValue"
        ]
        if len(json_output) == 0:
            log.warning("No JSON output for template {}".format(id))
            return
        if len(json_output) > 1:
            log.warning("More than 1 JSON output for template {}"
                        " -- returning only the first one.".format(id))
        # Note that the cast to a dict is to convert
        # an expected Response object.
        return dict(json_output[0])
    else:
        file_ids = {o.name: o.object_id for o in outputs}
        return file_ids
示例#8
0
    def test_set_api_result_result_succeeded(self, mock_subscribe, mock_api):
        mock_pubnub = mock.Mock()
        mock_pubnub.unsubscribe_all.return_value = None
        mock_subscribe.return_value = mock_pubnub
        poller = mock.Mock()
        api_result = mock.Mock()
        api_result.state = 'succeeded'

        result = CivisFuture(poller, (1, 2))
        result._set_api_result(api_result)
        assert poller.call_count == 0
        assert mock_pubnub.unsubscribe_all.call_count == 1
        assert result._state == 'FINISHED'
示例#9
0
def run_job(job_id, api_key=None, client=None, polling_interval=None):
    """Run a job.

    Parameters
    ----------
    job_id: str or int
        The ID of the job.
    api_key: DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client: :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    polling_interval : int or float, optional
        The number of seconds between API requests to check whether a result
        is ready.

    Returns
    -------
    results: :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.
    """
    if client is None:
        client = APIClient(api_key=api_key)
    run = client.jobs.post_runs(job_id)
    return CivisFuture(
        client.jobs.get_runs,
        (job_id, run["id"]),
        client=client,
        polling_interval=polling_interval,
        poll_on_creation=False,
    )
示例#10
0
def query_civis(sql,
                database,
                api_key=None,
                client=None,
                credential_id=None,
                preview_rows=10,
                polling_interval=None,
                hidden=True):
    """Execute a SQL statement as a Civis query.

    Run a query that may return no results or where only a small
    preview is required. To execute a query that returns a large number
    of rows, see :func:`~civis.io.read_civis_sql`.

    Parameters
    ----------
    sql : str
        The SQL statement to execute.
    database : str or int
        The name or ID of the database.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    credential_id : str or int, optional
        The ID of the database credential. If ``None``, the default
        credential will be used.
    preview_rows : int, optional
        The maximum number of rows to return. No more than 100 rows can be
        returned at once.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.

    Examples
    --------
    >>> run = query_civis(sql="DELETE schema.table", database='database')
    >>> run.result()  # Wait for query to complete
    """
    if client is None:
        client = APIClient(api_key=api_key)
    database_id = client.get_database_id(database)
    cred_id = credential_id or client.default_credential
    resp = client.queries.post(database_id,
                               sql,
                               preview_rows,
                               credential=cred_id,
                               hidden=hidden)
    return CivisFuture(client.queries.get, (resp.id, ),
                       polling_interval,
                       client=client,
                       poll_on_creation=False)
示例#11
0
def run_job(job_id, api_key=None, client=None):
    """Run a job.

    Parameters
    ----------
    job_id : str or int
        The ID of the job.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.
    """
    if client is None:
        client = APIClient(api_key=api_key)
    run = client.jobs.post_runs(job_id)
    return CivisFuture(client.jobs.get_runs, (job_id, run['id']),
                       client=client,
                       poll_on_creation=False)
示例#12
0
def test_future_job_id_run_id(poller_args, expected_job_id, expected_run_id):
    result = CivisFuture(
        poller=_create_poller_mock("succeeded"),
        poller_args=poller_args,
        client=create_client_mock(),
    )
    assert result.job_id == expected_job_id
    assert result.run_id == expected_run_id
示例#13
0
def test_future_job_id_run_id(poller_args, expected_job_id, expected_run_id):
    result = CivisFuture(
        poller=lambda x: x,
        poller_args=poller_args,
        client=create_client_mock(),
    )
    assert result.job_id == expected_job_id
    assert result.run_id == expected_run_id
示例#14
0
    def test_polling_interval(self, *mocks):
        clear_lru_cache()

        polling_interval = 30
        future = CivisFuture(lambda x: x,
                             (1, 20),
                             polling_interval=polling_interval)
        assert future.polling_interval == polling_interval

        clear_lru_cache()
示例#15
0
    def test_polling_interval(self, *mocks):
        # This tests the fallback to polling when channels is not available.
        # It uses a different api spec than the other tests so it
        # should clear the cached values before and after
        clear_lru_cache()

        polling_interval = 30
        future = CivisFuture(lambda x: x, (1, 20),
                             polling_interval=polling_interval)
        assert future.polling_interval == polling_interval
        assert hasattr(future, '_pubnub') is False

        clear_lru_cache()
    def test_set_api_result_failed(self, mock_subscribe, mock_api):
        mock_pubnub = mock.Mock()
        mock_pubnub.unsubscribe_all.return_value = None
        mock_subscribe.return_value = mock_pubnub
        poller = mock.Mock()
        api_result = mock.Mock()
        api_result.state = 'failed'

        result = CivisFuture(poller, (1, 2))
        result._set_api_result(api_result)
        assert mock_pubnub.unsubscribe_all.call_count == 1
        assert result._state == 'FINISHED'
        with pytest.raises(CivisJobFailure):
            result.result()
        with pytest.raises(CivisJobFailure):
            result.outputs()
示例#17
0
    def test_set_api_result_failed(self, mock_api, m_sleep):
        poller = _create_poller_mock("failed")

        result = CivisFuture(poller, (1, 2))
        assert result._state == 'FINISHED'
        with pytest.raises(CivisJobFailure):
            result.result()
        with pytest.raises(CivisJobFailure):
            result.outputs()
示例#18
0
def run_job(job_id, api_key=None):
    """Run a job.

    Parameters
    ----------
    job_id : str or int
        The ID of the job.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.
    """
    client = APIClient(api_key=api_key, resources='all')
    run = client.jobs.post_runs(job_id)
    return CivisFuture(client.jobs.get_runs, (job_id, run['id']),
                       api_key=api_key,
                       poll_on_creation=False)
示例#19
0
def civis_to_csv(filename,
                 sql,
                 database,
                 job_name=None,
                 api_key=None,
                 credential_id=None,
                 archive=False,
                 hidden=True,
                 polling_interval=None):
    """Export data from Civis to a local CSV file.

    Parameters
    ----------
    filename : str
        Download exported data into this file.
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Export data from this database. Can be the database name or ID.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    credential_id : str or int, optional
        The ID of the database credential.  If ``None``, the default
        credential will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional (deprecated)
        If ``True``, archive the import job as soon as it completes.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> fut = civis_to_csv("file.csv", sql, "my_database")
    >>> fut.result()  # Wait for job to complete

    See Also
    --------
    civis.io.read_civis : Read table contents into memory.
    civis.io.read_civis_sql : Read results of a SQL query into memory.
    """
    if archive:
        warnings.warn(
            "`archive` is deprecated and will be removed in v2.0.0. "
            "Use `hidden` instead.", FutureWarning)
    client = APIClient(api_key=api_key)
    script_id, run_id = _sql_script(client,
                                    sql,
                                    database,
                                    job_name,
                                    credential_id,
                                    hidden=hidden)
    fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id),
                      polling_interval=polling_interval,
                      api_key=api_key,
                      poll_on_creation=False)
    download = _download_callback(script_id, run_id, client, filename)
    fut.add_done_callback(download)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        fut.add_done_callback(f)

    return fut
示例#20
0
def civis_to_multifile_csv(sql, database, job_name=None, api_key=None,
                           client=None, credential_id=None,
                           include_header=True,
                           compression='none', delimiter='|',
                           unquoted=False, prefix=None,
                           polling_interval=None, hidden=True):
    """Unload the result of SQL query and return presigned urls.

    This function is intended for unloading large queries/tables from redshift
    as it uses a 'PARALLEL ON' S3 unload. It returns a similar manifest file
    to conventional S3 UNLOAD statements except the CSV parts are accessible
    via both files endpoint IDs and presigned S3 urls.

    Parameters
    ----------
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Execute the query against this database. Can be the database name
        or ID.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    credential_id : str or int, optional
        The database credential ID.  If ``None``, the default credential
        will be used.
    include_header: bool, optional
        If ``True`` include a key in the returned dictionary containing a list
        of column names. Default: ``True``.
    compression: str, optional
        Type of compression to use, if any. One of ``'none'``, ``'zip'``, or
        ``'gzip'``. Default ``'none'``.
    delimiter, str: optional
        Which delimiter to use, if any. One of ``','``, ``'\t'``, or
        ``'|'``. Default: ``'|'``.
    unquoted: bool, optional
        Whether or not to quote fields. Default: ``False``.
    prefix: str, optional
        A user specified filename prefix for the output file to have. Default:
        ``None``.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.

    Returns
    -------
    unload_manifest: dict
        A dictionary resembling an AWS manifest file. Has the following keys:
        ``'header'``, ``'query'``, ``'entries'``, respresenting the columns
        from the query, the query itself, and a list of dictionaries for each
        unloaded CSV part, each containing its file ``'id'``, ``'name'``,
        ``'size'``, and unsigned and signed S3 urls, ``'url'`` and
        ``'url_signed'``, respectively.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.my_big_table"
    >>> database = "my_database"
    >>> delimiter = "|"
    >>> manifest = civis_multipart_unload(sql, database, delimiter=delimiter)
    >>> ids = [file['id'] for file in manifest['files']]
    >>> buf = BytesIO()
    >>> civis_to_file(ids[0], buf)
    >>> buf.seek(0)
    >>> df = pd.read_csv(buf, delimiter=delimiter)

    See Also
    --------
    civis.APIClient.scripts.post_sql
    """
    if client is None:
        client = APIClient(api_key=api_key, resources='all')
    delimiter = DELIMITERS.get(delimiter)
    assert delimiter, "delimiter must be one of {}".format(DELIMITERS.keys())

    csv_settings = dict(include_header=include_header,
                        compression=compression,
                        column_delimiter=delimiter,
                        unquoted=unquoted,
                        filename_prefix=prefix,
                        force_multifile=True)
    script_id, run_id = _sql_script(client, sql, database, job_name,
                                    credential_id, hidden,
                                    csv_settings=csv_settings)

    fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id),
                      polling_interval=polling_interval, client=client,
                      poll_on_creation=False)
    outputs = fut.result()["output"]
    if not outputs:
        raise EmptyResultError("Unload query {} returned no manifest."
                               .format(script_id))

    buf = io.BytesIO()
    civis_to_file(outputs[0]['file_id'], buf)
    txt = io.TextIOWrapper(buf, encoding='utf-8')
    txt.seek(0)
    unload_manifest = json.load(txt)

    return unload_manifest
示例#21
0
def civis_to_csv(filename, sql, database, job_name=None, api_key=None,
                 client=None, credential_id=None, include_header=True,
                 compression='none', delimiter=',', unquoted=False,
                 archive=False, hidden=True, polling_interval=None):
    """Export data from Civis to a local CSV file.

    Parameters
    ----------
    filename : str
        Download exported data into this file.
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Export data from this database. Can be the database name or ID.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    credential_id : str or int, optional
        The ID of the database credential.  If ``None``, the default
        credential will be used.
    include_header: bool, optional
        If ``True``, the first line of the CSV will be headers.
        Default: ``True``.
    compression: str, optional
        Type of compression to use, if any. One of ``'none'``, ``'zip'``, or
        ``'gzip'``. Default ``'none'``.
    delimiter, str: optional
        Which delimiter to use, if any. One of ``','``, ``'\t'``, or
        ``'|'``. Default: ``','``.
    unquoted: bool, optional
        Whether or not to quote fields. Default: ``False``.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional (deprecated)
        If ``True``, archive the import job as soon as it completes.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> fut = civis_to_csv("file.csv", sql, "my_database")
    >>> fut.result()  # Wait for job to complete

    See Also
    --------
    civis.io.read_civis : Read table contents into memory.
    civis.io.read_civis_sql : Read results of a SQL query into memory.
    """
    if archive:
        warnings.warn("`archive` is deprecated and will be removed in v2.0.0. "
                      "Use `hidden` instead.", FutureWarning)
    if client is None:
        client = APIClient(api_key=api_key, resources='all')

    delimiter = DELIMITERS.get(delimiter)
    if not delimiter:
        raise ValueError("delimiter must be one of {}"
                         .format(DELIMITERS.keys()))
    csv_settings = dict(include_header=include_header,
                        compression=compression,
                        column_delimiter=delimiter,
                        unquoted=unquoted,
                        filename_prefix=None,
                        force_multifile=False)

    script_id, run_id = _sql_script(client, sql, database, job_name,
                                    credential_id, hidden=hidden,
                                    csv_settings=csv_settings)
    fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id),
                      polling_interval=polling_interval, client=client,
                      poll_on_creation=False)
    download = _download_callback(script_id, run_id, client, filename)
    fut.add_done_callback(download)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        fut.add_done_callback(f)

    return fut
示例#22
0
def read_civis_sql(sql, database, use_pandas=False, job_name=None,
                   api_key=None, client=None, credential_id=None,
                   polling_interval=None, archive=False,
                   hidden=True, **kwargs):
    """Read data from Civis using a custom SQL string.

    Parameters
    ----------
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Execute the query against this database. Can be the database name
        or ID.
    use_pandas : bool, optional
        If ``True``, return a :class:`pandas:pandas.DataFrame`. Otherwise,
        return a list of results from :func:`python:csv.reader`.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    credential_id : str or int, optional
        The database credential ID.  If ``None``, the default credential
        will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional (deprecated)
        If ``True``, archive the import job as soon as it completes.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.
    **kwargs : kwargs
        Extra keyword arguments are passed into
        :func:`pandas:pandas.read_csv` if `use_pandas` is ``True`` or
        passed into :func:`python:csv.reader` if `use_pandas` is
        ``False``.

    Returns
    -------
    data : :class:`pandas:pandas.DataFrame` or list
        A list of rows (with header as first row) if `use_pandas` is
        ``False``, otherwise a `pandas` `DataFrame`. Note that if
        `use_pandas` is ``False``, no parsing of types is performed and
        each row will be a list of strings.

    Raises
    ------
    ImportError
        If `use_pandas` is ``True`` and `pandas` is not installed.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> df = read_civis_sql(sql, "my_database", use_pandas=True)
    >>> col_a = df["column_a"]

    >>> data = read_civis_sql(sql, "my_database")
    >>> columns = data.pop(0)
    >>> col_a_index = columns.index("column_a")
    >>> col_a = [row[col_a_index] for row in data]

    Notes
    -----
    This reads the data into memory.

    See Also
    --------
    civis.io.read_civis : Read directly into memory without SQL.
    civis.io.civis_to_csv : Write directly to a CSV file.
    """
    if client is None:
        client = APIClient(api_key=api_key, resources='all')
    if use_pandas and NO_PANDAS:
        raise ImportError("use_pandas is True but pandas is not installed.")
    if archive:
        warnings.warn("`archive` is deprecated and will be removed in v2.0.0. "
                      "Use `hidden` instead.", FutureWarning)
    script_id, run_id = _sql_script(client, sql, database,
                                    job_name, credential_id,
                                    hidden=hidden)
    fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id),
                      polling_interval=polling_interval, client=client,
                      poll_on_creation=False)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        fut.add_done_callback(f)
    fut.result()
    outputs = client.scripts.get_sql_runs(script_id, run_id)["output"]
    if not outputs:
        raise EmptyResultError("Query {} returned no output."
                               .format(script_id))
    url = outputs[0]["path"]
    if use_pandas:
        data = pd.read_csv(url, **kwargs)
    else:
        r = requests.get(url)
        r.raise_for_status()
        data = list(csv.reader(StringIO(r.text), **kwargs))
    return data
示例#23
0
 def test_poller_call_count_poll_on_creation_false(self, mock_api):
     poller = _create_poller_mock("succeeded")
     CivisFuture(poller, (1, 2), poll_on_creation=False)
     assert poller.call_count == 0
示例#24
0
 def test_overwrite_polling_interval_with_channels(self, *mocks):
     future = CivisFuture(lambda x: x, (1, 20))
     assert future.polling_interval == _LONG_POLLING_INTERVAL
     assert hasattr(future, '_pubnub')
示例#25
0
 def test_explicit_polling_interval_with_channels(self, *mocks):
     future = CivisFuture(lambda x: x, (1, 20), polling_interval=5)
     assert future.polling_interval == 5
     assert hasattr(future, '_pubnub')
示例#26
0
def read_civis_sql(sql,
                   database,
                   use_pandas=False,
                   job_name=None,
                   api_key=None,
                   client=None,
                   credential_id=None,
                   polling_interval=None,
                   archive=False,
                   hidden=True,
                   **kwargs):
    """Read data from Civis using a custom SQL string.

    The custom SQL string will be executed twice; once to attempt to
    retrieve headers and once to retrieve the data. This is done to
    use a more performant method for retrieving the data. The first
    execution of the custom SQL is controlled such that changes in
    state cannot occur (e.g., INSERT, UPDATE, DELETE, etc.).

    Parameters
    ----------
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Execute the query against this database. Can be the database name
        or ID.
    use_pandas : bool, optional
        If ``True``, return a :class:`pandas:pandas.DataFrame`. Otherwise,
        return a list of results from :func:`python:csv.reader`.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    credential_id : str or int, optional
        The database credential ID.  If ``None``, the default credential
        will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional (deprecated)
        If ``True``, archive the import job as soon as it completes.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.
    **kwargs : kwargs
        Extra keyword arguments are passed into
        :func:`pandas:pandas.read_csv` if `use_pandas` is ``True`` or
        passed into :func:`python:csv.reader` if `use_pandas` is
        ``False``.

    Returns
    -------
    data : :class:`pandas:pandas.DataFrame` or list
        A list of rows (with header as first row) if `use_pandas` is
        ``False``, otherwise a `pandas` `DataFrame`. Note that if
        `use_pandas` is ``False``, no parsing of types is performed and
        each row will be a list of strings.

    Raises
    ------
    ImportError
        If `use_pandas` is ``True`` and `pandas` is not installed.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> df = read_civis_sql(sql, "my_database", use_pandas=True)
    >>> col_a = df["column_a"]

    >>> data = read_civis_sql(sql, "my_database")
    >>> columns = data.pop(0)
    >>> col_a_index = columns.index("column_a")
    >>> col_a = [row[col_a_index] for row in data]

    Notes
    -----
    This reads the data into memory.

    See Also
    --------
    civis.io.read_civis : Read directly into memory without SQL.
    civis.io.civis_to_csv : Write directly to a CSV file.
    """
    if client is None:
        client = APIClient(api_key=api_key, resources='all')
    if use_pandas and NO_PANDAS:
        raise ImportError("use_pandas is True but pandas is not installed.")
    if archive:
        warnings.warn(
            "`archive` is deprecated and will be removed in v2.0.0. "
            "Use `hidden` instead.", FutureWarning)

    db_id = client.get_database_id(database)
    credential_id = credential_id or client.default_credential

    # determine if we can request headers separately; if we can then Platform
    # will perform a parallel unload which is significantly more performant
    # we start by assuming headers are requested
    ovrd_include_header, headers = _include_header(client, sql, True, db_id,
                                                   credential_id,
                                                   polling_interval)

    # if we retrieved headers then we are performing a parallel unload
    # in which case we need to specify backslash as the escapechar
    if headers is not None:
        kwargs['escapechar'] = '\\'

    csv_settings = dict(include_header=ovrd_include_header, compression='gzip')

    script_id, run_id = _sql_script(client,
                                    sql,
                                    db_id,
                                    job_name,
                                    credential_id,
                                    csv_settings=csv_settings,
                                    hidden=hidden)
    fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id),
                      polling_interval=polling_interval,
                      client=client,
                      poll_on_creation=False)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        fut.add_done_callback(f)
    fut.result()
    outputs = client.scripts.get_sql_runs(script_id, run_id)["output"]
    if not outputs:
        raise EmptyResultError(
            "Query {} returned no output.".format(script_id))

    url = outputs[0]["path"]
    file_id = outputs[0]["file_id"]
    log.debug('Exported results to Civis file %s (%s)',
              outputs[0]["output_name"], file_id)

    if use_pandas:
        # allows users to enter their own names parameter
        _kwargs = {'names': headers}
        _kwargs.update(kwargs)
        _kwargs['compression'] = 'gzip'

        data = pd.read_csv(url, **_kwargs)
    else:
        response = requests.get(url, stream=True)
        response.raise_for_status()

        with StringIO() as buf:
            if headers:
                buf.write(','.join(headers) + '\n')
            _decompress_stream(response, buf, write_bytes=False)
            buf.seek(0)
            data = list(csv.reader(buf, **kwargs))

    return data
示例#27
0
def civis_to_csv(filename,
                 sql,
                 database,
                 job_name=None,
                 api_key=None,
                 client=None,
                 credential_id=None,
                 include_header=True,
                 compression='none',
                 delimiter=',',
                 unquoted=False,
                 archive=False,
                 hidden=True,
                 polling_interval=None):
    """Export data from Civis to a local CSV file.

    The custom SQL string will be executed twice; once to attempt to
    retrieve headers and once to retrieve the data. This is done to
    use a more performant method for retrieving the data. The first
    execution of the custom SQL is controlled such that changes in
    state cannot occur (e.g., INSERT, UPDATE, DELETE, etc.).

    Parameters
    ----------
    filename : str
        Download exported data into this file.
    sql : str, optional
        The SQL select string to be executed.
    database : str or int
        Export data from this database. Can be the database name or ID.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    credential_id : str or int, optional
        The ID of the database credential.  If ``None``, the default
        credential will be used.
    include_header: bool, optional
        If ``True``, the first line of the CSV will be headers.
        Default: ``True``.
    compression: str, optional
        Type of compression to use, if any. One of ``'none'``, ``'zip'``, or
        ``'gzip'``. Default ``'none'``. ``'gzip'`` currently returns a file
        with no compression unless include_header is set to False. In a
        future release, a ``'gzip'`` compressed file will be returned for
        all cases.
    delimiter: str, optional
        Which delimiter to use, if any. One of ``','``, ``'\t'``, or
        ``'|'``. Default: ``','``.
    unquoted: bool, optional
        Whether or not to quote fields. Default: ``False``.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for query completion.
    archive : bool, optional (deprecated)
        If ``True``, archive the import job as soon as it completes.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.

    Examples
    --------
    >>> sql = "SELECT * FROM schema.table"
    >>> fut = civis_to_csv("file.csv", sql, "my_database")
    >>> fut.result()  # Wait for job to complete

    See Also
    --------
    civis.io.read_civis : Read table contents into memory.
    civis.io.read_civis_sql : Read results of a SQL query into memory.
    """
    if archive:
        warnings.warn(
            "`archive` is deprecated and will be removed in v2.0.0. "
            "Use `hidden` instead.", FutureWarning)
    if client is None:
        client = APIClient(api_key=api_key, resources='all')

    db_id = client.get_database_id(database)
    credential_id = credential_id or client.default_credential

    # don't fix bug that would cause breaking change for now
    # when gzip compression is requested, a gzip file is not actually returned
    # instead the gzip file is decompressed during download
    if compression == 'gzip' and include_header:
        compression = 'none'

    # don't support parallel unload; the output format
    # is different which would introduce a breaking change
    headers = b''

    delimiter = DELIMITERS.get(delimiter)
    if not delimiter:
        raise ValueError("delimiter must be one of {}".format(
            DELIMITERS.keys()))

    # always set compression to gzip to reduce I/O
    csv_settings = dict(include_header=include_header,
                        compression='gzip',
                        column_delimiter=delimiter,
                        unquoted=unquoted,
                        filename_prefix=None,
                        force_multifile=False)

    script_id, run_id = _sql_script(client,
                                    sql,
                                    db_id,
                                    job_name,
                                    credential_id,
                                    hidden=hidden,
                                    csv_settings=csv_settings)
    fut = CivisFuture(client.scripts.get_sql_runs, (script_id, run_id),
                      polling_interval=polling_interval,
                      client=client,
                      poll_on_creation=False)
    download = _download_callback(script_id, run_id, filename, headers,
                                  compression)
    fut.add_done_callback(download)
    if archive:

        def f(x):
            return client.scripts.put_sql_archive(script_id, True)

        fut.add_done_callback(f)

    return fut
示例#28
0
def civis_file_to_table(file_id,
                        database,
                        table,
                        client=None,
                        max_errors=None,
                        existing_table_rows="fail",
                        diststyle=None,
                        distkey=None,
                        sortkey1=None,
                        sortkey2=None,
                        delimiter=",",
                        headers=None,
                        credential_id=None,
                        polling_interval=None,
                        hidden=True):
    """Upload the contents of a Civis file to a Civis table.

    Parameters
    ----------
    file_id : int
        Civis file ID.
    database : str or int
        Upload data into this database. Can be the database name or ID.
    table : str
        The schema and table you want to upload to. E.g.,
        ``'scratch.table'``.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    max_errors : int, optional
        The maximum number of rows with errors to remove from the import
        before failing.
    existing_table_rows : str, optional
        The behaviour if a table with the requested name already exists.
        One of ``'fail'``, ``'truncate'``, ``'append'`` or ``'drop'``.
        Defaults to ``'fail'``.
    diststyle : str, optional
        The distribution style for the table.
        One of ``'even'``, ``'all'`` or ``'key'``.
    distkey : str, optional
        The column to use as the distkey for the table.
    sortkey1 : str, optional
        The column to use as the sortkey for the table.
    sortkey2 : str, optional
        The second column in a compound sortkey for the table.
    delimiter : string, optional
        The column delimiter. One of ``','``, ``'\\t'`` or ``'|'``.
    headers : bool, optional
        Whether or not the first row of the file should be treated as
        headers. The default, ``None``, attempts to autodetect whether
        or not the first row contains headers.
    credential_id : str or int, optional
        The ID of the database credential.  If ``None``, the default
        credential will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for job completion.
    hidden : bool, optional
        If ``True`` (the default), this job will not appear in the Civis UI.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.

    Examples
    --------
    >>> file_id = 100
    >>> fut = civis.io.civis_file_to_table(file_id,
    ...                                    'my-database',
    ...                                    'scratch.my_data')
    >>> fut.result()
    """
    if client is None:
        client = APIClient(resources='all')

    schema, table = table.split(".", 1)
    db_id = client.get_database_id(database)
    cred_id = credential_id or client.default_credential
    delimiter = DELIMITERS.get(delimiter)
    assert delimiter, "delimiter must be one of {}".format(DELIMITERS.keys())

    destination = dict(remote_host_id=db_id, credential_id=cred_id)
    import_name = 'CSV import to {}.{}'.format(schema, table)
    import_job = client.imports.post(import_name,
                                     'AutoImport',
                                     is_outbound=False,
                                     destination=destination,
                                     hidden=hidden)

    options = dict(max_errors=max_errors,
                   existing_table_rows=existing_table_rows,
                   diststyle=diststyle,
                   distkey=distkey,
                   sortkey1=sortkey1,
                   sortkey2=sortkey2,
                   column_delimiter=delimiter,
                   first_row_is_header=headers)

    client.imports.post_syncs(
        import_job.id,
        source=dict(file=dict(id=file_id)),
        destination=dict(database_table=dict(schema=schema, table=table)),
        advanced_options=options)

    run = client.jobs.post_runs(import_job.id)
    fut = CivisFuture(client.jobs.get_runs, (import_job.id, run['id']),
                      polling_interval=polling_interval,
                      client=client,
                      poll_on_creation=False)

    return fut
示例#29
0
def transfer_table(source_db, dest_db, source_table, dest_table,
                   job_name=None, api_key=None, client=None,
                   source_credential_id=None, dest_credential_id=None,
                   polling_interval=None, **advanced_options):
    """Transfer a table from one location to another.

    Parameters
    ----------
    source_db : str or int
        The name of the database where the source table is located.
        Optionally, could be the database ID.
    dest_db : str or int
        The name of the database where the table will be transfered.
        Optionally, could be the database ID.
    source_table : str
        Full name of the table to transfer, e.g., ``'schema.table'``.
    dest_table : str
        Full name of the table in the destination database, e.g.,
        ``'schema.table'``.
    job_name : str, optional
        A name to give the job. If omitted, a random job name will be
        used.
    api_key : DEPRECATED str, optional
        Your Civis API key. If not given, the :envvar:`CIVIS_API_KEY`
        environment variable will be used.
    client : :class:`civis.APIClient`, optional
        If not provided, an :class:`civis.APIClient` object will be
        created from the :envvar:`CIVIS_API_KEY`.
    source_credential_id : str or int, optional
        Optional credential ID for the source database. If ``None``, the
        default credential will be used.
    dest_credential_id : str or int, optional
        Optional credential ID for the destination database. If ``None``,
        the default credential will be used.
    polling_interval : int or float, optional
        Number of seconds to wait between checks for job completion.
    **advanced_options : kwargs
        Extra keyword arguments will be passed to the import sync job. See
        :func:`~civis.resources._resources.Imports.post_syncs`.

    Returns
    -------
    results : :class:`~civis.futures.CivisFuture`
        A `CivisFuture` object.

    Examples
    --------
    >>> transfer_table(source_db='Cluster A', dest_db='Cluster B',
    ...                source_table='schma.tbl', dest_table='schma.tbl')
    """
    if client is None:
        client = APIClient(api_key=api_key)
    source_cred_id = source_credential_id or client.default_credential
    dest_cred_id = dest_credential_id or client.default_credential
    job_name = maybe_get_random_name(job_name)
    source = {
        'remote_host_id': client.get_database_id(source_db),
        'credential_id': source_cred_id
    }
    destination = {
        'remote_host_id': client.get_database_id(dest_db),
        'credential_id': dest_cred_id
    }
    job_id = client.imports.post(job_name, "Dbsync", True, source=source,
                                 destination=destination).id

    client.imports.post_syncs(id=job_id,
                              source={'path': source_table},
                              destination={'path': dest_table},
                              advanced_options=advanced_options)
    run_id = client.imports.post_runs(id=job_id).run_id
    log.debug('Started run %d of sync for import %d', run_id, job_id)
    fut = CivisFuture(client.imports.get_files_runs, (job_id, run_id),
                      polling_interval=polling_interval, client=client,
                      poll_on_creation=False)
    return fut
示例#30
0
 def test_set_api_result_succeeded(self, mock_api):
     poller = _create_poller_mock("succeeded")
     result = CivisFuture(poller, (1, 2))
     assert result._state == 'FINISHED'