Exemplo n.º 1
0
def test_token_refresh():
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL,
        access_token="invalid",
    )
    invalid_token = d2.access_token
    count = d2.count("rigs", deleteddate="null")
    query = d2.query("rigs", pagesize=10000, deleteddate="null")
    assert len([x for x in query]) == count
    assert invalid_token != d2.access_token

    # Test client with no credentials
    try:
        d2 = DirectAccessV2(
            api_key=None, client_id=None, client_secret=None, log_level=LOG_LEVEL
        )
    except DAAuthException as e:
        pass

    return
Exemplo n.º 2
0
def load(endpoint, **options):
    """
    A generic load function that will be called by each of the three processes.

    :param endpoint: the Direct Access API endpoint
    :param options: the query parameters to provide on the endpoint
    :return:
    """
    # Create a DirectAccessV2 client within the function, providing it our already existing access token
    # and thus avoiding unnecessary authentication calls
    client = DirectAccessV2(
        api_key=os.getenv('DIRECTACCESS_API_KEY'),
        client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
        client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET'),
        access_token=ACCESS_TOKEN
    )

    count = None
    with open(endpoint + '.csv', mode='w') as f:
        writer = csv.writer(f)
        for i, row in enumerate(client.query(endpoint, **options), start=1):
            count = i
            if count == 1:
                writer.writerow(row.keys())
            writer.writerow(row.values())

            if count % options.get('pagesize', 100000) == 0:
                print('Wrote {} records for {}'.format(count, endpoint))

    print('Completed writing {}. Final count: {}'.format(endpoint, count))
    return
Exemplo n.º 3
0
def test_ddl():
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        access_token=DIRECTACCESS_TOKEN,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL,
    )
    ddl = d2.ddl("rigs", database="pg")
    with TemporaryFile(mode="w+") as f:
        f.write(ddl)
        f.seek(0)
        for line in f:
            assert line.split(" ")[0] == "CREATE"
            break

    # Neg - test ddl with invalid database parameter
    try:
        ddl = d2.ddl("rigs", database="invalid")
    except DAQueryException:
        pass

    return
Exemplo n.º 4
0
def test_dataframe():
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        access_token=DIRECTACCESS_TOKEN,
    )
    df = d2.to_dataframe("rigs", pagesize=10000, deleteddate="null")

    # Check index is set to API endpoint "primary key"
    assert df.index.name == "RigID"

    # Check datetime64 dtypes
    assert is_datetime64_ns_dtype(df.CreatedDate)
    assert is_datetime64_ns_dtype(df.DeletedDate)
    assert is_datetime64_ns_dtype(df.SpudDate)
    assert is_datetime64_ns_dtype(df.UpdatedDate)

    # Check Int64 dtypes
    assert is_int64_dtype(df.PermitDepth)
    assert is_int64_dtype(df.FormationDepth)

    # Check float dtypes
    assert is_float_dtype(df.RigLatitudeWGS84)
    assert is_float_dtype(df.RigLongitudeWGS84)

    return
Exemplo n.º 5
0
def test_csv():
    """
    Write Direct Access query results to CSV

    :return:
    """
    tempdir = mkdtemp()
    path = os.path.join(tempdir, 'rigs.csv')
    d2 = DirectAccessV2(api_key=DIRECTACCESS_API_KEY,
                        client_id=DIRECTACCESS_CLIENT_ID,
                        client_secret=DIRECTACCESS_CLIENT_SECRET,
                        retries=5,
                        backoff_factor=10,
                        log_level=LOG_LEVEL,
                        access_token=DIRECTACCESS_ACCESS_TOKEN)

    dataset = 'rigs'
    options = dict(pagesize=10000, deleteddate='null')
    count = d2.count(dataset, **options)
    query = d2.query(dataset, **options)
    d2.to_csv(query,
              path=path,
              log_progress=True,
              delimiter=',',
              quoting=csv.QUOTE_MINIMAL)

    with open(path, mode='r') as f:
        reader = csv.reader(f)
        row_count = len([x for x in reader])
        assert row_count == (count + 1)
Exemplo n.º 6
0
def set_token():
    if not os.environ.get("DIRECTACCESS_TOKEN"):
        os.environ["DIRECTACCESS_TOKEN"] = DirectAccessV2(
            client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
            client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
            api_key=os.environ.get("DIRECTACCESS_API_KEY"),
        ).access_token
    return
Exemplo n.º 7
0
def test_docs():
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        access_token=DIRECTACCESS_TOKEN,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL,
    )
    docs = d2.docs("well-origins")
    if docs:
        assert isinstance(docs, list)
    return
Exemplo n.º 8
0
def test_v2_query():
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        access_token=DIRECTACCESS_TOKEN,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL,
    )

    query = d2.query("rigs", pagesize=10000, deleteddate="null")
    records = list()
    for i, row in enumerate(query, start=1):
        records.append(row)
        if i % 1000 == 0:
            break
    assert records
Exemplo n.º 9
0
def test_count():
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        access_token=DIRECTACCESS_TOKEN,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL,
    )
    count = d2.count("rigs", deleteddate="null")
    assert count is not None
    assert isinstance(count, int)

    # Neg - test count for invalid dataset
    try:
        count = d2.count("invalid")
    except DADatasetException as e:
        pass
    return
Exemplo n.º 10
0
def test_multiple_processes():
    """
    Launch two child processes, one for rigs and one for permits.
    :return:
    """
    if not DIRECTACCESS_ACCESS_TOKEN:
        access_token = DirectAccessV2(
            api_key=DIRECTACCESS_API_KEY,
            client_id=DIRECTACCESS_CLIENT_ID,
            client_secret=DIRECTACCESS_CLIENT_SECRET,
            retries=5,
            backoff_factor=10
        ).access_token
        os.environ['DIRECTACCESS_ACCESS_TOKEN'] = access_token
    else:
        access_token = DIRECTACCESS_ACCESS_TOKEN

    procs = list()
    a = Process(
        target=query,
        kwargs=dict(
            endpoint='rigs',
            access_token=access_token
        )
    )
    procs.append(a)

    b = Process(
        target=query,
        kwargs=dict(
            endpoint='permits',
            access_token=access_token
        )
    )
    procs.append(b)

    [x.start() for x in procs]
    [x.join() for x in procs]
    return
Exemplo n.º 11
0
def query(endpoint, access_token, **options):
    """
    Query method target for multiprocessing child processes.

    :param endpoint: a valid Direct Access API dataset endpoint
    :param access_token: a Direct Access API access token
    :param options: kwargs of valid query parameters for the dataset endpoint
    :return:
    """
    client = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        retries=5,
        backoff_factor=5,
        access_token=access_token,
        log_level=LOG_LEVEL,
    )

    resp = client.query(endpoint, **options)
    next(resp)
    assert resp
    return
Exemplo n.º 12
0
When provided, an initial authentication request will not be made. We still provide our API Key, Client ID
and Client Secret to the class so that the access token can be refreshed if needed.

In the sample below, we simultaneously write three CSVs from the producing-entities, well-rollups and permits
API endpoints. This results in much faster loading time than when done sequentially.
"""
import os
import csv
from multiprocessing import Process

from directaccess import DirectAccessV2

# Retrieve our access token
ACCESS_TOKEN = DirectAccessV2(
    api_key=os.getenv('DIRECTACCESS_API_KEY'),
    client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
    client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET')
).access_token


def load(endpoint, **options):
    """
    A generic load function that will be called by each of the three processes.

    :param endpoint: the Direct Access API endpoint
    :param options: the query parameters to provide on the endpoint
    :return:
    """
    # Create a DirectAccessV2 client within the function, providing it our already existing access token
    # and thus avoiding unnecessary authentication calls
    client = DirectAccessV2(
Exemplo n.º 13
0
"""

import os
import logging
from multiprocessing import Process

from directaccess import DirectAccessV2

DIRECTACCESS_API_KEY = os.environ.get('DIRECTACCESS_API_KEY')
DIRECTACCESS_CLIENT_ID = os.environ.get('DIRECTACCESS_CLIENT_ID')
DIRECTACCESS_CLIENT_SECRET = os.environ.get('DIRECTACCESS_CLIENT_SECRET')

if not os.environ.get('DIRECTACCESS_ACCESS_TOKEN'):
    access_token = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
    ).access_token
    os.environ['DIRECTACCESS_ACCESS_TOKEN'] = access_token
DIRECTACCESS_ACCESS_TOKEN = os.environ.get('DIRECTACCESS_ACCESS_TOKEN')

LOG_LEVEL = logging.DEBUG
if os.environ.get('CIRCLE_JOB'):
    LOG_LEVEL = logging.ERROR


def query(endpoint, access_token, **options):
    """
    Query method target for multiprocessing child processes.

    :param endpoint: a valid Direct Access API dataset endpoint
Exemplo n.º 14
0
def test_query():
    """
    Authenticate and query Direct Access API for docs, ddl, count and query methods

    :return:
    """
    # Test V1 query
    d1 = DirectAccessV1(
        api_key=DIRECTACCESS_API_KEY,
        log_level=LOG_LEVEL
    )
    query = d1.query('rigs', pagesize=1000)
    records = list()
    for i, row in enumerate(query, start=1):
        records.append(row)
        if i % 1000 == 0:
            break
    assert records

    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL
    )

    # Test docs
    docs = d2.docs('well-origins')
    if docs:
        assert isinstance(docs, list)

    # Test DDL
    ddl = d2.ddl('rigs', database='pg')
    with TemporaryFile(mode='w+') as f:
        f.write(ddl)
        f.seek(0)
        for line in f:
            assert line.split(' ')[0] == 'CREATE'
            break

    # Test count
    count = d2.count('rigs', deleteddate='null')
    assert count is not None
    assert isinstance(count, int)

    # Neg - test count for invalid dataset
    try:
        count = d2.count('invalid')
    except DADatasetException as e:
        pass

    # Neg - test ddl with invalid database parameter
    try:
        ddl = d2.ddl('rigs', database='invalid')
    except DAQueryException:
        pass

    # Test query
    query = d2.query('rigs', pagesize=10000, deleteddate='null')
    records = list()
    for i, row in enumerate(query, start=1):
        records.append(row)
        if i % 1000 == 0:
            break
    assert records

    # Test token refresh
    d2 = DirectAccessV2(
        api_key=DIRECTACCESS_API_KEY,
        client_id=DIRECTACCESS_CLIENT_ID,
        client_secret=DIRECTACCESS_CLIENT_SECRET,
        retries=5,
        backoff_factor=10,
        log_level=LOG_LEVEL,
        access_token='invalid'
    )
    invalid_token = d2.access_token
    query = d2.query('rigs', pagesize=10000, deleteddate='null')
    assert len([x for x in query]) == count
    assert invalid_token != d2.access_token

    # Test client with no credentials
    try:
        d2 = DirectAccessV2(
            api_key=None,
            client_id=None,
            client_secret=None,
            log_level=LOG_LEVEL
        )
    except DAAuthException as e:
        pass

    return