Пример #1
0
def test_fetch_raises_for_non_2xx_status(mocker, mock_sender, requests_mock):
    mocker.patch("time.sleep")  # skip backoff retry delay
    requests_mock.get(
        'http://test',
        headers={
            'Server-Authorization': 'dummy',
            'Content-Type': ''
        },
        status_code=404,
    )
    with pytest.raises(HTTPError):
        common._hawk_api_request(
            'http://test',
            credentials=FAKE_HAWK_CREDENTIALS,
            results_key="results",
            next_key="next",
        )
Пример #2
0
def test_fetch_raises_error_invalid_response(mock_sender, requests_mock):
    requests_mock.get(
        'http://test',
        headers={
            'Server-Authorization': 'dummy',
            'Content-Type': ''
        },
        json={'next': None},
    )

    with pytest.raises(ValueError):
        common._hawk_api_request(
            'http://test',
            credentials=FAKE_HAWK_CREDENTIALS,
            results_key="results",
            next_key="next",
        )
Пример #3
0
def test_hawk_api_request(mock_sender, requests_mock):
    requests_mock.get(
        'http://test',
        headers={
            'Server-Authorization': 'dummy',
            'Content-Type': ''
        },
        json={
            'next': None,
            'results': []
        },
    )
    common._hawk_api_request(
        'http://test',
        credentials=FAKE_HAWK_CREDENTIALS,
        results_key="results",
        next_key="next",
    )
Пример #4
0
def test_hawk_api_request_fail(mocker, requests_mock):
    mock_sender = mocker.patch('dataflow.operators.common.Sender')
    mock_sender().request_header = 'dummy'
    mock_sender().accept_response.side_effect = HawkFail
    requests_mock.get(
        'http://test',
        headers={
            'Server-Authorization': 'dummy',
            'Content-Type': ''
        },
        json={
            'next': None,
            'results': []
        },
    )
    with pytest.raises(HawkFail):
        common._hawk_api_request(
            'http://test',
            credentials=FAKE_HAWK_CREDENTIALS,
            results_key="results",
            next_key="next",
        )
Пример #5
0
def get_table_config(**context):
    def to_sql_alachemy_type(column_name, data_type):
        mapping = {
            'TEXT': sa.Text,
            'INTEGER': sa.Integer,
            'TIMESTAMP WITHOUT TIME ZONE': sa.DateTime,
            'DATE': sa.Date,
            'FLOAT': sa.Float,
            'BOOLEAN': sa.Boolean,
            'BIGINT': sa.BigInteger,
            'NUMERIC': sa.Numeric,
        }
        try:
            if data_type.endswith('[]'):
                sa_data_type = sa.ARRAY(mapping[data_type[:-2]])
            else:
                sa_data_type = mapping[data_type]
        except KeyError:
            raise ValueError(f'data type {data_type} not supported')
        return sa.Column(column_name, sa_data_type)

    schema_name, table_name = _get_schema_and_table(context)
    source_url = f'{DATA_STORE_SERVICE_BASE_URL}/api/v1/table-structure/{quote(schema_name)}/{quote(table_name)}'

    # transform schema and table in line with dataflow conventions
    target_schema_name, target_table_name = _parse_schema(schema_name)

    table_fields = _hawk_api_request(
        url=source_url,
        credentials=DATA_STORE_SERVICE_HAWK_CREDENTIALS,
        results_key='columns',
        next_key=None,
    )['columns']
    table_fields = [
        (field['name'], to_sql_alachemy_type(field['name'], field['type']))
        for field in table_fields
    ]
    table_config = TableConfig(
        schema=target_schema_name,
        table_name=target_table_name,
        field_mapping=table_fields,
    )
    return table_config