Exemplo n.º 1
0
def fetch_from_hawk_api(
    table_name: str,
    source_url: str,
    hawk_credentials: dict,
    results_key: str = "results",
    next_key: Optional[str] = "next",
    validate_response: Optional[bool] = True,
    force_http: Optional[bool] = False,
    **kwargs,
):
    s3 = S3Data(table_name, kwargs["ts_nodash"])
    total_records = 0
    page = 1

    while True:
        data = _hawk_api_request(
            source_url,
            credentials=hawk_credentials,
            results_key=results_key,
            next_key=next_key,
            validate_response=validate_response,
            force_http=force_http,
        )

        results = get_nested_key(data, results_key)
        s3.write_key(f"{page:010}.json", results)

        total_records += len(results)
        logger.info(f"Fetched {total_records} records")

        source_url = get_nested_key(data, next_key) if next_key else None
        if not source_url:
            break

        page += 1

    logger.info("Fetching from source completed")
Exemplo n.º 2
0
def _get_data_to_insert(field_mapping: SingleTableFieldMapping, record: Dict):
    try:
        record_data = {
            db_column.name: get_nested_key(record, field,
                                           not db_column.nullable)
            for field, db_column in field_mapping if field is not None
        }
    except KeyError:
        logger.warning(
            "Failed to load item %s, required field is missing",
            record.get('id', str(record)),
        )
        raise

    return record_data
Exemplo n.º 3
0
def _insert_related_records(
    conn: sa.engine.Connection,
    table_config: TableConfig,
    contexts: Tuple[Dict, ...],
):
    for key, related_table in table_config.related_table_configs:
        related_records = get_nested_key(contexts[-1], key) or []

        for related_record in related_records:
            for transform in related_table.transforms:
                related_record = transform(related_record,
                                           related_table.field_mapping,
                                           contexts)

            conn.execute(
                related_table.temp_table.insert(),
                **_get_data_to_insert(related_table.columns, related_record),
            )

            if related_table.related_table_configs:
                _insert_related_records(conn, related_table,
                                        contexts + (related_record, ))
Exemplo n.º 4
0
def fetch_from_api_endpoint(
    table_name: str,
    source_url: str,
    auth_token: Optional[str] = None,
    auth_token_builder: Optional[Callable] = None,
    results_key: Optional[str] = "results",
    next_key: Optional[str] = "next",
    auth_type: Optional[str] = "Token",
    extra_headers: Optional[Mapping] = None,
    **kwargs,
):
    if auth_token is not None and auth_token_builder is not None:
        raise ValueError(
            "You can provide at most one of `auth_token` and `auth_token_builder`"
        )

    s3 = S3Data(table_name, kwargs["ts_nodash"])
    total_records = 0
    page = 1

    while True:
        if auth_token:
            request_headers = {"Authorization": f'{auth_type} {auth_token}'}
        elif auth_token_builder:
            request_headers = {
                "Authorization": f'{auth_type} {auth_token_builder()}'
            }
        else:
            request_headers = {}

        if extra_headers:
            request_headers = {**request_headers, **extra_headers}

        response = requests.get(source_url, headers=request_headers)

        try:
            response.raise_for_status()
        except requests.exceptions.HTTPError:
            logger.error(f"Request failed: {response.text}")
            raise

        response_json = response.json()

        if (next_key and next_key not in response_json) or (
                results_key and results_key not in response_json):
            raise ValueError("Unexpected response structure")

        if results_key is not None:
            results = get_nested_key(response_json, results_key)
        else:
            results = response_json

        s3.write_key(f"{page:010}.json", results)

        total_records += len(results)
        logger.info(f"Fetched {total_records} records")

        source_url = get_nested_key(response_json,
                                    next_key) if next_key else None
        if not source_url:
            break

        page += 1

    logger.info("Fetching from source completed")
Exemplo n.º 5
0
def test_nested_key(path, required, expected):
    data = {"a": {"b": {"c": 1}, "l": [2, 3, 4]}, "e": 5}
    assert utils.get_nested_key(data, path, required) == expected