Exemplo n.º 1
0
def create_aliases(client, index, load_type, silent=False):
    for award_type, award_type_codes in INDEX_ALIASES_TO_AWARD_TYPES.items():
        if load_type == "awards":
            prefix = settings.ES_AWARDS_QUERY_ALIAS_PREFIX
        else:
            prefix = settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX

        alias_name = f"{prefix}-{award_type}"
        if silent is False:
            logger.info(
                format_log(
                    f"Putting alias '{alias_name}' on {index} with award codes {award_type_codes}",
                    process="ES Alias Put",
                ))
        alias_body = {"filter": {"terms": {"type": award_type_codes}}}
        put_alias(client, index, alias_name, alias_body)

    # ensure the new index is added to the alias used for incremental loads.
    # If the alias is on multiple indexes, the loads will fail!
    write_alias = settings.ES_AWARDS_WRITE_ALIAS if load_type == "awards" else settings.ES_TRANSACTIONS_WRITE_ALIAS
    logger.info(
        format_log(f"Putting alias '{write_alias}' on {index}",
                   process="ES Alias Put"))
    put_alias(
        client,
        index,
        write_alias,
        {},
    )
Exemplo n.º 2
0
def create_aliases(client, index, load_type, silent=False):
    for award_type, award_type_codes in INDEX_ALIASES_TO_AWARD_TYPES.items():
        if load_type == "awards":
            alias_name = "{}-{}".format(settings.ES_AWARDS_QUERY_ALIAS_PREFIX,
                                        award_type)
        else:
            alias_name = "{}-{}".format(
                settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX, award_type)
        if silent is False:
            printf({
                "msg":
                "Putting alias '{}' on {} with award codes {}".format(
                    alias_name, index, award_type_codes),
                "job":
                None,
                "f":
                "ES Alias Put",
            })
        alias_body = {"filter": {"terms": {"type": award_type_codes}}}
        put_alias(client, index, alias_name, alias_body)

    # ensure the new index is added to the alias used for incremental loads.
    # If the alias is on multiple indexes, the loads will fail!
    write_alias = settings.ES_AWARDS_WRITE_ALIAS if load_type == "awards" else settings.ES_TRANSACTIONS_WRITE_ALIAS
    printf({
        "msg": "Putting alias '{}' on {}".format(write_alias, index),
        "job": None,
        "f": "ES Alias Put"
    })
    put_alias(
        client,
        index,
        write_alias,
        {},
    )
Exemplo n.º 3
0
def get_total_results(keyword):
    group_by_agg_key_values = {
        "filters": {
            category: {
                "terms": {
                    "type": types
                }
            }
            for category, types in INDEX_ALIASES_TO_AWARD_TYPES.items()
        }
    }
    aggs = A("filters", **group_by_agg_key_values)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keyword_search": [es_minimal_sanitize(keyword)]})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("types", aggs)
    response = search.handle_execute()

    if response is not None:
        try:
            return response["aggregations"]["types"]["buckets"]
        except KeyError:
            logger.error("Unexpected Response")
    else:
        logger.error("No Response")
        return None
Exemplo n.º 4
0
def create_award_type_aliases(client, config):
    for award_type, award_type_codes in INDEX_ALIASES_TO_AWARD_TYPES.items():

        alias_name = f"{config['query_alias_prefix']}-{award_type}"
        if config["verbose"]:
            msg = f"Putting alias '{alias_name}' on {config['index_name']} with award codes {award_type_codes}"
            logger.info(format_log(msg, action="ES Alias"))
        alias_body = {"filter": {"terms": {"type": award_type_codes}}}
        put_alias(client, config["index_name"], alias_name, alias_body)
Exemplo n.º 5
0
def swap_aliases(client, index):
    client.indices.refresh(index)
    # add null values to contracts alias
    if client.indices.get_alias(index, "*"):
        printf({
            "msg": 'Removing old aliases for index "{}"'.format(index),
            "job": None,
            "f": "ES Alias Drop"
        })
        client.indices.delete_alias(index, "_all")

    alias_patterns = settings.TRANSACTIONS_INDEX_ROOT + "*"

    try:
        old_indices = client.indices.get_alias("*", alias_patterns).keys()
        for old_index in old_indices:
            client.indices.delete_alias(old_index, "_all")
            client.indices.close(old_index)
            printf({
                "msg": 'Removing aliases & closing "{}"'.format(old_index),
                "job": None,
                "f": "ES Alias Drop"
            })
    except Exception:
        printf({
            "msg": "ERROR: no aliases found for {}".format(alias_patterns),
            "f": "ES Alias Drop"
        })

    for award_type, award_type_codes in INDEX_ALIASES_TO_AWARD_TYPES.items():
        alias_name = "{}-{}".format(settings.TRANSACTIONS_INDEX_ROOT,
                                    award_type)
        printf({
            "msg":
            'Putting alias "{}" with award codes {}'.format(
                alias_name, award_type_codes),
            "job":
            "",
            "f":
            "ES Alias Put",
        })
        put_alias(client, index, alias_name, award_type_codes)

    es_settingsfile = os.path.join(settings.BASE_DIR,
                                   "usaspending_api/etl/es_settings.json")
    with open(es_settingsfile) as f:
        settings_dict = json.load(f)
    index_settings = settings_dict["settings"]["index"]

    current_settings = client.indices.get(index)[index]["settings"]["index"]

    client.indices.put_settings(index_settings, index)
    client.indices.refresh(index)
    for setting, value in index_settings.items():
        message = 'Changing "{}" from {} to {}'.format(
            setting, current_settings.get(setting), value)
        printf({"msg": message, "job": None, "f": "ES Settings Put"})
def create_aliases(client, index, silent=False):
    for award_type, award_type_codes in INDEX_ALIASES_TO_AWARD_TYPES.items():
        alias_name = "{}-{}".format(settings.TRANSACTIONS_INDEX_ROOT, award_type)
        if silent is False:
            printf(
                {
                    "msg": 'Putting alias "{}" with award codes {}'.format(alias_name, award_type_codes),
                    "job": "",
                    "f": "ES Alias Put",
                }
            )
        put_alias(client, index, alias_name, award_type_codes)
def spending_by_transaction_count(request_data):
    keyword = request_data["filters"]["keywords"]
    response = {}

    for category in INDEX_ALIASES_TO_AWARD_TYPES.keys():
        total = get_total_results(keyword, category)
        if total is not None:
            if category == "directpayments":
                category = "direct_payments"
            response[category] = total
        else:
            return total
    return response
Exemplo n.º 8
0
def spending_by_transaction_count(request_data):
    keyword = request_data["filters"]["keywords"]
    response = {}
    results = get_total_results(keyword)
    for category in INDEX_ALIASES_TO_AWARD_TYPES.keys():
        if results is not None:
            if category == "directpayments":
                response["direct_payments"] = results[category]["doc_count"]
            else:
                response[category] = results[category]["doc_count"]
        else:
            return results
    return response
def search_transactions(request_data, lower_limit, limit):
    """
    request_data: dictionary
    lower_limit: integer
    limit: integer

    if transaction_type_code not found, return results for contracts
    """

    keyword = request_data["filters"]["keywords"]
    query_fields = [
        TRANSACTIONS_SOURCE_LOOKUP[i] for i in request_data["fields"]
    ]
    query_fields.extend(["award_id", "generated_unique_award_id"])
    query_sort = TRANSACTIONS_LOOKUP[request_data["sort"]]
    query = {
        "_source": query_fields,
        "from": lower_limit,
        "size": limit,
        "query": base_query(keyword),
        "sort": [{
            query_sort: {
                "order": request_data["order"]
            }
        }],
    }

    for index, award_types in INDEX_ALIASES_TO_AWARD_TYPES.items():
        if sorted(award_types) == sorted(
                request_data["filters"]["award_type_codes"]):
            index_name = "{}-{}*".format(
                settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX, index)
            break
    else:
        logger.exception(
            "Bad/Missing Award Types. Did not meet 100% of a category's types")
        return False, "Bad/Missing Award Types requested", None

    response = es_client_query(index=index_name, body=query, retries=10)
    if response:
        total = response["hits"]["total"]
        results = format_for_frontend(response["hits"]["hits"])
        return True, results, total
    else:
        return False, "There was an error connecting to the ElasticSearch cluster", None
Exemplo n.º 10
0
def get_download_ids(keyword, field, size=10000):
    """
    returns a generator that
    yields list of transaction ids in chunksize SIZE

    Note: this only works for fields in ES of integer type.
    """
    n_iter = DOWNLOAD_QUERY_SIZE // size

    results = get_total_results(keyword)
    if results is None:
        logger.error(
            "Error retrieving total results. Max number of attempts reached")
        return
    total = sum(results[category]["doc_count"]
                for category in INDEX_ALIASES_TO_AWARD_TYPES.keys())
    required_iter = (total // size) + 1
    n_iter = min(max(1, required_iter), n_iter)
    for i in range(n_iter):
        filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
            {"keyword_search": [es_minimal_sanitize(keyword)]})
        search = TransactionSearch().filter(filter_query)
        group_by_agg_key_values = {
            "field": field,
            "include": {
                "partition": i,
                "num_partitions": n_iter
            },
            "size": size,
            "shard_size": size,
        }
        aggs = A("terms", **group_by_agg_key_values)
        search.aggs.bucket("results", aggs)
        response = search.handle_execute()
        if response is None:
            raise Exception("Breaking generator, unable to reach cluster")
        results = []
        for result in response["aggregations"]["results"]["buckets"]:
            results.append(result["key"])
        yield results