示例#1
0
def get_documents(params, size, index, scroll_id=None):
    queryObj = Query(params)

    if not scroll_id:
        es_uri = "/" + index + "/doc/_search?scroll=1d"
        request = {"size": size, "query": {"bool": {}}}
    else:
        es_uri = "/_search/scroll"
        request = {"scroll": "1d", "scroll_id": scroll_id}

    if "query" in request:
        filter = []
        # 프로젝트 시퀀스 포함
        filter.append(queryObj.get_project_seq_query())
        filter.append(queryObj.get_project_filter_query(params['project_seq']))

        # 대상 채널
        if "channels" in params and params[
                "channels"] and params["channels"] != 'all':
            filter.append(queryObj.get_channel_query())

        # 대상 기간
        if "start_date" in params and "end_date" in params:
            filter.append(queryObj.get_period_query())

        request["query"]["bool"]["filter"] = filter

        # 데이터셋의 포함 키워드
        if "datasets" in params and params["datasets"]:
            request["query"]["bool"]["must"] = queryObj.get_dataset_query(
                params['project_seq'], params["datasets"])

    logger.debug("[get_documents] Query >>> %s " % json.dumps(request))

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'hits' in json.loads(result):
        logger.debug("[get_documents] result['hits']['total'] >>> %d" %
                     int(json.loads(result)['hits']['total']))
    else:
        logger.debug("[get_documents] result ::: " + str(result))

    return json.loads(result)
示例#2
0
def get_documents(params, size, index, scroll_id=None):
    queryObj = Query(params)

    if not scroll_id:
        es_uri = "/" + index + "/doc/_search?scroll=1d"
        request = {"size": size, "query": {"bool": {"must": []}}}
    else:
        es_uri = "/_search/scroll"
        request = {"scroll": "1d", "scroll_id": scroll_id}

    must = []
    # 프로젝트 시퀀스 포함
    must.append(get_project_seq_query(params))

    # 대상 채널
    if "channels" in params and params[
            "channels"] and params["channels"] != 'all':
        must.append(get_channel_query(params))

    # 대상 기간
    if "start_date" in params and "end_date" in params:
        must.append(get_period_query(params))

    # 데이터셋의 포함 키워드
    if "datasets" in params and params["datasets"]:  # 신라면,삼양라면,안성탕면
        if len(params["datasets"].split("^")) > 1:
            should = []
            for dataset in params["datasets"].split("^"):
                should.append(
                    queryObj.get_dataset_query(params['project_seq'], dataset))

            must.append({"bool": {"should": should}})
        else:
            must.append(
                queryObj.get_dataset_query(params['project_seq'],
                                           params["datasets"]))

    # elif params["type_cd"] == "CCT002": # 소셜모니터링-문서통계
    # elif params["type_cd"] == "CCT003": # 소셜모니터링-감성분석
    # .....
    # 코드별로 request 필요한 형태로 변경해서 추가

    if "query" in request:
        request["query"]["bool"]["must"] = must

    logger.debug("get_documents() ==> request : ")
    for k, v in request.items():
        logger.debug("\t{} : {}".format(k, v))

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'hits' in json.loads(result):
        logger.debug("[get_documents] result['hits']['total'] >>> %d" %
                     int(json.loads(result)['hits']['total']))
    else:
        logger.debug("[get_documents] result ::: " + str(result))

    return json.loads(result)