Exemplo n.º 1
0
def search(query: str,
           highlight=True,
           offset=0,
           limit=10,
           language=None,
           restrict=None):
    query.strip()

    root_indexes = list(get_root_language_uids())
    tr_indexes = [language]

    extra_langs = language_id.smart_rank(query)
    for iso_code in extra_langs:
        if iso_code != language and iso_code not in root_indexes:
            tr_indexes.append(iso_code)

    if restrict == 'root-text':
        indexes = root_indexes
    elif restrict == 'translation':
        indexes = tr_indexes
    elif restrict is None:
        indexes = root_indexes + tr_indexes
    else:
        return None

    index_string = ','.join(get_available_indexes(indexes))

    fields = [
        "content",
        "content.*^0.5",
        "term^1.5",
        "term.*^0.5",
        "gloss^1.5",
        "lang^0.5",
        "author^0.5",
        "uid",
        "uid.division^0.7",
        "name^1.25",
        "name.*^0.75",
        "heading.title^0.5",
        "heading.title.plain^0.5",
        "heading.title.shingle^0.5",
    ]

    if regex.search(r'[:"~*]', query) or regex.search(r'AND|OR|NOT', query):
        query = query.replace('define:', 'term:')
        inner_query = {
            "query_string": {
                "fields": fields,
                "query": query,
                "use_dis_max": True
            }
        }
    else:
        inner_query = {
            "multi_match": {
                "type": "best_fields",
                "tie_breaker": 0.3,
                "fields": fields,
                "query": query,
            }
        }

    body = {
        "from":
        offset,
        "size":
        limit,
        "_source": [
            "uid",
            "lang",
            "name",
            "volpage",
            "gloss",
            "term",
            "heading",
            "is_root",
            "author",
            "author_uid",
            "author_short",
            "acronym",
        ],
        "timeout":
        "15s",
        "query": {
            "function_score": {
                "query":
                inner_query,
                "functions": [
                    {
                        "weight": "1.5",
                        "filter": {
                            "term": {
                                "lang": language
                            }
                        }
                    },
                    {
                        "field_value_factor": {
                            "field": "boost",
                            "factor": 1.0,
                            "missing": 1.0,
                        }
                    },
                    {
                        "weight": "0.25",
                        "filter": {
                            "type": {
                                "value": "definition"
                            }
                        }
                    },
                    {
                        "weight": "2",
                        "filter": {
                            "term": {
                                "uid": query.replace(' ', '').lower()
                            }
                        },
                    },
                    {
                        "weight": "1.0",
                        "filter": {
                            "term": {
                                "is_root": True
                            }
                        }
                    },
                ],
                "score_mode":
                "multiply",
            }
        },
    }

    if highlight:
        body["highlight"] = {
            "pre_tags": ["<strong class=\"highlight\">"],
            "post_tags": ["</strong>"],
            "order": "score",
            "require_field_match": False,
            "fields": {
                "content": {
                    "matched_fields":
                    ["content", "content.folded", "content.stemmed"],
                    "type":
                    "fvh",
                    "fragment_size":
                    100,
                    "number_of_fragments":
                    3,
                    "no_match_size":
                    250,
                }
            },
        }

    return es.search(index=index_string, body=body)
Exemplo n.º 2
0
def search(query: str, highlight=True, offset=0, limit=10,
           language=None, restrict=None):
    query.strip()
       
    root_indexes = list(get_root_language_uids())
    tr_indexes = [language]
    
    extra_langs = language_id.smart_rank(query)
    for iso_code in extra_langs:
        if iso_code != language and iso_code not in root_indexes:
            tr_indexes.append(iso_code)
    
    if restrict == 'root-text':
        indexes = root_indexes
    elif restrict == 'translation':
        indexes = tr_indexes
    elif restrict is None:
        indexes = root_indexes + tr_indexes
    else:
        return None

    index_string = ','.join(get_available_indexes(indexes))

    fields = ["content", "content.*^0.5",
              "term^1.5", "term.*^0.5",
              "gloss^1.5",
              "lang^0.5",
              "author^0.5",
              "uid", "uid.division^0.7",
              "name^1.25", "name.*^0.75",
              "heading.title^0.5",
              "heading.title.plain^0.5",
              "heading.title.shingle^0.5"]

    if regex.search(r'[:"~*]', query) or regex.search(r'AND|OR|NOT', query):
        query = query.replace('define:', 'term:')
        inner_query = {
            "query_string": {
                "fields": fields,
                "query": query,
                "use_dis_max": True
            }
        }
    else:
        inner_query = {
            "multi_match": {
                "type": "best_fields",
                "tie_breaker": 0.3,
                "fields": fields,
                "query": query
            }
        }

    body = {
        "from": offset,
        "size": limit,
        "_source": ["uid", "lang", "name", "volpage", "gloss", "term", "heading", "is_root", "author", "author_uid", "author_short", "acronym"],
        "timeout": "15s",
        "query": {
            "function_score": {
                "query": inner_query,
                "functions": [
                    {
                        "weight": "1.5",
                        "filter": {
                            "term": {
                                "lang": language
                            }
                        }
                    },
                    {
                        "field_value_factor": {
                            "field": "boost",
                            "factor": 1.0,
                            "missing": 1.0
                        }
                    },
                    {
                        "weight": "0.25",
                        "filter": {
                            "type": {
                                "value": "definition"
                            }
                        }
                    },
                    {
                        "weight": "2",
                        "filter": {
                            "term": {
                                "uid": query.replace(' ', '').lower()
                            }
                        }
                    },
                    {
                        "weight": "1.0",
                        "filter": {
                            "term": {
                                "is_root": True
                            }
                        }
                    }
                ],
                "score_mode": "multiply"
            }
        }
    }

    if highlight:
        body["highlight"] = {
            "pre_tags": ["<strong class=\"highlight\">"],
            "post_tags": ["</strong>"],
            "order": "score",
            "require_field_match": False,
            "fields": {
                "content": {
                    "matched_fields": ["content", "content.folded", "content.stemmed"],
                    "type": "fvh",
                    "fragment_size": 100,
                    "number_of_fragments": 3,
                    "no_match_size": 250
                }
            }
        }
    
    return es.search(index=index_string, body=body)
Exemplo n.º 3
0
def sutta_search(**kwargs):
    mode = kwargs.get("mode") or "wildcard"
    fields = {
        "name": {
            "mode": mode
        },
        "volpage": {
            "mode": mode,
            "fields": ["volpage", "volpage_extra"]
        },
        "acronym": {
            "mode": mode,
            "field": "uid"
        },
        "division": {
            "mode": mode,
            "fields": ["division", "subdivision"]
        },
        "lang": {
            "mode": "term"
        },
    }
    if "acronym" in kwargs:
        kwargs["acronym"] = kwargs["acronym"].lower().replace(' ', '')

    queries = []

    for field, params in fields.items():
        value = kwargs.get(field)
        if not value:
            continue
        if "fields" in params:
            sub_query = {
                "bool": {
                    "should": [{
                        params["mode"]: {
                            sub_field: {
                                "value": value.lower()
                            }
                        }
                    } for sub_field in params["fields"]]
                }
            }
            queries.append(sub_query)

        else:
            queries.append({
                params["mode"]: {
                    params.get("field", field): {
                        "value": value.lower()
                    }
                }
            })

    if not queries:
        return None

    body = {
        "size": int(kwargs.get("limit", 25)),
        "from": int(kwargs.get("offset", 0)),
        "query": {
            "bool": {
                "must": queries
            }
        },
        "sort": [{
            "_score": {
                "order": "desc"
            }
        }, {
            "ordering": {
                "order": "asc"
            }
        }],
    }

    return es.search(index="suttas", body=body)
Exemplo n.º 4
0
def sutta_search(**kwargs):
    mode = kwargs.get("mode") or "wildcard"
    fields = {
        "name": {"mode": mode},
        "volpage": {"mode": mode, "fields": ["volpage", "volpage_extra"]},
        "acronym": {"mode": mode, "field": "uid"},
        "division": {"mode": mode, "fields": ["division", "subdivision"]},
        "lang": {"mode": "term"}
    }
    if "acronym" in kwargs:
        kwargs["acronym"] = kwargs["acronym"].lower().replace(' ', '')

    queries = []

    for field, params in fields.items():
        value = kwargs.get(field)
        if not value:
            continue
        if "fields" in params:
            sub_query = {
                "bool": {
                    "should": [
                        {
                            params["mode"]: {
                                sub_field: {
                                    "value": value.lower()
                                }
                            }
                        }
                        for sub_field in params["fields"]
                    ]
                }
            }
            queries.append(sub_query)

        else:
            queries.append(
                {
                    params["mode"]: {
                        params.get("field", field): {
                            "value": value.lower()
                        }
                    }
                }
            )

    if not queries:
        return None

    body = {
        "size": int(kwargs.get("limit", 25)),
        "from": int(kwargs.get("offset", 0)),
        "query": {
            "bool": {
                "must": queries
            }
        },
        "sort": [
            {"_score": {"order": "desc"}},
            {"ordering": {"order": "asc"}}
        ]
    }

    return es.search(index="suttas", body=body)