Exemplo n.º 1
0
def search_related(phrases):
    '''
    Busqueda de archivos relacionados
    '''
    sph = sphinxapi.SphinxClient()
    sph.SetServer(current_app.config["SERVICE_SPHINX"], current_app.config["SERVICE_SPHINX_PORT"])
    sph.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2)
    sph.SetRankingMode(sphinxapi.SPH_RANK_SPH04)
    sph.SetFieldWeights({"fn1":100})
    sph.SetSelect("*, idiv(@weight,10000) as sw")
    sph.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "w DESC, sw DESC, ls DESC")
    sph.SetMaxQueryTime(current_app.config["SERVICE_SPHINX_MAX_QUERY_TIME"])
    sph.SetLimits( 0, 6, 6, 10000)
    sph.SetFilter('bl', [0])
    sph.SetFilter('t', [int(i["_id"]) for i in filesdb.get_sources()])
    minlen = float("inf")
    for phrase in phrases:
        words = [word for word in phrase if len(word)>1]
        minlen = min(len(words),minlen)
        sph.AddQuery(" ".join(words), "idx_files")

    # añade busquedas más cortas
    if minlen > 4 and phrases:
        words = [word for word in phrases[0] if len(word)>1]
        sph.AddQuery(" ".join(words[0:3]), "idx_files")
        sph.AddQuery(" ".join(words[-3:]), "idx_files")

    query = sph.RunQueries() or []
    sph.Close()
    return query
Exemplo n.º 2
0
def search_files(query, filters, page=1):
    '''
    Busqueda simple de archivos con filtros
    '''
    sph = sphinxapi.SphinxClient()
    sph.SetServer(current_app.config["SERVICE_SPHINX"], current_app.config["SERVICE_SPHINX_PORT"])
    sph.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2)
    sph.SetRankingMode(sphinxapi.SPH_RANK_SPH04)
    sph.SetFieldWeights({"fn1":100})
    sph.SetSelect("*, idiv(@weight,10000) as sw")
    sph.SetSortMode( sphinxapi.SPH_SORT_EXTENDED, "w DESC, sw DESC, ls DESC" )
    sph.SetMaxQueryTime(current_app.config["SERVICE_SPHINX_MAX_QUERY_TIME"])
    sph.SetLimits((page-1)*10, 10, 1000, 2000000)
    sph.ResetFilters()
    sph.SetFilter('bl', [0])

    #todos los filtros posibles de busqueda
    if 'type' in filters and filters["type"] and filters["type"] in current_app.config["CONTENTS_CATEGORY"]:
        sph.SetFilter('ct', current_app.config["CONTENTS_CATEGORY"][filters["type"]])

    if 'src' in filters and filters["src"]:
        sph.SetFilter('t', [int(i["_id"]) for i in filesdb.get_sources(group=tuple(filters['src']))])
    else:
        sph.SetFilter('t', [int(i["_id"]) for i in filesdb.get_sources()])

    if 'size' in filters and filters["size"]:
        if int(filters['size'])<4:
            sph.SetFilterRange('z', 1, 1048576*(10**(int(filters['size'])-1)), False)
        else:
            sph.SetFilterRange('z', 0, 104857600, True)

    if 'brate' in filters and filters["brate"]:
        sph.SetFilterRange('mab', 0, [127,191,255,319][int(filters['brate'])-1], True)

    if 'year' in filters and filters["year"]:
        sph.SetFilterRange('may', [0,60,70,80,90,100,datetime.utcnow().year-1][int(filters['year'])-1], [59,69,79,89,99,109,datetime.utcnow().year][int(filters['year'])-1])

    query = sph.Query(query, "idx_files")
    sph.Close()
    if query:
        if current_app.debug and query["warning"]: logging.warn(query["warning"])
        if query["error"]: logging.error(query["error"])
    return query
Exemplo n.º 3
0
def search_related(phrases):
    """
    Busqueda de archivos relacionados
    """
    if not phrases:
        return []

    sph = sphinxapi2.SphinxClient()
    sph.SetServer(current_app.config["SERVICE_SPHINX"], current_app.config["SERVICE_SPHINX_PORT"])
    sph.SetConnectTimeout(current_app.config["SERVICE_SPHINX_CONNECT_TIMEOUT"])
    sph.SetMatchMode(sphinxapi2.SPH_MATCH_EXTENDED2)
    sph.SetFieldWeights({"fn": 100, "md": 1})
    sph.SetRankingMode(sphinxapi2.SPH_RANK_EXPR, "sum((2.0*lcs/min_best_span_pos)*user_weight)")
    sph.SetSortMode(sphinxapi2.SPH_SORT_EXTENDED, "r DESC, r2 DESC, uri1 DESC")
    sph.SetMaxQueryTime(current_app.config["SERVICE_SPHINX_MAX_QUERY_TIME"])
    sph.SetLimits(0, 6, 6, 10000)
    sph.SetFilter("bl", [0])
    sph.SetFilter("s", [int(i["_id"]) for i in filesdb.get_sources()])

    if phrases[-1] in EXTENSIONS:
        phrases.pop()

    phrases.sort(key=len, reverse=True)
    for phrase in phrases[:5]:
        sph.AddQuery(sph.EscapeString(phrase), "idx_files")

    querys = sph.RunQueries() or []

    warn = error = []
    if querys:
        for query_res in querys:
            if query_res["warning"]:
                warn.append(query_res["warning"])
            if query_res["error"]:
                error.append(query_res["error"])
    else:
        warn = sph.GetLastWarning()
        error = sph.GetLastError()
    if warn:
        logging.warn(
            "Warning on a Sphinx response", extra={"method": "search_related", "q": phrases[:5], "orig_msg": warn}
        )
    if error:
        logging.error(
            "Error on a Sphinx response", extra={"method": "search_related", "q": phrases[:5], "orig_msg": error}
        )
    sph.Close()
    return querys
Exemplo n.º 4
0
def search_files(query, filters, page=1):
    """
    Busqueda simple de archivos con filtros
    """
    sph = sphinxapi2.SphinxClient()
    sph.SetServer(current_app.config["SERVICE_SPHINX"], current_app.config["SERVICE_SPHINX_PORT"])
    sph.SetConnectTimeout(current_app.config["SERVICE_SPHINX_CONNECT_TIMEOUT"])
    sph.SetMatchMode(sphinxapi2.SPH_MATCH_EXTENDED2)
    sph.SetFieldWeights({"fn": 100, "md": 1})
    sph.SetSelect("*, %s" % normalize_weights)
    sph.SetRankingMode(sphinxapi2.SPH_RANK_EXPR, "sum((10.0*lcs+1.0/min_best_span_pos)*user_weight)")
    sph.SetSortMode(sphinxapi2.SPH_SORT_EXTENDED, "wr DESC, r2 DESC, uri1 DESC")
    sph.SetMaxQueryTime(current_app.config["SERVICE_SPHINX_MAX_QUERY_TIME"])
    sph.SetLimits((page - 1) * 10, 10, 1000, 2000000)
    sph.ResetFilters()
    sph.SetFilter("bl", [0])

    # todos los filtros posibles de busqueda
    try:
        if "type" in filters and filters["type"]:
            print "ct", [
                tl for t in filters["type"].split("|") if t in CONTENTS_CATEGORY for tl in CONTENTS_CATEGORY[t]
            ]
            sph.SetFilter(
                "ct", [tl for t in filters["type"].split("|") if t in CONTENTS_CATEGORY for tl in CONTENTS_CATEGORY[t]]
            )
    except:
        logging.warn("Wrong data for type filter.", extra={"q": query, "filters": filters})

    if "src" in filters and filters["src"]:
        try:
            print "sources", [int(i["_id"]) for i in filesdb.get_sources(group=tuple(filters["src"]))]
            sph.SetFilter("s", [int(i["_id"]) for i in filesdb.get_sources(group=tuple(filters["src"]))])
        except:
            logging.warn("Wrong data for source filter.", extra={"q": query, "filters": filters})
        else:
            sph.SetFilter("s", [int(i["_id"]) for i in filesdb.get_sources()])

    if "size" in filters:
        try:
            if filters["size"].isdigit() and int(filters["size"]) > 0 and int(filters["size"]) <= 4:
                if int(filters["size"]) < 4:
                    sph.SetFilterFloatRange("z", 1.0, log(1048576 * (10 ** (int(filters["size"]) - 1)), 2), False)
                else:
                    sph.SetFilterFloatRange("z", 0.0, log(104857600, 2), True)
            else:
                sizes = filters["size"].split(",")
                if len(sizes) == 2:
                    sph.SetFilterFloatRange("z", float(sizes[0]), float(sizes[1]), False)
        except:
            logging.warn("Wrong data for size filter.", extra={"q": query, "filters": filters})

    """if 'brate' in filters and filters["brate"].isdigit() and int(filters['brate'])>0 and int(filters['brate'])<=4:
        sph.SetFilterRange('mab', 0, [127,191,255,319][int(filters['brate'])-1], True)

    if 'year' in filters and filters["year"].isdigit() and int(filters['year'])>0 and int(filters['year'])<=7:
        sph.SetFilterRange('may', [0,60,70,80,90,100,datetime.utcnow().year-1][int(filters['year'])-1], [59,69,79,89,99,109,datetime.utcnow().year][int(filters['year'])-1])
    """
    query_res = sph.Query(sph.EscapeString(query), "idx_files")
    warn = error = None
    if query_res:
        if query_res["warning"]:
            warn = query_res["warning"]
        if query_res["error"]:
            error = query_res["error"]
    else:
        warn = sph.GetLastWarning()
        error = sph.GetLastError()
    if warn:
        logging.warn("Warning on a Sphinx response", extra={"method": "search_files", "q": query, "orig_msg": warn})
    if error:
        logging.error("Error on a Sphinx response", extra={"method": "search_files", "q": query, "orig_msg": error})

    if warn or error:
        cache.cacheme = False  # evita que se cacheen respuestas con fallos
    sph.Close()

    return query_res
Exemplo n.º 5
0
def init_search_stats():
    global normalize_weights
    source_weights = {"w": 1, "s": 1, "t": 0.5, "e": 0.08, "g": 0.08}
    sources_weights = {int(s["_id"]): v for k, v in source_weights.iteritems() for s in filesdb.get_sources(group=k)}
    sources_weights[18] /= 1.8
    iclogs = {
        str(s): sources_weights.get(s, 1.0) / (1.0 + log(searchd.proxy.sources_rating_rc[s] + 1))
        for s in searchd.proxy.sources.iterkeys()
        if s in searchd.proxy.sources_rating_ra
    }
    avgs = {
        str(s): searchd.proxy.sources_rating_ra[s]
        for s in searchd.proxy.sources.iterkeys()
        if s in searchd.proxy.sources_rating_ra
    }
    devs = {
        str(s): ((searchd.proxy.sources_rating_rd[s] if s in searchd.proxy.sources_rating_rd else 1.0) - 1.0)
        for s in searchd.proxy.sources_rating_rd.iterkeys()
    }
    avgs_vals = "+".join(
        "IN(s,%s)*%f" % (",".join(v), k)
        for k, v in groupby(sorted(avgs, key=avgs.get), key=avgs.get)
        if not -1e-8 < k < 1e-8
    )
    devs_vals = "+".join(
        "IN(s,%s)*%f" % (",".join(v), k)
        for k, v in groupby(sorted(devs, key=devs.get), key=devs.get)
        if not -1e-8 < k < 1e-8
    )
    iclog_vals = "+".join(
        "IN(s,%s)*%f" % (",".join(v), k)
        for k, v in groupby(sorted(iclogs, key=iclogs.get), key=iclogs.get)
        if not -1e-8 < k - 1 < 1e-8
    )

    normalize_weights = (
        "@weight*(%(iclog)s)*(0.4+(if(r>-1,r-%(avg)s,0))/(1.0+%(dev)s)) as wr, %(iclog)s as riclog, %(dev)s as rdev, %(avg)s as ravg"
        % {"iclog": iclog_vals, "avg": avgs_vals, "dev": devs_vals}
    )