示例#1
0
def get_search_requests(page=0, limit=100, sortModel=None, filterModel=None, distinct=False, onlyUser=None):
    columnNameToEntityMap = {
        "time": Search.time,
        "query": Search.query,
        "category": Search.category,
        "access": Search.internal,
        "username": Search.username
    }
    columns = [Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author, TvIdCache.title.alias("tvtitle"), MovieIdCache.title.alias("movietitle")]

    query = Search().select(*columns)
    query = query.join(TvIdCache, JOIN.LEFT_OUTER, on=(
        ((Search.identifier_value == TvIdCache.tvdb) & (Search.identifier_key == "tvdbid")) |
        ((Search.identifier_value == TvIdCache.tvrage) & (Search.identifier_key == "rid"))
    )).join(MovieIdCache, JOIN.LEFT_OUTER, on=(
        ((Search.identifier_value == MovieIdCache.imdb) & (Search.identifier_key == "imdbid")) |
        ((Search.identifier_value == MovieIdCache.tmdb) & (Search.identifier_key == "tmdbid"))))

    query = extendQueryWithFilter(columnNameToEntityMap, filterModel, query)
    query = extendQueryWithSorting(columnNameToEntityMap, query, sortModel, Search.time.desc())

    if onlyUser is not None and onlyUser:
        query = query.where(Search.username == onlyUser)
    if distinct:
        query = query.group_by(Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author)
    total_requests = query.count()
    requests = list(query.paginate(page, limit).dicts())

    search_requests = {"totalRequests": total_requests, "searchRequests": requests}
    return search_requests
示例#2
0
def get_search_requests(page=0, limit=100, type=None):
    query = Search().select(
        Search.time, Search.internal, Search.query, Search.identifier_key,
        Search.identifier_value, Search.category, Search.season,
        Search.episode, Search.type, Search.username, Search.title,
        Search.author, TvIdCache.title.alias("tvtitle"),
        MovieIdCache.title.alias("movietitle")).join(
            TvIdCache,
            JOIN.LEFT_OUTER,
            on=(((Search.identifier_value == TvIdCache.tvdb) &
                 (Search.identifier_key == "tvdbid")) |
                ((Search.identifier_value == TvIdCache.tvrage) &
                 (Search.identifier_key == "rid")))).join(
                     MovieIdCache,
                     JOIN.LEFT_OUTER,
                     on=(((Search.identifier_value == MovieIdCache.imdb) &
                          (Search.identifier_key == "imdbid")) |
                         ((Search.identifier_value == MovieIdCache.tmdb) &
                          (Search.identifier_key == "tmdbid"))))

    if type is not None and type != "All":
        query = query.where(
            Search.internal) if type == "Internal" else query.where(
                ~Search.internal)
    total_requests = query.count()
    requests = list(
        query.order_by(Search.time.desc()).paginate(page, limit).dicts())

    search_requests = {
        "totalRequests": total_requests,
        "searchRequests": requests
    }
    return search_requests
示例#3
0
def getTimeBasedSearchStats(after, before):
    searches = Search().select(Search.time).where((Search.time > after) & (Search.time < before))
    searchTimes = [arrow.get(x.time).to(tz.tzlocal()) for x in searches]

    perDayOfWeek, perHourOfDay = calculcateTimeBasedStats(searchTimes)

    return {"perDayOfWeek": perDayOfWeek, "perHourOfDay": perHourOfDay}
示例#4
0
def getTimeBasedSearchStats():
    searches = Search().select(Search.time)
    searchTimes = [arrow.get(x.time) for x in searches]

    perDayOfWeek, perHourOfDay = calculcateTimeBasedStats(searchTimes)

    return {"perDayOfWeek": perDayOfWeek, "perHourOfDay": perHourOfDay}
示例#5
0
文件: stats.py 项目: nzbis/nzbhydra
def get_search_requests(page=0, limit=100, type=None):
    query = Search().select(Search.time, Search.internal, Search.query,
                            Search.identifier_key, Search.identifier_value,
                            Search.category, Search.season, Search.episode,
                            Search.type, Search.username)
    if type is not None and type != "All":
        query = query.where(
            Search.internal) if type == "Internal" else query.where(
                ~Search.internal)
    total_requests = query.count()
    requests = list(
        query.order_by(Search.time.desc()).paginate(page, limit).dicts())

    search_requests = {
        "totalRequests": total_requests,
        "searchRequests": requests
    }
    return search_requests
示例#6
0
    def testIndexersApiLimits(self):

        config.settings.searching.generate_queries = []
        self.newznab1.hitLimit = 3
        self.newznab1.hitLimitResetTime = None
        config.settings.indexers = [self.newznab1]
        read_indexers_from_config()
        search_request = SearchRequest()
        indexers = search.pick_indexers(search_request)
        self.assertEqual(1, len(indexers))
        dbsearch = Search(internal=True, time=arrow.utcnow().datetime)
        dbsearch.save()
        indexer = Indexer().get(name="newznab1")
        
        #Two accesses one and 12 hours ago
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-1).datetime, type="search", url="", response_successful=True).save()
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-12).datetime, type="search", url="", response_successful=True).save()
        self.assertEqual(1, len(search.pick_indexers(search_request)))

        #Another one 20 hours ago, so limit should be reached
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-20).datetime, type="search", url="", response_successful=True).save()
        self.assertEqual(0, len(search.pick_indexers(search_request)))
示例#7
0
def search(search_request):
    logger.info("Starting new search: %s" % search_request)
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = countOldSearchResults(keepFor)
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()
    else:
        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug("%d search results stored in database" % SearchResult.select().count())

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    categoryResult = categories.getCategoryByAnyInput(search_request.category)
    search_request.category = categoryResult
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0, "rejected": SearchModule.getRejectedCountDict()}
        category = categoryResult.category
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}

        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode,
                          type=search_request.type, title=search_request.title, author=search_request.author, username=search_request.username)
        saveSearch(dbsearch)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and parse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal)
        applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal)
        if config.settings.searching.forbiddenWords and applyRestrictionsGlobal:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords and applyRestrictionsGlobal:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])

        if category.forbiddenWords and applyRestrictionsCategory:
            logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
        if category.requiredWords and applyRestrictionsCategory:
            logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])

        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)
        cache_entry["forbiddenWords"] = search_request.forbiddenWords
        cache_entry["requiredWords"] = search_request.requiredWords
        cache_entry["query"] = search_request.query

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        search_request.forbiddenWords = cache_entry["forbiddenWords"]
        search_request.requiredWords = cache_entry["requiredWords"]
        search_request.query = cache_entry["query"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    if search_request.loadAll:
        logger.debug("Requested to load all results. Will continue to search until all indexers are exhausted")
    while (len(cache_entry["results"]) < external_offset + limit or search_request.loadAll) and len(indexers_to_call) > 0:
        if len(cache_entry["results"]) < external_offset + limit:
            logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        elif search_request.loadAll:
            logger.debug("All results requested. Continuing to search.")
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]

        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        logger.debug("All search calls to indexers completed")
        search_results = []
        indexers_to_call = []

        waslocked = False
        before = arrow.now()
        if databaseLock.locked():
            logger.debug("Database accesses locked by other search. Will wait for our turn.")
            waslocked = True
        databaseLock.acquire()
        if waslocked:
            after = arrow.now()
            took = (after - before).seconds * 1000 + (after - before).microseconds / 1000
            logger.debug("Waited %dms for database lock" % took)
        for indexer, queries_execution_result in result["results"].items():
            with db.atomic():
                logger.info("%s returned %d results" % (indexer, len(queries_execution_result.results)))
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    try:
                        searchResultId = hashlib.sha1(str(indexer.indexer.id) + result.indexerguid).hexdigest()
                        tryGetOrCreateSearchResultDbEntry(searchResultId, indexer.indexer.id, result)
                        result.searchResultId = searchResultId
                        search_results.append(result)
                    except (IntegrityError, OperationalError) as e:
                        logger.error("Error while trying to save search result to database. Skipping it. Error: %s" % e)

            cache_entry["indexer_infos"][indexer].update(
                {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                 "indexer_search": queries_execution_result.indexerSearchEntry, "rejected": queries_execution_result.rejected, "processed_results": queries_execution_result.loaded_results})
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug("%s still has more results so we could use it the next round" % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer]["total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                cache_entry["total"] += 100
            for rejectKey in cache_entry["rejected"].keys():
                if rejectKey in cache_entry["indexer_infos"][indexer]["rejected"].keys():
                    cache_entry["rejected"][rejectKey] += cache_entry["indexer_infos"][indexer]["rejected"][rejectKey]

        databaseLock.release()

        logger.debug("Searching for duplicates")
        numberResultsBeforeDuplicateRemoval = len(search_results)
        grouped_by_sameness, uniqueResultsPerIndexer = find_duplicates(search_results)
        allresults = []
        for group in grouped_by_sameness:
            if search_request.internal:
                for i in group:
                    # We give each group of results a unique value by which they can be identified later
                    i.hash = hash(group[0].details_link)
                    allresults.append(i)

            else:
                # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                group = sorted(group, key=lambda x: x.epoch, reverse=True)
                group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                allresults.append(group[0])
        search_results = allresults

        with databaseLock:
            for indexer, infos in cache_entry["indexer_infos"].iteritems():
                if indexer.name in uniqueResultsPerIndexer.keys():  # If the search failed it isn't contained in the duplicates list
                    uniqueResultsCount = uniqueResultsPerIndexer[infos["indexer"]]
                    processedResults = infos["processed_results"]
                    logger.debug("Indexer %s had a unique results share of %d%% (%d of %d total results were only provided by this indexer)" % (indexer.name, 100 / (numberResultsBeforeDuplicateRemoval / uniqueResultsCount), uniqueResultsCount, numberResultsBeforeDuplicateRemoval))
                    infos["indexer_search"].uniqueResults = uniqueResultsCount
                    infos["indexer_search"].processedResults = processedResults
                    infos["indexer_search"].save()

        if not search_request.internal:
            countAfter = len(search_results)
            countRemoved = numberResultsBeforeDuplicateRemoval - countAfter
            logger.info("Removed %d duplicates from %d results" % (countRemoved, numberResultsBeforeDuplicateRemoval))

        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if len(indexers_to_call) == 0:
        logger.info("All indexers exhausted")
    elif len(cache_entry["results"]) >= external_offset + limit:
        logger.debug("Loaded a total of %d results which is enough for the %d requested. Stopping search." % (len(cache_entry["results"]), (external_offset + limit)))

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    for k, v in cache_entry["rejected"].items():
        if v > 0:
            logger.info("Rejected %d because: %s" % (v, k))
    logger.info("Returning %d results" % len(nzb_search_results))
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset, "rejected": cache_entry["rejected"].items()}
示例#8
0
def search(internal, search_request):
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)
    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    if search_hash not in pseudo_cache.keys(
    ) or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug(
            "Didn't find this query in cache or want to do a new search")
        cache_entry = {
            "results": [],
            "indexer_infos": {},
            "total": 0,
            "last_access": arrow.utcnow(),
            "offset": 0
        }
        indexers_to_call, with_query_generation = pick_indexers(
            query_supplied=True if search_request.query is not None
            and search_request.query != "" else False,
            identifier_key=search_request.identifier_key,
            internal=internal,
            selected_indexers=search_request.indexers)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {
                "has_more": True,
                "search_request": search_request,
                "total_included": False
            }
        dbsearch = Search(internal=internal,
                          query=search_request.query,
                          category=search_request.category,
                          identifier_key=search_request.identifier_key,
                          identifier_value=search_request.identifier_value,
                          season=search_request.season,
                          episode=search_request.episode,
                          type=search_request.type,
                          username=request.authorization.username
                          if request.authorization is not None else None)
        #dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        if with_query_generation and search_request.identifier_key and search_request.title is None:
            try:
                search_request.title = infos.title_from_id(
                    search_request.identifier_key,
                    search_request.identifier_value)
            except:
                pass
        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [
            indexer for indexer, info in cache_entry["indexer_infos"].items()
            if info["has_more"]
        ]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug(
            "Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results."
        )
    while len(cache_entry["results"]) < external_offset + limit and len(
            indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" %
                     ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        logger.debug("Searching indexers with offset %d" %
                     search_request.offset)
        result = search_and_handle_db(
            dbsearch, {x: search_request
                       for x in indexers_to_call})
        search_results = []
        indexers_to_call = []

        for indexer, queries_execution_result in result["results"].items():
            search_results.extend(queries_execution_result.results)
            logger.debug("%s returned %d results" %
                         (indexer, len(queries_execution_result.results)))
            cache_entry["indexer_infos"][indexer].update({
                "did_search":
                queries_execution_result.didsearch,
                "indexer":
                indexer.name,
                "search_request":
                search_request,
                "has_more":
                queries_execution_result.has_more,
                "total":
                queries_execution_result.total,
                "total_known":
                queries_execution_result.total_known,
                "indexer_search":
                queries_execution_result.indexerSearchEntry
            })
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug(
                    "%s still has more results so we could use it the next round"
                    % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug(
                        "%s reports %d total results. We'll include in the total this time only"
                        % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer][
                        "total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug(
                    "%s doesn't report an exact number of results so let's just add another 100 to the total"
                    % indexer)
                cache_entry["total"] += 100

        if internal or config.settings.searching.removeDuplicatesExternal:
            countBefore = len(search_results)
            grouped_by_sameness = find_duplicates(search_results)
            allresults = []
            for group in grouped_by_sameness:
                if internal:
                    for i in group:
                        # We give each group of results a unique value by which they can be identified later
                        i.hash = hash(group[0].guid)
                        allresults.append(i)

                else:
                    # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                    group = sorted(group, key=lambda x: x.epoch, reverse=True)
                    group = sorted(group,
                                   key=lambda x: x.indexerscore,
                                   reverse=True)
                    allresults.append(group[0])
            search_results = allresults
            if not internal:
                countAfter = len(search_results)
                countRemoved = countBefore - countAfter
                logger.info("Removed %d duplicates from %d results" %
                            (countRemoved, countBefore))
        search_results = sorted(search_results,
                                key=lambda x: x.epoch,
                                reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if internal:
        logger.debug(
            "We have %d cached results and return them all because we search internally"
            % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(
            cache_entry["results"][external_offset:])
    else:
        logger.debug(
            "We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search"
            % (len(cache_entry["results"]), external_offset,
               external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(
            cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()

    return {
        "results": nzb_search_results,
        "indexer_infos": cache_entry["indexer_infos"],
        "dbsearchid": cache_entry["dbsearch"].id,
        "total": cache_entry["total"],
        "offset": external_offset
    }
示例#9
0
def search(search_request):
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count()
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()
    else:
        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug("%d search results stored in database" % SearchResult.select().count())

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    categoryResult = categories.getCategoryByAnyInput(search_request.category)
    search_request.category = categoryResult
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0}
        category = categoryResult.category
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}
        
        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type,
                          username=search_request.username)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and parse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal)
        applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal)
        if config.settings.searching.forbiddenWords and applyRestrictionsGlobal:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords and applyRestrictionsGlobal:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])
        
        if category.forbiddenWords and applyRestrictionsCategory:
            logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
        if category.requiredWords and applyRestrictionsCategory:
            logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])
        
        
        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        
        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        logger.debug("All search calls to indexers completed")
        search_results = []
        indexers_to_call = []

        for indexer, queries_execution_result in result["results"].items():
            #Drastically improves db access time but means that if one database write fails all fail. That's a risk we need to take 
            with db.atomic():
                logger.debug("%s returned %d results. Writing them to database..." % (indexer, len(queries_execution_result.results)))
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    searchResult, _ = SearchResult().create_or_get(indexer=indexer.indexer, guid=result.indexerguid, title= result.title, link= result.link, details= result.details_link)
                    result.searchResultId = searchResult.id
                    search_results.append(result)
                logger.debug("Written results results to database")

                cache_entry["indexer_infos"][indexer].update(
                    {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                     "indexer_search": queries_execution_result.indexerSearchEntry})
                if queries_execution_result.has_more:
                    indexers_to_call.append(indexer)
                    logger.debug("%s still has more results so we could use it the next round" % indexer)
        
                if queries_execution_result.total_known:
                    if not cache_entry["indexer_infos"][indexer]["total_included"]:
                        cache_entry["total"] += queries_execution_result.total
                        logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                        cache_entry["indexer_infos"][indexer]["total_included"] = True
                elif queries_execution_result.has_more:
                    logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                    cache_entry["total"] += 100

        if search_request.internal or config.settings.searching.removeDuplicatesExternal:
            logger.debug("Searching for duplicates")
            countBefore = len(search_results)
            grouped_by_sameness = find_duplicates(search_results)
            allresults = []
            for group in grouped_by_sameness:
                if search_request.internal:
                    for i in group:
                        # We give each group of results a unique value by which they can be identified later
                        i.hash = hash(group[0].details_link)
                        allresults.append(i)

                else:
                    # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                    group = sorted(group, key=lambda x: x.epoch, reverse=True)
                    group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                    allresults.append(group[0])
            search_results = allresults
            if not search_request.internal:
                countAfter = len(search_results)
                countRemoved = countBefore - countAfter
                logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore))
        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    logger.info("Returning %d results" % len(nzb_search_results))
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
示例#10
0
    def testThatDatabaseValuesAreStored(self):
        with self.app.test_request_context('/'):
            with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
                newznabItems = [
                    [mockbuilder.buildNewznabItem(title="title1", pubdate=arrow.get(1000).format("ddd, DD MMM YYYY HH:mm:ss Z"), size=1000, indexer_name="newznab1")],
                    [mockbuilder.buildNewznabItem(title="title2", pubdate=arrow.get(1000).format("ddd, DD MMM YYYY HH:mm:ss Z"), size=1000, indexer_name="newznab2")]
                ]

                self.prepareSearchMocks(rsps, indexerCount=len(newznabItems), newznabItems=newznabItems)
                # Make the second access unsuccessful
                rsps._urls.pop(1)
                rsps.add(responses.GET, r".*",
                         body="an error message", status=500,
                         content_type='application/x-html')

                searchRequest = SearchRequest(type="search", query="aquery", category="acategory", identifier_key="imdbid", identifier_value="animdbid", season=1, episode=2, indexers="newznab1|newznab2")
                result = search.search(searchRequest)
                results = result["results"]
                self.assertEqual(1, len(results))

                dbSearch = Search().get()
                self.assertEqual(True, dbSearch.internal)
                self.assertEqual("aquery", dbSearch.query)
                self.assertEqual("All", dbSearch.category)
                self.assertEqual("imdbid", dbSearch.identifier_key)
                self.assertEqual("animdbid", dbSearch.identifier_value)
                self.assertEqual("1", dbSearch.season)
                self.assertEqual("2", dbSearch.episode)
                self.assertEqual("search", dbSearch.type)
                self.assertEqual(18, dbSearch.time.hour)

                indexerSearch1 = IndexerSearch.get(IndexerSearch.indexer == Indexer.get(Indexer.name == "newznab1"))
                self.assertEqual(indexerSearch1.search, dbSearch)
                self.assertEqual(18, indexerSearch1.time.hour)

                indexerSearch2 = IndexerSearch.get(IndexerSearch.indexer == Indexer.get(Indexer.name == "newznab2"))
                self.assertEqual(indexerSearch2.search, dbSearch)
                self.assertEqual(18, indexerSearch2.time.hour)

                calledUrls = sorted([x.request.url for x in rsps.calls])

                indexerApiAccess1 = IndexerApiAccess.get(IndexerApiAccess.indexer == Indexer.get(Indexer.name == "newznab1"))
                self.assertEqual(indexerSearch1, indexerApiAccess1.indexer_search)
                self.assertEqual(18, indexerApiAccess1.time.hour)
                self.assertEqual("search", indexerApiAccess1.type)
                self.assertEqual(calledUrls[0], indexerApiAccess1.url)
                self.assertTrue(indexerApiAccess1.response_successful)
                self.assertEqual(0, indexerApiAccess1.response_time)
                self.assertIsNone(indexerApiAccess1.error)

                indexerApiAccess2 = IndexerApiAccess.get(IndexerApiAccess.indexer == Indexer.get(Indexer.name == "newznab2"))
                self.assertEqual(indexerSearch2, indexerApiAccess2.indexer_search)
                self.assertEqual(18, indexerApiAccess2.time.hour)
                self.assertEqual("search", indexerApiAccess2.type)
                self.assertEqual(calledUrls[1], indexerApiAccess2.url)
                self.assertFalse(indexerApiAccess2.response_successful)
                self.assertIsNone(indexerApiAccess2.response_time)
                self.assertTrue("Connection refused" in indexerApiAccess2.error)

                indexerStatus2 = IndexerStatus.get(IndexerStatus.indexer == Indexer.get(Indexer.name == "newznab2"))
                self.assertEqual(1, indexerStatus2.level)
                self.assertTrue("Connection refused" in indexerStatus2.reason)