def testIndexersApiLimits(self): config.settings.searching.generate_queries = [] self.newznab1.hitLimit = 3 self.newznab1.hitLimitResetTime = None config.settings.indexers = [self.newznab1] read_indexers_from_config() search_request = SearchRequest() indexers = search.pick_indexers(search_request) self.assertEqual(1, len(indexers)) dbsearch = Search(internal=True, time=arrow.utcnow().datetime) dbsearch.save() indexer = Indexer().get(name="newznab1") #Two accesses one and 12 hours ago IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-1).datetime, type="search", url="", response_successful=True).save() IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-12).datetime, type="search", url="", response_successful=True).save() self.assertEqual(1, len(search.pick_indexers(search_request))) #Another one 20 hours ago, so limit should be reached IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-20).datetime, type="search", url="", response_successful=True).save() self.assertEqual(0, len(search.pick_indexers(search_request)))
def search(internal, search_request): for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) limit = search_request.limit # todo use actual configured limit external_offset = int(search_request.offset) search_hash = search_request.search_hash if search_hash not in pseudo_cache.keys() or search_request.offset == 0: #If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0} indexers_to_call, with_query_generation = pick_indexers(query_supplied=True if search_request.query is not None and search_request.query != "" else False, identifier_key=search_request.identifier_key, internal=internal, selected_indexers=search_request.indexers) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=internal, query=search_request.query, category=search_request.category, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type) dbsearch.save() cache_entry["dbsearch"] = dbsearch if with_query_generation and search_request.identifier_key and search_request.title is None: search_request.title = infos.title_from_id(search_request.identifier_key, search_request.identifier_value) pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): search_results.extend(queries_execution_result.results) logger.debug("%s returned %d results" % (indexer, len(queries_execution_result.results))) cache_entry["indexer_infos"][indexer].update({"search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.dbentry}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit #todo: perhaps move duplicate handling here. WOuld allow to recognize duplicates that were added, for example 100 were already loaded and then we get 101-200 und 100 and 101 are duplicates #todo: then make configurable if we want to delete duplicates for api, internal, both, none. would also mean that we return 100 actually different results, otherwise in the worst case we could for example return 50 originals and 50 duplicates if internal: logger.debug("We have %d cached results and them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearch": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}