Exemplo n.º 1
0
    def execute_queries(self, queries, searchRequest):
        if len(queries) == 0:
            return QueriesExecutionResult(didsearch=False, results=[], indexerSearchEntry=None, indexerApiAccessEntry=None, indexerStatus=None, total=0, loaded_results=0, total_known=True, has_more=False)
        results = []
        executed_queries = set()
        psearch = IndexerSearch(indexer=self.indexer)
        papiaccess = IndexerApiAccess()
        indexerStatus = None
        #psearch.save()
        total_results = 0
        total_known = False
        has_more = False
        while len(queries) > 0:
            query = queries.pop()
            if query in executed_queries:
                # To make sure that in case an offset is reported wrong or we have a bug we don't get stuck in an endless loop 
                continue

            try:
                request, papiaccess, indexerStatus = self.get_url_with_papi_access(query, "search", saveToDb=False)
                papiaccess.indexer_search = psearch

                executed_queries.add(query)
                #papiaccess.save()

                if request is not None:
                    self.check_auth(request.text)
                    self.debug("Successfully loaded URL %s" % request.url)
                    try:

                        parsed_results = self.process_query_result(request.content, searchRequest)
                        results.extend(parsed_results.entries)  # Retrieve the processed results
                        queries.extend(parsed_results.queries)  # Add queries that were added as a result of the parsing, e.g. when the next result page should also be loaded
                        total_results += parsed_results.total
                        total_known = parsed_results.total_known
                        has_more = parsed_results.has_more

                        papiaccess.response_successful = True
                        self.handle_indexer_success(False)
                    except IndexerResultParsingException as e:
                        self.error("Error while processing search results from indexer %s" % e)
                    except Exception:
                        self.exception("Error while processing search results from indexer %s" % self)
                        raise IndexerResultParsingException("Error while parsing the results from indexer", self)
            except IndexerAuthException as e:
                self.error("Unable to authorize with %s: %s" % (e.search_module, e.message))
                papiaccess.error = "Authorization error :%s" % e.message
                self.handle_indexer_failure(reason="Authentication failed", disable_permanently=True)
                papiaccess.response_successful = False
            except IndexerAccessException as e:
                self.error("Unable to access %s: %s" % (e.search_module, e.message))
                papiaccess.error = "Access error: %s" % e.message
                self.handle_indexer_failure(reason="Access failed")
                papiaccess.response_successful = False
            except IndexerResultParsingException as e:
                papiaccess.exception = "Access error: %s" % e.message
                self.handle_indexer_failure(reason="Parsing results failed")
                papiaccess.response_successful = False
            except Exception as e:
                self.exception("An error error occurred while searching: %s", e)
                if papiaccess is not None:
                    papiaccess.error = "Unknown error :%s" % e
                    papiaccess.response_successful = False
            finally:
                if papiaccess is not None:
                    #papiaccess.save()
                    psearch.successful = papiaccess.response_successful
                else:
                    self.error("Unable to save API response to database")
                psearch.resultsCount = total_results
                #psearch.save()
        return QueriesExecutionResult(didsearch= True, results=results, indexerSearchEntry=psearch, indexerApiAccessEntry=papiaccess, indexerStatus=indexerStatus, total=total_results, loaded_results=len(results), total_known=total_known, has_more=has_more)
Exemplo n.º 2
0
    def execute_queries(self, queries, searchRequest):
        if len(queries) == 0:
            return QueriesExecutionResult(didsearch=False,
                                          results=[],
                                          indexerSearchEntry=None,
                                          indexerApiAccessEntry=None,
                                          indexerStatus=None,
                                          total=0,
                                          loaded_results=0,
                                          total_known=True,
                                          has_more=False,
                                          rejected=self.getRejectedCountDict())
        results = []
        executed_queries = set()
        psearch = IndexerSearch(indexer=self.indexer)
        papiaccess = IndexerApiAccess()
        indexerStatus = None
        total_results = 0
        total_known = False
        has_more = False
        rejected = self.getRejectedCountDict()
        while len(queries) > 0:
            query = queries.pop()
            if query in executed_queries:
                # To make sure that in case an offset is reported wrong or we have a bug we don't get stuck in an endless loop
                continue

            try:
                request, papiaccess, indexerStatus = self.get_url_with_papi_access(
                    query, "search", saveToDb=False)
                papiaccess.indexer_search = psearch

                executed_queries.add(query)

                if request is not None:
                    if request.text == "":
                        raise IndexerResultParsingException(
                            "Indexer returned an empty page", self)
                    self.check_auth(request.text)
                    self.debug("Successfully loaded URL %s" % request.url)
                    try:

                        parsed_results = self.process_query_result(
                            request.content, searchRequest)
                        results.extend(parsed_results.entries
                                       )  # Retrieve the processed results
                        queries.extend(
                            parsed_results.queries
                        )  # Add queries that were added as a result of the parsing, e.g. when the next result page should also be loaded
                        total_results += parsed_results.total
                        total_known = parsed_results.total_known
                        has_more = parsed_results.has_more
                        rejected = parsed_results.rejected

                        papiaccess.response_successful = True
                        indexerStatus = self.handle_indexer_success(False)
                    except Exception:
                        self.exception(
                            "Error while processing search results from indexer %s"
                            % self)
                        raise IndexerResultParsingException(
                            "Error while parsing the results from indexer",
                            self)
            except IndexerAuthException as e:
                papiaccess.error = "Authorization error :%s" % e.message
                self.error(papiaccess.error)
                indexerStatus = self.handle_indexer_failure(
                    reason="Authentication failed", disable_permanently=True)
                papiaccess.response_successful = False
            except IndexerAccessException as e:
                papiaccess.error = "Access error: %s" % e.message
                self.error(papiaccess.error)
                indexerStatus = self.handle_indexer_failure(
                    reason="Access failed")
                papiaccess.response_successful = False
            except IndexerResultParsingException as e:
                papiaccess.error = "Access error: %s" % e.message
                self.error(papiaccess.error)
                indexerStatus = self.handle_indexer_failure(
                    reason="Parsing results failed")
                papiaccess.response_successful = False
            except Exception as e:
                self.exception("An error error occurred while searching: %s",
                               e)
                if papiaccess is not None:
                    papiaccess.error = "Unknown error :%s" % e
                    papiaccess.response_successful = False
            finally:
                if papiaccess is not None:
                    psearch.successful = papiaccess.response_successful
                else:
                    self.error("Unable to save API response to database")
                psearch.resultsCount = total_results
        return QueriesExecutionResult(didsearch=True,
                                      results=results,
                                      indexerSearchEntry=psearch,
                                      indexerApiAccessEntry=papiaccess,
                                      indexerStatus=indexerStatus,
                                      total=total_results,
                                      loaded_results=len(results),
                                      total_known=total_known,
                                      has_more=has_more,
                                      rejected=rejected)