Exemplo n.º 1
0
 def __init__(self, config, name):
     """Constructor for PipelineService."""
     self.config = config
     self.name = name
     self.searchEngineManager = SearchEngineManager()
     self.query_logger = None
     self.postLogging = False
     self._init_filters()
Exemplo n.º 2
0
 def __init__(self, config, name):
   """Constructor for PipelineService."""
   self.config = config
   self.name = name
   self.searchEngineManager = SearchEngineManager()
   self.query_logger = None
   self.postLogging = False
   self._init_filters()
Exemplo n.º 3
0
class PipelineService(object):
  """Models the configuration of a Pipeline (QueryFilters/Modifiers and ResultFilters/Modifiers) and the search engines using the Pipeline"""
  
  def __init__(self, config, name):
    """Constructor for PipelineService."""
    self.config = config
    self.name = name
    self.searchEngineManager = SearchEngineManager()
    self.query_logger = None
    self.postLogging = False
    self._init_filters()

  def _init_filters(self):
    self.query_filters = []
    self.query_modifiers = []
    self.results_filters = []
    self.results_modifiers = []

  def searchSpecificEngine(self, query, searchEngineName, offset=0):
    """
    Search a specific search engine only, if it's currently stored by our 'SearchEngineManager' using the query and result pipeline as currently defined.

    Parameters:

    * query (puppy.model.Query): search query
    * searchEngineName (str): the name of the search engine to search which will be searched if it's currently stored
    * offset (int): result offset

    Returns:

    * results_dict (dictionary of puppy.model.Response): the key being the name of the search egine and the value the reponse object
    """
	
    results_dict = {}
    searchEngine = self.searchEngineManager.get_search_engine(searchEngineName) # None if doesn't exist

    # If our search engine is actually stored by the Pipeline Manager then get the results from it
    if searchEngine:
      query = self._runQueryPipeline(query)
      results_dict[searchEngineName] = self._getResults(query, offset, searchEngine)

    return results_dict  
  
  def searchAll(self, query, offset=0):
    """
    Search all the search engines currently stored by our 'SearchEngineManager' using the query and result pipeline as currently defined.
    
    Parameters:
    
    * query (puppy.model.Query): search query
    * offset (int): result offset
    
    Returns:
    
    * results_dict (dictionary of puppy.model.Response): the key being the name of the search egine and the value the reponse object
    """
    results_dict = {}
    query = self._runQueryPipeline(query) # Only run this once, it's the same for each search engine

    # Loop through each search engine and add results to the dictionary
    for key, value in self.searchEngineManager.get_search_engines().iteritems():
      results_dict[key] = self._getResults(query, offset, value) # I.e. run the defined Result pipeline for the current search engine

    return results_dict

  def _runQueryPipeline(self, query):
    """ Run through the defined query pipeline and return either the processed query or raise an exception (QueryRejectionError) """
    # Log the query sent to the pipeline manager before any processing
    if self.query_logger:
      self.query_logger.log(query)

    # Run the query filter pipeline
    self._run_query_filters(query)

    # Run through the query modifier pipeline and get the modified query back
    query = self._run_query_modifiers(query)

    # Log the query after processing (if it's not been rejected) if postLogging is enabled
    if (self.query_logger) and (self.postLogging == True):
      self.query_logger.log(query, processed=True) # Processed i.e. the query after going through the query pipeline

    return query # Return the modified query unless it was rejected

  def _getResults(self, query, offset, search_engine):
    """ Run through the defined result pipeline for the current search engine """
    
    try:  # Get the search results from the current search engine   
      results = search_engine.search(query, offset)
    except Exception, e:
      raise SearchEngineError("Test", e)
      
    search_results = results.entries
    
    # Run the result filter pipeline
    search_results = self._run_result_filters(search_results)
    
    # Run the result modifier pipeline
    search_results = self._run_result_modifiers(search_results)

    # Set the results entries to be the processed results then return them
    results.entries = search_results
    return results    
Exemplo n.º 4
0
class PipelineService(object):
    """Models the configuration of a Pipeline (QueryFilters/Modifiers and ResultFilters/Modifiers) and the search engines using the Pipeline"""
    def __init__(self, config, name):
        """Constructor for PipelineService."""
        self.config = config
        self.name = name
        self.searchEngineManager = SearchEngineManager()
        self.query_logger = None
        self.postLogging = False
        self._init_filters()

    def _init_filters(self):
        self.query_filters = []
        self.query_modifiers = []
        self.results_filters = []
        self.results_modifiers = []

    def searchSpecificEngine(self, query, searchEngineName, offset=0):
        """
    Search a specific search engine only, if it's currently stored by our 'SearchEngineManager' using the query and result pipeline as currently defined.

    Parameters:

    * query (puppy.model.Query): search query
    * searchEngineName (str): the name of the search engine to search which will be searched if it's currently stored
    * offset (int): result offset

    Returns:

    * results_dict (dictionary of puppy.model.Response): the key being the name of the search egine and the value the reponse object
    """

        results_dict = {}
        searchEngine = self.searchEngineManager.get_search_engine(
            searchEngineName)  # None if doesn't exist

        # If our search engine is actually stored by the Pipeline Manager then get the results from it
        if searchEngine:
            query = self._runQueryPipeline(query)
            results_dict[searchEngineName] = self._getResults(
                query, offset, searchEngine)

        return results_dict

    def searchAll(self, query, offset=0):
        """
    Search all the search engines currently stored by our 'SearchEngineManager' using the query and result pipeline as currently defined.
    
    Parameters:
    
    * query (puppy.model.Query): search query
    * offset (int): result offset
    
    Returns:
    
    * results_dict (dictionary of puppy.model.Response): the key being the name of the search egine and the value the reponse object
    """
        results_dict = {}
        query = self._runQueryPipeline(
            query)  # Only run this once, it's the same for each search engine

        # Loop through each search engine and add results to the dictionary
        for key, value in self.searchEngineManager.get_search_engines(
        ).iteritems():
            results_dict[key] = self._getResults(
                query, offset, value
            )  # I.e. run the defined Result pipeline for the current search engine

        return results_dict

    def _runQueryPipeline(self, query):
        """ Run through the defined query pipeline and return either the processed query or raise an exception (QueryRejectionError) """
        # Log the query sent to the pipeline manager before any processing
        if self.query_logger:
            self.query_logger.log(query)

        # Run the query filter pipeline
        self._run_query_filters(query)

        # Run through the query modifier pipeline and get the modified query back
        query = self._run_query_modifiers(query)

        # Log the query after processing (if it's not been rejected) if postLogging is enabled
        if (self.query_logger) and (self.postLogging == True):
            self.query_logger.log(
                query, processed=True
            )  # Processed i.e. the query after going through the query pipeline

        return query  # Return the modified query unless it was rejected

    def _getResults(self, query, offset, search_engine):
        """ Run through the defined result pipeline for the current search engine """

        try:  # Get the search results from the current search engine
            results = search_engine.search(query, offset)
        except Exception, e:
            raise SearchEngineError("Test", e)

        search_results = results.entries

        # Run the result filter pipeline
        search_results = self._run_result_filters(search_results)

        # Run the result modifier pipeline
        search_results = self._run_result_modifiers(search_results)

        # Set the results entries to be the processed results then return them
        results.entries = search_results
        return results