def __init__(self, module_reference, use_cache=True, cache_name="wikipedia-cache"): super().__init__(module_reference) self._name = "Wikipedia Cooccurrence" self.use_cache = use_cache self._lang = "en" self.cache = MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION)
def __init__(self, module_reference, use_cache=True, cache_name="google-book-cache"): super().__init__() self._module_reference = module_reference self._name = "Google Book Submodule" self.use_cache = use_cache self.cache = MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION) self.internal_cache = None
def __init__(self, use_cache=True, cache_name="google-cache"): CachableQueryingSystem.__init__(self, MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION)) self._name = "Google Autocomplete" self.use_cache = use_cache self.time_between_queries = 1.5 self.default_number_suggestions = 10 self.begin_time = time.time()
def _test_write_read_other(self): cache = MongoDBCache("testrw", mongodb_location="YOURLOCATION") cache_value = cache.read_cache("elephant") self.assertIsNone(cache_value) cache.write_cache("elephant", [("elephants are big", 0)]) cache_value = cache.read_cache("elephant") self.assertIsNotNone(cache_value) suggestions = cache_value self.assertEqual(["elephants are big"], suggestions) cache.delete_cache()
def __init__(self, module_reference, use_cache=True, cache_name="google-cache"): BrowserAutocompleteSubmodule.__init__(self, module_reference) CachableQueryingSystem.__init__( self, MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION)) self._name = "Google Autocomplete" self.use_cache = use_cache self.time_between_queries = 1.0 self.default_number_suggestions = 10
def __init__(self, module_reference, use_cache=True, cache_name="bing-cache", look_new=False): BrowserAutocompleteSubmodule.__init__(self, module_reference) CachableQueryingSystem.__init__( self, MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION)) self._name = "Bing Autocomplete" self.use_cache = use_cache self.time_between_queries = 0.02 self.default_number_suggestions = 8 self.look_new = look_new
def test_creation(self): cache = MongoDBCache("test") self.assertFalse(cache.exists_collection()) cache.write_cache("test", []) self.assertTrue(cache.exists_collection()) cache2 = MongoDBCache("test") self.assertTrue(cache2.exists_collection()) cache.delete_cache() self.assertFalse(cache.exists_collection()) self.assertFalse(cache2.exists_collection())
def test_write_read(self): cache = MongoDBCache("testrw") cache.delete_cache() cache = MongoDBCache("testrw") cache_value = cache.read_cache("elephant") self.assertIsNone(cache_value) cache.write_cache("elephant", [["elephants are big", 0]]) cache_value = cache.read_cache("elephant") self.assertIsNotNone(cache_value) suggestions = cache_value self.assertEqual([["elephants are big", 0]], suggestions) cache.delete_cache()
class GoogleBookSubmodule(SubmoduleInterface): def __init__(self, module_reference, use_cache=True, cache_name="google-book-cache"): super().__init__() self._module_reference = module_reference self._name = "Google Book Submodule" self.use_cache = use_cache self.cache = MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION) self.internal_cache = None def clean(self): super().clean() del self.internal_cache self.internal_cache = None def _setup_cache(self): self._cache = dict() if not os.path.exists(cache_dir): os.makedirs(cache_dir) if os.path.isfile(cache_file): with open(cache_file) as f: for line in f: line = line.strip().split("\t") if len(line) == 2: self._cache[line[0]] = int(line[1]) def read_cache(self, query): if self.use_cache: if self.internal_cache is None: self.internal_cache = self.cache.read_all() if query in self.internal_cache: return float(self.internal_cache[query]) return None def write_cache(self, query, total): if self.use_cache: self.cache.write_cache(query, total) def _get_occurrences_books(self, query, only_cache): cache_value = self.read_cache(query) if cache_value is not None: return cache_value if only_cache or service is None: return -1 req = service.volumes().list(q=query, maxResults=1) response = req.execute() if match_query(query, response): total = response["totalItems"] else: total = 0 self.write_cache(query, total) time.sleep(1.0 / calls_per_seconds) return total def process(self, input_interface): logging.info("Start the verification using google book") global service if service is None: try: service = apiclient.discovery.build('books', 'v1', developerKey=api_key) except Exception as exception: logging.warning("When initializing Google Book: " + str(exception)) service = None if service is None: logging.info("No service found for Google Book") only_cache = True else: only_cache = False maxi = 0 for generated_fact in input_interface.get_generated_facts(): query = _get_query_from_fact(generated_fact) occurrences = -1 try: occurrences = self._get_occurrences_books(query, only_cache) except Exception as exception0: logging.warning(str(exception0)) only_cache = True maxi = max(maxi, occurrences) if maxi == 0: maxi = 1 for generated_fact in input_interface.get_generated_facts(): query = _get_query_from_fact(generated_fact) try: occurrences = self._get_occurrences_books(query, only_cache) except Exception as exception1: logging.warning(str(exception1)) only_cache = True continue if occurrences != -1: generated_fact.get_score().add_score(occurrences / maxi, self._module_reference, self) return input_interface
class WikipediaCooccurrenceSubmodule(ContentComparator): def __init__(self, module_reference, use_cache=True, cache_name="wikipedia-cache"): super().__init__(module_reference) self._name = "Wikipedia Cooccurrence" self.use_cache = use_cache self._lang = "en" self.cache = MongoDBCache(cache_name, mongodb_location=DEFAULT_MONGODB_LOCATION) def _get_wikipedia_page_content(self, name): content = self.read_cache(name) if content is not None: return content search = wikipedia.search(name) # For now, we only consider the first result if search: try: content = wikipedia.page(search[0]).content except wikipedia.DisambiguationError as e: # Not clear how often it happens if e.options: try: content = wikipedia.page(e.options[0]).content except wikipedia.DisambiguationError as e2: if e2.options: temp = e2.options[0].replace("(", "")\ .replace(")", "") try: content = wikipedia.page(temp).content except wikipedia.DisambiguationError as e3: pass except wikipedia.exceptions.PageError: logging.warning("Wikipedia page not found: " + name) except wikipedia.exceptions.PageError: logging.warning("Wikipedia page not found: " + name) except wikipedia.exceptions.PageError: logging.warning("Wikipedia page not found: " + name) self.write_cache(name, content) return content def write_cache(self, wikipedia_page, content): if self.use_cache: filename = wikipedia_page.replace(" ", "_").replace("/", "_") self.cache.write_cache(filename, content) def read_cache(self, wikipedia_page): if self.use_cache: filename = wikipedia_page.replace(" ", "_").replace("/", "_") cache_value = self.cache.read_cache(filename) return cache_value return None def get_contents(self, subject): return [self._get_wikipedia_page_content(subject)] def setup_processing(self, input_interface): wikipedia.set_lang(self._lang)