def search_diffbot_cache(term): response = requests.get('http://api.diffbot.com/v3/search', params={ 'token': config.credentials.diffbot, 'query': requests.utils.quote('"{}"'.format(term)), 'col': 'GLOBAL-INDEX' }).json() if not response.get('objects'): if response.get('error'): print("Response Error '{}' (code: {})".format(response['error'], response['errorCode'])) else: print("NO RESULTS") results = [] for object in response.get('objects', []): if object.get('text'): pr = PageRequest(object.get('pageUrl'), term, run=False) pr.extract_sentences(object.get('text')) result = { "title": object.get('title'), "url": object.get('pageUrl'), 'search_provider': 'diffbot', "author": object.get('author'), "date": parse_date(object.get('date', '')).isoformat(), "doc": object.get('text'), "sentences": pr.sentences, "variants": list(pr.variants) } results.append(result) return results
def search_diffbot_cache(term): response = requests.get('http://api.diffbot.com/v3/search', params={ 'token': config.credentials.diffbot, 'query': requests.utils.quote('"{}"'.format(term)), 'col': 'GLOBAL-INDEX' }).json() if not response.get('objects'): if response.get('error'): print("Response Error '{}' (code: {})".format( response['error'], response['errorCode'])) else: print("NO RESULTS") results = [] for object in response.get('objects', []): if object.get('text'): pr = PageRequest(object.get('pageUrl'), term, run=False) pr.extract_sentences(object.get('text')) result = { "title": object.get('title'), "url": object.get('pageUrl'), 'search_provider': 'diffbot', "author": object.get('author'), "date": parse_date(object.get('date', '')).isoformat(), "doc": object.get('text'), "sentences": pr.sentences, "variants": list(pr.variants) } results.append(result) return results
def search_duckduckgo(term): result = [] try: req = requests.get( 'http://api.duckduckgo.com/?q={}&format=json'.format(term)).json() except: return result if req['AbstractSource'] not in config.duckduckgo_sources: return result if req.get('Abstract'): pr = PageRequest(req['AbstractURL'], term, run=False) pr.extract_sentences(req['Abstract']) result.append({ 'title': req['Heading'], 'url': req['AbstractURL'], 'search_provider': 'duckduckgo', 'author': None, 'date': None, 'source': req['AbstractSource'], 'doc': req['Abstract'], "sentences": pr.sentences, "variants": list(pr.variants) }) if req.get('Definition'): pr = PageRequest(req['DefinitionURL'], term, run=False) pr.extract_sentences(req['Definition']) result.append({ 'title': req['Heading'], 'url': req['DefinitionURL'], 'source': req['DefinitionSource'], 'search_provider': 'duckduckgo', 'author': None, 'date': None, 'doc': req['Definition'], "sentences": pr.sentences, "variants": list(pr.variants) }) log.info("Searching DuckDuckGo for '{}' returned {} results".format( term, len(result))) return result
def search_duckduckgo(term): result = [] try: req = requests.get('http://api.duckduckgo.com/?q={}&format=json'.format(term)).json() except: return result if req['AbstractSource'] not in config.duckduckgo_sources: return result if req.get('Abstract'): pr = PageRequest(req['AbstractURL'], term, run=False) pr.extract_sentences(req['Abstract']) result.append({ 'title': req['Heading'], 'url': req['AbstractURL'], 'search_provider': 'duckduckgo', 'author': None, 'date': None, 'source': req['AbstractSource'], 'doc': req['Abstract'], "sentences": pr.sentences, "variants": list(pr.variants) }) if req.get('Definition'): pr = PageRequest(req['DefinitionURL'], term, run=False) pr.extract_sentences(req['Definition']) result.append({ 'title': req['Heading'], 'url': req['DefinitionURL'], 'source': req['DefinitionSource'], 'search_provider': 'duckduckgo', 'author': None, 'date': None, 'doc': req['Definition'], "sentences": pr.sentences, "variants": list(pr.variants) }) log.info("Searching DuckDuckGo for '{}' returned {} results".format(term, len(result))) return result