def get_data(dc, timeout=120, clear_cache=False): print "Retrieving data for datacenter: {0}".format(dc['name']) # Configure our cache, create one DB per DC. requests_cache.install_cache(dc["name"], backend='sqlite', old_data_on_error=True) if clear_cache: requests_cache.clear() url = 'http://{0}:{1}/results'.format(dc['url'], dc['port']) data = None r = None now = time.ctime(int(time.time())) try: if 'user' and 'password' in dc: r = requests.get(url, auth=(dc['user'], dc['password']), timeout=timeout) else: r = requests.get(url, timeout=timeout) print "Time: {0} / Used Cache: {1}".format(now, r.from_cache) # print "CACHE USED? {}".format(requests_cache.get_cache()) r.raise_for_status() except Exception as ex: print "Got exception while retrieving data for dc: {0} ex: {1}".format( dc, str(ex)) pass finally: if r: data = r.json() r.close() else: print "Got no data while making API call to {0} ".format(dc) print "Data Retrieval for datacenter {0} complete".format(dc['name']) return data
def get_proxies(): try: proxies = dict() response = requests.get(current_app.config["PROXY_URL"]) current_app.logger.info(u"获取代理IP: " + response.text) if response.ok and (':' in response.text): proxies["http"] = "http://" + response.text else: requests_cache.clear() except: current_app.logger.error(traceback.format_exc()) requests_cache.clear() finally: return proxies
def test_should_fail_if_cache_is_not_turned_off_and_on_properly(self): httpretty.register_uri( httpretty.GET, "http://ben-major.co.uk/labs/top40/api/albums", body=request_send_file, content_type='text/json', status=200 ) #: Clear the cache, so that no cached reads are present requests_cache.clear() #: Turn the cache off self.top40.reset_cache(None) #: Make a request, but this should not find its way into the cache response = requests.get("http://ben-major.co.uk/labs/top40/api/albums") expect(response).to(not_(have_property("from_cache"))) #: Turn cache on self.top40.reset_cache(3600) #: Prime the cache response = requests.get("http://ben-major.co.uk/labs/top40/api/albums") #: The first read should not have come from the cache expect(response.from_cache).to(be(False)) #: This time it should be from the cache response = requests.get("http://ben-major.co.uk/labs/top40/api/albums") expect(response).to(have_property("from_cache")) expect(response.from_cache).to(be(True))
def get_request(url, params, use_cache=False): if not use_cache: requests_cache.clear() resp = requests.get(url, params=params) print(f"From cache: {resp.from_cache}, {resp.url}") return resp.json( ) # Return a python object (a list of dictionaries in this case)
def get_proxies(): try: response = requests.get(current_app.config["PROXY_URL"]) current_app.logger.info( str(response.status_code) + "\t" + response.text) if response.ok and ":" in response.text: proxies = dict(http="http://" + response.text) else: requests_cache.clear() proxies = dict() except: requests_cache.clear() current_app.logger.info(traceback.format_exc()) finally: return proxies
def get_articles(url): source = "" while 1: source = retrieve(url) try: source = re.search("msgList = (.*);", source).group(1) except AttributeError: current_app.logger.critical(u"请输入验证码") time.sleep(3) requests_cache.clear() continue break for item in json.loads(source)["list"]: item = item["app_msg_ext_info"] if item["content_url"]: item["content"], item["read_num"], item["post_date"] = get_content( item) yield item
def ls(self, verbose=True, update=False): """ List the available datasets. :param verbose: Print the datasets if true :param update: Refresh the cache if true """ self.datasets = [] if update: requests_cache.clear() response = requests.get(self.base_url) if response.ok: soup = BeautifulSoup(response.content, "html.parser") links = soup.find_all("a") for link in links: href = link.get("href") if ".zip" in href: self.datasets.append(href) if verbose: print(href)
def get_pokedex( cache: bool = True, processes: int = DEFAULT_PROCESSES) -> Dict[Pokemon, List[int]]: """ Returns a dictionary where the keys are all the pokemons and the values are the list of related pokemons. """ if not cache: print("Removing cache") requests_cache.clear() else: print("Using cache") data = get_throttled(POKEDEX_URL) soup = BeautifulSoup(data.text, 'html.parser') infocards = soup.find_all(class_="infocard") # Prune the tree because multiprocessing needs to pickle the data, # and passing the original elements cause RecursionError infocards_pruned = (prune_bs_tree(infocard) for infocard in infocards) with multiprocessing.Pool(processes) as pool: parsed = pool.map(parse_infocard, infocards_pruned) return dict(parsed)
def malshare_update(filename, suffix=""): "Update a ClamAV database with MalShare signatures." with portalocker.Lock(filename, "ab+") as db_file: db_file.seek(0) if db_file.readline(): db_file.seek(-200, os.SEEK_END) last_line = db_file.readlines()[-1] if not last_line.endswith(b"\n"): db_file.write(b"\n") old_date = _date_from_db(last_line.decode()) else: old_date = date.today() - timedelta(days=16) if old_date in (date.today(), date.today() - timedelta(days=1)): return False if DEBUG: sys.stderr.write(" ".join([ "Update ", str(old_date), str(date.today() - timedelta(days=1)), "\n" ])) db_file.write( malshare_by_dates(stop=old_date + timedelta(days=1), suffix=suffix, silent=True).encode()) if REQUESTS_CACHE: requests_cache.clear() return True
def removeTempImages(): dir = os.path.dirname(os.path.abspath(__file__)) files = os.listdir("{}/temp".format(dir)) # Clear the rate cache requests_cache.clear() print ("Rate cache was cleared.")
def __init__(self, acs, tz): self.acs = acs self.tz = tz # Install sqlite cache for celestrak with a 24 hour duration # Good enough for celestrak and other data. Cache disabled when appropriate requests_cache.install_cache('teeminus10_cache', expire_after=24*60*60) requests_cache.clear()
def load_annotations(data_folder): contents = os.listdir(path=data_folder) try: infile = [i for i in contents if '.tsv' in i][0] except IndexError: raise Exception(f"No .tsv found in {contents}") with open(infile, 'r') as litcovid_tsv: tsv_reader = csv.reader(litcovid_tsv, delimiter='\t') for i in range(32): next(tsv_reader) pmids = [line[0] for line in tsv_reader] doc_id_set = set() requests_cache.install_cache('litcovid_cache') requests_cache.clear() s = requests_cache.CachedSession() s.hooks = {'response': throttle} logging.debug("requests_cache: %s", requests_cache.get_cache().responses.filename) for i, pmid in enumerate(pmids,start=1): # NCBI eutils API limits requests to 10/sec if i % 100 == 0: logging.info("litcovid.parser.load_annotations progress %s", i) doc = getPubMedDataFor(pmid, session=s) if doc['_id'] not in doc_id_set: yield doc doc_id_set.add(doc['_id']) remove_expired(s)
def _clear_cache(self): ''' Clear the globally installed cache. ''' try: requests_cache.clear() except AttributeError: # requests_cache is not enabled print("requests_cache is not enabled. Nothing to clear.")
def requests_cache_test(): requests_cache.install_cache(backend='redis', expire_after=timedelta(days=30)) requests_cache.clear() url = 'http://example.python-scraping.com/view/United-Kingdom-239' resp = requests.get(url) print(resp.from_cache) resp = requests.get(url) print(resp.from_cache)
def __init__(self, acs, tz): self.acs = acs self.tz = tz # Install sqlite cache for celestrak with a 24 hour duration # Good enough for celestrak and other data. Cache disabled when appropriate requests_cache.install_cache('teeminus10_cache', expire_after=24 * 60 * 60) requests_cache.clear()
def test_cached_response(): factory = RequestFactory() request = factory.get('/events-with-subscriptions/' + EVENT_ID + '/') retrieve_event(request) assert_true(retrieve_event.cache) requests_cache.clear() retrieve_event(request) assert_false(retrieve_event.cache)
def load(self): response = requests.get(self.REDDIT_SEARCH, params=self.QUERY) if response.status_code != 200: requests_cache.clear() response_json = response.json() response_data = response_json.get('data', {}) return response_data.get('children', [])
def test_get_from_cache(self): country_info = CountryInfo() requests_cache.install_cache(cache_name='testing-cache', backend='sqlite', expire_after=5) response1 = country_info.get_info('egypt') response2 = country_info.get_info('egypt') requests_cache.clear() self.assertEqual(response1['from cache'], False) self.assertEqual(response2['from cache'], True)
def flush(verbose): """Flushes the contents of the cache.""" configure_logging(verbose) requests_cache.install_cache('leech') requests_cache.clear() conn = sqlite3.connect('leech.sqlite') conn.execute("VACUUM") conn.close() logger.info("Flushed cache")
def get_url_data(serviceurl, params=None): """ :param serviceurl: url to retrieve data :param params: http://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls :return: json url_data """ # Get data from the url # Support https without verification of certificate # req = requests.get(serviceurl, verify=False, params=params) cnt = 0 max_retry = 3 purl = parse_url(serviceurl) if purl.auth: username = purl.auth.split(':')[0] password = purl.auth.split(':')[1] else: username = None password = None # Add url like http://host burl = '{}://{}'.format(purl.scheme, purl.host) if purl.port: # Add port like: http://host:8080 burl += ':{}'.format(purl.port) if purl.request_uri: # Add path and query like: http://host:8080/path/uri?query burl += '{}'.format(purl.request_uri) while cnt < max_retry: try: req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password)) if req.json(): return req.json() elif req.from_cache: # Clear cache to retry again requests_cache.clear() req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password)) if req.json(): return req.json() else: # Raise a custom exception raise ValueError('No data from response') except requests.exceptions.RequestException as e: time.sleep(2 ** cnt) cnt += 1 if cnt >= max_retry: raise e data = req.json() return data
def create_xml(): requests_cache.clear() api = Api(settings.WOO_HOST, settings.WOO_CONSUMER_KEY, settings.WOO_CONSUMER_SECRET, console_logs=False) config = json.load(open(settings.XML_CONFIG_FILENAME)) print("\033[95m[Feed XML] Getting shipping methods...\033[0m") utils.default_shippings.clear() utils.default_tax_rates.clear() shipping_zones = api.get_shipping_zones() for zone in shipping_zones: zone_locations = api.get_shipping_zone_locations( shipping_zone_id=zone.id) zone_methods = api.get_shipping_zone_methods(shipping_zone_id=zone.id) for location in zone_locations: for method in zone_methods: utils.default_shippings.append( utils.get_shipping_method(method, location)) print("\033[95m[Feed XML] Getting tax rates...\033[0m") taxes = get_tax_rates(api) for config_file_name, config_file in config.items(): config_file_languages = config_file.get('languages') config_file_types = config_file.get('types') if config_file_languages and config_file_types: for language_file, languages_in_file in config_file_languages.items( ): languages_in_file = [languages_in_file] if not isinstance( languages_in_file, list) else languages_in_file elements = [] for language in languages_in_file: print(( "\033[95m[Feed XML] Getting products in language '%s'...\033[0m" ) % language) if 'variation' in config_file_types: elements.extend( get_products_and_variations(api, language)) else: elements.extend(get_products(api, language)) config_file_path = config_file_name.split('/') config_file_directory = ('/').join(['feeds'] + config_file_path[:-1]) print(( "\033[95m[Feed XML] Generating '%s/%s_%s_%s.xml'...\033[0m" ) % (config_file_directory, settings.XML_FEED_FILENAME, language_file, config_file_path[-1])) selected_products = filter( lambda product: product.type in config_file_types.keys(), elements) write_xml(selected_products, language_file, config_file_path, config_file_types)
def generate_csl_items(args, citekeys_df): """ General CSL (citeproc) items for standard_citekeys in citekeys_df. Writes references.json to disk and logs warnings for potential problems. """ # Read manual references (overrides) in JSON CSL manual_refs = load_manual_references(args.manual_references_paths) requests_cache.install_cache(args.requests_cache_path, include_get_headers=True) cache = requests_cache.get_cache() if args.clear_requests_cache: logging.info('Clearing requests-cache') requests_cache.clear() logging.info( f'requests-cache starting with {len(cache.responses)} cached responses' ) csl_items = list() failures = list() for standard_citekey in citekeys_df.standard_citekey.unique(): if standard_citekey in manual_refs: csl_items.append(manual_refs[standard_citekey]) continue elif standard_citekey.startswith('raw:'): logging.error( f'CSL JSON Data with a standard_citekey of {standard_citekey!r} not found in manual-references.json. ' 'Metadata must be provided for raw citekeys.') failures.append(standard_citekey) try: csl_item = citekey_to_csl_item(standard_citekey) csl_items.append(csl_item) except Exception: logging.exception( f'Citeproc retrieval failure for {standard_citekey!r}') failures.append(standard_citekey) logging.info( f'requests-cache finished with {len(cache.responses)} cached responses' ) requests_cache.uninstall_cache() if failures: message = 'CSL JSON Data retrieval failed for the following standardized citation keys:\n{}'.format( '\n'.join(failures)) logging.error(message) # Write JSON CSL bibliography for Pandoc. with args.references_path.open('w', encoding='utf-8') as write_file: json.dump(csl_items, write_file, indent=2, ensure_ascii=False) write_file.write('\n') return csl_items
def _config_cache(self, cache_settings: dict): ''' Configure the cache settings ''' folder_path = os.path.expanduser(os.path.expandvars(cache_settings['path'])) file_name = os.path.join(folder_path, cache_settings['filename']) if not os.path.exists(folder_path): os.makedirs(folder_path) requests_cache.install_cache(file_name, expire_after=cache_settings['max_age_in_seconds']) if cache_settings['refresh_cache'] and os.path.isfile("{}.sqlite".format(file_name)): requests_cache.clear()
def populate_top_pages(): """ This goes through the domains stored locally and gets the latest chartbeat data everh hour """ requests_cache.clear() for host_obj in Host.query.all(): api_key = host_obj.api_key.api_key host_name = host_obj.host_name res = chartbeat_request(api_key, host_name) host_obj.top_pages = res.json() host_obj.updated_at = datetime.datetime.utcnow() db.session.commit()
def sync(self): """Find workflow attributes, create a new template pipeline on TEMPLATE""" # Clear requests_cache so that we don't get stale API responses requests_cache.clear() log.info("Pipeline directory: {}".format(self.pipeline_dir)) if self.from_branch: log.info("Using branch '{}' to fetch workflow variables".format( self.from_branch)) if self.make_pr: log.info("Will attempt to automatically create a pull request") self.inspect_sync_dir() self.get_wf_config() self.checkout_template_branch() self.delete_template_branch_files() self.make_template_pipeline() self.commit_template_changes() # Push and make a pull request if we've been asked to if self.made_changes and self.make_pr: try: # Check that we have an API auth token if os.environ.get("GITHUB_AUTH_TOKEN", "") == "": raise PullRequestException("GITHUB_AUTH_TOKEN not set!") # Check that we know the github username and repo name if self.gh_username is None and self.gh_repo is None: raise PullRequestException( "Could not find GitHub username and repo name") self.push_template_branch() self.create_merge_base_branch() self.push_merge_branch() self.make_pull_request() self.close_open_template_merge_prs() except PullRequestException as e: self.reset_target_dir() raise PullRequestException(e) self.reset_target_dir() if not self.made_changes: log.info("No changes made to TEMPLATE - sync complete") elif not self.make_pr: log.info( "Now try to merge the updates in to your pipeline:\n cd {}\n git merge TEMPLATE" .format(self.pipeline_dir))
def retrieve(url, headers=None): current_app.logger.info(inspect.stack()[1][3] + " retrieve " + url) if not headers: headers = dict() while 1: headers["user-agent"] = UserAgent().random try: response = requests.get(url, headers=headers, proxies=get_proxies(), timeout=3) current_app.logger.info( str(response.status_code) + "\t" + response.reason) if response.ok and (url == response.request.url): if "referer" in headers: return response return response.text else: current_app.logger.info(response.request.url) requests_cache.clear() time.sleep(3) continue except: current_app.logger.info(traceback.format_exc()) requests_cache.clear() time.sleep(3) continue
def enable_cache(self, expire_after=300, drop_cache=False): if expire_after is None: log.info( f'Setting up response cache to never expire (infinite caching)' ) else: log.info( f'Setting up response cache to expire after {expire_after} seconds' ) requests_cache.install_cache(expire_after=expire_after) if drop_cache: log.info(f'Dropping cache') requests_cache.clear() return self
def generate_csl_items(args, citation_df): """ General CSL (citeproc) items for standard_citations in citation_df. Writes references.json to disk and logs warnings for potential problems. """ # Read manual references (overrides) in JSON CSL manual_refs = read_manual_references(args.manual_references_path) requests_cache.install_cache(args.requests_cache_path, include_get_headers=True) cache = requests_cache.get_cache() if args.clear_requests_cache: logging.info('Clearing requests-cache') requests_cache.clear() logging.info( f'requests-cache starting with {len(cache.responses)} cached responses' ) csl_items = list() failures = list() for citation in citation_df.standard_citation.unique(): if citation in manual_refs: csl_items.append(manual_refs[citation]) continue try: citeproc = citation_to_citeproc(citation) csl_items.append(citeproc) except Exception as error: logging.exception(f'Citeproc retrieval failure for {citation}') failures.append(citation) logging.info( f'requests-cache finished with {len(cache.responses)} cached responses' ) requests_cache.uninstall_cache() if failures: message = 'Citeproc retrieval failed for:\n{}'.format( '\n'.join(failures)) logging.error(message) # Write JSON CSL bibliography for Pandoc. with args.references_path.open('w') as write_file: json.dump(csl_items, write_file, indent=2, ensure_ascii=False) write_file.write('\n') return csl_items
def _get_fahrplans(self): if self.update_cache: requests_cache.clear() self.fahrplans = [] for url in self.urls: try: self.fahrplans.append(requests.get(url).json()["schedule"]) except JSONDecodeError as e: print( f"{Colour.FAIL}Problem downloading the Fahrplan {url}. Check your internet connection.{Colour.ENDC}" # noqa: E501 ) print(e) if not self.fahrplans: print( f"{Colour.FAIL}Fahrplan empty. Something is wrong with your urls. Exiting.{Colour.ENDC}" # noqa: E501 ) sys.exit()
def test_caching_works(self): import time start_time = time.time() requests_cache.clear() first_time_taken = None for i in range(0, 5): EDSMInterface.get_bodies("Deciat") time_taken = time.time() - start_time if not first_time_taken: first_time_taken = time_taken assert time_taken > 0.1 # Typically takes > 0.5s else: assert time_taken < 0.1 # Typically ~ 0.002 with the cache installed start_time = time.time()
def get_rhymes(words, use_cache=False): baseurl = "https://api.datamuse.com/words" params_diction = {} # Set up an empty dictionary for query parameters params_diction["max"] = "3" # get at most n results new_lst, rhy_lst = [], [] new_lst = words.strip('.').split() #print(resp.from_cache) for word in new_lst: if not use_cache: requests_cache.clear() params_diction["rel_rhy"] = word resp = requests.get(baseurl, params=params_diction) word_ds = resp.json( ) # Return a python object (a list of dictionaries in this case) rhy_lst.append([d['word'] for d in word_ds]) return rhy_lst
def clear_cache(deep=False): """Removes from disk cached data. Just in case you feel the need of a fresh start or you have too much bytes laying around. Use it with parsimony. In case of a major update you may want to call this function. It may solve conflicts rising on unexpected data structures. The cache needs to be enabled first, so that the cache path is known. Args: deep (=False, optional): If true, going for removal of http cache as well. """ file_names = os.listdir(CACHE_PATH) for file_name in file_names: if file_name.endswith('.pkl'): os.remove(os.path.join(CACHE_PATH, file_name)) if deep: requests_cache.clear()
def send_single_request(self, path): query_link = self.generate_request(const.LINK + path) request = requests.Request(method='GET', url=query_link) i = 0 while True: try: thottled_request = self.bt.submit(request) data = thottled_request.response.content if 'To many requests' in data or 'minimum delay' in data: raise requests.exceptions.ConnectionError return data except requests.exceptions.ConnectionError: requests_cache.clear() if i >= 10: raise print 'Retrying Request' i += 1 time.sleep(60)
def do_tests_for(backend): requests_cache.configure(CACHE_NAME, backend) requests_cache.clear() n_threads = 10 url = 'http://httpbin.org/get' def do_requests(url, params): for i in range(10): # for testing write and read from cache requests.get(url, params=params) for _ in range(20): # stress test threads = [Thread(target=do_requests, args=(url, {'param': i})) for i in range(n_threads)] for t in threads: t.start() for t in threads: t.join() for i in range(n_threads): self.assert_(requests_cache.has_url('%s?param=%s' % (url, i)))
def get_new_alerts(): """BROADCASTING - Alerts appeared within 1 minute. Returns a string. """ try: requests_cache.clear() ws = get_worldstate() except BaseException: return '' alert_text = '' alerts = ws['Alerts'] for alert in alerts: activation = time.time() - \ float(alert['Activation']['$date']['$numberLong']) / 1000 if activation < 60 and activation > 0: expiry = float( alert['Expiry']['$date']['$numberLong']) / 1000 - time.time() req_archwing = 'Archwing' if 'archwingRequired' in alert[ 'MissionInfo'] else '' rew_credits = alert['MissionInfo']['missionReward']['credits'] try: rew_items = ' - ' + data_dict['L'][ alert['MissionInfo']['missionReward']['items'] [0].lower()]['value'] if 'items' in alert['MissionInfo'][ 'missionReward'] else '' except BaseException: rew_items = ' - ' + \ alert['MissionInfo']['missionReward']['items'][0] if 'items' in alert['MissionInfo']['missionReward'] else '' rew_counteditems = ' - ' + str(alert['MissionInfo']['missionReward']['countedItems'][0]['ItemCount']) \ + ' x ' + str(data_dict['L'][alert['MissionInfo']['missionReward']['countedItems'][0]['ItemType'].lower()]['value']) \ if 'countedItems' in alert['MissionInfo']['missionReward'] else '' bc_counteditems = ['泥炭萃取物', '库狛', '库娃', '虚空光体'] if rew_items == '' or '内融核心' in rew_items: if not any(item in rew_counteditems for item in bc_counteditems): break alert_text += '新警报任务!\n\n地点:' + data_dict['S'][alert['MissionInfo']['location']]['value'] + ' | ' + req_archwing + data_dict['M'][alert['MissionInfo']['missionType']]['value'] \ + '\n等级:' + str(alert['MissionInfo']['minEnemyLevel']) + '-' + str(alert['MissionInfo']['maxEnemyLevel']) \ + '\n奖励:' + str(rew_credits) + ' CR' + rew_items + rew_counteditems\ + '\n时限:' + s2h(expiry) return alert_text
def parse(): query_parameters = request.args url = query_parameters.get('url') if 'clear_cache' in query_parameters and query_parameters.get( 'clear_cache') == '1': requests_cache.clear() try: a = Article(url, keep_article_html=True) a.download() a.parse() a.nlp() return jsonify({ "author": ", ".join(a.authors), "source": a.source_url[a.source_url.find("//") + 2:].split("/")[0], "title": a.title, "image": a.top_image, "url": a.url, "publishedAt": a.publish_date, "html": a.article_html, "text": a.text, "summary": a.summary, "keywords": a.keywords, }) except Exception as e: print(e) return jsonify({ 'error': True, 'description': "'%s' parsing went wrong with error: '%s'" % (url, str(e)) })
def run(): liveness = 0 while True: try: liveness += 1 uid = db.get_next_queue() print('Get user[%s]\' relationship'%uid) get_relation(uid) if liveness <= 50: delay = random.randint(5, 60) elif liveness<=100: delay = random.randint(30, 180) else: delay = random.randint(600, 1800) if delay > 900: liveness = 0 _sleep(delay) requests_cache.clear() except Exception as e: print('Error:%s'%e) _sleep(300)
def send_multi_request(self, paths): request_queue = [] for path in paths: query_link = self.generate_request(const.LINK + path) request_queue.append(requests.Request(method='GET', url=query_link)) i = 0 while True: try: thottled_requests = self.bt.multi_submit(request_queue) responses = [tr.response for tr in thottled_requests] request_queue = [] complete_response = [] for response in responses: if 'To many requests' in response.content or 'minimum delay' in response.content: request_queue.append(response.url) print response.content elif 'you are now in a lockout period' in response.content: raise Exception(response.content) else: complete_response.append(response.content) if len(request_queue) > 0: raise requests.exceptions.ConnectionError return complete_response except AttributeError: for request in request_queue: print 'Error with request: ', request raise except requests.exceptions.ConnectionError: requests_cache.clear() if i >= 10: raise print 'Retrying Request' i += 1 time.sleep(60)
def setUp(self): requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE) requests_cache.clear()
def setUp(self): self.top40 = top40.Top40(cache_duration=3600) #: Clear the cache, otherwise if a Python 2 test is run after a Python 3 test, then #: incorrect pickle format errors can occur requests_cache.clear()
info[line['arch']]['md5'] = line['md5'] info[line['arch']]['key-offset'] = line['offset'] for line in data['url-offsets']: if line['arch'] not in info: raise VersionError('{} not in key-offsets.'.format(line['arch'])) if line['md5'] != info[line['arch']]['md5']: raise VersionError('MD5s for {} do not match.'.format(line['arch'])) info[line['arch']]['url-offset'] = line['offset'] return info try: info = retrieve_version_info() except VersionError as e: if config['debug']: print('Version missing from cache. Clearing and retrieving again ...') requests_cache.clear() try: info = retrieve_version_info() except VersionError as e: if config['debug']: print('ERROR: Version is missing from wiki ({}).'.format(str(e).rstrip('.')), file=sys.stderr) else: print('ERROR: Version is missing from wiki.', file=sys.stderr) sys.exit(1) if 'versions' not in config: config['versions'] = {} config['versions'][args.version] = info with open('config.json', 'w') as fp: json.dump(config, fp, indent=2) print(json.dumps(info, indent=2))
def cache_clear(): requests_cache.clear() CACHE.urls = {} return 'OK'
def main(): ''' Our main application ''' parser = op("usage ipblisted.py --ip [ip]") parser.add_option('--proxy', action="store", dest="proxy", help="Useful for when behind a proxy") parser.add_option('--proxy_user', action="store", dest="proxy_user") parser.add_option('--proxy_pass', action="store", dest="proxy_pass") parser.add_option('--good', default=False, action="store_true", dest="show_good", help="Displays lists that the IP did NOT show up on.") parser.add_option('--skip-dnsbl', default=False, action="store_true", dest="skip_dnsbl", help="Skips the checking DNS Blacklists") parser.add_option('--skip-bl', default=False, action="store_true", dest="skip_bl", help="Skips the checking of text based blacklists") parser.add_option('--no-cache', default=False, action="store_true", dest="no_cache", help="This will prevent caching of text based blacklists") parser.add_option('--clear-cache', default=False, action="store_true", dest="clear_cache", help="This will clear the existing cache") parser.add_option('--cache-timeout', default=60*60*12, action="store", dest="cache_timeout", help="Number of seconds before cache results are to expire (Default: 12 hours)") parser.add_option('--threads', default=5, action="store", dest="threads", help="Sets the number of feed search threads") parser.add_option('--infile', default=None, action="store", dest="infile", help="A newline separated list of IP addresses") parser.add_option('--ip', action="store", dest="ip") parser.add_option('-w','--wan', action="store_true", dest="wan", default=False, help="Will add your WAN ip to the list of IP addresses being checked.") parser.add_option('-f', '--format', action="store", dest="format", help="Set the output format for an outfile", default="csv") parser.add_option('-o', '--outfile', action="store", dest="outfile", help="Where to write the results", default=None) (options, args) = parser.parse_args() if options.format: allowed_formats = ['csv', 'xls', 'xlsx', 'txt'] if not options.format in allowed_formats: cprint("[!] Invalid format \"{}\". Please select a valid format {}".format(options.format, ', '.join(allowed_formats)), RED) sys.exit(1) if options.outfile: print("[*] Results will be saved to {} in {} format".format(options.outfile, options.format)) # Check if the user supplied an IP address or IP block if options.ip is None and options.infile is None and options.wan is False: print("[!] You must supply an IP address, the WAN flag or a file containing IP addresses.") sys.exit(1) # Set our list of IPs to an empty list ips = [] # Load up the IP in the --ip flag if options.ip: if '\\' in options.ip or '/' in options.ip: cprint("[!] Detected CIDR notation, adding all IP addresses in this range", BLUE) for ip in IPSet([options.ip]): ips += [str(ip)] elif len(options.ip.split(',')) > 0: ips += [ip for ip in options.ip.split(',') if ip != ''] # Handles when user does ,%20 else: ips += [options.ip] # If the user supplied a file load these as well if options.infile: ips += [ip for ip in file(options.infile).read().split('\n') if ip != ''] if options.wan: ip = wan_ip() if ip: ips += [ip] else: cprint("[!] There was an issue trying to gather the WAN IP address.", RED) # Check if the user set their credentials when using a proxy if options.proxy: if options.proxy_user is None or options.proxy_pass is None: cprint("[!] Warning, no proxy credentials supplied. Authenticated proxies may not work.", BLUE) else: options.proxy_pass = urllib.quote(options.proxy_pass) # Initialize a queue for the feeds to go in fq = Queue() # Load in all the feeds from the feed configuration file feeds = load_feeds({"skip_bl": options.skip_bl, "skip_dnsbl": options.skip_dnsbl}) # Establish the requests cache if not options.no_cache: requests_cache.install_cache('ipblisted', expire_after=int(options.cache_timeout)) # If the user wants to manually clear the cache, do it now if options.clear_cache: requests_cache.clear() # If there are no feeds set, just exit the program if len(feeds) == 0: cprint("[!] No feeds were defined, please define them in feeds.json or don't skip them all.", RED) sys.exit(1) # Final check to make sure we actually have a list of IP addresses to check if len(ips) == 0: cprint("[!] No IP addresses were listed to check. Please check your syntax and try again.", RED) feed_results = [] # Loop through each IP and find it print("[*] Checking {} IP addresses against {} lists".format(len(ips), len(feeds))) for ip in ips: print("[*] Searching Blacklist feeds for IP {ip}".format(ip=ip)) # Build the feed requests queue oq = Queue() # Create a queue of all the feeds we want to check [fq.put(f) for f in feeds] qsize = fq.qsize() # Start up our threads and start checking the feeds threads = [FeedThread(ip, options, fq, oq) for i in range(0,options.threads)] [t.start() for t in threads] [t.join() for t in threads] # Set the number of lists we have found to 0 find_count = 0 # Go through each feed and see if we find the IP or block results = [r for r in oq.queue] if options.outfile: convert_results(results, ip, options.outfile) # Print out if the IP was found in any of the feeds for result in results: output = "[*] {name}: {found}".format(**result) if result["found"] == "Found": find_count += 1 cprint(output,RED) continue if options.show_good: cprint(output) if find_count == 0: cprint("[*] Not found on any defined lists.", GREEN) else: cprint("[*] Found on {}/{} lists.".format(find_count,qsize), RED) print("[-]")
def tearDown(self): requests_cache.clear()
def clear_cache(self): if self.cache_name: requests_cache.clear() self.download()
def clear_cache(self): requests_cache.clear()
def request( self, uri=None, method="GET", data=None, params={}, decode_json=True, headers={}, stream=False, ): """Perform a HTTP requests. Parameters ---------- uri : str The URI to use for the request. method : str The HTTP method to use for the request. data : dict Any data to send as part of a post request body. params : dict Query string parameters. decode_json : bool Decode response or not. headers : dict The HTTP request headers. stream: bool If response is streamed. Returns ------- dict Decoded JSON response data as a dict object. """ self.last_response = None if uri is None: uri = self.uri headers["User-Agent"] = self.USER_AGENT uri = "{}.json".format(uri) resp = None if "GET" == method: attempt = 0 while attempt <= 5: resp = self.session.get( uri, auth=self.auth, headers=headers, params=params, stream=stream ) if resp.status_code not in list(range(500, 505)): # No need to retry for if not a server error type. break attempt += 1 params["acapi_retry"] = attempt time.sleep((attempt ** 2.0) / 10) # We need to unset the property or it sticks around. if "acapi_retry" in params: del params["acapi_retry"] if "POST" == method: jdata = json.dumps(data) resp = self.session.post( uri, auth=self.auth, headers=headers, params=params, data=jdata ) # This is a sledgehammer but fine grained invalidation is messy. if self.is_cache_enabled(): requests_cache.clear() if "DELETE" == method: resp = self.session.delete(uri, auth=self.auth, headers=headers, params=params) # Quickest and easiest way to do this. if self.is_cache_enabled(): requests_cache.clear() if hasattr(resp, "from_cache") and resp.from_cache: LOGGER.info("%s %s returned from cache", method, uri) self.last_response = resp try: resp.raise_for_status() except requests.exceptions.HTTPError as exp: LOGGER.info( "Failed request response headers: \n%s", pformat(exp.response.headers, indent=2), ) raise if stream: return resp if decode_json: return resp.json() return resp.content
def setUp(self): requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND) requests_cache.clear()
def clear_cache(self): from requests_cache import clear clear()
def request(self, uri=None, method='GET', data=None, params={}, decode_json=True): """Perform a HTTP requests. Parameters ---------- uri : str The URI to use for the request. method : str The HTTP method to use for the request. auth : tuple The authentication credentials to use for the request. data : dict Any data to send as part of a post request body. params : dict Query string parameters. Returns ------- dict Decoded JSON response data as a dict object. """ self.last_response = None if None == uri: uri = self.uri headers = {'User-Agent': self.USER_AGENT} uri = '{}.json'.format(uri) if 'GET' == method: attempt = 0 while attempt <= 5: resp = requests.get(uri, auth=self.auth, headers=headers, params=params) if resp.status_code not in range(500, 505): # No need to retry for if not a server error type. break attempt += 1 params['acapi_retry'] = attempt time.sleep((attempt ** 2.0) / 10) # We need to unset the property or it sticks around. if 'acapi_retry' in params: del params['acapi_retry'] if 'POST' == method: jdata = json.dumps(data) resp = requests.post(uri, auth=self.auth, headers=headers, params=params, data=jdata) # This is a sledgehammer but fine grained invalidation is messy. requests_cache.clear() if 'DELETE' == method: resp = requests.delete(uri, auth=self.auth, headers=headers, params=params) # Quickest and easiest way to do this. requests_cache.clear() if hasattr(resp, 'from_cache') and resp.from_cache: LOGGER.info("%s %s returned from cache", method, uri) self.last_response = resp if resp.status_code != requests.codes.ok: try: raise resp.raise_for_status() except requests.exceptions.HTTPError as exp: LOGGER.info("Failed request response headers: \n%s", pformat(exp.response.headers, indent=2)) raise if decode_json: return resp.json() return resp.content
def clear_cache(self): ''' Clear the globally installed cache. ''' try: requests_cache.clear() except: pass