async def get_from_backend(address_components): cache = Cache(TAMULookup.name) result = cache.get(address_components) is_cached = bool(result) if is_cached: request_count_cached.labels(TAMULookup.name).inc() else: result = tamu_geocode_address(address_components) cache.save(address_components, result) # TODO do this in the background request_count.labels(TAMULookup.name).inc() point = Point( (Decimal(result["Longitude"]), Decimal(result["Latitude"]))) feature = Feature( geometry=point, properties={ "service": TAMULookup.name, "quality": result["NAACCRGISCoordinateQualityCode"], "timestamp": datetime.datetime.utcnow().isoformat() + "Z", # poop "cached": is_cached, # should this be a timestamp? }, ) return feature
def post(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) header = request.META.get('HTTP_AUTHORIZATION') if isinstance(header, str): header = header.encode(HTTP_HEADER_ENCODING) try: tag, token = header.split() cache_token = Cache.get(serializer.data["username"]) if cache_token is not None and str( cache_token).strip() == token.decode().strip(): Cache.delete(serializer.data["username"]) try: user_obj = User.objects.get( username=serializer.data["username"]) models.Log(username=user_obj.username, event=0, content="注销登陆成功!").save() except User.DoesNotExist: logs.print( "error", "{user}不存在,无法记录注销日志".format( user=serializer.data["username"])) except ValueError: pass except AttributeError: return Response(status=status.HTTP_400_BAD_REQUEST) return Response({ "code": 200, "msg": "注销成功!" }, status=status.HTTP_200_OK)
def active(self, request, *args, **kwargs): try: username = request.data["username"] is_active = request.data["active"] except KeyError as key: return Response({ "code": 801, "msg": "缺少:{key}参数1".format(key=key) }) try: user_obj = User.objects.get(username=username) user_obj.is_active = is_active user_obj.save() except User.DoesNotExist: return Response({"code": 805, "msg": "用户不存在!"}) if is_active: msg = "用户激活完成!" else: msg = "用户禁用完成!" Cache.delete(username) models.Log(username=str(request.user), event=3, content="{username}{msg}".format(username=username, msg=msg)).save() return Response({"code": 200, "msg": msg})
def __init__(self): config = Configuration() self.cache = Cache() self.cards = [] self.name = "" self.combos = [] self.api = GameApi(self.config) self.savePath = config.paths.savePath
class MQReader(BaseReader): suffix = 'rt' @staticmethod def decode(args): raise NotImplementedError() @staticmethod def get_value(msg): raise NotImplementedError() @staticmethod def get_token(msg): raise NotImplementedError() def __init__(self, cache_key='', is_bootstrap=True, is_resume=True): """从 MQ 读取数据 :param cache_key: 缓存key :param is_bootstrap: 是否全量读取 :param is_resume: 是否断点续传 """ self.is_bootstrap = is_bootstrap self.is_resume = is_resume self._cache_key = cache_key self._cache_resume_token = None self.resume_token = {} if self.is_resume: key = f'{cache_key}:{self.suffix}' # 从外部缓存读取 self._cache_resume_token = Cache(key, **REDIS_CONFIG) self.resume_token = self.get_resume_token() def get_resume_token(self): resume_token = self._cache_resume_token.get_all( ) if self._cache_resume_token else {} return dict(map(self.decode, resume_token.items())) def save_resume_token(self): if self._cache_resume_token and self.resume_token: logger.info('Save resume_token') self._cache_resume_token.setm(self.resume_token) def read(self): raise NotImplementedError() def commit(self, token): if self.is_resume: self.resume_token.update(token) def disconnect(self, *args, **kwargs): raise NotImplementedError() def stop(self): self.save_resume_token() logger.info(f'Reader closed')
def __init__(self): cache = Cache() status = cache.key_status("/stats") if status == None: self.update(cache) else: if status == Cache.STALE: t = threading.Thread(target=self.update) t.start() self.distro_count, self.package_count, self.upstream_count, self.release_count = cache.get("/stats")
def initialise_simulation(self, simulation): self.simulation = simulation Cache.get_cache().flush() self.configurations.load_configuration_for_simulation(simulation) self.configurations.configurations_dict = deepcopy( self.configurations.configurations_dict) segment_table_fixture_path = join(segment_table_dir_path, simulation.segment_table_file_name) SegmentTable.load(segment_table_fixture_path)
def __init__(self, adapter, **kwargs): self.adapter = adapter "The dataset adapter serving the raw data." self.queue = [] "A queue for the elements still to be served this epoch." self.split_limits = {'train': None, 'test': None, 'val': None} "Optional limits for the number of elements in train, test, and val sets." self.noshuffle = False "If set, epoch elements are not shuffled." self.current_minibatch = None "The current minibatch index in the epoch." self.current_phase = None "The current phase (train, test or val)." self.minibatch_size = None "The amount of elements per minibatch." self.cache = Cache(enabled=kwargs.pop('caching', True), gpu=kwargs.pop('gpu_caching', True)) "The cache used by the data function calls. By default, caches everything in GPU memory." self.data_function = None """ Function that serves the input and target data for a given minibatch element from a given adapter. The minibatch dimension should already be added - they are concatenated along the first dimension. This function should handle any desired caching itself, using the passed cache. Input: adapter, element [, cache] Output: (input, target) tuple Both input and target should be a tuple """ self._logger = None "Logger to handle output." self.center_crop_size = None "Used by the patch-based data servers to crop the center view." self.refinement_experiment = None self.nr_neighbours = 4 self.restricted_nr_views = 1 "Used by some data loader functions" self.__dict__.update(kwargs) if self.refinement_experiment is not None: self.refinement_experiment = experiment_handler.ExperimentHandler.load_experiment_from_file( self.refinement_experiment)
def __init__(self,package=None,distro=None): c = Cache() self.releases = [] con = db.connect(host=HOST,user=USER,password=PASSWORD,database=DB) cur = con.cursor() if package != None: cur.execute("SELECT id FROM packages WHERE name = %s",(package,)) package_id = cur.fetchone()[0] else: package_id = None if distro != None: cur.execute("SELECT id FROM distros WHERE name = %s",(distro,)) row = cur.fetchone() if row==None: print "Unknown distro: " + distro raise UnknownDistroError(distro) distro_id = row[0] else: distro_id = None cached = False if package == None and distro == None: key = "/upstream/latest" query = "SELECT packages.name, ureleases.version, MIN(ureleases.released) FROM packages, ureleases WHERE packages.id = ureleases.package_id GROUP BY packages.name, ureleases.version HAVING MIN(ureleases.released) >= current_timestamp - interval '1 day' ORDER BY MIN(ureleases.released) DESC, packages.name ASC" query_args = [] elif package == None and distro != None: key = "/distro/%s/latest"%distro query = "SELECT packages.name, dreleases.version, dreleases.revision, MIN(dreleases.released) FROM packages, dreleases, repos, distros WHERE packages.id = dreleases.package_id AND repos.id = dreleases.repo_id AND distros.id = repos.distro_id AND distros.name = %s GROUP BY packages.name, dreleases.version, dreleases.revision HAVING MIN(dreleases.released) >= current_timestamp - interval '1 day' ORDER BY MIN(dreleases.released) DESC, packages.name ASC" query_args = (distro,) elif package != None and distro == None: key = "/pkg/%s/latest"%package query = "SELECT packages.name, ureleases.version, MIN(ureleases.released) FROM packages, ureleases WHERE packages.id = ureleases.package_id AND packages.name = %s GROUP BY packages.name, ureleases.version HAVING MIN(ureleases.released) >= current_timestamp - interval '1 day'ORDER BY MIN(ureleases.released) DESC" query_args = (package,) else: key = "/distro/%s/pkg/%s/latest"%(distro,package) query = "SELECT packages.name, dreleases.version, dreleases.revision, MIN(dreleases.released) FROM packages, dreleases, repos, distros WHERE packages.id = dreleases.package_id AND repos.id = dreleases.repo_id AND distros.id = repos.distro_id AND distros.name = %s AND packages.name = %s GROUP BY packages.name, dreleases.version, dreleases.revision HAVING MIN(dreleases.released) >= current_timestamp - interval '1 day' ORDER BY MIN(dreleases.released) DESC" query_args = (distro,package) now = datetime.datetime.now() day = datetime.timedelta(1) status = c.key_status(key) if status != None: self.releases = c.get(key) if status == Cache.STALE: t = threading.Thread(target=self.update, args=(key, query, query_args, package_id, distro_id)) t.start() else: self.update(key, query, query_args, package_id, distro_id) self.today = len(self.releases)
def __init__(self,branch="current"): self.branch = branch cache = Cache() status = cache.key_status("/distro_ranks/"+self.branch) if status == None: self.update(cache) else: if status == Cache.STALE: t = threading.Thread(target=self.update) t.start() self.distros, = cache.get("/distro_ranks/"+self.branch)
def cache_test(): key = request.args.get("key") Cache.set(key,"val") Cache.set("name",key) name = Cache.get("name") Cache.delete("name") Cache.set("age",12) return jsonify({"name":name})
def __init__(self, command_prefix: str, intents: discord.Intents, **kwargs): super().__init__(command_prefix=command_prefix, intents=intents, **kwargs) self.logger = set_logger() self.verification_queue = dict() self.event_queue = Queue() self.obj_cache = Cache() self.running = True self.default_invite = \ "https://discord.com/api/oauth2/authorize?client_id=767842408758771742&permissions=51200&scope=bot" self.reddit = self.create_reddit_connection() self.load_data()
def __init__(self, bot): self.bot = GenericEvent.bot = bot self.bot_command = ['/bot'] self.pluggables = { "allmessages": [], "call": [], "membership": [], "message": [], "rename": [], "history": [], "sending": [], "typing": [], "watermark": [], } # timeout for messages to be received for reprocessing: 6hours receive_timeout = 60 * 60 * 6 self._reprocessors = Cache(receive_timeout, increase_on_access=False) self._contexts = Cache(receive_timeout, increase_on_access=False) self._image_ids = Cache(receive_timeout, increase_on_access=False) self._executables = Cache(receive_timeout, increase_on_access=False)
def parse(self, response, **kwargs): res = response.body soup = BeautifulSoup(res, "lxml") tbody = soup.find("tbody") tr_list = tbody.find_all("tr") # if not tr_list: # self.crawler.engine.close_spider(self) for tr in tr_list: td_list = tr.find_all("td") ip_info = IPItem() ip_info["host"] = td_list[0].text.strip() ip_info["port"] = td_list[1].text.strip() if Cache.is_exist("{}:{}".format(ip_info["host"], ip_info["port"])): continue ip_info["proxy_type"] = 0 ip_info["anonymity_type"] = 1 ip_info["region"] = FieldParser.get_region(td_list[2].text.strip()) try: yield ip_info except Exception as exc: self.logger.error("【程序异常】{}".format(exc))
def __init__(self): super().__init__(command_prefix=_prefix_callable, owner_id=394859035209498626, case_insensitive=True) self.restrictions = ["create", "edit", ""] self.reactions = {"arrows": ["◀", "▶"], "toggle": ["⏏"], "ticks": ["<:greenTick:600735269993578496>", "<:redTick:600735269792120977>"], "boolean": ["<:greenTick:600735269993578496>", "<:redTick:600735269792120977>"], "thumbs": ["👍","👎"], "cancel": ["<:redTick:600735269792120977>"], "pager": ["⏪", "⏩"], "superlike": "🔥" } self.translate = {"greenTick": 1, "redTick": 0} # Simple "cache" for some data self.data = {} self.cache = Cache() # Prefix class for fetching cached prefixes self.prefixes = Prefix(self) # Loads the images into memory self.images = ImageCache("images") # Removes the help command self.remove_command("help") # Loads the extensions (cogs) for ext in extensions: try: self.load_extension(ext) except Exception: print(f'Failed to load extension {ext}.', file=sys.stderr) traceback.print_exc() else: print(f'Successfully loaded {ext}') # Can be used to measure statistics self.stats = {} # Can be used to prevent people from using commands multiple times self.restricted = {} # Loads the fonts into memory self.fonts = FontCache("fonts") # Loads data from json files as a directory self.json = JsonCache() # Creates session for API calls. self.session = aiohttp.ClientSession(loop=self.loop) # Starts background tasks :: # Starts the latency timer self.loop.create_task(self.latency_timer()) # Creates redis database class #self.db = Rdb(config.redis_path, self.loop) print("Tinker successfully initialized.")
def parse(self, response, **kwargs): res = response.body soup = BeautifulSoup(res, "lxml") table = soup.find_all("table")[2] tr_list = table.find_all("tr")[1:] if not tr_list and self.page > 2400: self.crawler.engine.close_spider(self) for tr in tr_list: ip_info = IPItem() ip_info["host"] = tr.contents[0].text ip_info["port"] = tr.contents[1].text if Cache.is_exist("{}:{}".format(ip_info["host"], ip_info["port"])): continue ip_info["proxy_type"] = 0 ip_info["anonymity_type"] = FieldParser.get_anonymity_type( tr.contents[3].text) ip_info["region"] = FieldParser.get_region(tr.contents[2].text) try: yield ip_info except Exception as exc: self.logger.error("【程序异常】{}".format(exc))
def update(self, key, query, query_args, package_id, distro_id, cache=None): if cache == None: cache = Cache() con = db.connect(host=HOST,user=USER,password=PASSWORD,database=DB) cur = con.cursor() cur.execute(query, query_args) tmp = [] for row in cur: tmp.append(row) self.releases = tmp print tmp cache.put(key,tmp,[(package_id,distro_id)])
def crawl(mod): cache = Cache() repos = mod.get_repos(test) i = 0 for repo in repos: print str(i) + "/" + str(len(repos)), repo s = time.clock() if not last: repo.last_crawl = None last_crawl, rels = mod.crawl_repo(repo) total_new = downstream.add_releases(repo, rels, test, cache) if total_new > 0: cache.evict([(None, repo.distro_id)]) downstream.set_last_crawl(repo, last_crawl, test) print "\t" + str(total_new), "new releases", "\t\t", time.clock() - s, "secs" i += 1
def authenticate(self, request): header = self.get_header(request) if header is None: return None raw_token = self.get_raw_token(header) if raw_token is None: return None validated_token = self.get_validated_token(raw_token) if validated_token is None: return try: user_id = validated_token[api_settings.USER_ID_CLAIM] except KeyError: return None try: user = User.objects.get(**{api_settings.USER_ID_FIELD: user_id}) except User.DoesNotExist: return None if not user.is_active: return None cache_token = Cache.get(user.username) if cache_token is None or str( cache_token).strip() != raw_token.decode().strip(): return None return self.get_user(validated_token)
def crawl(mod): cache = Cache() repos = mod.get_repos(test) i = 0 for repo in repos: print str(i) + "/" + str(len(repos)), repo s = time.clock() if not last: repo.last_crawl = None last_crawl, rels = mod.crawl_repo(repo) total_new = downstream.add_releases(repo, rels, test, cache) if total_new > 0: cache.evict([(None, repo.distro_id)]) downstream.set_last_crawl(repo, last_crawl, test) print "\t" + str( total_new), "new releases", "\t\t", time.clock() - s, "secs" i += 1
def crawl(test): print "subversion" cache = Cache() last_crawl = upstream.last_crawl(source_id) rels = get_releases(last_crawl) count, max_date = upstream.add_releases(source_id, rels, test, cache) print "\t" + str(count), "new releases" upstream.set_last_crawl(source_id, max_date, test)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.cache = Cache.get_cache() migration_queues = [] for _ in range(self.total_num_islands): migration_queues.append(Queue(maxsize=1)) self.migration_queues = migration_queues
def post(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) try: serializer.is_valid(raise_exception=True) except TokenError as e: raise InvalidToken(e.args[0]) data = { "display_name": serializer.validated_data["user_display"], "username": serializer.validated_data["user"], "Token": serializer.validated_data["access"], "UserType": serializer.validated_data["user_type"], "active": serializer.validated_data["user_type"], "is_superuser": serializer.validated_data["is_superuser"], } Cache.set(request.data["username"], serializer.validated_data["access"]) return Response(data, status=status.HTTP_200_OK)
def __init__(self, cache_key='', is_bootstrap=True, is_resume=True): """从 MQ 读取数据 :param cache_key: 缓存key :param is_bootstrap: 是否全量读取 :param is_resume: 是否断点续传 """ self.is_bootstrap = is_bootstrap self.is_resume = is_resume self._cache_key = cache_key self._cache_resume_token = None self.resume_token = {} if self.is_resume: key = f'{cache_key}:{self.suffix}' # 从外部缓存读取 self._cache_resume_token = Cache(key, **REDIS_CONFIG) self.resume_token = self.get_resume_token()
def crawl(test): cache = Cache() sources = explore_module.get_explore_targets() for target in sources: print target[1] rels = explore(*target[2:]) count, max_date = explore_module.add_releases(source_id, target[0], rels, test, cache) print "\t" + str(count), "new releases" explore_module.set_last_crawl(target[0], max_date, test)
def run(self): Cache.get_cache().flush() if self.resume: self.resume_simulation() else: self.processes = self.init_processes() for p in self.processes: p.start() self.logger.info('Started process {}'.format(p.name)) self.maintain_island_processes() self.collect_all_island_results() for p in self.processes: self.logger.info('Joining process {}'.format(p.name)) p.join() p.terminate() return self.best_hypothesis
def update(self, cache=None): if cache == None: cache = Cache() con = db.connect(host=HOST,user=USER,password=PASSWORD,database=DB) cur = con.cursor() cur.execute("SELECT COUNT(*) FROM distros;") self.distro_count = cur.fetchone()[0] cur.execute("SELECT COUNT(*) FROM packages;") self.package_count = cur.fetchone()[0] cur.execute("SELECT COUNT( DISTINCT package_id ) FROM ureleases") self.upstream_count = cur.fetchone()[0] cur.execute("SELECT COUNT(*) FROM ( SELECT DISTINCT package_id, version, revision FROM releases) t"); self.release_count = cur.fetchone()[0] con.close() cache.put("/stats", (self.distro_count, self.package_count, self.upstream_count, self.release_count), [(None, None)])
def __init__(self, tables, cache_key='', is_bootstrap=True, is_resume=True): """从 DB 读取数据 :param tables: list, 要读取的库表 :param cache_key: 缓存key :param is_bootstrap: 是否全量查询 :param is_resume: 是与否启用断点续传 """ self.tables = tables self.is_bootstrap = is_bootstrap self.is_resume = is_resume self._cache_key = cache_key # resume_token 用于记录实时增量日志读取的位置 self._cache_resume_token = None self.resume_token = {} self._rt = {} # 用于记录每张表读取到的时间 self._cache_timestamps = None self.timestamps = {} self._ts = {} if self.is_resume: key_rt = f'{cache_key}:{self.suffix_rt}' # 从外部缓存读取 self._cache_resume_token = Cache(key_rt, **REDIS_CONFIG) self.resume_token = self._rt = self.get_resume_token() key_ts = f'{cache_key}:{self.suffix_ts}' # 从外部缓存读取 self._cache_timestamps = Cache(key_ts, **REDIS_CONFIG) self.timestamps = self._ts = self.get_timestamps() # 新增表 self.new_tables = set(self.tables) - set(self.timestamps.keys()) # 保留表 self.old_tables = set(self.tables) & set(self.timestamps.keys()) # 需要跟踪增量表 self.inc_tables = self.new_tables | self.old_tables
def process_item(item, spider): sql = "insert into api_ip (host, port, proxy_type, anonymity_type, region) values (%s, %s, %s, %s, %s)" try: cursor.execute(sql, [ item.get("host"), item.get("port"), item.get("proxy_type"), item.get("anonymity_type"), item.get("region"), ]) conn.commit() ip_title = "{}:{}".format(item["host"], item["port"]) Cache.set(ip_title) spider.logger.warning("【导入成功】{}".format(ip_title)) except Exception as exc: spider.logger.error("【导入失败】{}".format(exc)) return item
def update(self, cache=None): if cache == None: cache = Cache() pkgs = groups.get_group("twenty") upstream = map(history.PackageHistory, pkgs) distros = downstream.list_distros() distros = map(lambda x: history.DistroHistory(x,upstream,self.branch), distros) results = [] for distro in distros: current_obs = distro.get_obsoletion_timeline()[-1] current_obs_count = distro.get_obsoletion_count_timeline()[-1] current_lag = distro.get_lag_timeline()[-1] results.append({"name":distro.name, "codename":distro.codename, "obs":current_obs, "count":current_obs_count, "lag":current_lag}) self.distros = results self.distros.sort(key=lambda x: x["obs"]) cache.put("/distro_ranks/"+self.branch, (self.distros,), [(None, None)])
async def get_from_backend(address_components): cache = Cache(OSMLookup.name) result = cache.get(address_components) is_cached = bool(result) if is_cached: request_count_cached.labels(OSMLookup.name).inc() else: result = osm_geocode_address(address_components) cache.save(address_components, result) # TODO do this in the background request_count.labels(OSMLookup.name).inc() point = Point((Decimal(result["lon"]), Decimal(result["lat"]))) feature = Feature( geometry=point, properties={ "service": OSMLookup.name, "timestamp": datetime.datetime.utcnow().isoformat() + "Z", # poop "cached": is_cached, # should this be a timestamp? }, ) return feature
def change(self, request, *args, **kwargs): try: username = request.data["username"] password = request.data["password"] except KeyError as key: return Response({ "code": 801, "msg": "缺少:{key}参数!".format(key=key) }) if str(request.user) != username: return Response({"code": 204, "msg": "密码修改失败!"}) try: user_obj = User.objects.get(username=username) user_obj.set_password(password) user_obj.save() except User.DoesNotExist: return Response({"code": 805, "msg": "用户不存在!"}) Cache.delete(username) msg = "密码修改完成!" models.Log(username=str(request.user), event=3, content="{username}{msg}".format(username=username, msg=msg)).save() return Response({"code": 200, "msg": "密码修改完成!"})
def crawl(test=False): cache = Cache() sources = sf_module.get_sf_targets() all_rels = [] total_new = 0 for target in sources: print target[1] rels = get_releases(*target[2:]) all_rels += rels count, max_date = sf_module.add_releases(source_id, target[0], rels, test, cache) total_new += count print "\t"+str(count),"new releases" sf_module.set_last_crawl(target[0], max_date, test) return (total_new, all_rels)
def __init__(self, simulation, migration_coordinator, result_queue, island_number, simulation_total_islands, max_generations, simulation_total_generations, initial_generation=0, initial_population=None): self.logger = Logger.get_logger() self.island_number = island_number self.simulation_total_islands = simulation_total_islands self.island_name = '{}_{}'.format(ga_config.PROCESS_NAME_PREFIX, self.island_number) self.init_random_seed() self.cache = Cache.get_cache() self.initial_generation = initial_generation self.generation = initial_generation self.max_generations = max_generations self.simulation_total_generations = simulation_total_generations self.simulation = simulation self.migration_coordinator = migration_coordinator self.result_queue = result_queue self._population = None if initial_population: self.population = initial_population self.invalidate_all_population_fitness() self.var_and, self.crossover_rate, self.mutation_rate = self.init_crossover_mutation_rates( ) self.target_hypothesis, self.target_hypothesis_energy = self.init_target_hypothesis( ) self.new_individuals_in_generation = 0 self.elite = None self.stats, self.hall_of_fame = self.init_stats() self.logbook = self.init_logbook() self._non_inf_fitness = None self.toolbox = self.init_toolbox()
async def setup_hook(self) -> None: self.session: ClientSession = ClientSession() self.cache: Cache = Cache(self) self.pool: asyncpg.Pool = await asyncpg.create_pool( **self.settings["postgresql"]) self.topgg: DBLClient = DBLClient(self, self.api["TopGG"], autopost_interval=None, session=self.session) self.topgg_webhook: WebhookManager = WebhookManager(self).dbl_webhook( "/dbl", self.api["TopGGWH"]) self.gist: asyncgist.Client = asyncgist.Client(self.api["GitHub"], self.session) self.sr: sr_api.Client = sr_api.Client() self.dagpi: asyncdagpi.Client = asyncdagpi.Client(self.api["DagpiAPI"], session=self.session) self.myst: mystbin.Client = mystbin.Client(session=self.session) self.loop.create_task(self.cache.populate_cache()) self.loop.create_task(self.load_extensions()) self.loop.create_task(self.start_nodes()) self.loop.create_task(self.find_restart_message()) self.topgg_webhook.run(8025)
def post(self): try: Cache.delete(self.get_argument('exp'), exp=True) self.flash(1) except: self.flash(0)
total_start = time.clock() stats = [] for d in downstream_targets: s = time.clock() try: stats.append((d, crawl(DISTROS[d]))) except: print "error from distro:", d print traceback.format_exc() gc.collect() print time.clock() - s, "distro seconds" for u in upstream_targets: s = time.clock() try: stats.append((u, UPSTREAM[u].crawl(test))) except: print "error from upstream:", u print traceback.format_exc() gc.collect() print time.clock() - s, "upstream seconds" cache = Cache() cache.evict([(None, None)]) print time.clock() - total_start, "seconds total" save_to = open("crawl_stats/" + str(int(time.time())) + ".pickle", "w") pickle.dump(stats, save_to) save_to.close()
def entry(self, sign, size=1, life=10, swap=False): # 控制登陆尝试次数 sign = 'entry@' + sign data = Cache.obtain(sign) if swap or not data: Cache.upsert(sign, size, life) return data