def post(self, *args, **kwargs): method = self.get_argument("method", "POST") if method == "DELETE": self.delete(*args, **kwargs) elif method == "OPTIONS": self.options(*args, **kwargs) else: if config.get_config("requires_tos") and \ self.get_argument("accepted_tos", "false") != "true": self.set_status(403) self.finish() return timer = Timer("Kernel handler for %s"%self.get_argument("notebook", uuid.uuid4())) proto = self.request.protocol.replace("http", "ws", 1) host = self.request.host ws_url = "%s://%s/" % (proto, host) km = self.application.km logger.info("Starting session: %s"%timer) timeout = self.get_argument("timeout", None) if timeout is not None: timeout = float(timeout) if math.isnan(timeout) or timeout<0: timeout = None kernel_id = yield gen.Task(km.new_session_async, referer = self.request.headers.get('Referer',''), remote_ip = self.request.remote_ip, timeout = timeout) data = {"ws_url": ws_url, "id": kernel_id} self.write(self.permissions(data)) self.finish()
def restoredb(pg_env, pg_restore_binary, database_dump_path, dump_name): env = os.environ.copy() env.update(pg_env) answer = raw_input("This command will restore this dump into database %s. " "Continue? (y)es, (N)o? " % env['PGDATABASE']) if answer != 'y': logger.info("Aborting!") return db_dump_file_name = os.path.join(database_dump_path, dump_name) if not os.path.isfile(db_dump_file_name): logger.error("file %s does not exist: " % db_dump_file_name) return logger.debug("Restoring %s" % db_dump_file_name) cmd = (pg_restore_binary, "-d", env['PGDATABASE'], "-O", "-x", db_dump_file_name) logger.trace("Executing %s" % str(cmd)) proc = subprocess.Popen(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = proc.communicate() if stderr != '': logger.error("An error occured while calling pg_restore: %s " % stderr) return
def createImage(self,user,image,lxc,description="Not thing", imagenum=10): fspath = self.NFS_PREFIX + "/local/volume/" + lxc imgpath = self.imgpath + "private/" + user + "/" #tmppath = self.NFS_PREFIX + "/local/tmpimg/" #tmpimage = str(random.randint(0,10000000)) + ".tz" if not os.path.exists(imgpath+image) and os.path.exists(imgpath): cur_imagenum = 0 for filename in os.listdir(imgpath): if os.path.isdir(imgpath+filename): cur_imagenum += 1 if cur_imagenum >= int(imagenum): return [False,"image number limit exceeded"] #sys_run("mkdir -p %s" % tmppath, True) sys_run("mkdir -p %s" % imgpath,True) try: sys_run("tar -cvf %s -C %s ." % (imgpath+image+".tz",self.dealpath(fspath)), True) except Exception as e: logger.error(e) #try: #sys_run("cp %s %s" % (tmppath+tmpimage, imgpath+image+".tz"), True) #sys_run("rsync -a --delete --exclude=lost+found/ --exclude=root/nfs/ --exclude=dev/ --exclude=mnt/ --exclude=tmp/ --exclude=media/ --exclude=proc/ --exclude=sys/ %s/ %s/" % (self.dealpath(fspath),imgpath+image),True) #except Exception as e: # logger.error(e) #sys_run("rm -f %s" % tmppath+tmpimage, True) #sys_run("rm -f %s" % (imgpath+"."+image+"_docklet_share"),True) self.updateinfo(imgpath,image,description) logger.info("image:%s from LXC:%s create success" % (image,lxc)) return [True, "create image success"]
def stop_heartbeat(): try: if monitor_enable: params = {'inst': instance_index} urllib2.urlopen(stop_hb_url, data=urllib.urlencode(params)) except urllib2.URLError: logger.info('Cannot connect to monitor')
def sensitivitybreak(A): from log import logger sn = [0, 0] flag = 0 cnt = 0 if A >= 2 and A < 65536: if A == 2: sn = [2, 1] return sn elif 2 < A <= 512: sn = [A, 1] return sn else: while(1): for i in range(A / 255 + 1, int(math.sqrt(A))): if (i > 1): if (A % i): pass else: sn[0] = max(i, A / i) sn[1] = min(i, A / i) return sn cnt = cnt + 1 if (cnt % 2): flag = flag + 1 A += flag else: flag = flag + 1 A -= flag else: logger.info("Invalid digitalsensitivity number!")
def run(self): # Ok create the thread nb_threads = self.daemon_thread_pool_size # Keep a list of our running threads threads = [] logger.info('Using a %d http pool size', nb_threads) while True: # We must not run too much threads, so we will loop until # we got at least one free slot available free_slots = 0 while free_slots <= 0: to_del = [t for t in threads if not t.is_alive()] _ = [t.join() for t in to_del] for t in to_del: threads.remove(t) free_slots = nb_threads - len(threads) if free_slots <= 0: time.sleep(0.01) socks = self.get_sockets() # Blocking for 0.1 s max here ins = self.get_socks_activity(socks, 0.1) if len(ins) == 0: # trivial case: no fd activity: continue # If we got activity, Go for a new thread! for sock in socks: if sock in ins: # GO! t = threading.Thread(None, target=self.handle_one_request_thread, name='http-request', args=(sock,)) # We don't want to hang the master thread just because this one is still alive t.daemon = True t.start() threads.append(t)
def __init__(self, host, port, http_backend, use_ssl, ca_cert, ssl_key, ssl_cert, hard_ssl_name_check, daemon_thread_pool_size): self.port = port self.host = host # Port = 0 means "I don't want HTTP server" if self.port == 0: return self.use_ssl = use_ssl self.registered_fun = {} self.registered_fun_names = [] self.registered_fun_defaults = {} protocol = 'http' if use_ssl: protocol = 'https' self.uri = '%s://%s:%s' % (protocol, self.host, self.port) logger.info("Opening HTTP socket at %s", self.uri) # Hack the BaseHTTPServer so only IP will be looked by wsgiref, and not names __import__('BaseHTTPServer').BaseHTTPRequestHandler.address_string = lambda x:x.client_address[0] if http_backend == 'cherrypy' or http_backend == 'auto' and cheery_wsgiserver: self.srv = CherryPyBackend(host, port, use_ssl, ca_cert, ssl_key, ssl_cert, hard_ssl_name_check, daemon_thread_pool_size) else: self.srv = WSGIREFBackend(host, port, use_ssl, ca_cert, ssl_key, ssl_cert, hard_ssl_name_check, daemon_thread_pool_size) self.lock = threading.RLock()
def run(self, handler): daemon_thread_pool_size = self.options['daemon_thread_pool_size'] from wsgiref.simple_server import WSGIRequestHandler LoggerHandler = WSGIRequestHandler if self.quiet: class QuietHandler(WSGIRequestHandler): def log_request(*args, **kw): pass LoggerHandler = QuietHandler srv = simple_server.make_server(self.host, self.port, handler, handler_class=LoggerHandler) logger.info('Initializing a wsgiref backend with %d threads', daemon_thread_pool_size) use_ssl = self.options['use_ssl'] ca_cert = self.options['ca_cert'] ssl_cert = self.options['ssl_cert'] ssl_key = self.options['ssl_key'] if use_ssl: if not ssl: logger.error("Missing python-openssl librairy," "please install it to open a https backend") raise Exception("Missing python-openssl librairy, " "please install it to open a https backend") srv.socket = ssl.wrap_socket(srv.socket, keyfile=ssl_key, certfile=ssl_cert, server_side=True) return srv
def vnodes_monitor(cur_user, user, form, con_id, issue): global G_clustername logger.info("handle request: monitor/vnodes") res = {} fetcher = monitor.Container_Fetcher(con_id) if issue == 'cpu_use': res['cpu_use'] = fetcher.get_cpu_use() elif issue == 'mem_use': res['mem_use'] = fetcher.get_mem_use() elif issue == 'disk_use': res['disk_use'] = fetcher.get_disk_use() elif issue == 'basic_info': res['basic_info'] = fetcher.get_basic_info() elif issue == 'owner': names = con_id.split('-') result = G_usermgr.query(username = names[0], cur_user = cur_user) if result['success'] == 'false': res['username'] = "" res['truename'] = "" else: res['username'] = result['data']['username'] res['truename'] = result['data']['truename'] else: res = "Unspported Method!" return json.dumps({'success':'true', 'monitor':res})
def send_mail_thread(mailto_list, subject, msg): if send_mail(mailto_list, subject, msg): logger.info("send mail success.") else: logger.error("send mail fail.") logger.error(subject) logger.error(msg)
def start(self): self.etcd.setkey("machines/runnodes/"+self.addr, "work") self.thread_sendheartbeat = threading.Thread(target=self.sendheartbeat) self.thread_sendheartbeat.start() # start serving for rpc logger.info ("begins to work") self.rpcserver.serve_forever()
def __init__(self): with open(os.path.join(os.path.dirname(__file__), "api_config"), 'r') as yaml_file: self._config = yaml.load(yaml_file) for key in config_keys: setattr(self, key, self._config.get(key)) logger.info('API configuration read')
def init_allocations(): global machine_allocation_dict global allocations_list global node_manager global usages_list global machine_usage_dict logger.info("init allocations:") machines = node_manager.get_allnodes() for machine in machines: allocation = AllocationOfMachine() allocation.machineid = machine allocation.resources = 2 allocation.reliable_resources_allocation_summary = 0 allocation.reliable_allocations = [] allocation.restricted_allocations = [] machine_allocation_dict[machine] = allocation bisect.insort(allocations_list,allocation) usage_of_machine = {} usage_of_machine['machineid']=machine usage_of_machine['cpu_utilization']=0.1 usages_list.append(usage_of_machine) machine_usage_dict[machine] = 0.1
def _reload_nginx(self): logger.info('reload nginx start') if not DEBUG: subprocess.call("nginx -t && nginx -s reload", shell=True) else: logger.debug('fake reload nginx') logger.info('reload nginx finish')
def run(self): assert self.storage is not None try: self.crawl() except Exception, e: logger.info('error when crawl: %s' % self.user) logger.exception(e)
def on_message(self, message): prefix, json_message = message.split(",", 1) kernel_id = prefix.split("/", 1)[0] message = jsonapi.loads(json_message) logger.debug("KernelConnection.on_message: %s", message) application = self.session.handler.application if kernel_id == "complete": if message["header"]["msg_type"] in ("complete_request", "object_info_request"): application.completer.registerRequest(self, message) return try: if kernel_id not in self.channels: # handler may be None in certain circumstances (it seems to only be set # in GET requests, not POST requests, so even using it here may # only work with JSONP because of a race condition) kernel_info = application.km.kernel_info(kernel_id) self.kernel_info = {'remote_ip': kernel_info['remote_ip'], 'referer': kernel_info['referer'], 'timeout': kernel_info['timeout']} if message["header"]["msg_type"] == "execute_request": stats_logger.info(StatsMessage( kernel_id=kernel_id, remote_ip=self.kernel_info['remote_ip'], referer=self.kernel_info['referer'], code=message["content"]["code"], execute_type='request')) if kernel_id not in self.channels: self.channels[kernel_id] = SockJSChannelsHandler(self.send) self.channels[kernel_id].open(application, kernel_id) self.channels[kernel_id].on_message(json_message) except KeyError: # Ignore messages to nonexistent or killed kernels. logger.info("%s message sent to nonexistent kernel: %s" % (message["header"]["msg_type"], kernel_id))
def mount_container(self,lxc_name): logger.info ("mount container:%s" % lxc_name) [success, status] = self.container_status(lxc_name) if not success: return [False, status] self.imgmgr.checkFS(lxc_name) return [True, "mount success"]
def change_pa(db, gno, status, stc): sql = 'update pas SET status=%d, stc=%s where gno="%s"' % (status, stc, gno) logger.info(sql) db.execute(sql) warning = {"gno":gno, "status": status, "stc":stc} userlist = back_user(int(gno[0])) handlers.qhandler.send_message_ws(userlist, warning)
def post(self, *args, **kwargs): method = self.get_argument("method", "POST") if method == "DELETE": self.delete(*args, **kwargs) elif method == "OPTIONS": self.options(*args, **kwargs) else: if config.get_config("requires_tos") and \ self.get_argument("accepted_tos", "false") != "true": self.set_status(403) self.finish() return logger.info('starting kernel for session ' + self.get_argument('CellSessionID', '(no ID)')) proto = self.request.protocol.replace("http", "ws", 1) host = self.request.host ws_url = "%s://%s/" % (proto, host) km = self.application.km timeout = self.get_argument("timeout", None) if timeout is not None: timeout = float(timeout) if math.isnan(timeout) or timeout<0: timeout = None kernel_id = yield tornado.gen.Task( km.new_session_async, referer=self.request.headers.get('Referer', ''), remote_ip=self.request.remote_ip, timeout=timeout) data = {"ws_url": ws_url, "id": kernel_id} self.set_header("Jupyter-Kernel-ID", kernel_id) self.write(self.permissions(data)) self.finish()
def basic_app_remove(self): logger.info("remove basic app : %s " % self.appname) remove_results = {} remove_success_results = {} remove_failed_results = {} remove_missed_results = {} try: app_spec = self.app_spec for pg_spec in app_spec.PodGroups: remove_r = self.podgroup_remove(pg_spec.Name) if remove_r.status_code < 400: remove_success_results[pg_spec.Name] = remove_r elif remove_r.status_code == 404: remove_missed_results[pg_spec.Name] = remove_r else: remove_failed_results[pg_spec.Name] = remove_r # use dependency_remove api of Deployd for deleting proc with # portal type for dp_spec in app_spec.Portals: remove_r = self.dependency_remove(dp_spec.Name) if remove_r.status_code < 400: remove_success_results[dp_spec.Name] = remove_r elif remove_r.status_code == 404: remove_missed_results[dp_spec.Name] = remove_r else: remove_failed_results[dp_spec.Name] = remove_r except Exception, e: logger.warning("failed when trying to remove app %s: %s" % (self.appname, str(e)))
def delete_cluster(self, clustername, username, user_info): [status, vcluster] = self.get_vcluster(clustername, username) if not status: return [False, "cluster not found"] if vcluster.status =='running': return [False, "cluster is still running, you need to stop it and then delete"] ips = [] for container in vcluster.containers: worker = self.nodemgr.ip_to_rpc(container.host) if worker is None: return [False, "The worker can't be found or has been stopped."] worker.delete_container(container.containername) db.session.delete(container) ips.append(container.ip) logger.info("delete vcluster and release vcluster ips") self.networkmgr.release_userips(username, ips) self.networkmgr.printpools() #os.remove(self.fspath+"/global/users/"+username+"/clusters/"+clustername) for bh in vcluster.billing_history: db.session.delete(bh) db.session.delete(vcluster) db.session.commit() os.remove(self.fspath+"/global/users/"+username+"/hosts/"+str(vcluster.clusterid)+".hosts") groupname = json.loads(user_info)["data"]["group"] uid = json.loads(user_info)["data"]["id"] [status, clusters] = self.list_clusters(username) if len(clusters) == 0: self.networkmgr.del_user(username) self.networkmgr.del_usrgwbr(username, uid, self.nodemgr) #logger.info("vlanid release triggered") return [True, "cluster delete"]
def __init__(self, db_path): directory = SimpleFSDirectory(File(db_path)) reader = DirectoryReader.open(directory) self.searcher = IndexSearcher(reader) self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) logger.info("Loaded DB from %s with %d documents: ", db_path, reader.numDocs())
def get_instances_id(): logger.info('getting id from every instance...') cursor = _db.get_all_instance_ids() id_list = [] for doc in cursor: id_list.append(doc['_id']) return Response(to_json(id_list), mimetype=_JSON_MIME)
def scaleout_cluster(cur_user, user, form): global G_usermgr global G_vclustermgr clustername = form.get('clustername', None) logger.info ("scaleout: %s" % form) if (clustername == None): return json.dumps({'success':'false', 'message':'clustername is null'}) logger.info("handle request : scale out %s" % clustername) image = {} image['name'] = form.get("imagename", None) image['type'] = form.get("imagetype", None) image['owner'] = form.get("imageowner", None) user_info = G_usermgr.selfQuery(cur_user = cur_user) user_info = json.dumps(user_info) setting = { 'cpu': form.get('cpuSetting'), 'memory': form.get('memorySetting'), 'disk': form.get('diskSetting') } [status, result] = G_usermgr.usageInc(cur_user = cur_user, modification = setting) if not status: return json.dumps({'success':'false', 'action':'scale out', 'message': result}) [status, result] = G_vclustermgr.scale_out_cluster(clustername, user, image, user_info, setting) if status: return json.dumps({'success':'true', 'action':'scale out', 'message':result}) else: G_usermgr.usageRecover(cur_user = cur_user, modification = setting) return json.dumps({'success':'false', 'action':'scale out', 'message':result})
def delete_cluster(self, clustername, username, user_info): [status, info] = self.get_clusterinfo(clustername, username) if not status: return [False, "cluster not found"] if info['status']=='running': return [False, "cluster is still running, you need to stop it and then delete"] ips = [] for container in info['containers']: worker = self.nodemgr.ip_to_rpc(container['host']) # after release resources dscheduler.after_release(container['containername']) worker.delete_container(container['containername']) ips.append(container['ip']) logger.info("delete vcluster and release vcluster ips") self.networkmgr.release_userips(username, ips) self.networkmgr.printpools() os.remove(self.fspath+"/global/users/"+username+"/clusters/"+clustername) os.remove(self.fspath+"/global/users/"+username+"/hosts/"+str(info['clusterid'])+".hosts") groupname = json.loads(user_info)["data"]["group"] [status, clusters] = self.list_clusters(username) if len(clusters) == 0: self.networkmgr.del_user(username, isshared = True if str(groupname) == "fundation" else False) logger.info("vlanid release triggered") return [True, "cluster delete"]
def delete_cluster(self, clustername, username, user_info): [status, info] = self.get_clusterinfo(clustername, username) if not status: return [False, "cluster not found"] if info['status']=='running': return [False, "cluster is still running, you need to stop it and then delete"] ips = [] for container in info['containers']: worker = xmlrpc.client.ServerProxy("http://%s:%s" % (container['host'], env.getenv("WORKER_PORT"))) if worker is None: return [False, "The worker can't be found or has been stopped."] worker.delete_container(container['containername']) ips.append(container['ip']) logger.info("delete vcluster and release vcluster ips") self.networkmgr.release_userips(username, ips) self.networkmgr.printpools() os.remove(self.fspath+"/global/users/"+username+"/clusters/"+clustername) os.remove(self.fspath+"/global/users/"+username+"/hosts/"+str(info['clusterid'])+".hosts") groupname = json.loads(user_info)["data"]["group"] uid = json.loads(user_info)["data"]["id"] [status, clusters] = self.list_clusters(username) if len(clusters) == 0: self.networkmgr.del_user(username) self.networkmgr.del_usrgwbr(username, uid, self.nodemgr) #logger.info("vlanid release triggered") return [True, "cluster delete"]
def get_ranks(dataset, system): """ Return the rank of the first correct answer returned by the system. """ results = [] oracle = CachedOracleSystem(dataset) all_expressions = set() for _, expressions in oracle.queries.values(): all_expressions |= set(expressions) # all_expression_sets = [expressions for expressions in oracle.queries.values()] # all_possible_expressions = reduce(set.__or__, all_expression_sets) worst_possible_rank = len(all_expressions) logger.info("Number of possible expressions: %d", worst_possible_rank) for query, target_entities in dataset: logger.debug("Evaluating query %r", query) system_expressions = system.get_best_expressions(query) _, oracle_expressions = oracle.get_best_results_and_expressions(query) found_rank = get_rank(system_expressions, oracle_expressions, worst_possible_rank) logger.debug("Found rank: %r", found_rank) results.append({'query': query, 'target': target_entities, 'rank': found_rank}) return results
def _setup_classpath_model(self): classpath = [] if self.runtime_version < 5: # put model lib into classpath model_lib = os.path.join( self._conf['m2ee']['app_base'], 'model', 'lib' ) if os.path.isdir(model_lib): # put all jars into classpath classpath.append(os.path.join(model_lib, 'userlib', '*')) # put all directories as themselves into classpath classpath.extend( [os.path.join(model_lib, name) for name in os.listdir(model_lib) if os.path.isdir(os.path.join(model_lib, name)) ]) else: logger.info("No current unpacked application model is available. " "Use the unpack command to unpack a mendix deployment " "archive from %s" % self._conf['m2ee']['model_upload_path']) return classpath
def cb(reply): if (reply["type"] == "error"): pass else: logger.info("Ended kernel %s", kernel_id) del self._kernels[kernel_id] del self._comps[comp_id]["kernels"][kernel_id]
def useless_procs_remove(self, origin_procs): remove_results = {} remove_success_results = {} remove_failed_results = {} remove_missed_results = {} current_pgs = ["%s.%s.%s" % (self.appname, p.type.name, p.name) for p in self.lain_config.procs.values()] try: for proc in origin_procs: pg_name = "%s.%s.%s" % ( self.appname, proc.type.name, proc.name) if pg_name in current_pgs: continue logger.info("remove useless proc %s of app : %s " % (pg_name, self.appname)) remove_r = self.podgroup_remove(pg_name) if proc.type != ProcType.portal else \ self.dependency_remove(pg_name) if remove_r.status_code < 400: remove_success_results[pg_name] = remove_r elif remove_r.status_code == 404: remove_missed_results[pg_name] = remove_r else: remove_failed_results[pg_name] = remove_r except Exception, e: logger.warning("failed when trying to remove useless proc of app %s: %s" % (self.appname, str(e)))
def flagcheck(): msg = {'status': 0, 'msg': '提交成功'} lastround = Flags.query.order_by(Flags.rounds.desc()).first() # .rounds #print(lastround) #print('lastround',lastround) if lastround: lastround = lastround.rounds else: msg['status'] = -1 msg['msg'] = '比赛尚未开始' return json.dumps(msg, ensure_ascii=False) token = request.args.get('token') try: flag = request.form['flag'] except: msg['status'] = -1 msg['msg'] = '提交格式不正确' return json.dumps(msg, ensure_ascii=False) print(token, flag) attackteam = Teams.query.filter(Teams.token == token).first() print(attackteam) if attackteam: attackteamid = attackteam.id else: msg['status'] = -1 msg['msg'] = 'TOKEN 错误' return json.dumps(msg, ensure_ascii=False) #print(attackteamid) defenseteam = Flags.query.filter(Flags.rounds == lastround, Flags.flag == flag).first() print('rounds', lastround) print('flag', flag) #for i in Flags.query.filter(Flags.rounds == lastround).all(): # print(i.flag) if defenseteam: defenseteamid = defenseteam.teamid defenseteam = Teams.query.filter(Teams.id == defenseteamid).first() else: msg['status'] = -1 msg['msg'] = 'FLAG 错误' return json.dumps(msg, ensure_ascii=False) if defenseteamid == attackteamid: msg['status'] = -1 msg['msg'] = '你不能攻击自己的队伍' return json.dumps(msg, ensure_ascii=False) roundcheck = Round.query.filter(Round.defenseteamid == defenseteamid, Round.attackteamid == attackteamid, Round.rounds == lastround).first() if roundcheck: msg['status'] = -1 msg['msg'] = '你已经攻击了该的队伍' return json.dumps(msg, ensure_ascii=False) roundcheck2 = Round.query.filter(Round.score == 200, Round.defenseteamid == defenseteamid, Round.rounds == lastround).first() if roundcheck2: msg['status'] = -1 msg['msg'] = '该队伍Flag已经被提交' return json.dumps(msg, ensure_ascii=False) #print(defenseteamid) #msg = 'rounds {} attackteamid {} defenseteamid {}'.format(lastround,attackteamid,defenseteamid) msg['status'] = 1 msg['msg'] = '提交成功,({})成功攻击了 {}'.format(attackteam.name, defenseteam.name) rd = Round(attackteamid, defenseteamid, lastround, '{} 攻击了 {}'.format(attackteam.name, defenseteam.name)) db.session.add(rd) db.session.commit() # 这里是后加的.通过队伍Flag已经被提交,就不得分的机制,直接更新分数200 #Round.query.filter(Round.score==0).update({Round.score : 200}) #db.session.commit() logger.info('{} 成功攻击了 {}'.format(attackteam.name, defenseteam.name)) return json.dumps(msg, ensure_ascii=False)
def scale_out_cluster(self, clustername, username, image, user_info): if not self.is_cluster(clustername, username): return [False, "cluster:%s not found" % clustername] workers = self.nodemgr.get_rpcs() if (len(workers) == 0): logger.warning("no workers to start containers, scale out failed") return [False, "no workers are running"] image_json = json.dumps(image) [status, result] = self.networkmgr.acquire_userips_cidr(username) gateway = self.networkmgr.get_usergw(username) vlanid = self.networkmgr.get_uservlanid(username) self.networkmgr.printpools() if not status: return [False, result] ip = result[0] [status, clusterinfo] = self.get_clusterinfo(clustername, username) clusterid = clusterinfo['clusterid'] clusterpath = self.fspath + "/global/users/" + username + "/clusters/" + clustername hostpath = self.fspath + "/global/users/" + username + "/hosts/" + str( clusterid) + ".hosts" cid = clusterinfo['nextcid'] onework = workers[random.randint(0, len(workers) - 1)] lxc_name = username + "-" + str(clusterid) + "-" + str(cid) hostname = "host-" + str(cid) [success, message] = onework.create_container(lxc_name, username, user_info, clustername, clusterid, str(cid), hostname, ip, gateway, str(vlanid), image_json) if success is False: logger.info("create container failed, so scale out failed") return [False, message] if clusterinfo['status'] == "running": onework.start_container(lxc_name) onework.start_services(lxc_name, ["ssh"]) # TODO: need fix logger.info("scale out success") hostfile = open(hostpath, 'a') hostfile.write( ip.split("/")[0] + "\t" + hostname + "\t" + hostname + "." + clustername + "\n") hostfile.close() clusterinfo['nextcid'] = int(clusterinfo['nextcid']) + 1 clusterinfo['size'] = int(clusterinfo['size']) + 1 clusterinfo['containers'].append({ 'containername': lxc_name, 'hostname': hostname, 'ip': ip, 'host': self.nodemgr.rpc_to_ip(onework), 'image': image['name'], 'lastsave': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") }) clusterfile = open(clusterpath, 'w') clusterfile.write(json.dumps(clusterinfo)) clusterfile.close() return [True, clusterinfo]
def prepare_chrome(self, login_type): logger.info( color("fg_bold_cyan") + "正在初始化chrome driver,用以进行【{}】相关操作".format(login_type)) caps = DesiredCapabilities().CHROME # caps["pageLoadStrategy"] = "normal" # Waits for full page load caps["pageLoadStrategy"] = "none" # Do not wait for full page load options = Options() if not self.cfg._debug_show_chrome_logs: options.add_experimental_option("excludeSwitches", ["enable-logging"]) if self.cfg.run_in_headless_mode: logger.warning("已配置使用headless模式运行chrome") options.headless = True inited = False try: if not self.cfg.force_use_portable_chrome: # 如果未强制使用便携版chrome,则首先尝试使用系统安装的chrome self.driver = webdriver.Chrome( executable_path=self.chrome_driver_executable_path, desired_capabilities=caps, options=options) logger.info("使用自带chrome") inited = True except: pass if not inited: # 如果找不到,则尝试使用打包的便携版chrome # 先判定本地是否有便携版压缩包,若无则提示去网盘下载 if not os.path.isfile(self.chrome_binary_7z): msg = ( "当前电脑未发现合适版本chrome版本,且当前目录无便携版chrome的压缩包({zip_name})\n" "请在稍后打开的网盘页面中下载[{zip_name}],并放到小助手的exe所在目录(注意:是把这个压缩包原原本本地放到这个目录里,而不是解压后再放过来!!!),然后重新打开程序~\n" "如果之前版本已经下载过这个文件,可以直接去之前版本复制过来~不需要再下载一次~\n").format( zip_name=os.path.basename(self.chrome_binary_7z)) win32api.MessageBox(0, msg, "出错啦", win32con.MB_ICONERROR) webbrowser.open(get_netdisk_addr(self.cfg)) os.system("PAUSE") exit(-1) # 先判断便携版chrome是否已解压 if not os.path.isdir(self.chrome_binary_directory): logger.info("自动解压便携版chrome到当前目录") subprocess.call([ self.bandizip_executable_path, "x", "-target:auto", self.chrome_binary_7z ]) # 然后使用本地的chrome来初始化driver对象 options.binary_location = self.chrome_binary_location # you may need some other options options.add_argument('--no-sandbox') options.add_argument('--no-default-browser-check') options.add_argument('--no-first-run') self.driver = webdriver.Chrome( executable_path=self.chrome_driver_executable_path, desired_capabilities=caps, options=options) logger.info("使用便携版chrome") self.cookies = self.driver.get_cookies()
def _login_common(self, login_type, switch_to_login_frame_fn, assert_login_finished_fn, login_action_fn=None, need_human_operate=True): """ 通用登录逻辑,并返回登陆后的cookie中包含的uin、skey数据 :rtype: LoginResult """ switch_to_login_frame_fn() logger.info("等待#loginframe#ptlogin_iframe#switcher_plogin加载完毕") WebDriverWait(self.driver, self.cfg.login.load_login_iframe_timeout).until( expected_conditions.visibility_of_element_located( (By.ID, 'switcher_plogin'))) if need_human_operate: logger.info("请在{}s内完成{}操作".format(self.cfg.login.login_timeout, login_type)) # 实际登录的逻辑,不同方式的处理不同,这里调用外部传入的函数 logger.info("开始{}流程".format(login_type)) if login_action_fn is not None: login_action_fn() logger.info("等待登录完成(也就是#loginIframe#login登录框消失)") WebDriverWait(self.driver, self.cfg.login.login_timeout).until( expected_conditions.invisibility_of_element_located( (By.ID, "login"))) logger.info("回到主iframe") self.driver.switch_to.default_content() assert_login_finished_fn() logger.info("登录完成") self.cookies = self.driver.get_cookies() if self.login_mode == self.login_mode_normal: # 普通登录额外获取腾讯视频的vqq_vuserid logger.info("转到qq视频界面,从而可以获取vuserid,用于腾讯视频的蚊子腿") self.driver.get( "https://film.qq.com/film/p/topic/dnf922/index.html") for i in range(5): vuserid = self.driver.get_cookie('vuserid') if vuserid is not None: break time.sleep(1) self.add_cookies(self.driver.get_cookies()) return
def assert_login_finished_fn(): logger.info("请等待#logined的div可见,则说明已经登录完成了...") WebDriverWait( self.driver, self.cfg.login.login_finished_timeout).until( expected_conditions.visibility_of_element_located( (By.ID, "logined")))
def switch_to_login_frame_fn(): logger.info("打开活动界面") self.driver.get("https://guanjia.qq.com/act/cop/202010dnf/") logger.info("浏览器设为1936x1056") self.driver.set_window_size(1936, 1056) logger.info("等待登录按钮#dologin出来,确保加载完成") WebDriverWait(self.driver, self.cfg.login.load_page_timeout).until( expected_conditions.visibility_of_element_located( (By.ID, "dologin"))) logger.info("点击登录按钮") self.driver.find_element(By.ID, "dologin").click() logger.info("等待#login_ifr显示出来并切换") WebDriverWait( self.driver, self.cfg.login.load_login_iframe_timeout).until( expected_conditions.visibility_of_element_located( (By.ID, "login_ifr"))) loginIframe = self.driver.find_element_by_id("login_ifr") self.driver.switch_to.frame(loginIframe) logger.info("等待#login_ifr#ptlogin_iframe加载完毕并切换") WebDriverWait( self.driver, self.cfg.login.load_login_iframe_timeout).until( expected_conditions.visibility_of_element_located( (By.ID, "ptlogin_iframe"))) ptlogin_iframe = self.driver.find_element_by_id("ptlogin_iframe") self.driver.switch_to.frame(ptlogin_iframe)
def package(dir_src, dir_all_release, release_dir_name, release_7z_name, dir_github_action_artifact): old_cwd = os.getcwd() show_head_line(f"开始打包 {release_dir_name} 所需内容", color("bold_yellow")) # 确保发布根目录存在 if not os.path.isdir(dir_all_release): os.mkdir(dir_all_release) # 并清空当前的发布版本目录 dir_current_release = os.path.realpath( os.path.join(dir_all_release, release_dir_name)) shutil.rmtree(dir_current_release, ignore_errors=True) os.mkdir(dir_current_release) logger.info( color("bold_yellow") + f"将部分内容从 {dir_src} 复制到 {dir_current_release} ") # 需要复制的文件与目录 files_to_copy = [] # 基于正则确定初始复制范围 reg_wantted_file = r'.*\.(toml|md|txt|png|jpg|docx|url)$' for file in os.listdir('.'): if not re.search(reg_wantted_file, file, flags=re.IGNORECASE): continue files_to_copy.append(file) # 额外补充一些文件和目录 files_to_copy.extend([ "config.example.toml", "DNF蚊子腿小助手.exe", "DNF蚊子腿小助手配置工具.exe", "DNF蚊子腿小助手配置文件.bat", "使用教程", "付费指引", "相关信息", "utils", ]) # 按顺序复制 files_to_copy = sorted(files_to_copy) # 复制文件与目录过去 for filename in files_to_copy: source = os.path.join(dir_src, filename) destination = os.path.join(dir_current_release, filename) if os.path.isdir(filename): logger.info(f"拷贝目录 {filename}") shutil.copytree(source, destination) else: logger.info(f"拷贝文件 {filename}") shutil.copyfile(source, destination) logger.info(color("bold_yellow") + "移动部分文件的位置和名称") files_to_move = [ ("utils/auto_updater.exe", "utils/auto_updater_latest.exe"), ("CHANGELOG.MD", "相关信息/CHANGELOG.MD"), ("README.MD", "相关信息/README.MD"), ] for src_file, dst_file in files_to_move: src_file = os.path.join(dir_current_release, src_file) dst_file = os.path.join(dir_current_release, dst_file) logger.info(f"移动{src_file}到{dst_file}") shutil.move(src_file, dst_file) logger.info(color("bold_yellow") + "清除一些无需发布的内容") dir_to_filenames_need_remove = { ".": [ "requirements.txt", ], "utils": [ "logs", ".db", ".cached", ".first_run", ".log.filename", "buy_auto_updater_users.txt", "user_monthly_pay_info.txt", "notices.txt", f"chrome_portable_{QQLogin.chrome_major_version}.7z", f"chrome_portable_{QQLogin.chrome_major_version}", ], } for dir_path, filenames in dir_to_filenames_need_remove.items(): for filename in filenames: filepath = os.path.join(dir_current_release, f"{dir_path}/{filename}") if not os.path.exists(filepath): continue if os.path.isdir(filepath): logger.info(f"移除目录 {filepath}") shutil.rmtree(filepath, ignore_errors=True) else: logger.info(f"移除文件 {filepath}") os.remove(filepath) # 压缩打包 os.chdir(dir_all_release) logger.info(color("bold_yellow") + "开始压缩打包") compress_dir_with_bandizip(release_dir_name, release_7z_name, dir_src) # 额外备份一份最新的供github action 使用 shutil.copyfile(release_7z_name, os.path.join(dir_github_action_artifact, 'djc_helper.7z')) os.chdir(old_cwd)
def show_end_time(end_time, time_fmt="%Y-%m-%d %H:%M:%S"): # end_time = "2021-02-23 00:00:00" remaining_time = get_remaining_time(end_time, time_fmt) logger.info( color("bold_black") + f"活动的结束时间为{end_time},剩余时间为{remaining_time}")
"5": "PAN-BOL", "6": "Libre", "7": "CC", "8": "Juntos", } if __name__ == "__main__": from multiprocessing import cpu_count import threading votes = 0 url = "https://yoparticipo.voto/" votes_for = 2 # 1 = Creemos, 2 = ADN, 3 = MAS, ........ cores = cpu_count() logger.info(f"cpu cores: {cores}") while True: driver = get_driver(headless=True) ## mayor numero de cores, mayor rapidez de ejecucion. threads solo pueden ejecutarse ## cuando hay 2 o mas nucleos en la cpu if cores > 1: threading.Thread(target=run, kwargs={ "driver": driver, "url": url, "opt": votes_for }).start() else: run(driver=driver, url=url, opt=votes_for) votes += 1
def get_leader(self): # If the key is not yet set to expire, there is no point retrying now. # This is bound to lead to thundering herd problem. However, it's still # better than hitting the DB more often. logger.debug('time to refresh: %s', self.time_to_refresh) if datetime.datetime.now() < self.time_to_refresh: logger.debug('Returning leadership from cache. %s', self.current_leader) return self.current_leader logger.debug('Checking redis for leadership') # Get a handle to Redis r = redis.Redis(connection_pool=self.redis_pool) succeeded = False decoded_process_lock_value = '' retries = 1 time_to_refresh_delta = 0 while not succeeded: # conditionally write only if doesn't already exist. If we succeed, we will be the leader, else we must find # out who the leader is. is_leader = r.set(self.process_lock_key, json.dumps(self.process_lock_value.__dict__), ex=self.process_lock_ttl, nx=True) time_to_refresh_delta = datetime.timedelta( seconds=self.process_lock_ttl) logger.debug('Succeeded in writing leadership record = ' + str(is_leader)) if not is_leader: # If set didn't succeed, read the value and decode it. process_lock_value_string = r.get(self.process_lock_key) decoded_process_lock_value = unmarshal_json.decode_json_dump( process_lock_value_string) # If this process is the leader, update the expiry of the record in the database, # else update this process's time whem it must refresh the leadership record. if decoded_process_lock_value.__dict__ == self.process_lock_value.__dict__: is_leader = True r.expire(self.process_lock_key, self.process_lock_ttl) else: ttl = r.ttl(self.process_lock_key) # Sometimes r.ttl returns a type thats not compatible with datatime.timedelta. # This if block resolves that issue. if not ttl: ttl = 0 time_to_refresh_delta = datetime.timedelta( seconds=int(ttl)) if is_leader or isinstance(decoded_process_lock_value, ProcessLockValueType): succeeded = True retries = 1 # If we failed to determine leadership, back-off and retry. if not succeeded: logger.info('Did not succeed in getting a leader. Will retry') retries *= 2 time.sleep(self.retry_interval_ms * retries / 1000) # Now that we have determined the leader, update the local parameters. self.time_to_refresh = datetime.datetime.now() + time_to_refresh_delta if is_leader: self.current_leader = self.process_lock_value else: self.current_leader = decoded_process_lock_value return self.current_leader
def loginFectiva(browser,settings,account,pwd): FLAG_LOGIN = False while not FLAG_LOGIN: try: browser.get("http://library-admin.anu.edu.au/tools/factiva-redirect") if GATEWAY == 'OUTSIDE': logger.info("Gateway: OUTSIDE") wait = WebDriverWait(browser, 5) anuID = wait.until(EC.presence_of_element_located((By.ID, 'requester'))) anuID.send_keys(account) password = browser.find_element_by_id('requesteremail') password.send_keys(pwd) browser.get_screenshot_as_file("logs/pre-login.png") password.send_keys(Keys.RETURN) elif GATEWAY == 'ANULIB': logger.info("Gateway: ANULIB") else: logger.error("Please Set Cookie Gateway!") logger.info('Start login fectiva...') wait = WebDriverWait(browser, 40) browser.get_screenshot_as_file("logs/login.png") btn = wait.until(EC.presence_of_element_located((By.ID, 'btnSearchBottom'))) #select searching date dr = Select(browser.find_element_by_name('dr')) dr.select_by_visible_text('Enter date range...') #start date frd = browser.find_element_by_id('frd') frd.send_keys(settings['startDate']['frd']) frm = browser.find_element_by_id('frm') frm.send_keys(settings['startDate']['frm']) fry = browser.find_element_by_id('fry') fry.send_keys(settings['startDate']['fry']) #Enddate tod = browser.find_element_by_id('tod') tod.send_keys(settings['endDate']['tod']) tom = browser.find_element_by_id('tom') tom.send_keys(settings['endDate']['tom']) toy = browser.find_element_by_id('toy') toy.send_keys(settings['endDate']['toy']) filter = Select(browser.find_element_by_name('isrd')) filter.select_by_visible_text('Off') browser.execute_script('document.getElementById("ftx").value="{}";doLinkSubmit("../ha/default.aspx");'.format(settings['term'])) headlineFrame = wait.until(EC.presence_of_element_located((By.ID, 'headlineFrame'))) browser.get_screenshot_as_file("logs/search.png") FLAG_LOGIN = True except NoSuchElementException: logger.error('No Element during login') browser.close() except ElementNotVisibleException: logger.error('Not Visible during login') # browser.close() except TimeoutException: logger.error('Timeout during login') browser.get_screenshot_as_file("logs/Timeout.png") #browser.close() list_cookies = browser.get_cookies() cookies=dict() for item in list_cookies: cookies[item['name']] = item['value'] return json.dumps(cookies)
parser.add_argument("-i", "--esindex", help="Name of index to store to.", default=ESINDEX) parser.add_argument("-t", "--estype", help="Type of index to store to.", default=ESTYPE) parser.add_argument("-m", "--msdsummaryfile", help="MSD summary file.") parser.add_argument("-d", "--msddirectory", help="MSD directory structure.") parser.add_argument("-f", "--force", help="Force writing in existing ES index.", default=False, action="store_true") args = parser.parse_args() # Setup elasticsearch eshelper = Eshelper(args.eshost, args.esport, args.esindex, args.estype) force_index = bool(args.force) eshelper.check_host_reachable() eshelper.check_index_safe(force_index) # Setup track generator if args.msdsummaryfile: logger.info("Load summary file {}".format(args.msdsummaryfile)) track_gen = TrackGeneratorFromSummary() track_gen.load(args.msdsummaryfile) elif args.msddirectory: logger.info("Use directory {}".format(args.msddirectory)) track_gen = TrackGeneratorFromDirectory() track_gen.load(args.msddirectory) else: logger.error("-m or -d must be given as a parameter") sys.exit(1) track_gen.check() # Setup ingestor ingestor = Ingestor(eshelper, track_gen) ingestor.ingest()
def resetCheckPoint(): checkpoint = {'Dowjones':0, 'Publication':0, 'Website': 0,'Blog':0} saveCheckPoint(checkpoint) logger.info('Reset checkpoint')
def emptydb(config): if not config.allow_destroy_db(): logger.error("Refusing to do a destructive database operation " "because the allow_destroy_db configuration option " "is set to false.") return False env = os.environ.copy() env.update(config.get_pg_environment()) logger.info("Removing all tables...") # get list of drop table commands cmd = ( config.get_psql_binary(), "-t", "-c", "SELECT 'DROP TABLE ' || n.nspname || '.\"' || c.relname || '\" CASCADE;' " "FROM pg_catalog.pg_class AS c LEFT JOIN pg_catalog.pg_namespace AS n " "ON n.oid = c.relnamespace WHERE relkind = 'r' AND n.nspname NOT IN " "('pg_catalog', 'pg_toast') AND pg_catalog.pg_table_is_visible(c.oid)") logger.trace("Executing %s, creating pipe for stdout,stderr" % str(cmd)) proc1 = subprocess.Popen(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = proc1.communicate() if stderr != '': logger.error("An error occured while calling psql: %s" % stderr) return False stdin = stdout cmd = (config.get_psql_binary(), ) logger.trace("Piping stdout,stderr to %s" % str(cmd)) proc2 = subprocess.Popen(cmd, env=env, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = proc2.communicate(stdin) if stderr != '': logger.error("An error occured while calling psql: %s" % stderr) return False logger.info("Removing all sequences...") # get list of drop sequence commands cmd = (config.get_psql_binary(), "-t", "-c", "SELECT 'DROP SEQUENCE ' || n.nspname || '.\"' || c.relname || '\" " "CASCADE;' FROM pg_catalog.pg_class AS c LEFT JOIN " "pg_catalog.pg_namespace AS n ON n.oid = c.relnamespace WHERE " "relkind = 'S' AND n.nspname NOT IN ('pg_catalog', 'pg_toast') AND " "pg_catalog.pg_table_is_visible(c.oid)") logger.trace("Executing %s, creating pipe for stdout,stderr" % str(cmd)) proc1 = subprocess.Popen(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = proc1.communicate() if stderr != '': logger.error("An error occured while calling psql: %s" % stderr) return False stdin = stdout cmd = (config.get_psql_binary(), ) logger.trace("Piping stdout,stderr to %s" % str(cmd)) proc2 = subprocess.Popen(cmd, env=env, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = proc2.communicate(stdin) if stderr != '': logger.error("An error occured while calling psql: %s" % stderr) return False return True
checkpoint[source] = currentPage saveCheckPoint(checkpoint) btn_nextpage = browser.find_element_by_xpath('//a[@class="nextItem"]') btn_nextpage.click() wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="headlines"]/table/tbody/tr[@class="headline"][1]/td[@class="count"]'), '{}.'.format(nextPageStart) )) settings = loadSettings() resetCheckPoint() browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) loginFectiva(browser,settings,'','') timeSplit,status = getOverview(browser,settings) logger.info('Time split:{}'.format(timeSplit)) browser.close() for (start,end) in timeSplit: logger.info('Now start from {} to {}.'.format(str(start),str(end))) settings['startDate']['frd'] = start.day settings['startDate']['frm'] = start.month settings['startDate']['fry'] = start.year settings['endDate']['tod'] = end.day settings['endDate']['tom'] = end.month settings['endDate']['toy'] = end.year while True: crawled = status['crawled_pages'] checkpoint = loadCheckPoint() browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
def crawlFectiva(browser,checkpoint,status): #select = Select(browser.find_element_by_name('hso')) #select.select_by_visible_text('Sort by: Oldest first') for source in ['Blog','Website','Dowjones','Publication']: logger.info('Start crawling articles from: {}...'.format(source)) articlesOfChannel = browser.find_element_by_xpath('//span[@data-channel="{}"][1]/a/span[@class="hitsCount"]'.format(source)).text.replace(',','') articlesOfChannel = int(re.search('\((.*)\)',articlesOfChannel).group(1)) if articlesOfChannel ==0: logger.info('No articles in {} channel.'.format(source)) logger.info('End of Source from {}'.format(source)) continue dataChannel = browser.find_element_by_xpath('//span[@data-channel="{}"]'.format(source)) dataChannel.click() wait = WebDriverWait(browser, 10) btn = wait.until(EC.presence_of_element_located((By.XPATH, '//span[@class="tabOn"][@data-channel="{}"]'.format(source)))) #Compute the total pages we need to download currentPage,totalPages,nextPageStart,totalArticles,articlesInThisPage = getStatus(browser) #Load checkpoint checkpoint = loadCheckPoint() logger.info('Total pages:{} , currentAt:{} , checkPointAt:{}'.format(totalPages,currentPage,checkpoint[source])) while currentPage != totalPages or checkpoint[source]!=currentPage or totalPages == 0: logger.info('Total pages:{} , currentAt:{} , checkPointAt:{}'.format(totalPages,currentPage,checkpoint[source])) for i in range(abs(checkpoint[source] - currentPage)): #Compute the total pages we need to download currentPage,totalPages,nextPageStart,totalArticles,articlesInThisPage = getStatus(browser) logger.info('Skip Page To checkPoint...Total pages:{} , currentAt:{} , checkPointAt:{}'.format(totalPages,currentPage,checkpoint[source])) btn_nextpage = browser.find_element_by_xpath('//a[@class="nextItem"]') btn_nextpage.click() wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="headlines"]/table/tbody/tr[@class="headline"][1]/td[@class="count"]'), '{}.'.format(nextPageStart) )) status['crawled_pages'] += 100 #Compute the total pages we need to download currentPage,totalPages,nextPageStart,totalArticles,articlesInThisPage = getStatus(browser) for id in range(1,articlesInThisPage + 1): status['crawled_pages'] +=1 updateProgress(min(status['crawled_pages']/float(status['totalArticles']),0.99)) headline,date,author,documentID,documentType = getArticleInfo(browser,id,source) if checkItemExist(documentID): logger.info('{:.1%} item {} exist in database skip to next one.'.format(status['crawled_pages']/float(status['totalArticles']),id)) continue if documentType == 'Factiva Licensed Content': logger.debug('id:{}, documentID:{}, Headline:{}, date:{}, author:{} '.format(id,documentID,headline.text,date,author)) headline.click() logger.debug('waiting content response') wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="artHdr1"]/span[1]'), 'Article {}'.format(currentPage * 100 + id) )) logger.debug('get content response') articleHtml = browser.find_element_by_xpath('//div[@class="article enArticle"]') title =headline.text content = articleHtml.get_attribute('innerHTML') date = parse(date).strftime('%Y-%m-%d') crawldate = parse(str(datetime.now())).strftime('%Y-%m-%d') url = '' likelihood = processItem(documentID,title,author,content,date,crawldate,url,source) logger.info('{:.1%} [DOC] Get {} of {} in page {}.Totally {} pages {} articles, likelihood: {:.2}'.format(status['crawled_pages']/float(status['totalArticles']),id,articlesInThisPage, currentPage,totalPages,totalArticles,likelihood)) sleep(2) if documentType == 'HTML': browser.set_page_load_timeout(6) try: headline.click() title = headline.text window_main = browser.window_handles[0] window_download = browser.window_handles[-1] browser.switch_to_window(window_download) sleep(4) url = browser.current_url logger.debug('Try to get html page source') content = browser.page_source logger.debug('Get website success.') except: url = browser.current_url content = "<h1><a href='{}'>baidu</a></h1>".format(url) logger.warning('No response from {} of page {} title:{}, url:{}'.format(documentID,currentPage,title,url)) logger.debug('id:{}, documentID:{}, Headline:{}, date:{}, author:{} '.format(id,documentID,title,date,author)) date = parse(date).strftime('%Y-%m-%d') crawldate = parse(str(datetime.now())).strftime('%Y-%m-%d') likelihood = processItem(documentID,title,author,content,date,crawldate,url,source) logger.info('{:.1%} [HTM]Get {} of 100 in page {}.Totally {} pages {} articles, likelihood: {:.2}'.format(status['crawled_pages']/float(status['totalArticles']),id, currentPage,totalPages,totalArticles,likelihood)) try: browser.execute_script('window.close();') except: logger.debug('Close tab error') browser.switch_to_window(window_main) #sometimes recaptcha occurs here #view next page if currentPage == totalPages: logger.info('End of Source from {}'.format(source)) break checkpoint[source] = currentPage saveCheckPoint(checkpoint) btn_nextpage = browser.find_element_by_xpath('//a[@class="nextItem"]') btn_nextpage.click() wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="headlines"]/table/tbody/tr[@class="headline"][1]/td[@class="count"]'), '{}.'.format(nextPageStart) ))
def pull(self): logger.info("pulling from branch %s", self.source) return f"Branch = {self.source}"
def saveCheckPoint(checkpoint): f=open('checkpoint/checkpoint.json','w') json.dump(checkpoint,f) f.close() logger.info('Save checkpoint at {}'.format(checkpoint))
# -*- coding:utf-8 -*- import sys from kafkaMonitor import KafkaMonitor from log import logger if __name__ == '__main__': logger.info('--------------------------开始运行--------------------------') try: logger.info('执行操作:执行性能监控') interval, time = '1', '1' if len(sys.argv) == 2: interval = sys.argv[1] elif len(sys.argv) == 3: interval, time = sys.argv[1], sys.argv[2] exclustion = [] if len(sys.argv) > 3: interval, time = sys.argv[1], sys.argv[2] exclustion = sys.argv[3:] if interval != '1': if not interval.isdigit(): logger.error('采样时间间隔只能为数字') exit() if time != '1': temp = time.replace('+', '').replace(' ', '') if not temp.isdigit(): print('采样持续时间只能为数字\算术表达式')
def add_worker(): logger.info(f"adding worker") sched.add_worker() return jsonify( status='OK' )
def main(): create_purchases_file(PURCHASES_FILE) purchases = load_purchases(PURCHASES_FILE) stats = PurchasesStats(purchases).process() logger.info("Results: %s", stats)
def pull(self): logger.info("pulling from tag %s", self.source) print(self.__class__._attributes) return f"Tag = {self.source}"
import pika import os import time import json import requests import datetime from log import logger jobs_api_host = os.getenv('JOB_API_HOST') logger.info('Job API host: {}'.format(jobs_api_host)) rabbitmq_url = os.getenv('AMQP_URL') logger.info('Connecting to RabbitMQ at {}'.format(rabbitmq_url)) def update_job(job_id, body): response = requests.patch(jobs_api_host + '/jobs/' + job_id, headers={'Content-Type': 'application/json'}, json=body) logger.debug('API responded with ' + str(response.status_code)) def callback(body): if body is None: return data = json.loads(body) job_id = str(data['id']) logger.debug('Received job [{}]'.format(job_id)) update_job(job_id, {
def save_cache(self): logger.info("Saving entity score cache") with open(settings.ENTITY_SCORE_CACHE_PATH, 'w') as cache_file: json.dump(self.entity_scores, cache_file)
def build_graph(opts, iterations_per_step=1, is_training=True): train_graph = tf.Graph() with train_graph.as_default(): bert_config = bert_ipu.BertConfig.from_dict( opts, config=bert_ipu.BertConfig(vocab_size=None)) bert_config.dtype = tf.float32 if opts[ "precision"] == '32' else tf.float16 placeholders = dict() learning_rate = None opts['version_2_with_negative'] = False train_iterator = ipu_infeed_queue.IPUInfeedQueue( data_loader.load(opts, is_training=is_training)) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue() # building networks with pipeline if not should_be_pipeline_when_inference(opts): def bert_net(): return build_infer_network_without_pipeline( train_iterator, outfeed_queue, iterations_per_step, bert_config=bert_config, opts=opts) else: def bert_net(): return build_network(train_iterator, outfeed_queue, iterations_per_step, bert_config, opts, learning_rate, is_training) with ipu_scope('/device:IPU:0'): embedded = opts["embedded_runtime"] if embedded and is_training: raise ValueError( "embedded_runtime is only to be used for inference.") train = ipu.ipu_compiler.compile(bert_net, []) if not embedded else None exec_path = None compile_op = None poplar_exec_filepath = get_exec_path( opts['seq_length'], opts['micro_batch_size'], opts['device_mapping'], should_be_pipeline_when_inference(opts)) exec_path = os.path.join(poplar_exec_filepath) compile_op = application_compile_op.experimental_application_compile_op( bert_net, output_path=exec_path, freeze_variables=True) outfeed = outfeed_queue.dequeue() restore = tf.train.Saver(var_list=tf.global_variables()) ipu.utils.move_variable_initialization_to_cpu() train_init = tf.global_variables_initializer() tvars = tf.trainable_variables() # Calculate the number of required IPU""" num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas']) # The number of acquired IPUs must be the power of 2. if num_ipus & (num_ipus - 1) != 0: num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2))) ipu_config = get_config( fp_exceptions=opts["fp_exceptions"], enable_recomputation=opts["enable_recomputation"], disable_graph_outlining=False, num_required_ipus=num_ipus, enable_stochastic_rounding=opts['stochastic_rounding'], max_cross_replica_sum_buffer_size=opts[ 'max_cross_replica_sum_buffer_size'], max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'], scheduler_selection='CLUSTERING', compile_only=False, ipu_id=None, partials_type=opts["partials_type"], available_memory_proportion=opts['available_memory_proportion']) ipu_config.configure_ipu_system() train_sess = tf.Session(graph=train_graph) _ = train_sess.run(train_init, []) # ----------------- # Checkpoints restore and save init_checkpoint_path = opts['init_checkpoint'] logger.info(f"At the checkpoint location {init_checkpoint_path}") if init_checkpoint_path: logger.info("Loading checkpoint...") if os.path.isfile(init_checkpoint_path): init_checkpoint_path = os.path.splitext(init_checkpoint_path)[0] logger.info(f"checkpoint path: {init_checkpoint_path}") (assignment_map, initialized_variable_names ) = bert_ipu.get_assignment_map_from_checkpoint( tvars, init_checkpoint_path) for var in tvars: if var.name in initialized_variable_names: mark = "*" else: mark = " " logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape, var.dtype.name) reader = tf.train.NewCheckpointReader(init_checkpoint_path) load_vars = reader.get_variable_to_shape_map() saver_restore = tf.train.Saver(assignment_map) saver_restore.restore(train_sess, init_checkpoint_path) # ----------------- if compile_op is not None: logger.info( f"Compiling and saving Poplar executable to {poplar_exec_filepath}" ) _ = train_sess.run(compile_op, []) else: exec_path = None return GraphOps(train_graph, train_sess, train_init, [train], placeholders, train_iterator, outfeed, restore, tvars, exec_path), ipu_config
def get(self, shopping_id, db_session): logger.info(LogMsg.START) data = dict(filter=dict(shopping_key=shopping_id)) return super(PaymentController, self).get_by_data(data, db_session)
def run_time(opts, dataset_list=None): bs = opts['micro_batch_size'] seq_length = opts['seq_length'] poplar_exec_filepath = get_exec_path( opts['seq_length'], opts['micro_batch_size'], opts['device_mapping'], should_be_pipeline_when_inference(opts)) logger.info( f"POPLAR FILE PATH FOR EXEC: {poplar_exec_filepath}<------------------------------------------" ) inputs = [] engine_name = "my_engine" ctx = embedded_runtime.embedded_runtime_start(poplar_exec_filepath, inputs, engine_name, timeout=1000) input_ids = tf.placeholder(tf.int32, (bs, seq_length)) input_mask = tf.placeholder(tf.int32, (bs, seq_length)) segment_ids = tf.placeholder(tf.int32, (bs, seq_length)) unique_ids = tf.placeholder(tf.int32, (bs, )) placeholders = [input_ids, input_mask, segment_ids, unique_ids] durations = [] master_durations = [] test_results = [] durations = [] call_result = embedded_runtime.embedded_runtime_call(placeholders, ctx) thread_queue = Queue() ipu.config.reset_ipu_configuration() gc.collect() all_results = [] if opts['generated_data']: number_of_steps = int(opts['num_iter']) else: number_of_steps = len(dataset_list[0]) // bs with tf.Session() as sess: logger.debug(f"Number of threads: {opts['num_inference_thread']}") logger.debug(f"Data Type: {opts['generated_data']}") if opts['generated_data']: feed_dict = synthetic_feed_dict(placeholders, opts['micro_batch_size'], opts['seq_length']) else: feed_dict = parse_feed_dict(placeholders, dataset_list, bs, seq_length, i=0) def runner(feed_dict, session): for step in range(number_of_steps): if opts['generated_data']: feed_dict = synthetic_feed_dict(placeholders, opts['micro_batch_size'], opts['seq_length']) else: feed_dict = parse_feed_dict(placeholders, dataset_list, bs, seq_length, i=step) start = time.time() ans = session.run(call_result, feed_dict=feed_dict) stop = time.time() if step % int(opts['steps_per_logs']) == 0: logger.info( f"{step}/{number_of_steps}\t\t[{100*float(step/number_of_steps):.2f}%]" ) durations.append((start, stop)) master_durations.append(stop - start) if opts['do_predict']: all_results.extend([ squad_results.RawResult( this_unique_id, this_start_logit.astype(np.float64), this_end_logit.astype(np.float64)) for this_unique_id, this_start_logit, this_end_logit in zip(ans[0], ans[1], ans[2]) ]) thread_queue.put(durations, timeout=10) thp = [ Thread(target=runner, args=(feed_dict, sess)) for _ in range(opts['num_inference_thread']) ] start_time = time.time() for idx, _thread in enumerate(thp): _thread.start() logger.info(f"Thread {idx} started.") for idx, _thread in enumerate(thp): _thread.join() logger.info(f"Thread {idx} join.") total_dur = time.time() - start_time durations_from_th = [] while not thread_queue.empty(): durations_from_th += thread_queue.get() latencies = [y - x for x, y in durations_from_th] latency = np.mean(latencies) latency_99 = np.percentile(latencies, 99) latency_99_9 = np.percentile(latencies, 99.9) min_start = min([x for x, _ in durations_from_th]) max_stop = max([y for _, y in durations_from_th]) tput = bs * opts['num_inference_thread'] * number_of_steps / ( max_stop - min_start) test_results = { 'batch size': bs, 'latency': latency, 'latency_99': latency_99, 'latency_99_9': latency_99_9, 'throughput': tput } logger.info(test_results) return all_results
def main_thread(self): """ Monitors the jobs in current_jobs, updates their statuses, and puts their tasks in queues to be processed by other threads """ signal.signal(signal.SIGTERM, self.sigterm_handler) try: last_saved = None while not self.shutdown.is_set(): # Iterate backwards so we can delete jobs for job in reversed(self.jobs): if job.status == Status.INIT: def start_this_job(job): if isinstance(job, ModelJob): if job.dataset.status == Status.DONE: job.status = Status.RUN elif job.dataset.status in [Status.ABORT, Status.ERROR]: job.abort() else: job.status = Status.WAIT else: job.status = Status.RUN if config_option('level') == 'test': start_this_job(job) else: # Delay start by one second for initial page load gevent.spawn_later(1, start_this_job, job) if job.status == Status.WAIT: if isinstance(job, ModelJob): if job.dataset.status == Status.DONE: job.status = Status.RUN elif job.dataset.status in [Status.ABORT, Status.ERROR]: job.abort() else: job.status = Status.RUN if job.status == Status.RUN: alldone = True for task in job.tasks: if task.status == Status.INIT: alldone = False if task.ready_to_queue(): logger.debug('%s task queued.' % task.name(), job_id=job.id()) task.status = Status.WAIT if isinstance(task, dataset_tasks.ParseFolderTask): self.split_queue.put( (job, task) ) elif isinstance(task, dataset_tasks.CreateDbTask): self.create_queue.put( (job, task) ) elif isinstance(task, model_tasks.TrainTask): self.train_queue.put( (job, task) ) else: logger.error('Task type %s not recognized' % type(task).__name__, job_id=job.id()) task.exception = Exception('Task type not recognized') task.status = Status.ERROR elif task.status == Status.WAIT or task.status == Status.RUN: alldone = False elif task.status == Status.DONE: pass elif task.status == Status.ABORT: pass elif task.status == Status.ERROR: job.status = Status.ERROR alldone = False break else: logger.warning('Unrecognized task status: "%s"', task.status, job_id=job.id()) if alldone: job.status = Status.DONE logger.info('Job complete.', job_id=job.id()) job.save() # save running jobs every 15 seconds if not last_saved or time.time()-last_saved > 15: for job in self.jobs: if job.status.is_running(): job.save() last_saved = time.time() time.sleep(utils.wait_time()) except KeyboardInterrupt: pass # Shutdown for job in self.jobs: job.abort() job.save() self.running = False
default=1000, help="Number of iterations to run inference for.") group.add_argument('--num-inference-thread', action='store', default=2, help="Number of threads to use.") return parser if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.ERROR) opts = make_global_options([add_squad_options]) set_defaults(opts) opts['num_inference_thread'] = int(opts['num_inference_thread']) poplar_options = os.getenv('POPLAR_ENGINE_OPTIONS', 'unset') logger.info(f"Poplar options: {poplar_options}") logger.info("Command line: " + ' '.join(sys.argv)) logger.info("Options:\n" + json.dumps(OrderedDict(sorted(opts.items())), indent=1)) set_poplar_engine_options(execution_profile=opts['execution_profile'], memory_profile=opts['memory_profile'], profile_dir=str(opts['profile_dir']), sync_replicas_independently=False, synthetic_data=opts['synthetic_data'], tensorflow_progress_bar=opts['progress_bar']) predict_loop(opts)
def predict_loop(opts): dataset_list = None if not opts['generated_data']: eval_examples = squad_data.read_squad_examples(opts["predict_file"], opts, is_training=False) tfrecord_dir = opts['tfrecord_dir'] if not os.path.exists(tfrecord_dir): os.makedirs(tfrecord_dir) eval_writer = squad_data.FeatureWriter(filename=os.path.join( tfrecord_dir, "eval.tf_record"), is_training=False) eval_features = [] tokenizer = tokenization.FullTokenizer( vocab_file=opts['vocab_file'], do_lower_case=opts['do_lower_case']) def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) # Create eval.tfrecord num_features = squad_data.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=opts["seq_length"], doc_stride=opts["doc_stride"], max_query_length=opts["max_query_length"], is_training=False, output_fn=append_feature) eval_writer.close() squad_dataset = data_loader.load(opts, is_training=False) squad_dataset = squad_dataset.make_one_shot_iterator() _input_mask_array = [] _segment_ids_array = [] _input_ids_array = [] _unique_ids_array = [] # Call `get_next()` once outside the loop to create the TensorFlow operations once. with tf.Session() as sess: next_element = squad_dataset.get_next() is_data = True while is_data: try: output = sess.run(next_element) _input_mask_array.extend(output['input_mask']) _segment_ids_array.extend(output['segment_ids']) _input_ids_array.extend(output['input_ids']) _unique_ids_array.extend(output['unique_ids']) except tf.errors.OutOfRangeError: print("end of training dataset") is_data = False dataset_list = [ _input_ids_array, _input_mask_array, _segment_ids_array, _unique_ids_array ] iterations_per_step = 1 predict, ipu_config = build_graph(opts, iterations_per_step, is_training=False) if predict.exec_path is not None: all_results = run_time(opts, dataset_list) if opts['do_predict'] is True: logger.info(f"Writing out the predictions:") output_dir = opts['output_dir'] if not os.path.exists(output_dir): os.makedirs(output_dir) output_prediction_file = os.path.join(output_dir, "predictions.json") output_nbest_file = os.path.join(output_dir, "best_predictions.json") output_null_log_odds_file = os.path.join(output_dir, "null_odds.json") eval_features = eval_features[:num_features] squad_results.write_predictions( eval_examples, eval_features, all_results, opts["n_best_size"], opts["max_answer_length"], opts["do_lower_case"], output_prediction_file, output_nbest_file, output_null_log_odds_file, opts["version_2_with_negative"], opts["null_score_diff_threshold"], opts["verbose_logging"]) predict.session.close() if opts['do_evaluation']: evaluate_squad(output_prediction_file, opts)