Exemplo n.º 1
0
 def post(self, *args, **kwargs):
     method = self.get_argument("method", "POST")
     if method == "DELETE":
         self.delete(*args, **kwargs)
     elif method == "OPTIONS":
         self.options(*args, **kwargs)
     else:
         if config.get_config("requires_tos") and \
                 self.get_argument("accepted_tos", "false") != "true":
             self.set_status(403)
             self.finish()
             return
         timer = Timer("Kernel handler for %s"%self.get_argument("notebook", uuid.uuid4()))
         proto = self.request.protocol.replace("http", "ws", 1)
         host = self.request.host
         ws_url = "%s://%s/" % (proto, host)
         km = self.application.km
         logger.info("Starting session: %s"%timer)
         timeout = self.get_argument("timeout", None)
         if timeout is not None:
             timeout = float(timeout)
             if math.isnan(timeout) or timeout<0:
                 timeout = None
         kernel_id = yield gen.Task(km.new_session_async,
                                    referer = self.request.headers.get('Referer',''),
                                    remote_ip = self.request.remote_ip,
                                    timeout = timeout)
         data = {"ws_url": ws_url, "id": kernel_id}
         self.write(self.permissions(data))
         self.finish()
Exemplo n.º 2
0
def restoredb(pg_env, pg_restore_binary, database_dump_path, dump_name):

    env = os.environ.copy()
    env.update(pg_env)

    answer = raw_input("This command will restore this dump into database %s. "
                       "Continue? (y)es, (N)o? " % env['PGDATABASE'])
    if answer != 'y':
        logger.info("Aborting!")
        return

    db_dump_file_name = os.path.join(database_dump_path, dump_name)
    if not os.path.isfile(db_dump_file_name):
        logger.error("file %s does not exist: " % db_dump_file_name)
        return

    logger.debug("Restoring %s" % db_dump_file_name)
    cmd = (pg_restore_binary, "-d", env['PGDATABASE'], "-O", "-x",
           db_dump_file_name)
    logger.trace("Executing %s" % str(cmd))
    proc = subprocess.Popen(cmd, env=env, stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    (stdout, stderr) = proc.communicate()

    if stderr != '':
        logger.error("An error occured while calling pg_restore: %s " % stderr)
        return
Exemplo n.º 3
0
    def createImage(self,user,image,lxc,description="Not thing", imagenum=10):
        fspath = self.NFS_PREFIX + "/local/volume/" + lxc
        imgpath = self.imgpath + "private/" + user + "/"
        #tmppath = self.NFS_PREFIX + "/local/tmpimg/"
        #tmpimage = str(random.randint(0,10000000)) + ".tz"

        if not os.path.exists(imgpath+image) and os.path.exists(imgpath):
            cur_imagenum = 0
            for filename in os.listdir(imgpath):
                if os.path.isdir(imgpath+filename):
                    cur_imagenum += 1
            if cur_imagenum >= int(imagenum):
                return [False,"image number limit exceeded"]
        #sys_run("mkdir -p %s" % tmppath, True)
        sys_run("mkdir -p %s" % imgpath,True)
        try:
            sys_run("tar -cvf %s -C %s ." % (imgpath+image+".tz",self.dealpath(fspath)), True)
        except Exception as e:
            logger.error(e)
        #try: 
            #sys_run("cp %s %s" % (tmppath+tmpimage, imgpath+image+".tz"), True)
            #sys_run("rsync -a --delete --exclude=lost+found/ --exclude=root/nfs/ --exclude=dev/ --exclude=mnt/ --exclude=tmp/ --exclude=media/ --exclude=proc/ --exclude=sys/ %s/ %s/" % (self.dealpath(fspath),imgpath+image),True)
        #except Exception as e:
        #    logger.error(e)
        #sys_run("rm -f %s" % tmppath+tmpimage, True)    
        #sys_run("rm -f %s" % (imgpath+"."+image+"_docklet_share"),True)
        self.updateinfo(imgpath,image,description)
        logger.info("image:%s from LXC:%s create success" % (image,lxc))
        return [True, "create image success"]
Exemplo n.º 4
0
def stop_heartbeat():
    try:
        if monitor_enable:
            params = {'inst': instance_index}
            urllib2.urlopen(stop_hb_url, data=urllib.urlencode(params))
    except urllib2.URLError:
        logger.info('Cannot connect to monitor')
Exemplo n.º 5
0
def sensitivitybreak(A):
    from log import logger
    sn = [0, 0]
    flag = 0
    cnt = 0
    if A >= 2 and A < 65536:
        if A == 2:
            sn = [2, 1]
            return sn
        elif 2 < A <= 512:
            sn = [A, 1]
            return sn
        else:
            while(1):
                for i in range(A / 255 + 1, int(math.sqrt(A))):
                    if (i > 1):
                        if (A % i):
                            pass
                        else:
                            sn[0] = max(i, A / i)
                            sn[1] = min(i, A / i)
                            return sn

                cnt = cnt + 1
                if (cnt % 2):
                    flag = flag + 1
                    A += flag
                else:
                    flag = flag + 1
                    A -= flag
    else:
        logger.info("Invalid digitalsensitivity number!")
Exemplo n.º 6
0
    def run(self):
        # Ok create the thread
        nb_threads = self.daemon_thread_pool_size
        # Keep a list of our running threads
        threads = []
        logger.info('Using a %d http pool size', nb_threads)
        while True:
            # We must not run too much threads, so we will loop until
            # we got at least one free slot available
            free_slots = 0
            while free_slots <= 0:
                to_del = [t for t in threads if not t.is_alive()]
                _ = [t.join() for t in to_del]
                for t in to_del:
                    threads.remove(t)
                free_slots = nb_threads - len(threads)
                if free_slots <= 0:
                    time.sleep(0.01)

            socks = self.get_sockets()
            # Blocking for 0.1 s max here
            ins = self.get_socks_activity(socks, 0.1)
            if len(ins) == 0:  # trivial case: no fd activity:
                continue
            # If we got activity, Go for a new thread!
            for sock in socks:
                if sock in ins:
                    # GO!
                    t = threading.Thread(None, target=self.handle_one_request_thread, name='http-request', args=(sock,))
                    # We don't want to hang the master thread just because this one is still alive
                    t.daemon = True
                    t.start()
                    threads.append(t)
Exemplo n.º 7
0
        def __init__(self, host, port, http_backend, use_ssl, ca_cert, ssl_key, ssl_cert, hard_ssl_name_check, daemon_thread_pool_size):
            self.port = port
            self.host = host
            # Port = 0 means "I don't want HTTP server"
            if self.port == 0:
                return

            self.use_ssl = use_ssl
            
            self.registered_fun = {}
            self.registered_fun_names = []
            self.registered_fun_defaults = {}

            protocol = 'http'
            if use_ssl:
                protocol = 'https'
            self.uri = '%s://%s:%s' % (protocol, self.host, self.port)
            logger.info("Opening HTTP socket at %s", self.uri)

            # Hack the BaseHTTPServer so only IP will be looked by wsgiref, and not names
            __import__('BaseHTTPServer').BaseHTTPRequestHandler.address_string = lambda x:x.client_address[0]

            if http_backend == 'cherrypy' or http_backend == 'auto' and cheery_wsgiserver:
                self.srv = CherryPyBackend(host, port, use_ssl, ca_cert, ssl_key, ssl_cert, hard_ssl_name_check, daemon_thread_pool_size)
            else:
                self.srv = WSGIREFBackend(host, port, use_ssl, ca_cert, ssl_key, ssl_cert, hard_ssl_name_check, daemon_thread_pool_size)

            self.lock = threading.RLock()
Exemplo n.º 8
0
    def run(self, handler):
        daemon_thread_pool_size = self.options['daemon_thread_pool_size']
        from wsgiref.simple_server import WSGIRequestHandler
        LoggerHandler = WSGIRequestHandler
        if self.quiet:
            class QuietHandler(WSGIRequestHandler):
                def log_request(*args, **kw):
                    pass
            LoggerHandler = QuietHandler

        srv = simple_server.make_server(self.host, self.port, handler, handler_class=LoggerHandler)
        logger.info('Initializing a wsgiref backend with %d threads', daemon_thread_pool_size)
        use_ssl = self.options['use_ssl']
        ca_cert = self.options['ca_cert']
        ssl_cert = self.options['ssl_cert']
        ssl_key = self.options['ssl_key']

        if use_ssl:
            if not ssl:
                logger.error("Missing python-openssl librairy,"
                             "please install it to open a https backend")
                raise Exception("Missing python-openssl librairy, "
                                "please install it to open a https backend")
            srv.socket = ssl.wrap_socket(srv.socket,
                                         keyfile=ssl_key, certfile=ssl_cert, server_side=True)
        return srv
Exemplo n.º 9
0
def vnodes_monitor(cur_user, user, form, con_id, issue):
    global G_clustername
    logger.info("handle request: monitor/vnodes")
    res = {}
    fetcher = monitor.Container_Fetcher(con_id)
    if issue == 'cpu_use':
        res['cpu_use'] = fetcher.get_cpu_use()
    elif issue == 'mem_use':
        res['mem_use'] = fetcher.get_mem_use()
    elif issue == 'disk_use':
        res['disk_use'] = fetcher.get_disk_use()
    elif issue == 'basic_info':
        res['basic_info'] = fetcher.get_basic_info()
    elif issue == 'owner':
        names = con_id.split('-')
        result = G_usermgr.query(username = names[0], cur_user = cur_user)
        if result['success'] == 'false':
            res['username'] = ""
            res['truename'] = ""
        else:
            res['username'] = result['data']['username']
            res['truename'] = result['data']['truename']
    else:
        res = "Unspported Method!"
    return json.dumps({'success':'true', 'monitor':res})
Exemplo n.º 10
0
def send_mail_thread(mailto_list, subject, msg):
    if send_mail(mailto_list, subject, msg):
        logger.info("send mail success.") 
    else:
        logger.error("send mail fail.")
        logger.error(subject)
        logger.error(msg)
Exemplo n.º 11
0
 def start(self):
     self.etcd.setkey("machines/runnodes/"+self.addr, "work")
     self.thread_sendheartbeat = threading.Thread(target=self.sendheartbeat)
     self.thread_sendheartbeat.start()
     # start serving for rpc
     logger.info ("begins to work")
     self.rpcserver.serve_forever()
Exemplo n.º 12
0
 def __init__(self):
     with open(os.path.join(os.path.dirname(__file__), "api_config"), 'r') as yaml_file:
         self._config = yaml.load(yaml_file)
     for key in config_keys:
       setattr(self, key, self._config.get(key))
       
     logger.info('API configuration read')
Exemplo n.º 13
0
def init_allocations():
    global machine_allocation_dict
    global allocations_list
    global node_manager
    global usages_list
    global machine_usage_dict
    logger.info("init allocations:")

    machines = node_manager.get_allnodes()
    for machine in machines:
        allocation = AllocationOfMachine()
        allocation.machineid = machine
        allocation.resources = 2
        allocation.reliable_resources_allocation_summary = 0
        allocation.reliable_allocations = []
        allocation.restricted_allocations = []
        
        machine_allocation_dict[machine] = allocation
        bisect.insort(allocations_list,allocation)
        
        usage_of_machine = {}
        usage_of_machine['machineid']=machine
        usage_of_machine['cpu_utilization']=0.1
        
        usages_list.append(usage_of_machine)
        machine_usage_dict[machine] = 0.1
Exemplo n.º 14
0
 def _reload_nginx(self):
     logger.info('reload nginx start')
     if not DEBUG:
         subprocess.call("nginx -t && nginx -s reload", shell=True)
     else:
         logger.debug('fake reload nginx')
     logger.info('reload nginx finish')
Exemplo n.º 15
0
 def run(self):
     assert self.storage is not None
     try:
         self.crawl()
     except Exception, e:
         logger.info('error when crawl: %s' % self.user)
         logger.exception(e)
Exemplo n.º 16
0
 def on_message(self, message):
     prefix, json_message = message.split(",", 1)
     kernel_id = prefix.split("/", 1)[0]
     message = jsonapi.loads(json_message)
     logger.debug("KernelConnection.on_message: %s", message)
     application = self.session.handler.application
     if kernel_id == "complete":
         if message["header"]["msg_type"] in ("complete_request",
                                              "object_info_request"):
             application.completer.registerRequest(self, message)
         return
     try:
         if kernel_id not in self.channels:
             # handler may be None in certain circumstances (it seems to only be set
             # in GET requests, not POST requests, so even using it here may
             # only work with JSONP because of a race condition)
             kernel_info = application.km.kernel_info(kernel_id)
             self.kernel_info = {'remote_ip': kernel_info['remote_ip'],
                                 'referer': kernel_info['referer'],
                                 'timeout': kernel_info['timeout']}
         if message["header"]["msg_type"] == "execute_request":
             stats_logger.info(StatsMessage(
                 kernel_id=kernel_id,
                 remote_ip=self.kernel_info['remote_ip'],
                 referer=self.kernel_info['referer'],
                 code=message["content"]["code"],
                 execute_type='request'))
         if kernel_id not in self.channels:
             self.channels[kernel_id] = SockJSChannelsHandler(self.send)
             self.channels[kernel_id].open(application, kernel_id)
         self.channels[kernel_id].on_message(json_message)
     except KeyError:
         # Ignore messages to nonexistent or killed kernels.
         logger.info("%s message sent to nonexistent kernel: %s" %
                     (message["header"]["msg_type"], kernel_id))
Exemplo n.º 17
0
 def mount_container(self,lxc_name):
     logger.info ("mount container:%s" % lxc_name)
     [success, status] = self.container_status(lxc_name)
     if not success:
         return [False, status]
     self.imgmgr.checkFS(lxc_name)
     return [True, "mount success"]
Exemplo n.º 18
0
def change_pa(db, gno, status, stc):
    sql = 'update pas SET status=%d, stc=%s where gno="%s"' % (status, stc, gno)
    logger.info(sql)
    db.execute(sql)
    warning = {"gno":gno, "status": status, "stc":stc}
    userlist = back_user(int(gno[0]))
    handlers.qhandler.send_message_ws(userlist, warning)
Exemplo n.º 19
0
 def post(self, *args, **kwargs):
     method = self.get_argument("method", "POST")
     if method == "DELETE":
         self.delete(*args, **kwargs)
     elif method == "OPTIONS":
         self.options(*args, **kwargs)
     else:
         if config.get_config("requires_tos") and \
                 self.get_argument("accepted_tos", "false") != "true":
             self.set_status(403)
             self.finish()
             return
         logger.info('starting kernel for session '
                      + self.get_argument('CellSessionID', '(no ID)'))
         proto = self.request.protocol.replace("http", "ws", 1)
         host = self.request.host
         ws_url = "%s://%s/" % (proto, host)
         km = self.application.km
         timeout = self.get_argument("timeout", None)
         if timeout is not None:
             timeout = float(timeout)
             if math.isnan(timeout) or timeout<0:
                 timeout = None
         kernel_id = yield tornado.gen.Task(
            km.new_session_async,
            referer=self.request.headers.get('Referer', ''),
            remote_ip=self.request.remote_ip,
            timeout=timeout)
         data = {"ws_url": ws_url, "id": kernel_id}
         self.set_header("Jupyter-Kernel-ID", kernel_id)
         self.write(self.permissions(data))
         self.finish()
Exemplo n.º 20
0
 def basic_app_remove(self):
     logger.info("remove basic app : %s " % self.appname)
     remove_results = {}
     remove_success_results = {}
     remove_failed_results = {}
     remove_missed_results = {}
     try:
         app_spec = self.app_spec
         for pg_spec in app_spec.PodGroups:
             remove_r = self.podgroup_remove(pg_spec.Name)
             if remove_r.status_code < 400:
                 remove_success_results[pg_spec.Name] = remove_r
             elif remove_r.status_code == 404:
                 remove_missed_results[pg_spec.Name] = remove_r
             else:
                 remove_failed_results[pg_spec.Name] = remove_r
         # use dependency_remove api of Deployd for deleting proc with
         # portal type
         for dp_spec in app_spec.Portals:
             remove_r = self.dependency_remove(dp_spec.Name)
             if remove_r.status_code < 400:
                 remove_success_results[dp_spec.Name] = remove_r
             elif remove_r.status_code == 404:
                 remove_missed_results[dp_spec.Name] = remove_r
             else:
                 remove_failed_results[dp_spec.Name] = remove_r
     except Exception, e:
         logger.warning("failed when trying to remove app %s: %s" %
                        (self.appname, str(e)))
Exemplo n.º 21
0
    def delete_cluster(self, clustername, username, user_info):
        [status, vcluster] = self.get_vcluster(clustername, username)
        if not status:
            return [False, "cluster not found"]
        if vcluster.status =='running':
            return [False, "cluster is still running, you need to stop it and then delete"]
        ips = []
        for container in vcluster.containers:
            worker = self.nodemgr.ip_to_rpc(container.host)
            if worker is None:
                return [False, "The worker can't be found or has been stopped."]
            worker.delete_container(container.containername)
            db.session.delete(container)
            ips.append(container.ip)
        logger.info("delete vcluster and release vcluster ips")
        self.networkmgr.release_userips(username, ips)
        self.networkmgr.printpools()
        #os.remove(self.fspath+"/global/users/"+username+"/clusters/"+clustername)
        for bh in vcluster.billing_history:
            db.session.delete(bh)
        db.session.delete(vcluster)
        db.session.commit()
        os.remove(self.fspath+"/global/users/"+username+"/hosts/"+str(vcluster.clusterid)+".hosts")

        groupname = json.loads(user_info)["data"]["group"]
        uid = json.loads(user_info)["data"]["id"]
        [status, clusters] = self.list_clusters(username)
        if len(clusters) == 0:
            self.networkmgr.del_user(username)
            self.networkmgr.del_usrgwbr(username, uid, self.nodemgr)
            #logger.info("vlanid release triggered")

        return [True, "cluster delete"]
Exemplo n.º 22
0
 def __init__(self, db_path):
     directory = SimpleFSDirectory(File(db_path))
     reader = DirectoryReader.open(directory)
     self.searcher = IndexSearcher(reader)
     self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
     logger.info("Loaded DB from %s with %d documents: ",
                 db_path, reader.numDocs())
Exemplo n.º 23
0
def get_instances_id():
    logger.info('getting id from every instance...')
    cursor = _db.get_all_instance_ids()
    id_list = []
    for doc in cursor:
        id_list.append(doc['_id'])
    return Response(to_json(id_list), mimetype=_JSON_MIME)
Exemplo n.º 24
0
def scaleout_cluster(cur_user, user, form):
    global G_usermgr
    global G_vclustermgr
    clustername = form.get('clustername', None)
    logger.info ("scaleout: %s" % form)
    if (clustername == None):
        return json.dumps({'success':'false', 'message':'clustername is null'})
    logger.info("handle request : scale out %s" % clustername)
    image = {}
    image['name'] = form.get("imagename", None)
    image['type'] = form.get("imagetype", None)
    image['owner'] = form.get("imageowner", None)
    user_info = G_usermgr.selfQuery(cur_user = cur_user)
    user_info = json.dumps(user_info)
    setting = {
            'cpu': form.get('cpuSetting'),
            'memory': form.get('memorySetting'),
            'disk': form.get('diskSetting')
            }
    [status, result] = G_usermgr.usageInc(cur_user = cur_user, modification = setting)
    if not status:
        return json.dumps({'success':'false', 'action':'scale out', 'message': result})
    [status, result] = G_vclustermgr.scale_out_cluster(clustername, user, image, user_info, setting)
    if status:
        return json.dumps({'success':'true', 'action':'scale out', 'message':result})
    else:
        G_usermgr.usageRecover(cur_user = cur_user, modification = setting)
        return json.dumps({'success':'false', 'action':'scale out', 'message':result})
Exemplo n.º 25
0
    def delete_cluster(self, clustername, username, user_info):
        [status, info] = self.get_clusterinfo(clustername, username)
        if not status:
            return [False, "cluster not found"]
        if info['status']=='running':
            return [False, "cluster is still running, you need to stop it and then delete"]
        ips = []
        for container in info['containers']:
            worker = self.nodemgr.ip_to_rpc(container['host'])

            # after release resources
            dscheduler.after_release(container['containername'])
            
            worker.delete_container(container['containername'])
            ips.append(container['ip'])
            
        logger.info("delete vcluster and release vcluster ips")
        self.networkmgr.release_userips(username, ips)
        self.networkmgr.printpools()
        os.remove(self.fspath+"/global/users/"+username+"/clusters/"+clustername)
        os.remove(self.fspath+"/global/users/"+username+"/hosts/"+str(info['clusterid'])+".hosts")
        
        groupname = json.loads(user_info)["data"]["group"]
        [status, clusters] = self.list_clusters(username)
        if len(clusters) == 0:
            self.networkmgr.del_user(username, isshared = True if str(groupname) == "fundation" else False)
            logger.info("vlanid release triggered")
        
        return [True, "cluster delete"]
Exemplo n.º 26
0
    def delete_cluster(self, clustername, username, user_info):
        [status, info] = self.get_clusterinfo(clustername, username)
        if not status:
            return [False, "cluster not found"]
        if info['status']=='running':
            return [False, "cluster is still running, you need to stop it and then delete"]
        ips = []
        for container in info['containers']:
            worker = xmlrpc.client.ServerProxy("http://%s:%s" % (container['host'], env.getenv("WORKER_PORT")))
            if worker is None:
                return [False, "The worker can't be found or has been stopped."]
            worker.delete_container(container['containername'])
            ips.append(container['ip'])
        logger.info("delete vcluster and release vcluster ips")
        self.networkmgr.release_userips(username, ips)
        self.networkmgr.printpools()
        os.remove(self.fspath+"/global/users/"+username+"/clusters/"+clustername)
        os.remove(self.fspath+"/global/users/"+username+"/hosts/"+str(info['clusterid'])+".hosts")

        groupname = json.loads(user_info)["data"]["group"]
        uid = json.loads(user_info)["data"]["id"]
        [status, clusters] = self.list_clusters(username)
        if len(clusters) == 0:
            self.networkmgr.del_user(username)
            self.networkmgr.del_usrgwbr(username, uid, self.nodemgr)
            #logger.info("vlanid release triggered")

        return [True, "cluster delete"]
Exemplo n.º 27
0
def get_ranks(dataset, system):
    """
    Return the rank of the first correct answer returned by the
    system.
    """

    results = []
    oracle = CachedOracleSystem(dataset)
    all_expressions = set()
    for _, expressions in oracle.queries.values():
        all_expressions |= set(expressions)
    # all_expression_sets = [expressions for expressions in oracle.queries.values()]
    # all_possible_expressions = reduce(set.__or__, all_expression_sets)
    worst_possible_rank = len(all_expressions)
    logger.info("Number of possible expressions: %d", worst_possible_rank)
    for query, target_entities in dataset:
        logger.debug("Evaluating query %r", query)
        system_expressions = system.get_best_expressions(query)
        _, oracle_expressions = oracle.get_best_results_and_expressions(query)
        found_rank = get_rank(system_expressions, oracle_expressions, worst_possible_rank)
        logger.debug("Found rank: %r", found_rank)
        results.append({'query': query,
                        'target': target_entities,
                        'rank': found_rank})
    return results
Exemplo n.º 28
0
    def _setup_classpath_model(self):

        classpath = []

        if self.runtime_version < 5:
            # put model lib into classpath
            model_lib = os.path.join(
                self._conf['m2ee']['app_base'],
                'model',
                'lib'
            )
            if os.path.isdir(model_lib):
                # put all jars into classpath
                classpath.append(os.path.join(model_lib, 'userlib', '*'))
                # put all directories as themselves into classpath
                classpath.extend(
                    [os.path.join(model_lib, name)
                        for name in os.listdir(model_lib)
                        if os.path.isdir(os.path.join(model_lib, name))
                     ])
            else:
                logger.info("No current unpacked application model is available. "
                            "Use the unpack command to unpack a mendix deployment "
                            "archive from %s" % self._conf['m2ee']['model_upload_path'])

        return classpath
Exemplo n.º 29
0
 def cb(reply):
     if (reply["type"] == "error"):
         pass
     else:
         logger.info("Ended kernel %s", kernel_id)
         del self._kernels[kernel_id]
         del self._comps[comp_id]["kernels"][kernel_id]
Exemplo n.º 30
0
    def useless_procs_remove(self, origin_procs):
        remove_results = {}
        remove_success_results = {}
        remove_failed_results = {}
        remove_missed_results = {}

        current_pgs = ["%s.%s.%s" % (self.appname, p.type.name, p.name)
                       for p in self.lain_config.procs.values()]
        try:
            for proc in origin_procs:
                pg_name = "%s.%s.%s" % (
                    self.appname, proc.type.name, proc.name)
                if pg_name in current_pgs:
                    continue

                logger.info("remove useless proc %s of app : %s " %
                            (pg_name, self.appname))
                remove_r = self.podgroup_remove(pg_name) if proc.type != ProcType.portal else \
                    self.dependency_remove(pg_name)
                if remove_r.status_code < 400:
                    remove_success_results[pg_name] = remove_r
                elif remove_r.status_code == 404:
                    remove_missed_results[pg_name] = remove_r
                else:
                    remove_failed_results[pg_name] = remove_r
        except Exception, e:
            logger.warning("failed when trying to remove useless proc of app %s: %s" %
                           (self.appname, str(e)))
Exemplo n.º 31
0
def flagcheck():
    msg = {'status': 0, 'msg': '提交成功'}

    lastround = Flags.query.order_by(Flags.rounds.desc()).first()  # .rounds
    #print(lastround)
    #print('lastround',lastround)

    if lastround:
        lastround = lastround.rounds
    else:
        msg['status'] = -1
        msg['msg'] = '比赛尚未开始'
        return json.dumps(msg, ensure_ascii=False)
    token = request.args.get('token')

    try:
        flag = request.form['flag']
    except:
        msg['status'] = -1
        msg['msg'] = '提交格式不正确'
        return json.dumps(msg, ensure_ascii=False)

    print(token, flag)

    attackteam = Teams.query.filter(Teams.token == token).first()

    print(attackteam)

    if attackteam:
        attackteamid = attackteam.id
    else:
        msg['status'] = -1
        msg['msg'] = 'TOKEN 错误'
        return json.dumps(msg, ensure_ascii=False)

    #print(attackteamid)

    defenseteam = Flags.query.filter(Flags.rounds == lastround,
                                     Flags.flag == flag).first()
    print('rounds', lastround)
    print('flag', flag)

    #for i in Flags.query.filter(Flags.rounds == lastround).all():
    #    print(i.flag)

    if defenseteam:
        defenseteamid = defenseteam.teamid
        defenseteam = Teams.query.filter(Teams.id == defenseteamid).first()
    else:
        msg['status'] = -1
        msg['msg'] = 'FLAG 错误'
        return json.dumps(msg, ensure_ascii=False)

    if defenseteamid == attackteamid:
        msg['status'] = -1
        msg['msg'] = '你不能攻击自己的队伍'
        return json.dumps(msg, ensure_ascii=False)

    roundcheck = Round.query.filter(Round.defenseteamid == defenseteamid,
                                    Round.attackteamid == attackteamid,
                                    Round.rounds == lastround).first()

    if roundcheck:
        msg['status'] = -1
        msg['msg'] = '你已经攻击了该的队伍'
        return json.dumps(msg, ensure_ascii=False)

    roundcheck2 = Round.query.filter(Round.score == 200,
                                     Round.defenseteamid == defenseteamid,
                                     Round.rounds == lastround).first()
    if roundcheck2:
        msg['status'] = -1
        msg['msg'] = '该队伍Flag已经被提交'
        return json.dumps(msg, ensure_ascii=False)
    #print(defenseteamid)
    #msg = 'rounds {} attackteamid {} defenseteamid {}'.format(lastround,attackteamid,defenseteamid)
    msg['status'] = 1
    msg['msg'] = '提交成功,({})成功攻击了 {}'.format(attackteam.name, defenseteam.name)
    rd = Round(attackteamid, defenseteamid, lastround,
               '{} 攻击了 {}'.format(attackteam.name, defenseteam.name))
    db.session.add(rd)
    db.session.commit()
    # 这里是后加的.通过队伍Flag已经被提交,就不得分的机制,直接更新分数200
    #Round.query.filter(Round.score==0).update({Round.score : 200})
    #db.session.commit()
    logger.info('{} 成功攻击了 {}'.format(attackteam.name, defenseteam.name))
    return json.dumps(msg, ensure_ascii=False)
Exemplo n.º 32
0
 def scale_out_cluster(self, clustername, username, image, user_info):
     if not self.is_cluster(clustername, username):
         return [False, "cluster:%s not found" % clustername]
     workers = self.nodemgr.get_rpcs()
     if (len(workers) == 0):
         logger.warning("no workers to start containers, scale out failed")
         return [False, "no workers are running"]
     image_json = json.dumps(image)
     [status, result] = self.networkmgr.acquire_userips_cidr(username)
     gateway = self.networkmgr.get_usergw(username)
     vlanid = self.networkmgr.get_uservlanid(username)
     self.networkmgr.printpools()
     if not status:
         return [False, result]
     ip = result[0]
     [status, clusterinfo] = self.get_clusterinfo(clustername, username)
     clusterid = clusterinfo['clusterid']
     clusterpath = self.fspath + "/global/users/" + username + "/clusters/" + clustername
     hostpath = self.fspath + "/global/users/" + username + "/hosts/" + str(
         clusterid) + ".hosts"
     cid = clusterinfo['nextcid']
     onework = workers[random.randint(0, len(workers) - 1)]
     lxc_name = username + "-" + str(clusterid) + "-" + str(cid)
     hostname = "host-" + str(cid)
     [success,
      message] = onework.create_container(lxc_name, username,
                                          user_info, clustername, clusterid,
                                          str(cid), hostname, ip, gateway,
                                          str(vlanid), image_json)
     if success is False:
         logger.info("create container failed, so scale out failed")
         return [False, message]
     if clusterinfo['status'] == "running":
         onework.start_container(lxc_name)
     onework.start_services(lxc_name, ["ssh"])  # TODO: need fix
     logger.info("scale out success")
     hostfile = open(hostpath, 'a')
     hostfile.write(
         ip.split("/")[0] + "\t" + hostname + "\t" + hostname + "." +
         clustername + "\n")
     hostfile.close()
     clusterinfo['nextcid'] = int(clusterinfo['nextcid']) + 1
     clusterinfo['size'] = int(clusterinfo['size']) + 1
     clusterinfo['containers'].append({
         'containername':
         lxc_name,
         'hostname':
         hostname,
         'ip':
         ip,
         'host':
         self.nodemgr.rpc_to_ip(onework),
         'image':
         image['name'],
         'lastsave':
         datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     })
     clusterfile = open(clusterpath, 'w')
     clusterfile.write(json.dumps(clusterinfo))
     clusterfile.close()
     return [True, clusterinfo]
Exemplo n.º 33
0
    def prepare_chrome(self, login_type):
        logger.info(
            color("fg_bold_cyan") +
            "正在初始化chrome driver,用以进行【{}】相关操作".format(login_type))
        caps = DesiredCapabilities().CHROME
        # caps["pageLoadStrategy"] = "normal"  #  Waits for full page load
        caps["pageLoadStrategy"] = "none"  # Do not wait for full page load

        options = Options()
        if not self.cfg._debug_show_chrome_logs:
            options.add_experimental_option("excludeSwitches",
                                            ["enable-logging"])
        if self.cfg.run_in_headless_mode:
            logger.warning("已配置使用headless模式运行chrome")
            options.headless = True

        inited = False

        try:
            if not self.cfg.force_use_portable_chrome:
                # 如果未强制使用便携版chrome,则首先尝试使用系统安装的chrome
                self.driver = webdriver.Chrome(
                    executable_path=self.chrome_driver_executable_path,
                    desired_capabilities=caps,
                    options=options)
                logger.info("使用自带chrome")
                inited = True
        except:
            pass

        if not inited:
            # 如果找不到,则尝试使用打包的便携版chrome
            # 先判定本地是否有便携版压缩包,若无则提示去网盘下载
            if not os.path.isfile(self.chrome_binary_7z):
                msg = (
                    "当前电脑未发现合适版本chrome版本,且当前目录无便携版chrome的压缩包({zip_name})\n"
                    "请在稍后打开的网盘页面中下载[{zip_name}],并放到小助手的exe所在目录(注意:是把这个压缩包原原本本地放到这个目录里,而不是解压后再放过来!!!),然后重新打开程序~\n"
                    "如果之前版本已经下载过这个文件,可以直接去之前版本复制过来~不需要再下载一次~\n").format(
                        zip_name=os.path.basename(self.chrome_binary_7z))
                win32api.MessageBox(0, msg, "出错啦", win32con.MB_ICONERROR)
                webbrowser.open(get_netdisk_addr(self.cfg))
                os.system("PAUSE")
                exit(-1)

            # 先判断便携版chrome是否已解压
            if not os.path.isdir(self.chrome_binary_directory):
                logger.info("自动解压便携版chrome到当前目录")
                subprocess.call([
                    self.bandizip_executable_path, "x", "-target:auto",
                    self.chrome_binary_7z
                ])

            # 然后使用本地的chrome来初始化driver对象
            options.binary_location = self.chrome_binary_location
            # you may need some other options
            options.add_argument('--no-sandbox')
            options.add_argument('--no-default-browser-check')
            options.add_argument('--no-first-run')
            self.driver = webdriver.Chrome(
                executable_path=self.chrome_driver_executable_path,
                desired_capabilities=caps,
                options=options)
            logger.info("使用便携版chrome")

        self.cookies = self.driver.get_cookies()
Exemplo n.º 34
0
    def _login_common(self,
                      login_type,
                      switch_to_login_frame_fn,
                      assert_login_finished_fn,
                      login_action_fn=None,
                      need_human_operate=True):
        """
        通用登录逻辑,并返回登陆后的cookie中包含的uin、skey数据
        :rtype: LoginResult
        """
        switch_to_login_frame_fn()

        logger.info("等待#loginframe#ptlogin_iframe#switcher_plogin加载完毕")
        WebDriverWait(self.driver,
                      self.cfg.login.load_login_iframe_timeout).until(
                          expected_conditions.visibility_of_element_located(
                              (By.ID, 'switcher_plogin')))

        if need_human_operate:
            logger.info("请在{}s内完成{}操作".format(self.cfg.login.login_timeout,
                                              login_type))

        # 实际登录的逻辑,不同方式的处理不同,这里调用外部传入的函数
        logger.info("开始{}流程".format(login_type))
        if login_action_fn is not None:
            login_action_fn()

        logger.info("等待登录完成(也就是#loginIframe#login登录框消失)")
        WebDriverWait(self.driver, self.cfg.login.login_timeout).until(
            expected_conditions.invisibility_of_element_located(
                (By.ID, "login")))

        logger.info("回到主iframe")
        self.driver.switch_to.default_content()

        assert_login_finished_fn()

        logger.info("登录完成")

        self.cookies = self.driver.get_cookies()

        if self.login_mode == self.login_mode_normal:
            # 普通登录额外获取腾讯视频的vqq_vuserid
            logger.info("转到qq视频界面,从而可以获取vuserid,用于腾讯视频的蚊子腿")
            self.driver.get(
                "https://film.qq.com/film/p/topic/dnf922/index.html")
            for i in range(5):
                vuserid = self.driver.get_cookie('vuserid')
                if vuserid is not None:
                    break
                time.sleep(1)
            self.add_cookies(self.driver.get_cookies())

        return
Exemplo n.º 35
0
 def assert_login_finished_fn():
     logger.info("请等待#logined的div可见,则说明已经登录完成了...")
     WebDriverWait(
         self.driver, self.cfg.login.login_finished_timeout).until(
             expected_conditions.visibility_of_element_located(
                 (By.ID, "logined")))
Exemplo n.º 36
0
        def switch_to_login_frame_fn():
            logger.info("打开活动界面")
            self.driver.get("https://guanjia.qq.com/act/cop/202010dnf/")

            logger.info("浏览器设为1936x1056")
            self.driver.set_window_size(1936, 1056)

            logger.info("等待登录按钮#dologin出来,确保加载完成")
            WebDriverWait(self.driver, self.cfg.login.load_page_timeout).until(
                expected_conditions.visibility_of_element_located(
                    (By.ID, "dologin")))

            logger.info("点击登录按钮")
            self.driver.find_element(By.ID, "dologin").click()

            logger.info("等待#login_ifr显示出来并切换")
            WebDriverWait(
                self.driver, self.cfg.login.load_login_iframe_timeout).until(
                    expected_conditions.visibility_of_element_located(
                        (By.ID, "login_ifr")))
            loginIframe = self.driver.find_element_by_id("login_ifr")
            self.driver.switch_to.frame(loginIframe)

            logger.info("等待#login_ifr#ptlogin_iframe加载完毕并切换")
            WebDriverWait(
                self.driver, self.cfg.login.load_login_iframe_timeout).until(
                    expected_conditions.visibility_of_element_located(
                        (By.ID, "ptlogin_iframe")))
            ptlogin_iframe = self.driver.find_element_by_id("ptlogin_iframe")
            self.driver.switch_to.frame(ptlogin_iframe)
Exemplo n.º 37
0
def package(dir_src, dir_all_release, release_dir_name, release_7z_name,
            dir_github_action_artifact):
    old_cwd = os.getcwd()

    show_head_line(f"开始打包 {release_dir_name} 所需内容", color("bold_yellow"))

    # 确保发布根目录存在
    if not os.path.isdir(dir_all_release):
        os.mkdir(dir_all_release)
    # 并清空当前的发布版本目录
    dir_current_release = os.path.realpath(
        os.path.join(dir_all_release, release_dir_name))
    shutil.rmtree(dir_current_release, ignore_errors=True)
    os.mkdir(dir_current_release)

    logger.info(
        color("bold_yellow") + f"将部分内容从 {dir_src} 复制到 {dir_current_release} ")
    # 需要复制的文件与目录
    files_to_copy = []
    # 基于正则确定初始复制范围
    reg_wantted_file = r'.*\.(toml|md|txt|png|jpg|docx|url)$'
    for file in os.listdir('.'):
        if not re.search(reg_wantted_file, file, flags=re.IGNORECASE):
            continue
        files_to_copy.append(file)
    # 额外补充一些文件和目录
    files_to_copy.extend([
        "config.example.toml",
        "DNF蚊子腿小助手.exe",
        "DNF蚊子腿小助手配置工具.exe",
        "DNF蚊子腿小助手配置文件.bat",
        "使用教程",
        "付费指引",
        "相关信息",
        "utils",
    ])
    # 按顺序复制
    files_to_copy = sorted(files_to_copy)
    # 复制文件与目录过去
    for filename in files_to_copy:
        source = os.path.join(dir_src, filename)
        destination = os.path.join(dir_current_release, filename)
        if os.path.isdir(filename):
            logger.info(f"拷贝目录 {filename}")
            shutil.copytree(source, destination)
        else:
            logger.info(f"拷贝文件 {filename}")
            shutil.copyfile(source, destination)

    logger.info(color("bold_yellow") + "移动部分文件的位置和名称")
    files_to_move = [
        ("utils/auto_updater.exe", "utils/auto_updater_latest.exe"),
        ("CHANGELOG.MD", "相关信息/CHANGELOG.MD"),
        ("README.MD", "相关信息/README.MD"),
    ]
    for src_file, dst_file in files_to_move:
        src_file = os.path.join(dir_current_release, src_file)
        dst_file = os.path.join(dir_current_release, dst_file)

        logger.info(f"移动{src_file}到{dst_file}")
        shutil.move(src_file, dst_file)

    logger.info(color("bold_yellow") + "清除一些无需发布的内容")
    dir_to_filenames_need_remove = {
        ".": [
            "requirements.txt",
        ],
        "utils": [
            "logs",
            ".db",
            ".cached",
            ".first_run",
            ".log.filename",
            "buy_auto_updater_users.txt",
            "user_monthly_pay_info.txt",
            "notices.txt",
            f"chrome_portable_{QQLogin.chrome_major_version}.7z",
            f"chrome_portable_{QQLogin.chrome_major_version}",
        ],
    }
    for dir_path, filenames in dir_to_filenames_need_remove.items():
        for filename in filenames:
            filepath = os.path.join(dir_current_release,
                                    f"{dir_path}/{filename}")
            if not os.path.exists(filepath):
                continue

            if os.path.isdir(filepath):
                logger.info(f"移除目录 {filepath}")
                shutil.rmtree(filepath, ignore_errors=True)
            else:
                logger.info(f"移除文件 {filepath}")
                os.remove(filepath)

    # 压缩打包
    os.chdir(dir_all_release)
    logger.info(color("bold_yellow") + "开始压缩打包")
    compress_dir_with_bandizip(release_dir_name, release_7z_name, dir_src)

    # 额外备份一份最新的供github action 使用
    shutil.copyfile(release_7z_name,
                    os.path.join(dir_github_action_artifact, 'djc_helper.7z'))

    os.chdir(old_cwd)
Exemplo n.º 38
0
def show_end_time(end_time, time_fmt="%Y-%m-%d %H:%M:%S"):
    # end_time = "2021-02-23 00:00:00"
    remaining_time = get_remaining_time(end_time, time_fmt)
    logger.info(
        color("bold_black") + f"活动的结束时间为{end_time},剩余时间为{remaining_time}")
Exemplo n.º 39
0
    "5": "PAN-BOL",
    "6": "Libre",
    "7": "CC",
    "8": "Juntos",
}

if __name__ == "__main__":
    from multiprocessing import cpu_count
    import threading

    votes = 0
    url = "https://yoparticipo.voto/"
    votes_for = 2  # 1 = Creemos, 2 = ADN, 3 = MAS, ........

    cores = cpu_count()
    logger.info(f"cpu cores: {cores}")
    while True:
        driver = get_driver(headless=True)
        ## mayor numero de cores, mayor rapidez de ejecucion. threads solo pueden ejecutarse
        ## cuando hay 2 o mas nucleos en la cpu
        if cores > 1:
            threading.Thread(target=run,
                             kwargs={
                                 "driver": driver,
                                 "url": url,
                                 "opt": votes_for
                             }).start()

        else:
            run(driver=driver, url=url, opt=votes_for)
        votes += 1
Exemplo n.º 40
0
    def get_leader(self):
        # If the key is not yet set to expire, there is no point retrying now.
        # This is bound to lead to thundering herd problem. However, it's still
        # better than hitting the DB more often.
        logger.debug('time to refresh: %s', self.time_to_refresh)
        if datetime.datetime.now() < self.time_to_refresh:
            logger.debug('Returning leadership from cache. %s',
                         self.current_leader)
            return self.current_leader

        logger.debug('Checking redis for leadership')

        # Get a handle to Redis
        r = redis.Redis(connection_pool=self.redis_pool)

        succeeded = False
        decoded_process_lock_value = ''
        retries = 1

        time_to_refresh_delta = 0

        while not succeeded:
            # conditionally write only if doesn't already exist. If we succeed, we will be the leader, else we must find
            # out who the leader is.
            is_leader = r.set(self.process_lock_key,
                              json.dumps(self.process_lock_value.__dict__),
                              ex=self.process_lock_ttl,
                              nx=True)
            time_to_refresh_delta = datetime.timedelta(
                seconds=self.process_lock_ttl)
            logger.debug('Succeeded in writing leadership record = ' +
                         str(is_leader))

            if not is_leader:
                # If set didn't succeed, read the value and decode it.
                process_lock_value_string = r.get(self.process_lock_key)
                decoded_process_lock_value = unmarshal_json.decode_json_dump(
                    process_lock_value_string)

                # If this process is the leader, update the expiry of the record in the database,
                # else update this process's time whem it must refresh the leadership record.
                if decoded_process_lock_value.__dict__ == self.process_lock_value.__dict__:
                    is_leader = True
                    r.expire(self.process_lock_key, self.process_lock_ttl)
                else:
                    ttl = r.ttl(self.process_lock_key)
                    # Sometimes r.ttl returns a type thats not compatible with datatime.timedelta.
                    # This if block resolves that issue.
                    if not ttl:
                        ttl = 0
                    time_to_refresh_delta = datetime.timedelta(
                        seconds=int(ttl))

            if is_leader or isinstance(decoded_process_lock_value,
                                       ProcessLockValueType):
                succeeded = True
                retries = 1

            # If we failed to determine leadership, back-off and retry.
            if not succeeded:
                logger.info('Did not succeed in getting a leader. Will retry')
                retries *= 2
                time.sleep(self.retry_interval_ms * retries / 1000)

        # Now that we have determined the leader, update the local parameters.
        self.time_to_refresh = datetime.datetime.now() + time_to_refresh_delta

        if is_leader:
            self.current_leader = self.process_lock_value
        else:
            self.current_leader = decoded_process_lock_value

        return self.current_leader
Exemplo n.º 41
0
def loginFectiva(browser,settings,account,pwd):
    FLAG_LOGIN = False
    while not FLAG_LOGIN:
        try:
            
            browser.get("http://library-admin.anu.edu.au/tools/factiva-redirect")
            
            if GATEWAY == 'OUTSIDE':
                logger.info("Gateway: OUTSIDE")
                wait = WebDriverWait(browser, 5)
                anuID = wait.until(EC.presence_of_element_located((By.ID, 'requester')))
                anuID.send_keys(account)
                password = browser.find_element_by_id('requesteremail')
                password.send_keys(pwd)
                browser.get_screenshot_as_file("logs/pre-login.png")
                password.send_keys(Keys.RETURN)
            elif GATEWAY == 'ANULIB':
                logger.info("Gateway: ANULIB")
            else:
                logger.error("Please Set Cookie Gateway!")
            logger.info('Start login fectiva...')
            wait = WebDriverWait(browser, 40)
            browser.get_screenshot_as_file("logs/login.png")
            btn = wait.until(EC.presence_of_element_located((By.ID, 'btnSearchBottom')))
            #select searching date
            dr = Select(browser.find_element_by_name('dr'))
            dr.select_by_visible_text('Enter date range...')
            #start date
            frd = browser.find_element_by_id('frd')
            frd.send_keys(settings['startDate']['frd'])
            frm = browser.find_element_by_id('frm')
            frm.send_keys(settings['startDate']['frm'])
            fry = browser.find_element_by_id('fry')
            fry.send_keys(settings['startDate']['fry'])
            #Enddate
            tod = browser.find_element_by_id('tod')
            tod.send_keys(settings['endDate']['tod'])
            tom = browser.find_element_by_id('tom')
            tom.send_keys(settings['endDate']['tom'])
            toy = browser.find_element_by_id('toy')
            toy.send_keys(settings['endDate']['toy'])
            filter = Select(browser.find_element_by_name('isrd'))
            filter.select_by_visible_text('Off')
            browser.execute_script('document.getElementById("ftx").value="{}";doLinkSubmit("../ha/default.aspx");'.format(settings['term']))
            headlineFrame = wait.until(EC.presence_of_element_located((By.ID, 'headlineFrame')))
            browser.get_screenshot_as_file("logs/search.png")
            FLAG_LOGIN = True
        except NoSuchElementException:
            logger.error('No Element during login')
            browser.close()
        except ElementNotVisibleException:
            logger.error('Not Visible during login')
#            browser.close()
        except TimeoutException:
            logger.error('Timeout during login')
            browser.get_screenshot_as_file("logs/Timeout.png")
            #browser.close()
    list_cookies = browser.get_cookies()
    cookies=dict()
    for item in list_cookies:
        cookies[item['name']] = item['value']

    return json.dumps(cookies)
Exemplo n.º 42
0
    parser.add_argument("-i", "--esindex", help="Name of index to store to.", default=ESINDEX)
    parser.add_argument("-t", "--estype", help="Type of index to store to.", default=ESTYPE)
    parser.add_argument("-m", "--msdsummaryfile", help="MSD summary file.")
    parser.add_argument("-d", "--msddirectory", help="MSD directory structure.")
    parser.add_argument("-f", "--force", help="Force writing in existing ES index.", default=False, action="store_true")
    args = parser.parse_args()

    # Setup elasticsearch
    eshelper = Eshelper(args.eshost, args.esport, args.esindex, args.estype)
    force_index = bool(args.force)
    eshelper.check_host_reachable()
    eshelper.check_index_safe(force_index)

    # Setup track generator
    if args.msdsummaryfile:
        logger.info("Load summary file {}".format(args.msdsummaryfile))
        track_gen = TrackGeneratorFromSummary()
        track_gen.load(args.msdsummaryfile)
    elif args.msddirectory:
        logger.info("Use directory {}".format(args.msddirectory))
        track_gen = TrackGeneratorFromDirectory()
        track_gen.load(args.msddirectory)
    else:
        logger.error("-m or -d must be given as a parameter")
        sys.exit(1)
    track_gen.check()

    # Setup ingestor
    ingestor = Ingestor(eshelper, track_gen)

    ingestor.ingest()
Exemplo n.º 43
0
def resetCheckPoint():
    checkpoint = {'Dowjones':0, 'Publication':0, 'Website': 0,'Blog':0}
    saveCheckPoint(checkpoint)
    logger.info('Reset checkpoint')
Exemplo n.º 44
0
def emptydb(config):

    if not config.allow_destroy_db():
        logger.error("Refusing to do a destructive database operation "
                     "because the allow_destroy_db configuration option "
                     "is set to false.")
        return False

    env = os.environ.copy()
    env.update(config.get_pg_environment())

    logger.info("Removing all tables...")
    # get list of drop table commands
    cmd = (
        config.get_psql_binary(), "-t", "-c",
        "SELECT 'DROP TABLE ' || n.nspname || '.\"' || c.relname || '\" CASCADE;' "
        "FROM pg_catalog.pg_class AS c LEFT JOIN pg_catalog.pg_namespace AS n "
        "ON n.oid = c.relnamespace WHERE relkind = 'r' AND n.nspname NOT IN "
        "('pg_catalog', 'pg_toast') AND pg_catalog.pg_table_is_visible(c.oid)")
    logger.trace("Executing %s, creating pipe for stdout,stderr" % str(cmd))
    proc1 = subprocess.Popen(cmd,
                             env=env,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    (stdout, stderr) = proc1.communicate()

    if stderr != '':
        logger.error("An error occured while calling psql: %s" % stderr)
        return False

    stdin = stdout
    cmd = (config.get_psql_binary(), )
    logger.trace("Piping stdout,stderr to %s" % str(cmd))
    proc2 = subprocess.Popen(cmd,
                             env=env,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    (stdout, stderr) = proc2.communicate(stdin)

    if stderr != '':
        logger.error("An error occured while calling psql: %s" % stderr)
        return False

    logger.info("Removing all sequences...")
    # get list of drop sequence commands
    cmd = (config.get_psql_binary(), "-t", "-c",
           "SELECT 'DROP SEQUENCE ' || n.nspname || '.\"' || c.relname || '\" "
           "CASCADE;' FROM pg_catalog.pg_class AS c LEFT JOIN "
           "pg_catalog.pg_namespace AS n ON n.oid = c.relnamespace WHERE "
           "relkind = 'S' AND n.nspname NOT IN ('pg_catalog', 'pg_toast') AND "
           "pg_catalog.pg_table_is_visible(c.oid)")
    logger.trace("Executing %s, creating pipe for stdout,stderr" % str(cmd))
    proc1 = subprocess.Popen(cmd,
                             env=env,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    (stdout, stderr) = proc1.communicate()

    if stderr != '':
        logger.error("An error occured while calling psql: %s" % stderr)
        return False

    stdin = stdout
    cmd = (config.get_psql_binary(), )
    logger.trace("Piping stdout,stderr to %s" % str(cmd))
    proc2 = subprocess.Popen(cmd,
                             env=env,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    (stdout, stderr) = proc2.communicate(stdin)

    if stderr != '':
        logger.error("An error occured while calling psql: %s" % stderr)
        return False

    return True
Exemplo n.º 45
0
            
            checkpoint[source] = currentPage
            saveCheckPoint(checkpoint)

            btn_nextpage = browser.find_element_by_xpath('//a[@class="nextItem"]')
            btn_nextpage.click()
            wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="headlines"]/table/tbody/tr[@class="headline"][1]/td[@class="count"]'), '{}.'.format(nextPageStart) ))


settings = loadSettings()
resetCheckPoint()
browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
loginFectiva(browser,settings,'','')

timeSplit,status = getOverview(browser,settings)
logger.info('Time split:{}'.format(timeSplit))
browser.close()
for (start,end) in timeSplit:
        logger.info('Now start from {} to {}.'.format(str(start),str(end)))
	settings['startDate']['frd'] =  start.day
        settings['startDate']['frm'] =  start.month
        settings['startDate']['fry'] =  start.year
        settings['endDate']['tod'] = end.day
        settings['endDate']['tom'] = end.month
        settings['endDate']['toy'] = end.year


	while True:
		crawled = status['crawled_pages']
		checkpoint = loadCheckPoint()
        	browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)	
Exemplo n.º 46
0
def crawlFectiva(browser,checkpoint,status):

#select = Select(browser.find_element_by_name('hso'))
#select.select_by_visible_text('Sort by: Oldest first')
    for source in ['Blog','Website','Dowjones','Publication']:
        logger.info('Start crawling articles from: {}...'.format(source))        
        articlesOfChannel = browser.find_element_by_xpath('//span[@data-channel="{}"][1]/a/span[@class="hitsCount"]'.format(source)).text.replace(',','')
        articlesOfChannel = int(re.search('\((.*)\)',articlesOfChannel).group(1)) 
	if articlesOfChannel ==0:
        	logger.info('No articles in {} channel.'.format(source))
                logger.info('End of Source from {}'.format(source))
        	continue    
        
        dataChannel = browser.find_element_by_xpath('//span[@data-channel="{}"]'.format(source))
        dataChannel.click()
        wait = WebDriverWait(browser, 10)
        btn = wait.until(EC.presence_of_element_located((By.XPATH, '//span[@class="tabOn"][@data-channel="{}"]'.format(source))))
        #Compute the total pages we need to download
        currentPage,totalPages,nextPageStart,totalArticles,articlesInThisPage = getStatus(browser)
        #Load checkpoint
	checkpoint = loadCheckPoint()
        logger.info('Total pages:{} , currentAt:{} , checkPointAt:{}'.format(totalPages,currentPage,checkpoint[source]))
        while currentPage != totalPages or checkpoint[source]!=currentPage or totalPages == 0:
            logger.info('Total pages:{} , currentAt:{} , checkPointAt:{}'.format(totalPages,currentPage,checkpoint[source]))
            for i in range(abs(checkpoint[source] - currentPage)):
                #Compute the total pages we need to download
                currentPage,totalPages,nextPageStart,totalArticles,articlesInThisPage = getStatus(browser)
                logger.info('Skip Page To checkPoint...Total pages:{} , currentAt:{} , checkPointAt:{}'.format(totalPages,currentPage,checkpoint[source]))
                btn_nextpage = browser.find_element_by_xpath('//a[@class="nextItem"]')
                btn_nextpage.click()
                wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="headlines"]/table/tbody/tr[@class="headline"][1]/td[@class="count"]'), '{}.'.format(nextPageStart) ))
		status['crawled_pages'] += 100

            #Compute the total pages we need to download
            currentPage,totalPages,nextPageStart,totalArticles,articlesInThisPage = getStatus(browser)

            for id in range(1,articlesInThisPage + 1):
                status['crawled_pages'] +=1
		updateProgress(min(status['crawled_pages']/float(status['totalArticles']),0.99))
                headline,date,author,documentID,documentType = getArticleInfo(browser,id,source)
                if checkItemExist(documentID):
                    logger.info('{:.1%} item {} exist in database skip to next one.'.format(status['crawled_pages']/float(status['totalArticles']),id))
                    continue     
                if documentType == 'Factiva Licensed Content':
                    
                    logger.debug('id:{}, documentID:{}, Headline:{}, date:{}, author:{} '.format(id,documentID,headline.text,date,author))
                    headline.click()
                    logger.debug('waiting content response')
                    wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="artHdr1"]/span[1]'), 'Article {}'.format(currentPage * 100 + id) ))
                    logger.debug('get content response')
                    articleHtml = browser.find_element_by_xpath('//div[@class="article enArticle"]')
                    title =headline.text
                    content = articleHtml.get_attribute('innerHTML')
                    
                    date = parse(date).strftime('%Y-%m-%d')
                    crawldate = parse(str(datetime.now())).strftime('%Y-%m-%d')
                    url = ''
                    likelihood = processItem(documentID,title,author,content,date,crawldate,url,source)
                    logger.info('{:.1%} [DOC] Get {} of {} in page {}.Totally {} pages {} articles, likelihood: {:.2}'.format(status['crawled_pages']/float(status['totalArticles']),id,articlesInThisPage, currentPage,totalPages,totalArticles,likelihood))
                    sleep(2)
                if documentType == 'HTML':
                    browser.set_page_load_timeout(6)
                    try: 
                    	headline.click()
                    	title = headline.text
                    	window_main = browser.window_handles[0]
                    	window_download = browser.window_handles[-1]
                    	browser.switch_to_window(window_download)
                    	sleep(4)
                    	url = browser.current_url
                    	logger.debug('Try to get html page source')
                        content = browser.page_source
                        logger.debug('Get website success.')
                    except:
			url = browser.current_url
                        content = "<h1><a href='{}'>baidu</a></h1>".format(url)
                        logger.warning('No response from {} of page {} title:{}, url:{}'.format(documentID,currentPage,title,url))
                        
                    logger.debug('id:{}, documentID:{}, Headline:{}, date:{}, author:{} '.format(id,documentID,title,date,author))
                    date = parse(date).strftime('%Y-%m-%d')
                    crawldate = parse(str(datetime.now())).strftime('%Y-%m-%d')
                    likelihood = processItem(documentID,title,author,content,date,crawldate,url,source)
                    logger.info('{:.1%} [HTM]Get {} of 100 in page {}.Totally {} pages {} articles, likelihood: {:.2}'.format(status['crawled_pages']/float(status['totalArticles']),id, currentPage,totalPages,totalArticles,likelihood))
                    try:
                        browser.execute_script('window.close();')     
                    except:
                        logger.debug('Close tab error')               
                    browser.switch_to_window(window_main)
                    
                    

         
              #sometimes recaptcha occurs here
              #view next page
            
            if currentPage == totalPages:
                logger.info('End of Source from {}'.format(source))
                break
            
            checkpoint[source] = currentPage
            saveCheckPoint(checkpoint)

            btn_nextpage = browser.find_element_by_xpath('//a[@class="nextItem"]')
            btn_nextpage.click()
            wait.until(EC.text_to_be_present_in_element((By.XPATH, '//div[@id="headlines"]/table/tbody/tr[@class="headline"][1]/td[@class="count"]'), '{}.'.format(nextPageStart) ))
Exemplo n.º 47
0
 def pull(self):
     logger.info("pulling from branch %s", self.source)
     return f"Branch = {self.source}"
Exemplo n.º 48
0
def saveCheckPoint(checkpoint):
    f=open('checkpoint/checkpoint.json','w')
    json.dump(checkpoint,f)
    f.close()
    logger.info('Save checkpoint at {}'.format(checkpoint))
Exemplo n.º 49
0
# -*- coding:utf-8 -*-

import sys

from kafkaMonitor import KafkaMonitor
from log import logger

if __name__ == '__main__':
    logger.info('--------------------------开始运行--------------------------')
    try:
        logger.info('执行操作:执行性能监控')
        interval, time = '1', '1'
        if len(sys.argv) == 2:
            interval = sys.argv[1]
        elif len(sys.argv) == 3:
            interval, time = sys.argv[1], sys.argv[2]

        exclustion = []
        if len(sys.argv) > 3:
            interval, time = sys.argv[1], sys.argv[2]
            exclustion = sys.argv[3:]

        if interval != '1':
            if not interval.isdigit():
                logger.error('采样时间间隔只能为数字')
                exit()

        if time != '1':
            temp = time.replace('+', '').replace(' ', '')
            if not temp.isdigit():
                print('采样持续时间只能为数字\算术表达式')
Exemplo n.º 50
0
def add_worker():
    logger.info(f"adding worker")
    sched.add_worker()
    return jsonify(
        status='OK'
    )
Exemplo n.º 51
0
def main():
    create_purchases_file(PURCHASES_FILE)
    purchases = load_purchases(PURCHASES_FILE)
    stats = PurchasesStats(purchases).process()
    logger.info("Results: %s", stats)
Exemplo n.º 52
0
 def pull(self):
     logger.info("pulling from tag %s", self.source)
     print(self.__class__._attributes)
     return f"Tag = {self.source}"
Exemplo n.º 53
0
import pika
import os
import time
import json
import requests
import datetime
from log import logger

jobs_api_host = os.getenv('JOB_API_HOST')
logger.info('Job API host: {}'.format(jobs_api_host))

rabbitmq_url = os.getenv('AMQP_URL')
logger.info('Connecting to RabbitMQ at {}'.format(rabbitmq_url))


def update_job(job_id, body):
    response = requests.patch(jobs_api_host + '/jobs/' + job_id,
                              headers={'Content-Type': 'application/json'},
                              json=body)
    logger.debug('API responded with ' + str(response.status_code))


def callback(body):
    if body is None:
        return

    data = json.loads(body)
    job_id = str(data['id'])
    logger.debug('Received job [{}]'.format(job_id))

    update_job(job_id, {
Exemplo n.º 54
0
 def save_cache(self):
     logger.info("Saving entity score cache")
     with open(settings.ENTITY_SCORE_CACHE_PATH, 'w') as cache_file:
         json.dump(self.entity_scores, cache_file)
Exemplo n.º 55
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        bert_config = bert_ipu.BertConfig.from_dict(
            opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        learning_rate = None
        opts['version_2_with_negative'] = False
        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        if not should_be_pipeline_when_inference(opts):

            def bert_net():
                return build_infer_network_without_pipeline(
                    train_iterator,
                    outfeed_queue,
                    iterations_per_step,
                    bert_config=bert_config,
                    opts=opts)
        else:

            def bert_net():
                return build_network(train_iterator, outfeed_queue,
                                     iterations_per_step, bert_config, opts,
                                     learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            embedded = opts["embedded_runtime"]

            if embedded and is_training:
                raise ValueError(
                    "embedded_runtime is only to be used for inference.")

            train = ipu.ipu_compiler.compile(bert_net,
                                             []) if not embedded else None

        exec_path = None
        compile_op = None
        poplar_exec_filepath = get_exec_path(
            opts['seq_length'], opts['micro_batch_size'],
            opts['device_mapping'], should_be_pipeline_when_inference(opts))
        exec_path = os.path.join(poplar_exec_filepath)
        compile_op = application_compile_op.experimental_application_compile_op(
            bert_net, output_path=exec_path, freeze_variables=True)

        outfeed = outfeed_queue.dequeue()

        restore = tf.train.Saver(var_list=tf.global_variables())

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()

    # Calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        partials_type=opts["partials_type"],
        available_memory_proportion=opts['available_memory_proportion'])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)
    _ = train_sess.run(train_init, [])
    # -----------------
    # Checkpoints    restore and save
    init_checkpoint_path = opts['init_checkpoint']
    logger.info(f"At the checkpoint location {init_checkpoint_path}")
    if init_checkpoint_path:
        logger.info("Loading checkpoint...")
        if os.path.isfile(init_checkpoint_path):
            init_checkpoint_path = os.path.splitext(init_checkpoint_path)[0]
            logger.info(f"checkpoint path: {init_checkpoint_path}")

        (assignment_map, initialized_variable_names
         ) = bert_ipu.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint_path)

        for var in tvars:
            if var.name in initialized_variable_names:
                mark = "*"
            else:
                mark = " "
            logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape,
                        var.dtype.name)

        reader = tf.train.NewCheckpointReader(init_checkpoint_path)
        load_vars = reader.get_variable_to_shape_map()

        saver_restore = tf.train.Saver(assignment_map)
        saver_restore.restore(train_sess, init_checkpoint_path)
    # -----------------
    if compile_op is not None:
        logger.info(
            f"Compiling and saving Poplar executable to {poplar_exec_filepath}"
        )
        _ = train_sess.run(compile_op, [])
    else:
        exec_path = None
    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, restore, tvars,
                    exec_path), ipu_config
Exemplo n.º 56
0
 def get(self, shopping_id, db_session):
     logger.info(LogMsg.START)
     data = dict(filter=dict(shopping_key=shopping_id))
     return super(PaymentController, self).get_by_data(data, db_session)
Exemplo n.º 57
0
def run_time(opts, dataset_list=None):
    bs = opts['micro_batch_size']
    seq_length = opts['seq_length']
    poplar_exec_filepath = get_exec_path(
        opts['seq_length'], opts['micro_batch_size'], opts['device_mapping'],
        should_be_pipeline_when_inference(opts))
    logger.info(
        f"POPLAR FILE PATH FOR EXEC: {poplar_exec_filepath}<------------------------------------------"
    )
    inputs = []
    engine_name = "my_engine"
    ctx = embedded_runtime.embedded_runtime_start(poplar_exec_filepath,
                                                  inputs,
                                                  engine_name,
                                                  timeout=1000)
    input_ids = tf.placeholder(tf.int32, (bs, seq_length))
    input_mask = tf.placeholder(tf.int32, (bs, seq_length))
    segment_ids = tf.placeholder(tf.int32, (bs, seq_length))
    unique_ids = tf.placeholder(tf.int32, (bs, ))
    placeholders = [input_ids, input_mask, segment_ids, unique_ids]
    durations = []
    master_durations = []
    test_results = []
    durations = []
    call_result = embedded_runtime.embedded_runtime_call(placeholders, ctx)
    thread_queue = Queue()
    ipu.config.reset_ipu_configuration()
    gc.collect()
    all_results = []
    if opts['generated_data']:
        number_of_steps = int(opts['num_iter'])
    else:
        number_of_steps = len(dataset_list[0]) // bs

    with tf.Session() as sess:
        logger.debug(f"Number of threads: {opts['num_inference_thread']}")
        logger.debug(f"Data Type: {opts['generated_data']}")
        if opts['generated_data']:
            feed_dict = synthetic_feed_dict(placeholders,
                                            opts['micro_batch_size'],
                                            opts['seq_length'])
        else:
            feed_dict = parse_feed_dict(placeholders,
                                        dataset_list,
                                        bs,
                                        seq_length,
                                        i=0)

        def runner(feed_dict, session):
            for step in range(number_of_steps):
                if opts['generated_data']:
                    feed_dict = synthetic_feed_dict(placeholders,
                                                    opts['micro_batch_size'],
                                                    opts['seq_length'])
                else:
                    feed_dict = parse_feed_dict(placeholders,
                                                dataset_list,
                                                bs,
                                                seq_length,
                                                i=step)

                start = time.time()
                ans = session.run(call_result, feed_dict=feed_dict)
                stop = time.time()
                if step % int(opts['steps_per_logs']) == 0:
                    logger.info(
                        f"{step}/{number_of_steps}\t\t[{100*float(step/number_of_steps):.2f}%]"
                    )

                durations.append((start, stop))
                master_durations.append(stop - start)

                if opts['do_predict']:
                    all_results.extend([
                        squad_results.RawResult(
                            this_unique_id,
                            this_start_logit.astype(np.float64),
                            this_end_logit.astype(np.float64))
                        for this_unique_id, this_start_logit, this_end_logit in
                        zip(ans[0], ans[1], ans[2])
                    ])
            thread_queue.put(durations, timeout=10)

        thp = [
            Thread(target=runner, args=(feed_dict, sess))
            for _ in range(opts['num_inference_thread'])
        ]
        start_time = time.time()
        for idx, _thread in enumerate(thp):
            _thread.start()
            logger.info(f"Thread {idx} started.")

        for idx, _thread in enumerate(thp):
            _thread.join()
            logger.info(f"Thread {idx} join.")

        total_dur = time.time() - start_time

        durations_from_th = []
        while not thread_queue.empty():
            durations_from_th += thread_queue.get()

        latencies = [y - x for x, y in durations_from_th]
        latency = np.mean(latencies)
        latency_99 = np.percentile(latencies, 99)
        latency_99_9 = np.percentile(latencies, 99.9)
        min_start = min([x for x, _ in durations_from_th])
        max_stop = max([y for _, y in durations_from_th])
        tput = bs * opts['num_inference_thread'] * number_of_steps / (
            max_stop - min_start)

        test_results = {
            'batch size': bs,
            'latency': latency,
            'latency_99': latency_99,
            'latency_99_9': latency_99_9,
            'throughput': tput
        }
        logger.info(test_results)
    return all_results
Exemplo n.º 58
0
    def main_thread(self):
        """
        Monitors the jobs in current_jobs, updates their statuses,
        and puts their tasks in queues to be processed by other threads
        """
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        try:
            last_saved = None
            while not self.shutdown.is_set():
                # Iterate backwards so we can delete jobs
                for job in reversed(self.jobs):
                    if job.status == Status.INIT:
                        def start_this_job(job):
                            if isinstance(job, ModelJob):
                                if job.dataset.status == Status.DONE:
                                    job.status = Status.RUN
                                elif job.dataset.status in [Status.ABORT, Status.ERROR]:
                                    job.abort()
                                else:
                                    job.status = Status.WAIT
                            else:
                                job.status = Status.RUN
                        if config_option('level') == 'test':
                            start_this_job(job)
                        else:
                            # Delay start by one second for initial page load
                            gevent.spawn_later(1, start_this_job, job)

                    if job.status == Status.WAIT:
                        if isinstance(job, ModelJob):
                            if job.dataset.status == Status.DONE:
                                job.status = Status.RUN
                            elif job.dataset.status in [Status.ABORT, Status.ERROR]:
                                job.abort()
                        else:
                            job.status = Status.RUN

                    if job.status == Status.RUN:
                        alldone = True
                        for task in job.tasks:
                            if task.status == Status.INIT:
                                alldone = False
                                if task.ready_to_queue():
                                    logger.debug('%s task queued.' % task.name(), job_id=job.id())
                                    task.status = Status.WAIT
                                    if isinstance(task, dataset_tasks.ParseFolderTask):
                                        self.split_queue.put( (job, task) )
                                    elif isinstance(task, dataset_tasks.CreateDbTask):
                                        self.create_queue.put( (job, task) )
                                    elif isinstance(task, model_tasks.TrainTask):
                                        self.train_queue.put( (job, task) )
                                    else:
                                        logger.error('Task type %s not recognized' % type(task).__name__, job_id=job.id())
                                        task.exception = Exception('Task type not recognized')
                                        task.status = Status.ERROR
                            elif task.status == Status.WAIT or task.status == Status.RUN:
                                alldone = False
                            elif task.status == Status.DONE:
                                pass
                            elif task.status == Status.ABORT:
                                pass
                            elif task.status == Status.ERROR:
                                job.status = Status.ERROR
                                alldone = False
                                break
                            else:
                                logger.warning('Unrecognized task status: "%s"', task.status, job_id=job.id())
                        if alldone:
                            job.status = Status.DONE
                            logger.info('Job complete.', job_id=job.id())
                            job.save()

                # save running jobs every 15 seconds
                if not last_saved or time.time()-last_saved > 15:
                    for job in self.jobs:
                        if job.status.is_running():
                            job.save()
                    last_saved = time.time()

                time.sleep(utils.wait_time())
        except KeyboardInterrupt:
            pass

        # Shutdown
        for job in self.jobs:
            job.abort()
            job.save()
        self.running = False
Exemplo n.º 59
0
                       default=1000,
                       help="Number of iterations to run inference for.")
    group.add_argument('--num-inference-thread',
                       action='store',
                       default=2,
                       help="Number of threads to use.")
    return parser


if __name__ == '__main__':
    tf.logging.set_verbosity(tf.logging.ERROR)

    opts = make_global_options([add_squad_options])

    set_defaults(opts)
    opts['num_inference_thread'] = int(opts['num_inference_thread'])
    poplar_options = os.getenv('POPLAR_ENGINE_OPTIONS', 'unset')

    logger.info(f"Poplar options: {poplar_options}")
    logger.info("Command line: " + ' '.join(sys.argv))
    logger.info("Options:\n" +
                json.dumps(OrderedDict(sorted(opts.items())), indent=1))

    set_poplar_engine_options(execution_profile=opts['execution_profile'],
                              memory_profile=opts['memory_profile'],
                              profile_dir=str(opts['profile_dir']),
                              sync_replicas_independently=False,
                              synthetic_data=opts['synthetic_data'],
                              tensorflow_progress_bar=opts['progress_bar'])
    predict_loop(opts)
Exemplo n.º 60
0
def predict_loop(opts):
    dataset_list = None
    if not opts['generated_data']:
        eval_examples = squad_data.read_squad_examples(opts["predict_file"],
                                                       opts,
                                                       is_training=False)
        tfrecord_dir = opts['tfrecord_dir']
        if not os.path.exists(tfrecord_dir):
            os.makedirs(tfrecord_dir)

        eval_writer = squad_data.FeatureWriter(filename=os.path.join(
            tfrecord_dir, "eval.tf_record"),
                                               is_training=False)
        eval_features = []

        tokenizer = tokenization.FullTokenizer(
            vocab_file=opts['vocab_file'], do_lower_case=opts['do_lower_case'])

        def append_feature(feature):
            eval_features.append(feature)
            eval_writer.process_feature(feature)

        # Create eval.tfrecord
        num_features = squad_data.convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=opts["seq_length"],
            doc_stride=opts["doc_stride"],
            max_query_length=opts["max_query_length"],
            is_training=False,
            output_fn=append_feature)

        eval_writer.close()

        squad_dataset = data_loader.load(opts, is_training=False)
        squad_dataset = squad_dataset.make_one_shot_iterator()
        _input_mask_array = []
        _segment_ids_array = []
        _input_ids_array = []
        _unique_ids_array = []

        # Call `get_next()` once outside the loop to create the TensorFlow operations once.
        with tf.Session() as sess:
            next_element = squad_dataset.get_next()
            is_data = True
            while is_data:
                try:
                    output = sess.run(next_element)
                    _input_mask_array.extend(output['input_mask'])
                    _segment_ids_array.extend(output['segment_ids'])
                    _input_ids_array.extend(output['input_ids'])
                    _unique_ids_array.extend(output['unique_ids'])
                except tf.errors.OutOfRangeError:
                    print("end of training dataset")
                    is_data = False

        dataset_list = [
            _input_ids_array, _input_mask_array, _segment_ids_array,
            _unique_ids_array
        ]

    iterations_per_step = 1
    predict, ipu_config = build_graph(opts,
                                      iterations_per_step,
                                      is_training=False)

    if predict.exec_path is not None:
        all_results = run_time(opts, dataset_list)
        if opts['do_predict'] is True:
            logger.info(f"Writing out the predictions:")
            output_dir = opts['output_dir']
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            output_prediction_file = os.path.join(output_dir,
                                                  "predictions.json")
            output_nbest_file = os.path.join(output_dir,
                                             "best_predictions.json")
            output_null_log_odds_file = os.path.join(output_dir,
                                                     "null_odds.json")
            eval_features = eval_features[:num_features]
            squad_results.write_predictions(
                eval_examples, eval_features, all_results, opts["n_best_size"],
                opts["max_answer_length"], opts["do_lower_case"],
                output_prediction_file, output_nbest_file,
                output_null_log_odds_file, opts["version_2_with_negative"],
                opts["null_score_diff_threshold"], opts["verbose_logging"])

            predict.session.close()

            if opts['do_evaluation']:
                evaluate_squad(output_prediction_file, opts)