예제 #1
0
    def on_recieve(self, msg, src, dst, paras='', paras_to_save='', index=0):
        """
        The start of the action to a message recieved.
        
        This keyword starts a sub-procedure used as the reaction to a message
        recieved. The sub-procedure ended when next on_recieve met.
        
        The action set to certain message is only valid for one test case.
        """
        
        if self.msg_buff is None:
            self._init_msg_buff()
        
        ne_link = self._topology.link(src, dst)

        if ne_link is None:
            logger.warn('No link between NEs to send message. '
                        '%s<-%s: %s' % (dst, src, msg))
            return
        
        if not (ne_link.simulator == dst and
                ne_link.dut == src):
            logger.warn('Not a message from dut to simulator. '
                        '%s<-%s: %s' % (dst, src, msg))
            return
            
        msg_str = self._compose_on_recv(msg, paras, paras_to_save, index)
        msg_buff = self._get_msg_buff(ne_link.names[0])
        msg_buff.append(msg_str)
        logger.info('"send %s" added to buffer of %s' %
                    (msg, ne_link.names[0]))
예제 #2
0
 def check(self, expr, info=''):
     
     if self.msg_buff is None:
         self._init_msg_buff()
     
     self.chk_lst.check(expr, info)
     logger.info('"Check" added to check list')
예제 #3
0
    def send(self, msg, src, dst, paras='', paras_to_save='', delay=0):
        """
        Composing a message sending from simulator to dut.
        """
        
        if self.msg_buff is None:
            self._init_msg_buff()
        
        ne_link = self._topology.link(src, dst)

        if ne_link is None:
            logger.warn('No link between NEs to send message. '
                        '%s->%s: %s' % (src, dst, msg))
            return
        
        if not (ne_link.simulator == src and ne_link.dut == dst):
            logger.warn('Not a message from simulator to dut. '
                        '%s->%s: %s' % (src, dst, msg))
            return
            
        msg_str = self._compose_send(msg, paras, paras_to_save, delay)
        msg_buff = self._get_msg_buff(ne_link.names[0])
        msg_buff.append(msg_str)
        logger.info('"send %s" added to buffer of %s' %
                    (msg, ne_link.names[0]))
        
        if self.trigger is None and not self._on_recv_found:
            self.trigger = msg_buff.ne_name
            logger.info('Found trigger: %s' % src)
예제 #4
0
    def _dpkg_install(self, update_dir=None):
        """Install an update or install a new package.

        If an update directory is given, it must be made sure that
        the directory contains all of the dependencies for ALL of the
        packages inside the directory.
        """

        if update_dir:
            install_command = ['dpkg', '-i',  '-R', update_dir]

            try:
                result, err = self.utilcmds.run_command(install_command)

                if err:
                    raise Exception(err)

                logger.info(
                    ('debhandler.py/_dpkg_install: '
                     'Installed all packages in: ') + update_dir)

                return 'true', ''

            except Exception as e:
                logger.error(
                    ('debhandler.py/_dpkg_install: '
                     'Failed to install packages in: ') + update_dir)

                return 'false', str(e)
        else:
            logger.info(
                'debhandler.py/_dpkg_install: No directory provided.')

        return 'false', 'Update dir: ' + update_dir
예제 #5
0
    def run(self):
        logger.info('Devices\' information sync with airwatch started at %s under account %d!' % (str(time.time()), self.aw_account))
        try:
            logger.info('Step 1: Get device information from RS.')
            rs_retval = self._get_devinfo_from_rs()
            logger.debug('Account %d includes %d device needed sync with airwatch!' % (self.aw_account, rs_retval))
            if rs_retval <= 0:
                # report
                return  # exit, no work to do

            logger.info('Step 2: Get device information from airwatch.')
            aw_retval = self._get_devinfo_from_aw(self.devinfo_original_queue, self.devinfo_refresh_queue)
            logger.debug('Account %d, get %d device information from airwatch!' % (self.aw_account, aw_retval))
            if (aw_retval != rs_retval):
                #The devices do not exist in airwatch needed to be updated as unenroll status.
                logger.warning('Account %d, Original device number (%d) and refresh device number (%d) NOT match!' % (self.aw_account, rs_retval, aw_retval))

            logger.info('Step 3: Sync device information to RS.')
            sync_retval = self._sync_devinfo()
            logger.debug('Account %d, sync %d device information with RS finished!' % (self.aw_account, sync_retval))
            if (aw_retval != sync_retval):
                #causes of updating RS fail
                logger.warning('Account %d, Refresh device number (%d) and Sync device number (%d) NOT match!' % (self.aw_account, aw_retval, sync_retval))

            # Step 4: Update the defect devices to be unenroll.
            if self.m_defect_devs:
                defect_cnt = len(self.m_defect_devs)
                logger.info('Step 4: Set %d devices to be "unenroll" status!' % defect_cnt)
                ok_cnt = self._unenroll_dev_status(self.m_defect_devs)
                if defect_cnt != ok_cnt:
                    logger.warning('Account %d, Set %d devices to be "unenroll status failed!"' % (self.aw_account, (defect_cnt - ok_cnt)))
            # Step 5: Report
        except Exception, e:
            logger.error(repr(e))
예제 #6
0
    def _compute_detailed_scores(self, item_ids, query_item_ids=None, max_terms=20):
        # if set to None we assume previously queried items
        if query_item_ids is None:
            query_item_ids = self.item_ids

        # if the query vector is different than previously computed
        # or not computed at all, we need to recompute it.
        if not hasattr(self, 'q') or query_item_ids != self.item_ids:
            if not self.is_valid_query(query_item_ids):
                return []
            else:
                logger.info('Computing the query vector ...')
                self._make_query_vector()

        # computing the score for each item
        scores = []
        for id in item_ids:
            if id not in self.item_id_to_index:
                scores.append(utils._O(total_score=0, scores=[]))
                continue

            xi = self.X[self.item_id_to_index[id]]
            xi_ind = xi.indices
            
            feat = (self.index_to_feat[i] for i in xi_ind)
            
            qi = self.q.transpose()[xi_ind]
            qi = scipy.asarray(qi).flatten()

            sc = sorted(zip(feat, qi), key=lambda x: (x[1], x[0]), reverse=True)
            total_score = qi.sum()
            scores.append(utils._O(total_score=total_score, scores=sc[0:max_terms]))

        return scores
예제 #7
0
 def _reset_notify(self):
     from service_mgr.lib.service.service_main import ServiceMgr
     services = ServiceMgr().get_run_services(US_DEVICE_END)
     [device_type_notify(service.control_rpc, self.get_device_type()) for service in services]
     logger.info("DeviceTypeMgr _reset_notify services:%s, device_type:%s"
                 % ([service.id for service in services],
                    self.get_device_type()))
예제 #8
0
 def top_phrases(self, msg):
     logger.info("%s: sending top phrases to user %s" % (msg.text, user_log_string(msg)))
     lines = ["Top phrases (use '/say ID' to choose phrase):"]
     for audio_id, count in self.dataset.top_phrases.most_common(5):
         lines.append(
             "*[%2d]*  _%s_ (%d reproductions)" % (audio_id, self.dataset.get_audio_data(audio_id)['title'], count))
     self.reply_to(msg, "\n".join(lines), parse_mode="Markdown")
예제 #9
0
 def send_phrase(self, msg, audio_id):
     if 0 <= audio_id < len(self.dataset.data_keys):
         audio, duration = self.dataset.get_phrase(audio_id)
         logger.info("%s: sending phrase #%d to user %s" % (msg.text, audio_id, user_log_string(msg)))
         self.send_audio(msg.chat.id, audio, duration)
     else:
         bot.send_random_phrase(msg)
def main():
    rel_info = ReleaseInfo()
    start_directory = os.getcwd()
    parse_args(rel_info)
    try:
        setup_test_branch(rel_info)
        merge_branches(rel_info)
        test_tag_doc(rel_info)

        if rel_info.args.merge_only:
            return

        build_platform(rel_info)
    except KeyboardInterrupt:
        pass
    except:
        logger.exception("[FAILED] Please check your changes. "\
                         "You can not pass this checker unless your branch "\
                         "can be merged without conflicts.")
        logger.info("Note: all repos are in test branch %s" % rel_info.test_branch)
    else:
        logger.info(
            "[PASS] Your changes can be successfully merged and build.")
    finally:
        cleanup(rel_info)
        os.chdir(start_directory)
예제 #11
0
    def do_access(self, resource, method_name='POST', data=None, headers=None):
        """request resource in specified HTTP method
        Parameters:
            resource: an absolute path which supplied by server.
            method_name: HTTP METHOD, can be 'GET', 'POST', 'PUT' or 'DELETE', the default is 'POST'
            data: the data should transfer to server.
            headers: HTTP headers, should in dict form.
        """
        boundary = templates.generate_boundary()
        http_headers = self._generate_headers(boundary)
        if headers:
            http_headers.update(headers)
        if data:
            body = self._generate_post_body(data, boundary)
        else:
            body = ""
            http_headers.pop('Content-Type')

        logger.info('metanate request body: %s', body)
        r = super(self.__class__, self).do_access(resource, method_name='POST', data=body, headers=http_headers)

        if r.code == 200:
            if not self.session_id:
                self.session_id = r.session_id
                logger.info('HTTP response header X-Schemus-Session is: %s', self.session_id)
            if r.content_type == "text/ldif":
                r.content = templates.parse_ldif(r.content)
            elif r.content == "application/json":
                r.content = json.loads(r.content)
        else:
            logger.error('metanate request: %s error occurred: %s: %s', resource, r.code, r.content)
            r.content = {'ErrorCode': r.code, 'Message': r.reason, 'Detail': r.content}

        return r
    def process_result(self, result, live_server, token):
        """
        Munge the result from a client and add the data into our repository.
        """
        # Record the data from our transaction in the Tokens
        # Ensures that duplicated work is not accepted after the
        # token has been consumed (to prevent previous generation's
        # evals from affecting the current gen)
        if not result.has_key('FAILED'):
            if token in self.Tokens.keys():
                self.data_lock.acquire()
                if token[0] == 'map':
                    (shard, shuffle_keys) = result.values()[0]
                    self.shuffle_keys.update(shuffle_keys)
                    logger.info('Have %d shuffle keys (only alpha)' % (len(self.shuffle_keys)))
                    self.map_result_shards.append(shard)
                elif token[0] == 'shuffle':
                    self.shuffle_result_shards.append(result.values()[0])
                self.data_lock.release()

                # Consume the token if we've performed enough Evals
                self.print_complete(token, self.Tokens, live_server, live_servers, idle_servers)
                del self.Tokens[token]
            else: # Delete extraneous data
                if token[0] == 'map':
                    (shard, shuffle_keys) = result.values()[0]
                    os.remove(shard)
예제 #13
0
    def _untar_files(self, directory):
        """ Scans a directory for any tar files and 'untars' them. Scans
        recursively just in case there's tars within tars. Deletes tar files
        when done.

        @param directory: Directory to be scanned.
        @return: Nothing
        """

        tars = glob.glob(os.path.join(directory, '*.tar*'))

        if not tars:
            return

        import tarfile

        try:
            for tar_file in tars:
                tar = tarfile.open(tar_file)
                tar.extractall(path=directory)
                tar.close()
                os.remove(tar_file)

            self._untar_files(directory)

        except OSError as e:
            logger.info("Could not extract tarball.")
            logger.exception(e)
예제 #14
0
파일: vm.py 프로젝트: koder-ua/tiny_cloud
    def start_net(self, name):
        logger.info("Start network " + name)

        if name in self.networks:
            conn = libvirt.open(self.urls[self.networks[name].htype])
        else:
            conn = libvirt.open(self.def_connection)

        try:
            net = conn.networkLookupByName(name)
            if not net.isActive():
                logger.debug("Network registered in libvirt - start it")
                net.create()
            else:
                logger.debug("Network already active")

        except libvirt.libvirtError:
            try:
                logger.debug("No such network in libvirt")
                net = self.networks[name]
            except KeyError:
                msg = "Can't found network {0!r}".format(name)
                logger.error(msg)
                raise CloudError(msg)

            xml = xmlbuilder.XMLBuilder('network')
            xml.name(name)
            xml.bridge(name=net.bridge)
            with xml.ip(address=net.ip, netmask=net.netmask):
                xml.dhcp.range(start=net.ip1, end=net.ip2)

            logger.debug("Create network")
            conn.networkCreateXML(str(xml))
예제 #15
0
    def finish(self, link):
        """
        The stop point of test case. At least one stop point must be exist
        in each test case.
        
        This keyword represents the end of the test case. Test tool will send
        stop signal to dispatcher then the dispatcher will notify all other
        tools to stop. After all tools stopped, the dispatcher will send test
        reports to the verification module to be verified.
        """
        
        if self.msg_buff is None:
            self._init_msg_buff()
        
        src, dst = link.split('--')
        
        ne_link = self._topology.link(src, dst)

        if ne_link is None:
            logger.warn('No link between NEs to send message. '
                        '%s--%s: Stop' % (src, dst))
            return

        if ((not (ne_link.simulator == src and ne_link.dut == dst)) and
            (not (ne_link.simulator == dst and ne_link.dut == src))):
            logger.warn('Finish on unassigned link. '
                        '%s--%s: Stop' % (src, dst))
            return

        msg_str = self._compose_finish()
        msg_buff = self._get_msg_buff(ne_link.names[0])
        msg_buff.append(msg_str)
        
        logger.info('"Finish" added to buffer of %s' %
                    ne_link.names[0])
예제 #16
0
    def user_insert(self, user_name, password, jid, jid_pwd, type, device_type="", des=""):
        """
        用户插入
        :param user_name: 用户名
        :param password: 用户密码
        :param jid: 用户jid
        :param jid_pwd: 用户jid密码
        :param type: 用户类型
        :param device_type: 设备类型
        :param des: 描述
        :return: {"result"}
        """
        if not user_name\
                or not password\
                or not jid\
                or not jid_pwd\
                or type is None:
            return {'result': error_code.ERROR_PARAMS_ERROR}

        user_info = {"user_name": user_name,
                     "password": password,
                     "des": des,
                     "jid": jid,
                     "jid_pwd": jid_pwd,
                     "device_type": device_type,
                     "type": type,
                     "create_time": datetime.datetime.now()}

        g_RedisCoordinator.set(user_name, user_info)

        logger.info("RpcHandler::user_insert, user_name:%s, password:%s, jid:%s, jid_pwd:%s, device_type:%s, type:%s, des:%s"
                    % (user_name, password, jid, jid_pwd, device_type, type, des))
        return {'result': error_code.ERROR_SUCCESS}
예제 #17
0
    def __create_distro(self, compose, distro_file):
        rhel_version = self.get_rhel_version(compose)
        if rhel_version == 5:
            compose_url = "http://download.englab.nay.redhat.com/pub/rhel/rel-eng/%s/tree-x86_64/" % compose
        elif rhel_version == 6:
#           compose_url = "http://download.englab.nay.redhat.com/pub/rhel/rel-eng/%s/%s/Server/x86_64/os/" % (compose, self.get_rhel_version(compose))
    	    compose_url = "http://download.englab.nay.redhat.com/pub/rhel/rel-eng/%s/compose/Server/x86_64/os/" % compose
    	elif rhel_version == 7:
            compose_url = "http://download.englab.nay.redhat.com/pub/rhel/rel-eng/%s/compose/Server/x86_64/os/" % compose
        if not self.__check_file_exist(distro_file):
            cmd = ('cat <<EOF > %s\n'
                '[General]\n'
                'arch : x86_64\n'
                'breed : redhat\n'
                'comment :\n'
                '\n'
                'kernel : %simages/pxeboot/vmlinuz\n'
                'initrd : %simages/pxeboot/initrd.img\n'
                'kernel_options : biosdevname=0 reboot=pci\n'
                'kernel_options_post :\n'
                'ks_meta :\n'
                'mgmt_classes :\n'
                '\n'
                'os_version : rhel%s\n'
                'redhat_management_key :\n'
                'redhat_management_server :\n'
                'template_files :\n'
                'EOF' % (distro_file, compose_url, compose_url, rhel_version)
                )
            logger.info("Created distro file: %s" % distro_file)
            self.run(cmd)
        else:
            logger.info("Distro file: %s already existed ..." % distro_file)
예제 #18
0
파일: main.py 프로젝트: rsummers11/CADLab
def init_platform():
    config_file = cfg_from_file('config.yml')
    default_file = cfg_from_file('default.yml')
    logger.info(pprint.pformat(default_file))
    logger.info(pprint.pformat(config_file))

    merge_a_into_b(config_file, config)
    merge_a_into_b(default_file, default)
    default.best_model_path = ''

    if default.gpu == '':
        default.gpu = None
    if default.gpu is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = default.gpu

    default.distributed = default.world_size > 1
    if default.distributed:
        dist.init_process_group(backend=default.dist_backend, init_method=default.dist_url,
                                world_size=default.world_size)

    default.lr_epoch = [int(ep) for ep in default.lr_step.split(',')]

    if default.seed is not None:
        seed = default.seed
        np.random.seed(seed)
        random.seed(seed)
        torch.manual_seed(seed)
        cudnn.deterministic = True
예제 #19
0
 def _create_indexes(self, ids, fts):
     logger.info("Creating indices ...")
     self.item_id_to_index = dict(ids)
     self.index_to_item_id = dict((i, id) for id, i in ids.iteritems())
     self.index_to_feat = dict((i, ft) for ft, i in fts.iteritems())
     self.no_items = len(ids)
     self.no_features = len(fts)
예제 #20
0
    def wait(self, link, tm):
        """
        Wait for certain seconds specified by 'tm'.
        
        'tm' is the time to be waited in millisecond.
        """
        
        if self.msg_buff is None:
            self._init_msg_buff()
        
        src, dst = link.split('--')
        ne_link = self._topology.link(src, dst)
        
        if ne_link is None:
            logger.warn('No link between NEs to send message. '
                        '%s->%s: Wait' % (src, dst))
            return

        if ((not (ne_link.simulator == src and ne_link.dut == dst)) and
            (not (ne_link.simulator == dst and ne_link.dut == src))):
            logger.warn('Stop on unassigned link. '
                        '%s-%s: Stop' % (src, dst))
            return

        msg_str = self._compose_wait(tm)
        msg_buff = self._get_msg_buff(ne_link.names[0])
        msg_buff.append(msg_str)

        logger.info('"wait %s" added to buffer %s' %
                    (tm, ne_link.names[0]))
예제 #21
0
    def uninstall_application(self, uninstall_data):
        """ Uninstalls applications in the /Applications directory. """

        success = 'false'
        error = 'Failed to uninstall application.'
        restart = 'false'
        #data = []

        uninstallable_app_bundles = os.listdir('/Applications')

        app_bundle = uninstall_data.name + ".app"

        if app_bundle not in uninstallable_app_bundles:
            error = ("{0} is not an app bundle. Currently only app bundles are"
                     " uninstallable.".format(uninstall_data.name))

        else:

            uninstaller = Uninstaller()

            success, error = uninstaller.remove(uninstall_data.name)

        logger.info('Done attempting to uninstall app.')

        return UninstallResult(success, error, restart)
예제 #22
0
    def retrieve_data(self, link,
                      result, operation, paras='',
                      start_message='', end_message=''):
        
        if self.msg_buff is None:
            self._init_msg_buff()
        
        src, dst = link.split('--')
        
        ne_link = self._topology.link(src, dst)

        if ne_link is None:
            logger.warn('No link between NEs to send message. '
                        '%s--%s: Retrieve %s' % (src, dst, operation))
            return

        if ((not (ne_link.simulator == src and ne_link.dut == dst)) and
            (not (ne_link.simulator == dst and ne_link.dut == src))):
            logger.warn('Retrieve data on unassigned link. '
                        '%s--%s: Retrieve %s' % (src, dst, operation))
            return

        msg_str = self._compose_retrieve_data(result, operation, paras,
                                              start_message, end_message)
        msg_buff = self._get_msg_buff(ne_link.names[0])
        msg_buff.append(msg_str)
        
        logger.info('"Retrieve" added to buffer of %s' %
                    ne_link.names[0])
예제 #23
0
    def _apt_purge(self, package_name):
        purged = 0

        purge_command = [self.APT_GET_EXE, 'purge', '-y', package_name]

        try:
            result, err = self.utilcmds.run_command(purge_command)

            if err:
                raise Exception(err)

            found = re.search('\\d+ to remove', result)
            if found:
                amount_purged = found.group().split(' ')[0]
                purged = int(amount_purged)

            if purged > 0:
                logger.debug(
                    'Successfuly removed {0} packages.'.format(purged)
                )

                return 'true', ''
            else:
                logger.info('No packages were removed.')

                return 'false', 'No packages removed.'

        except Exception as e:
            logger.error('Problem while uninstalling package: ' + package_name)
            logger.exception(e)

            return 'false', str(e)
    def run(self):
        while self.processor.shared.paused():
            time.sleep(1)

        self.ircname = self.host + ' ' + self.getname()
        logger.info("joining IRC")

        while not self.processor.shared.stopped():
            client = irc.client.IRC()
            try:
                c = client.server().connect('irc.freenode.net', 6667, self.nick, self.password, ircname=self.ircname)
            except irc.client.ServerConnectionError:
                logger.error('irc', exc_info=True)
                time.sleep(10)
                continue

            c.add_global_handler("welcome", self.on_connect)
            c.add_global_handler("join", self.on_join)
            c.add_global_handler("quit", self.on_quit)
            c.add_global_handler("kick", self.on_kick)
            c.add_global_handler("whoreply", self.on_who)
            c.add_global_handler("namreply", self.on_name)
            c.add_global_handler("disconnect", self.on_disconnect)
            c.set_keepalive(60)

            self.connection = c
            try:
                client.process_forever()
            except BaseException as e:
                logger.error('irc', exc_info=True)
                time.sleep(10)
                continue

        logger.info("quitting IRC")
예제 #25
0
파일: vpn_profile.py 프로젝트: lls3018/mdmi
    def add_vpn_ondemandrules(self, node, vpn_bypass_except):
        if node is None:
            logger.info('OnDemandRules NOT found!');
            return;
        attr_val = None;
        if vpn_bypass_except and isinstance(vpn_bypass_except, dict):
            attr_val = vpn_bypass_except.get('attributes');
        attr_key = None;
        attr_key_val = None;
        if attr_val and isinstance(attr_val, dict):
            for (k, v) in attr_val.items():
                if k == 'LocalNetworkAccessKey':
                    if v:
                        attr_key = v[0]; #get value from list
                if k == 'LocalNetworkAccessValue':
                    if v:
                        attr_key_val = v[0];

        rules_array = node.getchildren(); #array in plist, list in python
        key_is_found = False;
        for element in rules_array: #element should be a dict
            for item in element:
                if item.text == attr_key:
                    key_is_found = True; #found!
                    continue;
                if key_is_found:
                    self.add_vpnexception(item, attr_key_val);
                    key_is_found = False; #clear!
                    break;
예제 #26
0
 def run(self):
     logger.info('dispatch thread is running...')
     while not self.__stop_event.isSet():
         try:
             self.__event.wait(timeout=5)
         except Exception:
             pass
         else:
             if self.__cur_tasks:
                 if self._run_tasks():
                     self.__event.set()
                 else:
                     self.__event.clear()
             else:
                 self.__cur_tasks = g_task_dict.get()
                 if not self.__cur_tasks:
                     self.__task_handler_pool.kill_idle_handlers()
                     self.__event.clear()
                     continue
                 if self._run_tasks():
                     self.__event.set()
                 else:
                     self.__event.clear()
         finally:
             pass
     logger.info('dispatch thread %s stopped', self.name)
예제 #27
0
    def stop(self):
        logger.info('stopping dispatch thread %s...', self.name)
        self.__stop_event.set()
        #if not self.__busy.isSet():
        self.__event.set()

        self.__task_handler_pool.kill_all_handlers()
예제 #28
0
def main():
    """
    The function gets all the user callback objects whose notify times are between the
    current time and 1 minute from now. For each callback object it calls the function notification_callback,
    each call is spawned as separate thread. This way, all users get concurrent notifications and if there
    are a large number of notifications we won't be taking up more than 1 minute to complete the execution.
    All the spawned threads are joined with this function, so that it waits until they complete their
    execution.
    @params: None
    @output: None
    """
    now = datetime.datetime.now()
    now_time = datetime.time(now.hour, now.minute, now.second)
    
    xmins_frm_now = now + datetime.timedelta(seconds = 60)
    xmins_frm_now = datetime.time(xmins_frm_now.hour, xmins_frm_now.minute, xmins_frm_now.second)

    today = datetime.date.today()
    days = {0:'mon', 1:'tue', 2:'wed', 3:'thu', 4:'fri', 5:'sat', 6:'sun'}
    today = days[today.weekday()]
        
    user_callbacks = models.UserCallback.objects.filter(notify_time__gte=now_time, notify_time__lt=xmins_frm_now)
    logger.info("number of user callbacks to process: %d" % user_callbacks.count())

    threads = []
    for user_callback in user_callbacks:
        thread = threading.Thread(target = notification_callback, args = (user_callback, today,))
        threads.append(thread)
        try:
            thread.start()
        except Exception, e:
            logger.error("error while running user callback: %s" % e)
예제 #29
0
        def wechat_recv_param_wapper(self, *args, **kwargs):
            if not signature_checker(token,
                                     kwargs['timestamp'],
                                     kwargs['nonce'],
                                     kwargs['signature']):
                logger.error("wechat_recv_wapper, failed, access_token:%s args:%s, kwargs:%s" %
                             (token, args, kwargs))
                return "-1"

            # 如果没有内容尽是验证,则即可返回
            if not self.request.body:
                return kwargs.get("echostr", "0")

            # 内容解析
            crypt = WXBizMsgCrypt(token, encodingAESKey, appid)
            ret, xml_body = crypt.DecryptMsg(self.request.body,
                                               kwargs['msg_signature'],
                                               kwargs['timestamp'],
                                               kwargs['nonce'])
            assert ret == WXBizMsgCrypt_OK

            order_dic_body = xmltodict.parse(xml_body)
            dic_body = dict(order_dic_body["xml"])
            kwargs['body'] = dic_body

            logger.info("%s:wechat_recv_wapper args:%s kwargs:%s" % (fun.__name__, args, kwargs))
            fun(self, *args, **kwargs)
            return kwargs.get("echostr", "0")
예제 #30
0
파일: rest_access.py 프로젝트: lls3018/mdmi
    def parse_result(self, res):
        retcode = res.status
        result = RestResult(retcode, res.reason)
        content_type = res.getheader('Content-Type')
        if content_type and content_type.find(";"):
            types = content_type.split(";")
            for t in types:
                t = t.strip()
                if t.startswith('charset'):
                    result.charset = t
                else: result.content_type = t
        else: result.content_type = content_type
        content_length = res.getheader('Content_Length')
        if content_length:
            result.content_length = int(content_length)
            result.content = res.read()
            while len(result.content) < result.content_length:
                result.content += res.read()
        else:
            result.content = res.read()
            result.content_length = len(result.content)

        last_modified = res.getheader('Last-Modified')
        if last_modified:
            logger.info('HTTP response from %s, Last-Modified header is: %s', self.host, last_modified)
            result.last_modified = last_modified

        session_id = res.getheader('X-Schemus-Session')
        if session_id:
            logger.info('HTTP response from %s, X-Schemus-Session header is: %s', self.host, session_id)
            result.session_id = session_id

        return result
예제 #31
0
def cancel_private(update, context):
    message = update.message
    context.chat_data.clear()
    message.reply_text(context.bot_data.get("config").get("CANCEL_PRIVATE"))
    logger.info(f"Private: Cancel")
    return ConversationHandler.END
예제 #32
0
파일: envwrapper.py 프로젝트: archhhh/Orca
    def get_state(self, evaluation=False):
        succeed = False
        error_cnt = 0
        while (1):
            # Read value from shared memory
            try:
                memory_value = self.shrmem_r.read()

            except sysv_ipc.ExistentialError:
                print("No shared memory Now, python ends gracefully :)")
                logger.info("No shared memory Now, python ends gracefully :)")
                sys.exit(0)

            memory_value = memory_value.decode('unicode_escape')

            i = memory_value.find('\0')

            if i != -1:

                memory_value = memory_value[:i]
                readstate = np.fromstring(memory_value, dtype=float, sep=' ')
                try:
                    rid = readstate[0]
                except:
                    rid = self.prev_rid
                    sleep(0.01)
                    continue
                try:
                    s0 = readstate[1:]
                except:
                    print("s0 waring")
                    sleep(0.01)
                    continue

                if rid != self.prev_rid:
                    succeed = True
                    break
                else:
                    wwwwww = ""

            error_cnt = error_cnt + 1
            if error_cnt > 24000:
                error_cnt = 0
                print(
                    "After 3 min, We didn't get any state from the server. Actor "
                    + str(self.config.task) + " is going down down down ...\n")
                sys.exit(0)

            sleep(0.01)

        error_cnt = 0
        if succeed == False:
            raise ValueError('read Nothing new from shrmem for a long time')
        reward = 0
        state = np.zeros(1)
        w = s0
        if len(s0) == (self.params.dict['input_dim']):
            d = s0[0]
            thr = s0[1]
            samples = s0[2]
            delta_t = s0[3]
            target_ = s0[4]
            cwnd = s0[5]
            pacing_rate = s0[6]
            loss_rate = s0[7]
            srtt_ms = s0[8]
            snd_ssthresh = s0[9]
            packets_out = s0[10]
            retrans_out = s0[11]
            max_packets_out = s0[12]
            mss = s0[13]
            min_rtt = s0[14]

            self.local_counter += 1

            if self.use_normalizer == True:
                if evaluation != True:
                    self.normalizer.observe(s0)
                s0 = self.normalizer.normalize(s0)
                min_ = self.normalizer.stats()
            else:
                min_ = s0 - s0

            d_n = s0[0] - min_[0]
            thr_n = s0[1]
            thr_n_min = s0[1] - min_[1]
            samples_n = s0[2]
            samples_n_min = s0[2] - min_[2]
            delta_t_n = s0[3]
            delta_t_n_min = s0[3] - min_[3]

            cwnd_n_min = s0[5] - min_[5]
            pacing_rate_n_min = s0[6] - min_[6]
            loss_rate_n_min = s0[7] - min_[7]
            srtt_ms_min = s0[8] - min_[8]
            snd_ssthresh_min = s0[9] - min_[9]
            packets_out_min = s0[10] - min_[10]
            retrans_out_min = s0[11] - min_[11]
            max_packets_out_min = s0[12] - min_[12]
            mss_min = mss - min_[13]
            min_rtt_min = min_rtt - min_[14]

            if self.use_normalizer == False:
                thr_n = thr_n
                thr_n_min = thr_n_min
                samples_n_min = samples_n_min
                cwnd_n_min = cwnd_n_min
                loss_rate_n_min = loss_rate_n_min
                d_n = d_n
            if self.max_bw < thr_n_min:
                self.max_bw = thr_n_min
            if self.max_cwnd < cwnd_n_min:
                self.max_cwnd = cwnd_n_min
            if self.max_smp < samples_n_min:
                self.max_smp = samples_n_min
            if self.min_del > d_n:
                self.min_del = d_n

            ################# Transfer all of the vars. to Rate/Max(Rate) space
            #cwnd_bytes= cwnd_n_min*mss_min
            #cwnd_n_min=(cwnd_bytes*1000)/srtt_ms_min
            #snd_ssthresh_min=(snd_ssthresh_min*mss_min*1000)/srtt_ms_min
            #packets_out_min=(packets_out_min*mss_min*1000)/srtt_ms_min
            #retrans_out_min=(retrans_out_min*mss_min*1000)/srtt_ms_min
            #max_packets_out_min=(max_packets_out_min*mss_min*1000)/srtt_ms_min
            #inflight_bytes=(packets_out-samples)*mss_min*1000

            if min_rtt_min * (
                    self.params.dict['delay_margin_coef']) < srtt_ms_min:
                delay_metric = (
                    min_rtt_min *
                    (self.params.dict['delay_margin_coef'])) / srtt_ms_min
            else:
                delay_metric = 1

            reward = (thr_n_min -
                      5 * loss_rate_n_min) / self.max_bw * delay_metric

            if self.max_bw != 0:
                state[0] = thr_n_min / self.max_bw
                tmp = pacing_rate_n_min / self.max_bw
                if tmp > 10:
                    tmp = 10
                state = np.append(state, [tmp])
                state = np.append(state, [5 * loss_rate_n_min / self.max_bw])
            else:
                state[0] = 0
                state = np.append(state, [0])
                state = np.append(state, [0])
            state = np.append(state, [samples / cwnd])
            state = np.append(state, [delta_t_n])
            state = np.append(state, [min_rtt_min / srtt_ms_min])
            state = np.append(state, [delay_metric])

            self.prev_rid = rid
            return state, d, reward, True
        else:
            return state, 0.0, reward, False
예제 #33
0
    def hicup_alig_filt(self, params, genome_digest, genome_index, genome_loc,
                        fastq1, fastq2, outdir_tar):
        """
        This function aling the HiC read into a reference
        genome and filter them

        Parameters
        ----------
        bowtie2_loc:
        genome_index: str
            location of genome indexed with bowtie2
        digest_genome: str
            location of genome digested
        fastq1: str
            location of fastq2 file
        fastq2: str
            location of fastq2

        Returns
        -------
        Bool
        """
        folder = os.path.split(outdir_tar)[0]+"/"+ \
                os.path.split(outdir_tar)[1].split(".")[0]

        if os.path.isdir(folder) is False:
            os.mkdir(folder)

        index_files = {
            "1.bt2": genome_loc + ".1.bt2",
            "2.bt2": genome_loc + ".2.bt2",
            "3.bt2": genome_loc + ".3.bt2",
            "4.bt2": genome_loc + ".4.bt2",
            "rev.1.bt2": genome_loc + ".rev.1.bt2",
            "rev.2.bt2": genome_loc + ".rev.2.bt2"
        }

        logger.progress("Untar Index: " + genome_loc + ", " + genome_index)
        self.untar_index(genome_loc, genome_index, index_files["1.bt2"],
                         index_files["2.bt2"], index_files["3.bt2"],
                         index_files["4.bt2"], index_files["rev.1.bt2"],
                         index_files["rev.2.bt2"])

        hicup_args = [
            "hicup", "--index", genome_loc, "--digest", genome_digest, fastq1,
            fastq2
        ]

        hicup_args = hicup_args + params + [
            "--bowtie2", "/home/compss/bin/bowtie2", "--outdir", folder
        ]

        logger.info("arguments for hicup:" + " ".join(hicup_args))

        try:
            process = subprocess.Popen(" ".join(hicup_args),
                                       shell=True,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            process.wait()

            logger.info("TARING output folder")

            tar_file = outdir_tar
            archive_name = os.path.split(outdir_tar)[1].split(".")[0]

            onlyfiles = [
                f for f in os.listdir(folder)
                if os.path.isfile(os.path.join(folder, f))
            ]

            tar = tarfile.open(tar_file, "w")

            for tmp_file in onlyfiles:
                tar.add(os.path.join(folder, tmp_file),
                        arcname=os.path.join(archive_name, tmp_file))

                os.remove(os.path.join(folder, tmp_file))
            tar.close()

            shutil.rmtree(folder)

            shutil.move(tar_file, outdir_tar)

            for indexed_file in index_files:
                os.remove(index_files[indexed_file])

            return True

        except IOError:
            return False
예제 #34
0
 def add_task(self, task_obj):
     logger.info("putting to queue: %s - %s", task_obj.id, task_obj.cmd)
     self.task_queue.put(task_obj)
    def _train(self):
        """ Main actor learner loop for advantage actor critic learning. """
        logger.debug("Actor {} resuming at Step {}".format(
            self.actor_id, self.global_step.value()))

        bonuses = deque(maxlen=100)
        while (self.global_step.value() < self.max_global_steps):
            # Sync local learning net with shared mem
            s = self.emulator.get_initial_state()
            self.reset_hidden_state()
            self.local_episode += 1
            episode_over = False
            total_episode_reward = 0.0
            episode_start_step = self.local_step

            while not episode_over:
                self.sync_net_with_shared_memory(self.local_network,
                                                 self.learning_vars)
                self.save_vars()

                rewards = list()
                states = list()
                actions = list()
                values = list()
                local_step_start = self.local_step
                self.set_local_lstm_state()

                while self.local_step - local_step_start < self.max_local_steps and not episode_over:
                    # Choose next action and execute it
                    a, readout_v_t, readout_pi_t = self.choose_next_action(s)
                    new_s, reward, episode_over = self.emulator.next(a)
                    total_episode_reward += reward

                    # Update density model
                    current_frame = new_s[..., -1]
                    bonus = self.density_model.update(current_frame)
                    bonuses.append(bonus)

                    if self.is_master() and (self.local_step % 400 == 0):
                        bonus_array = np.array(bonuses)
                        logger.debug(
                            'π_a={:.4f} / V={:.4f} / Mean Bonus={:.4f} / Max Bonus={:.4f}'
                            .format(readout_pi_t[a.argmax()], readout_v_t,
                                    bonus_array.mean(), bonus_array.max()))

                    # Rescale or clip immediate reward
                    reward = self.rescale_reward(
                        self.rescale_reward(reward) + bonus)
                    rewards.append(reward)
                    states.append(s)
                    actions.append(a)
                    values.append(readout_v_t)

                    s = new_s
                    self.local_step += 1

                    global_step, _ = self.global_step.increment()
                    if global_step % self.density_model_update_steps == 0:
                        self.write_density_model()
                    if self.density_model_update_flags.updated[
                            self.actor_id] == 1:
                        self.read_density_model()
                        self.density_model_update_flags.updated[
                            self.actor_id] = 0

                next_val = self.bootstrap_value(new_s, episode_over)
                advantages = self.compute_gae(rewards, values, next_val)
                targets = self.compute_targets(rewards, next_val)
                # Compute gradients on the local policy/V network and apply them to shared memory
                entropy = self.apply_update(states, actions, targets,
                                            advantages)

            elapsed_time = time.time() - self.start_time
            steps_per_sec = self.global_step.value() / elapsed_time
            perf = "{:.0f}".format(steps_per_sec)
            logger.info(
                "T{} / EPISODE {} / STEP {}k / REWARD {} / {} STEPS/s".format(
                    self.actor_id, self.local_episode,
                    self.global_step.value() / 1000, total_episode_reward,
                    perf))

            self.log_summary(total_episode_reward,
                             np.array(values).mean(), entropy)
예제 #36
0
def load_and_cache_examples(args, task_name, tokenizer, data_type="train"):
    if args.local_rank not in [-1, 0] and not evaluate:
        torch.distributed.barrier()

    processor = processors[task_name]()
    # load data features from cache or datafile
    cached_features_file = os.path.join(
        args.data_dir, "cache_crf-{}_{}_{}_{}".format(
            data_type,
            list(filter(None, args.model_name_or_path.split("/"))).pop(),
            str(args.train_max_seq_length if data_type ==
                "train" else args.eval_max_seq_length), str(task_name)))

    if os.path.exists(cached_features_file) and not args.overwrite_cache:
        logger.info("Loading features from cache file %s",
                    cached_features_file)
        features = torch.load(cached_features_file)
    else:
        logger.info("Creating features from dateset file at %s",
                    cached_features_file)
        label_list = processor.get_labels()
        if data_type == "train":
            examples = processor.get_train_examples(args.data_dir)
        elif data_type == "dev":
            examples = processor.get_dev_examples(args.data_dir)
        else:
            examples = processor.get_test_examples(args.data_dir)

        features = convert_examples_to_features(
            examples=examples,
            tokenizer=tokenizer,
            label_list=label_list,
            max_seq_length=args.train_max_seq_length
            if data_type == "train" else args.eval_max_seq_length,
            cls_token_at_end=bool(args.model_type in ["xlnet"]),
            pad_on_left=bool(args.model_type in ["xlnet"]),
            cls_token=tokenizer.cls_token,
            cls_token_segment_id=2 if args.model_type in ["xlnet"] else 0,
            sep_token=tokenizer.sep_token,
            #pad on left for xlnet
            pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token
                                                       ])[0],
            pad_token_segment_id=4 if args.model_type in ["xlnet"] else 0,
        )
        if args.local_rank in [-1, 0]:
            logger.info("save features into cached file %s",
                        cached_features_file)
            torch.save(features, cached_features_file)
    if args.local_rank == 0 and not evaluate:
        torch.distributed.barrier()

    #convert to tensors and build dateset
    print(features[0])
    all_input_ids = torch.tensor([f.input_ids for f in features],
                                 dtype=torch.long)
    all_input_masks = torch.tensor([f.input_mask for f in features],
                                   dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_id for f in features],
                                   dtype=torch.long)
    all_label_ids = torch.tensor([f.label_ids for f in features],
                                 dtype=torch.long)
    all_lens = torch.tensor([f.input_len for f in features], dtype=torch.long)
    dataset = TensorDataset(all_input_ids, all_input_masks, all_segment_ids,
                            all_lens, all_label_ids)

    return dataset
예제 #37
0
def _tb_load_manifest(file_path,
                      num_labels=31,
                      name_labels=None,
                      name_paths=None,
                      mode='single',
                      ext_data=False,
                      fl_balance=False,
                      r_seed=-1):
    if not file_path.exists():
        logger.error(f"manifest file {file_path} not found.")
        raise RuntimeError

    logger.debug(f"loading dataset manifest {file_path} ...")
    df = pd.read_csv(str(file_path)).fillna(0)

    if (not ext_data) and (True):  # using the clean-set
        # cleanset
        if True:
            ## MGH validation set
            df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)]
            if r_seed != -1:
                df = df.sample(n=1000, replace=True, random_state=r_seed)

            if (False):
                df = df.loc[
                    (df['Hilar/mediastinum>Cardiomegaly>.'] == 1)
                    |
                    (df['Lung density>Increased lung density>Atelectasis'] == 1
                     )
                    |
                    (df['Lung density>Increased lung density>Pulmonary edema']
                     == 1)
                    |
                    (df['Lung density>Increased lung density>pneumonia'] == 1)
                    | (df['Pleura>Pleural effusion>.'] == 1)]
                df.reset_index(drop=True, inplace=True)

            ## MGH testset
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][0:250]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][250:500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][500:750]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][750:]

            ## CheXpert trainset
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][0:250]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][250:500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][500:750]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][750:1000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1000:1250]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1250:1500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1500:1750]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1750:2000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2000:2250]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2250:2500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2500:2750]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2750:3000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][3000:3250]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][3250:3500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][3500:3750]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][3750:4000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][4000:4250]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][4250:4500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][4500:]

            ## NIH trainset
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][0:500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][500:1000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1000:1500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1500:2000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2000:2500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2500:3000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][3000:3500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][3500:4000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][4000:]

            ## MIMIC trainset
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][0:500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][500:1000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1000:1500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][1500:2000]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2000:2500]
            #df = df.loc[(df['bad_age'] == 0) & (df['bad_quality'] == 0)][2500:]

            if (Clean_Neg):
                #hilar area special care
                for cl_feature, cl_file in Clean_Neg_list:
                    df_case = pd.read_csv(MGH_DATA_BASE.joinpath(
                        'clean_nagative_data_v5_deblank/' + cl_file),
                                          names=['ACC'])
                    #df_case = pd.read_csv(MGH_DATA_BASE.joinpath('clean_nagative_data_v5/'+cl_file))
                    #df_case = pd.read_csv(MGH_DATA_BASE.joinpath('clean_negative_data_v5_321/'+cl_file), names=['ACC'])
                    df[f'{cl_feature}'] = df[f'{cl_feature}'].replace(0, -2)
                    #df.loc[df.AccessionNumber.isin(df_case.ACC), f'{cl_feature}'] = 0
                    df.loc[(df.AccessionNumber.isin(df_case.ACC)) &
                           (df[f'{cl_feature}'] == -2), f'{cl_feature}'] = 0

            if (fl_balance):
                for k, feature in enumerate(label_name):
                    num_p = df.loc[(df[f'{feature}'] == 1)].shape[0]
                    num_n = df.loc[(df[f'{feature}'] == 0)].shape[0]
                    ratio_pn = num_p / num_n
                    ratio_th = 5
                    if (ratio_pn < (1.0 / ratio_th)):
                        df[f'{feature}'] = df[f'{feature}'].replace(0, -1)
                        df_n = df.loc[(df[f'{feature}'] == -1)].sample(
                            n=(num_p * ratio_th), random_state=2020)
                        df[f'{feature}'].loc[df['AccessionNumber'].isin(
                            df_n['AccessionNumber'])] = 0

                        pos = df[f'{feature}'].loc[df[f'{feature}'] ==
                                                   1].shape[0]
                        neg = df[f'{feature}'].loc[df[f'{feature}'] ==
                                                   0].shape[0]
                        dontcare = df[f'{feature}'].loc[df[f'{feature}'] ==
                                                        -1].shape[0]

                        logger.info(
                            f'[{k:02d}-{feature}] pos: {pos}, neg: {neg}, dont-care: {dontcare}'
                        )

                if name_labels == None:
                    df = df[~(df.iloc[:, -(num_labels + 1):-1] == -1).all(1)]
                else:
                    df = df[~(df[name_labels] == -1).all(1)]
                df.reset_index(drop=True, inplace=True)

            if (Clean_Neg):
                for cl_feature, cl_file in Clean_Neg_list:
                    df[f'{cl_feature}'] = df[f'{cl_feature}'].replace(-2, -1)

            if False:
                for k, feature in enumerate(label_name):
                    num_p = df.loc[(df[f'{feature}'] == 1)].shape[0]
                    num_n = df.loc[(df[f'{feature}'] == 0)].shape[0]
                    num_i = df.loc[(df[f'{feature}'] == -1)].shape[0]

                    print(
                        f'{feature}-{num_p}-{num_p/df.shape[0]}-{num_i}-{num_i/df.shape[0]}-{num_n}-{num_n/df.shape[0]}'
                    )
                exit(-1)

            if (True):  # in order to add clinical information to network
                df['ScaledSex'] = df.sex.replace(0, -1)
                weight_gender = 10
                weight_age = 100
                min_age = 11.0
                max_age = 100.0
                #df.PatientAge = (df.PatientAge-min(df.PatientAge))/(max(df.PatientAge)-min(df.PatientAge))
                df['ScaledAge'] = (df.PatientAge - min_age) / (max_age -
                                                               min_age)
                df.ScaledAge = weight_age * (df.ScaledAge - 0.5)
                df['ScaledSex'] = weight_gender * df.ScaledSex

            df.reset_index(drop=True, inplace=True)

    else:
        try:
            df['ScaledSex'] = df.sex.replace(0, -1)
            weight_gender = 10
            weight_age = 100
            min_age = 11.0
            max_age = 117.0
            #df.PatientAge = (df.PatientAge-min(df.PatientAge))/(max(df.PatientAge)-min(df.PatientAge))
            df['ScaledAge'] = (df.PatientAge - min_age) / (max_age - min_age)
            df.ScaledAge = weight_age * (df.ScaledAge - 0.5)
            df['ScaledSex'] = weight_gender * df.ScaledSex
        except:
            df['ScaledAge'] = 0
            df['ScaledSex'] = 0

    if (mode == 'single') | (mode == 'extd'):
        LABELS = df.columns[-(num_labels +
                              1):-1] if name_labels == None else name_labels
        labels = df[LABELS].astype(int)
        paths = df['PATH'] if name_paths == None else df[name_paths]
        ages = df['ScaledAge'].astype(float)
        genders = df['ScaledSex'].astype(float)
        df_tmp = pd.concat([paths, ages, genders, labels], axis=1)
    elif mode == 'double':
        LABELS = df.columns[-(num_labels +
                              2):-2] if name_labels == None else name_labels
        labels = df[LABELS].astype(int)
        paths = df[df.columns[-2:]] if name_paths == None else df[name_paths]
        df_tmp = pd.concat([paths, labels], axis=1)
    else:
        raise RuntimeError

    entries = df_tmp

    logger.debug(f"{len(entries)} entries are loaded.")
    return entries
예제 #38
0
    def __init__(self, params: dict, dataset: LmSeqsDataset,
                 token_probs: torch.tensor, student: nn.Module,
                 teacher: nn.Module):
        logger.info('Initializing Distiller')
        self.params = params
        self.dump_path = params.dump_path
        self.multi_gpu = params.multi_gpu
        self.fp16 = params.fp16

        self.student = student
        self.teacher = teacher

        self.student_config = student.config
        self.vocab_size = student.config.vocab_size

        if params.n_gpu <= 1:
            sampler = RandomSampler(dataset)
        else:
            sampler = DistributedSampler(dataset)

        if params.group_by_size:
            groups = create_lengths_groups(lengths=dataset.lengths,
                                           k=params.max_model_input_size)
            sampler = GroupedBatchSampler(sampler=sampler,
                                          group_ids=groups,
                                          batch_size=params.batch_size)
        else:
            sampler = BatchSampler(sampler=sampler,
                                   batch_size=params.batch_size,
                                   drop_last=False)

        self.dataloader = DataLoader(dataset=dataset,
                                     batch_sampler=sampler,
                                     collate_fn=dataset.batch_sequences)

        self.temperature = params.temperature
        assert self.temperature > 0.

        self.alpha_ce = params.alpha_ce
        self.alpha_mlm = params.alpha_mlm
        self.alpha_clm = params.alpha_clm
        self.alpha_mse = params.alpha_mse
        self.alpha_cos = params.alpha_cos

        self.mlm = params.mlm
        if self.mlm:
            logger.info(f'Using MLM loss for LM step.')
            self.mlm_mask_prop = params.mlm_mask_prop
            assert 0.0 <= self.mlm_mask_prop <= 1.0
            assert params.word_mask + params.word_keep + params.word_rand == 1.0
            self.pred_probs = torch.FloatTensor(
                [params.word_mask, params.word_keep, params.word_rand])
            self.pred_probs = self.pred_probs.to(
                f'cuda:{params.local_rank}'
            ) if params.n_gpu > 0 else self.pred_probs
            self.token_probs = token_probs.to(
                f'cuda:{params.local_rank}'
            ) if params.n_gpu > 0 else token_probs
            if self.fp16:
                self.pred_probs = self.pred_probs.half()
                self.token_probs = self.token_probs.half()
        else:
            logger.info(f'Using CLM loss for LM step.')

        self.epoch = 0
        self.n_iter = 0
        self.n_total_iter = 0
        self.n_sequences_epoch = 0
        self.total_loss_epoch = 0
        self.last_loss = 0
        self.last_loss_ce = 0
        self.last_loss_mlm = 0
        self.last_loss_clm = 0
        if self.alpha_mse > 0.: self.last_loss_mse = 0
        if self.alpha_cos > 0.: self.last_loss_cos = 0
        self.last_log = 0

        self.ce_loss_fct = nn.KLDivLoss(reduction='batchmean')
        self.lm_loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
        if self.alpha_mse > 0.:
            self.mse_loss_fct = nn.MSELoss(reduction='sum')
        if self.alpha_cos > 0.:
            self.cosine_loss_fct = nn.CosineEmbeddingLoss(reduction='mean')

        logger.info('--- Initializing model optimizer')
        assert params.gradient_accumulation_steps >= 1
        self.num_steps_epoch = len(self.dataloader)
        num_train_optimization_steps = int(
            self.num_steps_epoch / params.gradient_accumulation_steps *
            params.n_epoch) + 1

        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in student.named_parameters()
                if not any(nd in n for nd in no_decay) and p.requires_grad
            ],
            'weight_decay':
            params.weight_decay
        }, {
            'params': [
                p for n, p in student.named_parameters()
                if any(nd in n for nd in no_decay) and p.requires_grad
            ],
            'weight_decay':
            0.0
        }]
        logger.info(
            "------ Number of trainable parameters (student): %i" % sum([
                p.numel() for p in self.student.parameters() if p.requires_grad
            ]))
        logger.info("------ Number of parameters (student): %i" %
                    sum([p.numel() for p in self.student.parameters()]))
        self.optimizer = AdamW(optimizer_grouped_parameters,
                               lr=params.learning_rate,
                               eps=params.adam_epsilon,
                               betas=(0.9, 0.98))

        warmup_steps = math.ceil(num_train_optimization_steps *
                                 params.warmup_prop)
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=warmup_steps,
            num_training_steps=num_train_optimization_steps)

        if self.fp16:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )
            logger.info(
                f"Using fp16 training: {self.params.fp16_opt_level} level")
            self.student, self.optimizer = amp.initialize(
                self.student,
                self.optimizer,
                opt_level=self.params.fp16_opt_level)
            self.teacher = self.teacher.half()

        if self.multi_gpu:
            if self.fp16:
                from apex.parallel import DistributedDataParallel
                logger.info(
                    "Using apex.parallel.DistributedDataParallel for distributed training."
                )
                self.student = DistributedDataParallel(self.student)
            else:
                from torch.nn.parallel import DistributedDataParallel
                logger.info(
                    "Using nn.parallel.DistributedDataParallel for distributed training."
                )
                self.student = DistributedDataParallel(
                    self.student,
                    device_ids=[params.local_rank],
                    output_device=params.local_rank,
                    find_unused_parameters=True)

        self.is_master = params.is_master
        if self.is_master:
            logger.info('--- Initializing Tensorboard')
            self.tensorboard = SummaryWriter(
                log_dir=os.path.join(self.dump_path, 'log', 'train'))
            self.tensorboard.add_text(tag='config/training',
                                      text_string=str(self.params),
                                      global_step=0)
            self.tensorboard.add_text(tag='config/student',
                                      text_string=str(self.student_config),
                                      global_step=0)
예제 #39
0
def startGame():
    '''
    Scripts to start benchmarking
    '''
    exeFile = r'{STEAM_DIRECTORY}//{GAME_DIRECTORY}//GlimpseGame//Binaries//Win64//{GAME_EXECUTOR}'.format(
        STEAM_DIRECTORY=STEAM_DIRECTORY,
        GAME_DIRECTORY=GAME_DIRECTORY,
        GAME_EXECUTOR=GAME_EXECUTOR)

    ## Start game launcher
    # - return 0 and end the whole process, if failed
    # - otherwise, keep running the process
    tries = 10
    while tries != 0:
        logger.info("Opening Game Launcher")
        startGame = win32api.ShellExecute(1, 'open', exeFile, '', '', 1)
        if tries == 1 and not startGame:
            screenShootName = utils.screen.saveScreenShoot(
                GAME_NAME, "OpenLauncherFailed")
            logger.error(
                'Opening Game Launcher Failed! Screenshoot Created: %s' %
                screenShootName)
            print(
                "****** Failed to open Game Launcher!!! Process stopped ******\n"
            )
            return 0
        if startGame:
            logger.info("Open Game Launcher Succeed")
            print("Open Game Launcher Succeed!!")
            break
        else:
            tries -= 1
            time.sleep(1)

    time.sleep(10)

    # ## Apply ENTER on the launcher to start game
    # # return 0, if failed to apply ENTER key on he launcher
    # # - otherwise, keep running the process
    # tries = 0
    # while utils.screen.findWindow(GAME_NAME):

    #     logger.info('Opening Game: %s'%GAME_NAME)
    #     utils.input.clickLeft(1310, 385)

    #     time.sleep(10)

    #     gameHD = utils.screen.findWindow("We Happy Few (64-bit, PCD3D_SM5)")
    #     if gameHD:
    #         tries = 0
    #         break
    #     elif tries > 10:
    #         screenShootName=utils.screen.saveScreenShoot(GAME_NAME, "OpenGameFailed")
    #         logger.error('Opening Game Failed! Screenshoot Created: %s'%screenShootName)
    #         print("****** Failed to open Game!!! Process stopped ******\n")
    #         return 0
    #     tries += 1
    #     time.sleep(3)

    logger.info(_TAB + 'Waiting for game to start')
    ## Give 25 sec for the game to start
    print("Waiting for game to start...")
    time.sleep(60)

    ####################################################################################
    # Start Game
    loop = LOOP_TIMES
    while (loop != 0):
        time.sleep(5)

        # Skip Press button to start
        tmp = 10
        while (tmp != 0):
            time.sleep(0.5)
            tmp = tmp - 1
            utils.input.clickLeft(960, 540)

        time.sleep(20)

        utils.keyboardUtils.callTinyTask("enter")
        time.sleep(10)
        utils.keyboardUtils.callTinyTask("enter")

        logger.info(_TAB + 'Starting Testing')
        print("Start Testing...")

        ## Perform random Character control for 5 min
        utils.keyboardUtils.randomCharacterControl(300)

        if loop == -1:
            break

        else:
            loop -= 1
        logger.info('Loop times remained: %s' % loop)
        print("Loop times remained: %s\n" % loop)

    logger.info(_TAB + 'All Loop Finishbed')
    print("Finished!")
    ####################################################################################

    # Quit Game
    time.sleep(10)
    # utils.keyboardUtils.press_alt_f4()
    utils.input.key_alt_f4()

    return startGame
예제 #40
0
def normalize_message(message):
    normalized = ''
    entity_text = ''
    entity_normalized = ''
    offsets = [0]
    lengths = [0]

    # Encodes the judged message to preserve possible emojis
    msg_enc = message.text.encode('utf-16le')

    # Strips the message in chunks: from entity to entity
    for entity in message.entities:
        # Keeps track of the entities position in the message
        offsets.append(entity.offset)
        lengths.append(entity.length)

        # Decodes a chunk of the message to get contents of an entity
        # NOTE that in the encoded message every symbols comes with a pair, hence doubling the indices
        entity_text = msg_enc[2 * offsets[-1]:2 *
                              (offsets[-1] + lengths[-1])].decode('utf-16le')

        # Strips the found link and other entities
        # rewrites it in the Markdown-friendly format
        if entity.type == 'text_link':
            # Detects a hidden link
            entity_normalized = '[{1}]({0})'.format(entity.url, entity_text)
        elif entity.type == 'url':
            # Detects an ordinary link
            entity_normalized = '[{0}]({0})'.format(entity_text)
        elif entity.type == 'email':
            # Detects an email
            entity_normalized = '[{0}](mailto:{0})'.format(entity_text)
        elif entity.type == 'mention':
            # Detects a user/chat mention
            entity_normalized = '[@{0}](https://t.me/{0})'.format(
                entity_text[1:])
        elif entity.type == 'bold':
            # Detects a bolded text
            entity_normalized = '*{}*'.format(entity_text)
        elif entity.type == 'italic':
            # Detects an italicized text
            entity_normalized = '_{}_'.format(entity_text)
        elif entity.type == 'code':
            # Detects a coded text
            entity_normalized = '`{}`'.format(entity_text)
        elif entity.type == 'pre':
            # Detects a prettified text
            entity_normalized = '```{}```'.format(entity_text)
        else:
            # Detects other entities
            logger.info('Unrecognized entity of type {0}, offset {1} and length {2} detected. '\
                        'Adding as text'.format(entity.type, entity.offset, entity.length))
            entity_normalized = entity_text

        # Adds the stripped chunk to the final string
        normalized += msg_enc[2 * (offsets[-2] + lengths[-2]):2 *
                              offsets[-1]].decode(
                                  'utf-16le') + entity_normalized

    # Adds the part after the last entity
    normalized += msg_enc[2 * (offsets[-1] + lengths[-1]):].decode('utf-16le')
    return normalized
예제 #41
0
    def train(self):
        """ Main actor learner loop for advantage actor critic learning. """
        logger.debug("Actor {} resuming at Step {}".format(
            self.actor_id, self.global_step.value()))

        s = self.emulator.get_initial_state()
        total_episode_reward = 0

        s_batch = []
        a_batch = []
        y_batch = []
        adv_batch = []
        seq_len_batch = []

        reset_game = False
        episode_over = False
        start_time = time.time()
        steps_at_last_reward = self.local_step

        while (self.global_step.value() < self.max_global_steps):
            # Sync local learning net with shared mem
            self.sync_net_with_shared_memory(self.local_network,
                                             self.learning_vars)
            self.save_vars()

            local_step_start = self.local_step

            rewards = []
            states = []
            actions = []
            values = []
            seq_lengths = []

            while not (episode_over or (self.local_step - local_step_start
                                        == self.max_local_steps)):

                # Choose next action and execute it
                action_sequence, readout_v_t = self.sample_action_sequence(s)
                # if self.is_master() and (self.local_step % 100 == 0):
                #     logger.debug("pi={}, V={}".format(readout_pi_t, readout_v_t))

                acc_reward = 0.0
                length = 0

                for action in action_sequence:
                    length += 1
                    a = np.argmax(action)
                    if a == self.num_actions or episode_over:
                        break

                    new_s, reward, episode_over = self.emulator.next(
                        action[:self.num_actions])
                    acc_reward += reward

                reward = acc_reward
                if reward != 0.0:
                    steps_at_last_reward = self.local_step

                total_episode_reward += reward
                # Rescale or clip immediate reward
                reward = self.rescale_reward(reward)

                rewards.append(reward)
                seq_lengths.append(length)
                states.append(s)
                actions.append(action_sequence)
                values.append(readout_v_t)

                s = new_s
                self.local_step += 1
                self.global_step.increment()

                if self.local_step % 1000 == 0:
                    pass
                    # Pdb().set_trace()

            # Calculate the value offered by critic in the new state.
            if episode_over:
                R = 0
            else:
                R = self.session.run(
                    self.local_network.output_layer_v,
                    feed_dict={self.local_network.input_ph: [new_s]})[0][0]

            sel_actions = []
            for i in reversed(range(len(states))):
                R = rewards[i] + self.gamma * R

                y_batch.append(R)
                a_batch.append(actions[i])
                s_batch.append(states[i])
                adv_batch.append(R - values[i])
                seq_len_batch.append(seq_lengths[i])

                sel_actions.append(np.argmax(actions[i]))

            padded_output_sequences = np.array([
                np.vstack([
                    seq[:length, :],
                    np.zeros(
                        (max(seq_len_batch) - length, self.num_actions + 1))
                ]) for length, seq in zip(seq_len_batch, a_batch)
            ])

            go_input = np.zeros((len(s_batch), 1, self.num_actions + 1))
            go_input[:, :, self.num_actions] = 1
            padded_input_sequences = np.hstack(
                [go_input, padded_output_sequences[:, :-1, :]])

            print('Sequence lengths:', seq_lengths)
            print('Actions:', [np.argmax(a) for a in a_batch[0]])

            allowed_actions = np.ones(
                (len(s_batch), max(seq_len_batch), self.num_actions + 1))
            allowed_actions[:, 0,
                            -1] = 0  #empty sequence is not a valid action

            feed_dict = {
                self.local_network.input_ph:
                s_batch,
                self.local_network.critic_target_ph:
                y_batch,
                self.local_network.adv_actor_ph:
                adv_batch,
                self.local_network.decoder_initial_state:
                np.zeros((len(s_batch),
                          self.local_network.decoder_hidden_state_size * 2)),
                self.local_network.action_inputs:
                padded_input_sequences,
                self.local_network.action_outputs:
                padded_output_sequences,
                self.local_network.allowed_actions:
                allowed_actions,
                self.local_network.use_fixed_action:
                True,
                self.local_network.decoder_seq_lengths:
                seq_lengths,
                self.local_network.temperature:
                1.0,
            }
            entropy, grads = self.session.run(
                [
                    self.local_network.entropy,
                    # self.local_network.adv_critic,
                    # self.local_network.output_layer_v,
                    self.local_network.get_gradients
                ],
                feed_dict=feed_dict)

            print('Entropy:', entropy)  #, 'Adv:', advantage #, 'Value:', value
            self.apply_gradients_to_shared_memory_vars(grads)

            s_batch = []
            a_batch = []
            y_batch = []
            adv_batch = []
            seq_len_batch = []

            # prevent the agent from getting stuck
            if (self.local_step - steps_at_last_reward > 5000
                    or (self.emulator.get_lives() == 0
                        and self.emulator.game not in ONE_LIFE_GAMES)):

                steps_at_last_reward = self.local_step
                episode_over = True
                reset_game = True

            # Start a new game on reaching terminal state
            if episode_over:
                elapsed_time = time.time() - start_time
                global_t = self.global_step.value()
                steps_per_sec = global_t / elapsed_time
                perf = "{:.0f}".format(steps_per_sec)
                logger.info(
                    "T{} / STEP {} / REWARD {} / {} STEPS/s, Actions {}".
                    format(self.actor_id, global_t, total_episode_reward, perf,
                           sel_actions))
                self.vis.plot_current_errors(global_t, total_episode_reward)
                self.log_summary(total_episode_reward, entropy)

                episode_over = False
                total_episode_reward = 0
                steps_at_last_reward = self.local_step

                if reset_game or self.emulator.game in ONE_LIFE_GAMES:
                    s = self.emulator.get_initial_state()
                    reset_game = False
예제 #42
0
                                 args=(i, cls.url, sleep)))
            workers[-1].start()
        for th in workers:
            th.join()

    @classmethod
    def test_async_sleep_url(cls, n, sleep=1):
        workers = []
        for i in range(n):
            workers.append(
                threading.Thread(target=cls.one_request,
                                 args=(i, cls.async_url, sleep)))
            workers[-1].start()
        for th in workers:
            th.join()

    @classmethod
    def one_request(cls, tid, url, sleep):
        rs = requests.get(url=url, params={'ts': sleep})
        logger.info('thread: {}, response: {}'.format(tid, rs.json()))


if __name__ == '__main__':
    import time
    tic = time.time()
    MultiThreadsClient.test_sleep_url(5, 1)
    logger.info('sleep url: {}'.format(time.time() - tic))
    tic = time.time()
    MultiThreadsClient.test_async_sleep_url(5, 1)
    logger.info('sleep url: {}'.format(time.time() - tic))
예제 #43
0
def predict(args, model, tokenizer, prefix=""):
    pred_output_dir = args.output_dir
    if not os.path.exists(pred_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(pred_output_dir)
    test_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           data_type='test')
    # Note that DistributedSampler samples randomly
    test_sampler = SequentialSampler(
        test_dataset) if args.local_rank == -1 else DistributedSampler(
            test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=1,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running prediction %s *****", prefix)
    logger.info("  Num examples = %d", len(test_dataset))
    logger.info("  Batch size = %d", 1)
    results = []
    output_predict_file = os.path.join(pred_output_dir, prefix,
                                       "test_prediction.json")
    pbar = ProgressBar(n_total=len(test_dataloader), desc="Predicting")

    if isinstance(model, nn.DataParallel):
        model = model.module
    for step, batch in enumerate(test_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": None,
                'input_lens': batch[4]
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            logits = outputs[0]
            tags = model.crf.decode(logits, inputs['attention_mask'])
            tags = tags.squeeze(0).cpu().numpy().tolist()
        preds = tags[0][1:-1]  # [CLS]XXXX[SEP]
        label_entities = get_entities(preds, args.id2label, args.markup)
        json_d = {}
        json_d['id'] = step
        json_d['tag_seq'] = " ".join([args.id2label[x] for x in preds])
        json_d['entities'] = label_entities
        results.append(json_d)
        pbar(step)
    logger.info("\n")
    with open(output_predict_file, "w") as writer:
        for record in results:
            writer.write(json.dumps(record) + '\n')
    if args.task_name == 'cluener':
        output_submit_file = os.path.join(pred_output_dir, prefix,
                                          "test_submit.json")
        test_text = []
        with open(os.path.join(args.data_dir, "test.json"), 'r') as fr:
            for line in fr:
                test_text.append(json.loads(line))
        test_submit = []
        for x, y in zip(test_text, results):
            json_d = {}
            json_d['id'] = x['id']
            json_d['label'] = {}
            entities = y['entities']
            words = list(x['text'])
            if len(entities) != 0:
                for subject in entities:
                    tag = subject[0]
                    start = subject[1]
                    end = subject[2]
                    word = "".join(words[start:end + 1])
                    if tag in json_d['label']:
                        if word in json_d['label'][tag]:
                            json_d['label'][tag][word].append([start, end])
                        else:
                            json_d['label'][tag][word] = [[start, end]]
                    else:
                        json_d['label'][tag] = {}
                        json_d['label'][tag][word] = [[start, end]]
            test_submit.append(json_d)
        json_to_text(output_submit_file, test_submit)
예제 #44
0
 def one_request(cls, tid, url, sleep):
     rs = requests.get(url=url, params={'ts': sleep})
     logger.info('thread: {}, response: {}'.format(tid, rs.json()))
예제 #45
0
def main():
    args = get_argparse().parse_args()

    # 创建输出路径

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    args.output_dir = args.output_dir + "{}".format(args.model_type)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    time_ = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())

    # 定义logger 文件
    init_logger(log_file=args.output_dir +
                f"/{args.model_type}-{args.task_name}-{time_}.log")

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) is already exists and is not empty.use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # setup cuda, gpu & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()

    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    logger.warning(
        "Process rank: %s,device: %s ,n_gpus: %s,distributed training: %s,16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # set seed
    seed_everyting(args.seed)

    # prepare ner task
    args.task_name = args.task_name.lower()
    if args.task_name not in processors:
        raise ValueError("Task not found: %s" % (args.task_name))

    processor = processors[args.task_name]()

    label_list = processor.get_labels()

    args.id2label = {i: label for i, label in enumerate(label_list)}
    args.label2id = {label: i for i, label in enumerate(label_list)}

    num_labels = len(label_list)

    # load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()
    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=num_labels,
        cache_dir=args.cache_dir if args.cache_dir else None)
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None)
    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None)

    if args.local_rank == 0:
        torch.distributed.barrier()

    model.to(args.device)
    logger.info("Training/evaluation parameters %s", args)

    #Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                args.task_name,
                                                tokenizer,
                                                data_type="train")
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info("golbal_step = %s,average loss= %s", global_step, tr_loss)

    # saving best-practice : if you use default names for the model,you can reload it using from_pytrained
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint in %s", args.output_dir)

        model_to_save = (model.module
                         if model is hasattr(model, "module") else model)
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_vocabulary(args.output_dir)
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

    #  Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("pytorch_transformers.modeling_utils").setLevel(
                logging.WARNING)

        logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                "/")[-1] if checkpoint.find("checkpoint") != -1 else ""
            model = model_class.from_pretrained(checkpoint, config=config)
            model.to(args.device)
            result = evaluate(args, model, tokenizer, prefix=prefix)

            if global_step:
                result = {
                    "{}_{}".format(global_step, k): v
                    for k, v in result.items()
                }
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            for key in sorted(results.keys()):
                writer.write("{} = {}\n".format(key, str(results[key])))

    # predict
    if args.do_predict and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]

        if args.predict_checkpoints > 0:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARNING)
            checkpoints = [
                x for x in checkpoints
                if x.split("-")[-1] == str(args.predict_checkpoints)
            ]

            for checkpoint in checkpoints:
                prefix = checkpoint.split(
                    "/")[-1] if checkpoint.find("checkpoint") != -1 else ""
                model = model_class.from_pretrained(checkpoint, config=config)
                model.to(args.device)
                predict(args, model, tokenizer, prefix)
예제 #46
0
def test(rank, args, shared_model, gl_step_cnt):
    torch.manual_seed(args.seed + rank)

    env = create_atari_env(args.env_name)
    env.seed(args.seed + rank)

    model = ActorCritic(env.observation_space.shape[0], env.action_space)

    model.eval()

    state = env.reset()
    state = torch.from_numpy(state)
    reward_sum = 0
    done = True

    start_time = time.time()

    local_episode_num = 0

    # a quick hack to prevent the agent from stucking
    actions = deque(maxlen=100)
    episode_length = 0
    while True:
        episode_length += 1
        # Sync with the shared model
        if done:
            model.load_state_dict(shared_model.state_dict())
            cx = Variable(torch.zeros(1, 256), volatile=True)
            hx = Variable(torch.zeros(1, 256), volatile=True)
        else:
            cx = Variable(cx.data, volatile=True)
            hx = Variable(hx.data, volatile=True)

        value, logit, (hx, cx) = model((Variable(state.unsqueeze(0),
                                                 volatile=True), (hx, cx)))
        prob = F.softmax(logit)
        action = prob.max(1)[1].data.numpy()

        state, reward, done, _ = env.step(action[0, 0])
        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        # a quick hack to prevent the agent from stucking
        actions.append(action[0, 0])
        if actions.count(actions[0]) == actions.maxlen:
            done = True

        if done:
            passed_time = time.time() - start_time
            local_episode_num += 1
            global_step_count = gl_step_cnt.get_value()

            logger.info("Time {}, episode reward {}, episode length {}".format(
                time.strftime("%Hh %Mm %Ss", time.gmtime(passed_time)),
                reward_sum, episode_length))
            tb.log_value('steps_second', global_step_count / passed_time,
                         global_step_count)
            tb.log_value('reward', reward_sum, global_step_count)

            reward_sum = 0
            episode_length = 0
            actions.clear()
            state = env.reset()
            time.sleep(60)

        state = torch.from_numpy(state)
예제 #47
0
    def train(self, sess, summary_writer, data_A, data_B):
        logger.info('Start training.')
        logger.info('  {} images from A'.format(len(data_A)))
        logger.info('  {} images from B'.format(len(data_B)))

        data_size = min(len(data_A), len(data_B))
        num_batch = data_size // self._batch_size
        epoch_length = num_batch * self._batch_size

        num_initial_iter = 10
        num_decay_iter = 10
        lr = lr_initial = 0.0002
        lr_decay = lr_initial / num_decay_iter

        history_a = HistoryQueue(shape=self._image_shape, size=50)
        history_b = HistoryQueue(shape=self._image_shape, size=50)

        initial_step = sess.run(self.global_step)
        num_global_step = (num_initial_iter + num_decay_iter) * epoch_length
        t = trange(initial_step,
                   num_global_step,
                   total=num_global_step,
                   initial=initial_step)

        for step in t:
            #TODO: resume training with global_step
            epoch = step // epoch_length
            iter = step % epoch_length

            if epoch > num_initial_iter:
                lr = max(0.0,
                         lr_initial - (epoch - num_initial_iter) * lr_decay)

            if iter == 0:
                random.shuffle(data_A)
                random.shuffle(data_B)

            image_a = np.stack(data_A[iter * self._batch_size:(iter + 1) *
                                      self._batch_size])
            image_b = np.stack(data_B[iter * self._batch_size:(iter + 1) *
                                      self._batch_size])
            fake_a, fake_b = sess.run([self.image_ba, self.image_ab],
                                      feed_dict={
                                          self.image_a: image_a,
                                          self.image_b: image_b,
                                          self.is_train: True
                                      })
            fake_a = history_a.query(fake_a)
            fake_b = history_b.query(fake_b)

            fetches = [
                self.loss_D_a, self.loss_D_b, self.loss_G_ab, self.loss_G_ba,
                self.loss_cycle, self.optimizer_D_a, self.optimizer_D_b,
                self.optimizer_G_ab, self.optimizer_G_ba
            ]
            if step % self._log_step == 0:
                fetches += [self.summary_op]

            fetched = sess.run(fetches,
                               feed_dict={
                                   self.image_a: image_a,
                                   self.image_b: image_b,
                                   self.is_train: True,
                                   self.lr: lr,
                                   self.history_fake_a: fake_a,
                                   self.history_fake_b: fake_b
                               })

            if step % self._log_step == 0:
                summary_writer.add_summary(fetched[-1], step)
                summary_writer.flush()
                t.set_description(
                    'Loss: D_a({:.3f}) D_b({:.3f}) G_ab({:.3f}) G_ba({:.3f}) cycle({:.3f})'
                    .format(fetched[0], fetched[1], fetched[2], fetched[3],
                            fetched[4]))
def train(model_class):
    """Train neural network for a number of steps."""
    logger.info("\nstart training...")
    with tf.Graph().as_default():
        # build computing graph
        with tf.variable_scope("model", reuse=None):
            model_train = model_class(is_train=True)
        if FLAGS.eval_step > 0:
            with tf.variable_scope("model", reuse=True):
                model_eval = model_class(is_train=False)

        saver = tf.train.Saver(tf.all_variables(),
                               max_to_keep=FLAGS.num_checkpoints)
        sv = tf.train.Supervisor(logdir=RESULT_DIR,
                                 saver=saver,
                                 save_summaries_secs=0,
                                 save_model_secs=0)

        logger.newline()
        logger.info("start building Graph (This might take a while)")
        # Start running operations on the Graph.
        sess = sv.prepare_or_wait_for_session(config=tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement))

        logger.newline()
        logger.info("start training...")
        try:
            while not sv.should_stop():
                start_time = time.time()
                step, loss_value, top_k = model_train.train_step(sess)
                duration = time.time() - start_time

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                # print current state
                if step % FLAGS.print_step == 0:
                    num_examples_per_step = FLAGS.batch_size
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)
                    precision = np.sum(top_k) / FLAGS.batch_size

                    format_str = (
                        'step %d, loss = %.2f, precision = %.2f (%.1f '
                        'examples/sec; %.3f sec/batch)')
                    logger.info(format_str % (step, loss_value, precision,
                                              examples_per_sec, sec_per_batch))

                # save summary
                if step % FLAGS.summary_step == 0:
                    summary_str = sess.run(sv.summary_op)
                    sv.summary_writer.add_summary(summary_str, step)
                    logger.info("step: {}, wrote summaries.".format(step))

                # Save the model checkpoint periodically and eval on test set.
                if FLAGS.checkpoint_step > 0 and step % FLAGS.checkpoint_step == 0:
                    saver_path = sv.saver.save(sess,
                                               CHECKPOINT_PATH,
                                               global_step=step)
                    logger.newline(2)
                    logger.info("Saved model checkpoint to {}\n\n".format(
                        saver_path))

                if FLAGS.eval_step > 0 and step % FLAGS.eval_step == 0:
                    logger.newline(2)
                    logger.info("evaluating current model:")
                    precision = model_eval.eval_once(sess)
                    logger.info('%s: precision @ 1 = %.3f' %
                                (time.strftime("%c"), precision))

                    summary = tf.Summary()
                    summary.ParseFromString(sess.run(sv.summary_op))
                    summary.value.add(tag='precision @ 1',
                                      simple_value=precision)
                    sv.summary_writer.add_summary(summary, step)
                    logger.info("write eval summary\n\n")

                # sleep for test use
                if FLAGS.sleep > 0:
                    logger.info("sleep {} second...".format(FLAGS.sleep))
                    time.sleep(FLAGS.sleep)
        except tf.errors.OutOfRangeError:
            logger.info("sv checkpoint saved path: " + sv.save_path)
            logger.info("Done~\n\n")
        finally:
            sv.request_stop()
        sv.wait_for_stop()
        sess.close()
예제 #49
0
    def run(self):

        for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, socket.SOCK_STREAM):
            af, socktype, proto, cannonname, sa = res
            try:
                sock = socket.socket(af, socktype, proto)
                sock.setblocking(0)
                sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            except socket.error:
                sock = None
                continue
            try:
                sock.bind(sa)
                sock.listen(5)
            except socket.error:
                sock.close()
                sock = None
                continue
            break
        host = sa[0]
        if af == socket.AF_INET6:
            host = "[%s]" % host
        if sock is None:
            print_log( "could not open " + ("SSL" if self.use_ssl else "TCP") + " socket on %s:%d" % (host, self.port))
            return
        print_log( ("SSL" if self.use_ssl else "TCP") + " server started on %s:%d" % (host, self.port))

        sock_fd = sock.fileno()
        poller = select.poll()
        poller.register(sock)

        def stop_session(fd):
            try:
                # unregister before we close s 
                poller.unregister(fd)
            except BaseException as e:
                logger.error('unregister error:' + str(e))
            session = self.fd_to_session.pop(fd)
            # this will close the socket
            session.stop()

        def check_do_handshake(session):
            if session.handshake:
                return
            try:
                session._connection.do_handshake()
            except ssl.SSLError as err:
                if err.args[0] == ssl.SSL_ERROR_WANT_READ:
                    return
                elif err.args[0] == ssl.SSL_ERROR_WANT_WRITE:
                    poller.modify(session.raw_connection, READ_WRITE)
                    return
                else:
                    raise BaseException(str(err))
            poller.modify(session.raw_connection, READ_ONLY)
            session.handshake = True

        redo = []

        while not self.shared.stopped():

            if self.shared.paused():
                sessions = self.fd_to_session.keys()
                if sessions:
                    logger.info("closing %d sessions"%len(sessions))
                for fd in sessions:
                    stop_session(fd)
                time.sleep(1)
                continue

            if redo:
                events = redo
                redo = []
            else:
                now = time.time()
                for fd, session in self.fd_to_session.items():
                    if now - session.time > 0.01 and session.message:
                        cmd = session.parse_message()
                        if not cmd: 
                            break
                        if cmd == 'quit':
                            data = False
                            break
                        session.time = now
                        self.handle_command(cmd, session)

                    # check sessions that need to write
                    if session.need_write:
                        poller.modify(session.raw_connection, READ_WRITE)
                        session.need_write = False
                    # collect garbage
                    if now - session.time > session.timeout:
                        stop_session(fd)

                events = poller.poll(TIMEOUT)

            for fd, flag in events:
                # open new session
                if fd == sock_fd:
                    if flag & (select.POLLIN | select.POLLPRI):
                        try:
                            connection, address = sock.accept()
                            session = TcpSession(self.dispatcher, connection, address, 
                                                 use_ssl=self.use_ssl, ssl_certfile=self.ssl_certfile, ssl_keyfile=self.ssl_keyfile)
                        except BaseException as e:
                            logger.error("cannot start TCP session" + str(e) + ' ' + repr(address))
                            connection.close()
                            continue
                        connection = session._connection
                        connection.setblocking(False)
                        self.fd_to_session[connection.fileno()] = session
                        poller.register(connection, READ_ONLY)
                    continue
                # existing session
                session = self.fd_to_session[fd]
                s = session._connection
                # non-blocking handshake
                try:
                    check_do_handshake(session)
                except BaseException as e:
                    #logger.error('handshake failure:' + str(e) + ' ' + repr(session.address))
                    stop_session(fd)
                    continue
                # anti DOS
                now = time.time()
                if now - session.time < 0.01:
                    continue
                # handle inputs
                if flag & (select.POLLIN | select.POLLPRI):
                    try:
                        data = s.recv(self.buffer_size)
                    except ssl.SSLError as x:
                        if x.args[0] == ssl.SSL_ERROR_WANT_READ: 
                            pass
                        elif x.args[0] == ssl.SSL_ERROR_SSL: 
                            pass
                        else:
                            logger.error('SSL recv error:'+ repr(x))
                        continue 
                    except socket.error as x:
                        if x.args[0] != 104:
                            logger.error('recv error: ' + repr(x) +' %d'%fd)
                        stop_session(fd)
                        continue
                    except ValueError as e:
                        logger.error('recv error: ' + str(e) +' %d'%fd)
                        stop_session(fd)
                        continue
                    if data:
                        session.message += data
                        if len(data) == self.buffer_size:
                            redo.append((fd, flag))
                        
                    if not data:
                        stop_session(fd)
                        continue

                elif flag & select.POLLHUP:
                    print_log('client hung up', session.address)
                    stop_session(fd)

                elif flag & select.POLLOUT:
                    # Socket is ready to send data, if there is any to send.
                    if session.retry_msg:
                        next_msg = session.retry_msg
                    else:
                        try:
                            next_msg = session.response_queue.get_nowait()
                        except queue.Empty:
                            # No messages waiting so stop checking for writability.
                            poller.modify(s, READ_ONLY)
                            continue
                    try:
                        sent = s.send(next_msg)
                    except socket.error as x:
                        logger.error("send error:" + str(x))
                        stop_session(fd)
                        continue
                    session.retry_msg = next_msg[sent:]

                elif flag & select.POLLERR:
                    print_log('handling exceptional condition for', session.address)
                    stop_session(fd)

                elif flag & select.POLLNVAL:
                    print_log('invalid request', session.address)
                    stop_session(fd)


        print_log('TCP thread terminating', self.shared.stopped())
예제 #50
0
def print_result(message_data, result):
    """
    On success print result
    """
    logger.info(
        f"The result of message {message_data['message_id']} was {result}.")
예제 #51
0
    def train(self, sess, summary_writer, data_A, data_B):
        logger.info('Start training.')
        logger.info('  {} images from A'.format(len(data_A)))
        logger.info('  {} images from B'.format(len(data_B)))

        assert len(data_A) == len(data_B), \
            'Data size mismatch dataA(%d) dataB(%d)' % (len(data_A), len(data_B))
        data_size = len(data_A)
        num_batch = data_size // self._batch_size
        epoch_length = num_batch * self._batch_size

        num_initial_iter = 8
        num_decay_iter = 2
        lr = lr_initial = 0.0002
        lr_decay = lr_initial / num_decay_iter

        initial_step = sess.run(self.global_step)
        num_global_step = (num_initial_iter + num_decay_iter) * epoch_length
        t = trange(initial_step, num_global_step,
                   total=num_global_step, initial=initial_step)

        for step in t:
            #TODO: resume training with global_step
            epoch = step // epoch_length
            iter = step % epoch_length

            if epoch > num_initial_iter:
                lr = max(0.0, lr_initial - (epoch - num_initial_iter) * lr_decay)

            # if iter == 0:
            #     #data = zip(data_A, data_B)
            #     random.shuffle(data_A)
            #     random.shuffle(data_B)
            #     #data_A, data_B = zip(*data)

            ran_choice = np.random.choice(len(data_A), self._batch_size)
            image_a = data_A[ran_choice]
            image_b = data_B[ran_choice]
            sample_z = np.random.normal(size=(self._batch_size, self._latent_dim))

            fetches = [self.loss, self.optimizer_D,
                       self.optimizer_G, self.optimizer_E]
            if step % self._log_step == 0:
                fetches += [self.summary_op]

            fetched = sess.run(fetches, feed_dict={self.image_a: image_a,
                                                   self.image_b: image_b,
                                                   self.is_train: True,
                                                   self.lr: lr,
                                                   self.z: sample_z})

            if step % self._log_step == 0:
                z = np.random.normal(size=(self._batch_size, self._latent_dim))
                image_ab = sess.run(self.image_ab, feed_dict={self.image_a: image_a,
                                                            self.z: z,
                                                            self.is_train: False})
                imsave('results/r_{}.jpg'.format(step), np.squeeze(image_ab[0:1], axis=0))

                summary_writer.add_summary(fetched[-1], step)
                summary_writer.flush()
                t.set_description('Loss({:.3f})'.format(fetched[0]))
예제 #52
0
 def refresh(self):
     """
     Refreshes the current page
     """
     logger.info("Refreshing the page")
     self.driver.refresh()
예제 #53
0
import routes
from config import conf
from flask import Flask
from utils import logger

app = Flask(__name__)

if __name__ == '__main__':
    logger.info("Server is starting...")
    routes.cross_domain(app)
    routes.settle(app)
    logger.info(app.url_map)
    app.run(host='127.0.0.1', port=conf.PORT, debug=False)
예제 #54
0
def load_embedding(path):
    rst = np.loadtxt(join(path, 'embedding.txt'), delimiter=' ')
    rst = np.append(rst, np.zeros(rst.shape[1]).reshape(1, -1),
                    axis=0).astype(np.float32)  # add padding vector
    logger.info("Embedding dim: %d, %d" % (rst.shape[0], rst.shape[1]))
    return rst
예제 #55
0
 def get_url(self):
     """
     Gets the current URL
     """
     logger.info("Getting the current url")
     return self.driver.current_url
예제 #56
0
def train(args, train_dataset, model, tokenizer):
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloder = DataLoader(train_dataset,
                                 sampler=train_sampler,
                                 batch_size=args.train_batch_size,
                                 collate_fn=collate_fn)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // len(
            train_dataset) // args.gradient_accumulation_steps + 1
    else:
        t_total = len(
            train_dataset
        ) * args.num_train_epochs // args.gradient_accumulation_steps

    #prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    bert_model_parameters = list(model.bert.named_parameters())
    crf_model_parameters = list(model.crf.named_parameters())
    linear_model_parameters = list(model.classifier.named_parameters())

    optimizer_group_parameters = [{
        "params": [
            p for n, p in bert_model_parameters
            if not any([nd in n for nd in no_decay])
        ],
        "weight_decay":
        args.weight_decay,
        "lr":
        args.learning_rate
    }, {
        "params": [
            p for n, p in bert_model_parameters
            if any([nd in n for nd in no_decay])
        ],
        "weight_decay":
        0.0,
        "lr":
        args.learning_rate
    }, {
        "params": [
            p for n, p in crf_model_parameters
            if not any([nd in n for nd in no_decay])
        ],
        "weight_decay":
        args.weight_decay,
        "lr":
        args.crf_learning_rate
    }, {
        "params": [
            p for n, p in crf_model_parameters
            if any([nd in n for nd in no_decay])
        ],
        "weight_decay":
        0.0,
        "lr":
        args.crf_learning_rate
    }, {
        "params": [
            p for n, p in linear_model_parameters
            if not any([nd in n for nd in no_decay])
        ],
        "weight_decay":
        args.weight_decay,
        "lr":
        args.crf_learning_rate
    }, {
        "params": [
            p for n, p in linear_model_parameters
            if any([nd in n for nd in no_decay])
        ],
        "weight_decay":
        0.0,
        "lr":
        args.crf_learning_rate
    }]
    args.warmup_steps = int(t_total * args.warmup_proportion)
    optimizer = AdamW(optimizer_group_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    # check if saved optimizer or scheduler
    if os.path.isfile(os.path.join(
            args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
                os.path.join(args.model_name_or_path, "scheduler.pt")):
        optimizer.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
    global_step = 0
    steps_trained_in_current_epoch = 0

    # check for continuing training for a checkpoint
    if os.path.exists(args.model_name_or_path
                      ) and "checkpoint" in args.model_name_or_path:
        global_step = int(args.model_name_or_path.split["-"][-1].split("/")[0])
        epochs_trained = global_step // len(
            train_dataset) // args.gradient_accumulation_steps
        steps_trained_in_current_epoch = global_step % (
            (len(train_dataloder)) // args.gradient_accumulation_steps)
        logger.info(
            "Continuing training from checkpoit, will skip to saved global_step"
        )
        logger.info("Continuing training from epoch %d", epochs_trained)
        logger.info("Continuing trianing from global step %d", global_step)
        logger.info("will skip the first %d steps in the first epoch",
                    steps_trained_in_current_epoch)

    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    seed_everyting(args.seed)

    for _ in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_dataloder), desc="Training")
        for step, batch in enumerate(train_dataloder):
            #skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3],
                "input_lens": batch[4]
            }
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            loss = outputs[0]

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean to average on multi-gpu parallel trianing

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            pbar(step, {"loss": loss.item()})

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)

                scheduler.step()
                optimizer.step()
                model.zero_grad()

                global_step += 1
                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    print(" ")
                    if args.local_rank == -1:
                        # Only evaluate when single GPU otherwise metrics may not average well
                        evaluate(args, model, tokenizer)
                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(
                        args.output_dir, "checkpoint-{}".format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = (
                        model.module if hasattr(model, "module") else model
                    )  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
                    torch.save(args,
                               os.path.join(output_dir, "training_args.bin"))
                    logger.info("Saving model checkpoint to %s", output_dir)
                    tokenizer.save_vocabulary(output_dir)
                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(),
                               os.path.join(output_dir, "scheduler.pt"))
                    logger.info("Saving optimizer and scheduler states to %s",
                                output_dir)
        logger.info("\n")
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
    return global_step, tr_loss / global_step
예제 #57
0
def newmem(update, context):
    message = update.message
    chat = message.chat
    if message.from_user.id in get_chat_admins(
            context.bot, chat.id,
            context.bot_data.get("config").get("SUPER_ADMIN")):
        return
    for user in message.new_chat_members:
        if user.is_bot:
            continue
        num = SystemRandom().randint(
            0,
            len(context.bot_data.get("config").get("CHALLENGE")) - 1)
        flag = context.bot_data.get("config").get("CHALLENGE")[num]
        if context.bot.restrict_chat_member(
                chat_id=chat.id,
                user_id=user.id,
                permissions=ChatPermissions(can_send_messages=False),
        ):
            logger.info(
                f"New member: Successfully restricted user {user.id} at group {chat.id}"
            )
        else:
            logger.warning(
                f"New member: No enough permissions to restrict user {user.id} at group {chat.id}"
            )
        buttons = [[
            InlineKeyboardButton(
                flag.get("WRONG")[t],
                callback_data=
                f"challenge|{user.id}|{num}|{flag.get('wrong')[t]}",
            )
        ] for t in range(len(flag.get("WRONG")))]
        buttons.append([
            InlineKeyboardButton(
                flag.get("ANSWER"),
                callback_data=f"challenge|{user.id}|{num}|{flag.get('answer')}",
            )
        ])
        SystemRandom().shuffle(buttons)
        buttons.append([
            InlineKeyboardButton(
                context.bot_data.get("config").get("PASS_BTN"),
                callback_data=f"admin|pass|{user.id}",
            ),
            InlineKeyboardButton(
                context.bot_data.get("config").get("KICK_BTN"),
                callback_data=f"admin|kick|{user.id}",
            ),
        ])
        question_message = message.reply_text(
            context.bot_data.get("config").get("GREET").format(
                question=flag.get("QUESTION"),
                time=context.bot_data.get("config").get("TIME"),
            ),
            reply_markup=InlineKeyboardMarkup(buttons),
            parse_mode=ParseMode.MARKDOWN,
        )
        context.job_queue.run_once(
            kick_queue,
            context.bot_data.get("config").get("TIME"),
            context={
                "chat_id": chat.id,
                "user_id": user.id,
            },
            name=f"{chat.id}|{user.id}|kick",
        )
        context.job_queue.run_once(
            clean_queue,
            context.bot_data.get("config").get("TIME"),
            context={
                "chat_id": chat.id,
                "user_id": user.id,
                "message_id": message.message_id,
            },
            name=f"{chat.id}|{user.id}|clean_join",
        )
        context.job_queue.run_once(
            clean_queue,
            context.bot_data.get("config").get("TIME"),
            context={
                "chat_id": chat.id,
                "user_id": user.id,
                "message_id": question_message.message_id,
            },
            name=f"{chat.id}|{user.id}|clean_question",
        )
예제 #58
0
def evaluate(args, model, tokenizer, prefix=""):
    metric = SeqEntityScore(args.id2label, markup=args.markup)
    eval_output_dir = args.output_dir
    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)

    eval_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           data_type="dev")

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)

    # note distributedSampler samples randomly
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)

    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running evaluation %s *****", prefix)
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating")
    if isinstance(model, nn.DataParallel):
        model = model.module
    for step, batch in enumerate(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3],
                'input_lens': batch[4]
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]
            tags = model.crf.decode(logits, inputs['attention_mask'])
        if args.n_gpu > 1:
            tmp_eval_loss = tmp_eval_loss.mean(
            )  # mean() to average on multi-gpu parallel evaluating
        eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        out_label_ids = inputs['labels'].cpu().numpy().tolist()
        input_lens = inputs['input_lens'].cpu().numpy().tolist()
        tags = tags.squeeze(0).cpu().numpy().tolist()

        if args.n_gpu > 1:
            tmp_eval_loss = tmp_eval_loss.mean(
            )  # mean() to average on multi-gpu parallel evaluating
        eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        out_label_ids = inputs['labels'].cpu().numpy().tolist()
        input_lens = inputs['input_lens'].cpu().numpy().tolist()
        tags = tags.squeeze(0).cpu().numpy().tolist()
        for i, label in enumerate(out_label_ids):
            temp_1 = []
            temp_2 = []
            for j, m in enumerate(label):
                if j == 0:
                    continue
                elif j == input_lens[i] - 1:
                    metric.update(pred_paths=[temp_2], label_paths=[temp_1])
                    break
                else:
                    temp_1.append(args.id2label[out_label_ids[i][j]])
                    temp_2.append(args.id2label[tags[i][j]])
        pbar(step)
    logger.info("\n")
    eval_loss = eval_loss / nb_eval_steps
    eval_info, entity_info = metric.result()
    results = {f'{key}': value for key, value in eval_info.items()}
    results['loss'] = eval_loss
    logger.info("***** Eval results %s *****", prefix)
    info = "-".join(
        [f' {key}: {value:.4f} ' for key, value in results.items()])
    logger.info(info)
    logger.info("***** Entity results %s *****", prefix)
    for key in sorted(entity_info.keys()):
        logger.info("******* %s results ********" % key)
        info = "-".join([
            f' {key}: {value:.4f} ' for key, value in entity_info[key].items()
        ])
        logger.info(info)
    return results
예제 #59
0
def log_info(logger, config):

    logger.info('--- Configs List---')
    logger.info('--- Dadaset:{}'.format(config.DATASET))
    logger.info('--- Train:{}'.format(config.TRAIN))
    logger.info('--- Bit:{}'.format(config.HASH_BIT))
    logger.info('--- Alpha:{}'.format(config.alpha))
    logger.info('--- Beta:{}'.format(config.beta))
    logger.info('--- Lambda:{}'.format(config.lamb))
    logger.info('--- Mu:{}'.format(config.mu))
    logger.info('--- Batch:{}'.format(config.BATCH_SIZE))
    logger.info('--- Lr_IMG:{}'.format(config.LR_IMG))
    logger.info('--- Lr_TXT:{}'.format(config.LR_TXT))
def test_synchronisation_mongo_postgresql(verbose=True):
    """ Print synchronization status between Mongo and Postgres databases
    
    :param verbose: 
    :return: 
    """
    output_str = "  {0:25} {1:25} {2:>20}  {3:>20}"
    synchronization = True
    client = pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT)
    for postgres_url, mapping_name in zip(POSTGRES_URLS, MAPPINGS_NAMES):

        cur = psycopg2.connect(postgres_url).cursor()
        cur.execute(
            "SELECT table_name FROM information_schema.tables WHERE table_schema = 'synchro';"
        )
        table_list = [t[0] for t in cur]

        with open('/home/data/' + mapping_name, 'r') as mapping_file:
            mapping = json.load(mapping_file)

        for mongo_database_name, db_mapping in mapping.items():
            mongodb = client[mongo_database_name]
            for collection_name in mongodb.collection_names():

                # Do not test synchronization for certain collections
                if collection_name not in db_mapping:
                    if verbose:
                        logger.warning(
                            "    collection '{}' present in MongoDB database '{}' is not mapped"
                            .format(collection_name, mongo_database_name))
                    continue

                namespace = mongo_database_name + '.' + collection_name
                if 'namespaces' in CONFIG and not CONFIG['namespaces'].get(
                        namespace, True):
                    if verbose:
                        logger.warning(
                            "    collection '{}' is ignored in doc_manager namespaces configuration."
                            .format(collection_name))
                    continue

                # MongoDB count
                mongo_count = mongodb[collection_name].count()

                # PostgresSQL count
                table_name = to_sql_identifier(collection_name)
                query = "SELECT count(*) FROM {}".format(table_name)
                if table_name not in table_list:
                    psql_count = 'not defined'
                else:
                    cur.execute(query)
                    psql_count = cur.fetchone()[0]

                # Test synchronization
                if mongo_count != psql_count:
                    if synchronization:
                        synchronization = False
                        if verbose:
                            logger.info(
                                "\nPostgreSQL is not synchronized with MongoDB on the following collections:"
                            )
                            logger.info(
                                output_str.format("Database", "Collection",
                                                  "MongoDB_count",
                                                  "PostgreSQL_count"))
                    if verbose:
                        logger.info(
                            output_str.format(mongo_database_name,
                                              collection_name, mongo_count,
                                              psql_count))

    if synchronization and verbose:
        logger.info(
            "\nPostgreSQL is synchronized on MongoDB on dabasases and collection definded in the mapping."
        )
    return synchronization