def run(self): if __debug__: logging_info = { 'mod': 'ReceiveFirstPhaseCommitMessageAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive first phase commit message ' + str(self.event_uuid) util.get_logger().info(log_msg,extra=logging_info) act_event = self.local_endpoint._act_event_map.get_event( self.event_uuid) if act_event != None: if self.successful: act_event.receive_successful_first_phase_commit_msg( self.event_uuid,self.msg_originator_endpoint_uuid, self.children_event_endpoint_uuids) else: act_event.receive_unsuccessful_first_phase_commit_msg( self.event_uuid,self.msg_originator_endpoint_uuid) if __debug__: log_msg = 'End receive first phase commit message ' + str(self.event_uuid) util.get_logger().info(log_msg,extra=logging_info)
def run(self): if __debug__: logging_info = { 'mod': 'ReceiveSubscriberAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive subscriber for ' + str(self.event_uuid) util.get_logger().info(log_msg,extra=logging_info) evt = self.local_endpoint._act_event_map.get_event(self.event_uuid) if evt == None: # the event was already backed out or committed. Do not # need to keep forwarding info about it. return if self.removed: evt.notify_removed_subscriber( self.subscriber_event_uuid,self.host_uuid, self.resource_uuid) else: evt.notify_additional_subscriber( self.subscriber_event_uuid,self.host_uuid, self.resource_uuid) if __debug__: log_msg = 'End receive subscriber for ' + evt.str_uuid util.get_logger().info(log_msg,extra=logging_info)
def run(self): if __debug__: logging_info = { 'mod': 'ReceiveEndpointCallAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive endpoint call action ' + str(self.event_uuid) util.get_logger().info(log_msg,extra=logging_info) act_evt_map = self.local_endpoint._act_event_map act_event = act_evt_map.get_or_create_endpoint_called_event( self.endpoint_making_call,self.event_uuid,self.result_queue) import waldoVariableStore evt_ctx = waldoExecutingEvent._ExecutingEventContext( self.local_endpoint._global_var_store, # should not have any sequence local data from an endpoint # call. waldoVariableStore._VariableStore( self.local_endpoint._host_uuid) ) # receiving endpoint must know that this call was an endpoint # call. This is so that it can ensure to make deep copies of # all non-external arguments (including lists,maps, and user # structs). evt_ctx.set_from_endpoint_true() exec_event = waldoExecutingEvent._ExecutingEvent( self.to_exec,act_event,evt_ctx,self.result_queue, *self.args) exec_event.start() if __debug__: log_msg = 'End receive endpoint call action ' + act_event.str_uuid util.get_logger().info(log_msg,extra=logging_info)
def __init__(self, *args, **kwargs): super(Gauge, self).__init__(*args, **kwargs) sysprefix = get_sys_prefix() self.config_file = os.getenv( 'GAUGE_CONFIG', sysprefix + '/etc/ryu/faucet/gauge.yaml') self.exc_logfile = os.getenv( 'GAUGE_EXCEPTION_LOG', sysprefix + '/var/log/ryu/faucet/gauge_exception.log') self.logfile = os.getenv( 'GAUGE_LOG', sysprefix + '/var/log/ryu/faucet/gauge.log') # Setup logging self.logger = get_logger( self.logname, self.logfile, logging.DEBUG, 0) # Set up separate logging for exceptions self.exc_logger = get_logger( self.exc_logname, self.exc_logfile, logging.CRITICAL, 1) # Set the signal handler for reloading config file signal.signal(signal.SIGHUP, self.signal_handler) # dict of watchers/handlers: # indexed by dp_id and then by name self.watchers = {} confs = watcher_parser(self.config_file, self.logname) for conf in confs: watcher = watcher_factory(conf)(conf, self.logname) self.watchers.setdefault(watcher.dp.dp_id, {}) self.watchers[watcher.dp.dp_id][watcher.conf.type] = watcher # Create dpset object for querying Ryu's DPSet application self.dpset = kwargs['dpset']
def service(self): if __debug__: logging_info = { 'mod': 'ReceiveRequestBackoutAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive request backout action ' + str(self.uuid) util.get_logger().info(log_msg,extra=logging_info) evt = self.local_endpoint._act_event_map.get_and_remove_event( self.uuid) if evt == None: # could happen for instance if there are loops in endpoint # call graph. In this case, might get more than one # request to backout an event. However, the first backout # has already removed the the active event from the active # event map. return skip_partner = False if self.requesting_endpoint == util.PARTNER_ENDPOINT_SENTINEL: skip_partner = True # FIXME: should probably be in a separate thread evt.forward_backout_request_and_backout_self(skip_partner) if __debug__: log_msg = 'End receive request backout action ' + evt.str_uuid util.get_logger().info(log_msg,extra=logging_info)
def run(self): if __debug__: logging_info = { 'mod': 'ReceiveRequestCommitAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive request commit action ' + str(self.event_uuid) util.get_logger().info(log_msg,extra=logging_info) evt = self.local_endpoint._act_event_map.get_event(self.event_uuid) if evt == None: # can happen if commit is requested and then # a ---> b ---> c # # a asks for commit. b backs out and forwards commit # request on to c. c waits on active event map lock # before receiving request for commit. a tells b to back # out and forwards the request to b to backout, which # forwards the request on to c. Then, if c reads the # backout before the request to commit, we may get to this # point. Just ignore the request. pass else: evt.forward_commit_request_and_try_holding_commit_on_myself( self.from_partner) if __debug__: logging_info = { 'mod': 'ReceiveRequestCommitAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'End receive request commit action ' + str(self.event_uuid) util.get_logger().info(log_msg,extra=logging_info)
def __init__(self, *args, **kwargs): super(Faucet, self).__init__(*args, **kwargs) # There doesnt seem to be a sensible method of getting command line # options into ryu apps. Instead I am using the environment variable # FAUCET_CONFIG to allow this to be set, if it is not set it will # default to valve.yaml sysprefix = get_sys_prefix() self.config_file = os.getenv( 'FAUCET_CONFIG', sysprefix + '/etc/ryu/faucet/faucet.yaml') self.logfile = os.getenv( 'FAUCET_LOG', sysprefix + '/var/log/ryu/faucet/faucet.log') self.exc_logfile = os.getenv( 'FAUCET_EXCEPTION_LOG', sysprefix + '/var/log/ryu/faucet/faucet_exception.log') # Set the signal handler for reloading config file signal.signal(signal.SIGHUP, self.signal_handler) # Create dpset object for querying Ryu's DPSet application self.dpset = kwargs['dpset'] # Setup logging self.logger = get_logger( self.logname, self.logfile, logging.DEBUG, 0) # Set up separate logging for exceptions self.exc_logger = get_logger( self.exc_logname, self.exc_logfile, logging.DEBUG, 1) # Set up a valve object for each datapath self.valves = {} self.config_hashes, valve_dps = dp_parser( self.config_file, self.logname) for valve_dp in valve_dps: # pylint: disable=no-member valve = valve_factory(valve_dp) if valve is None: self.logger.error( 'Hardware type not supported for DP: %s', valve_dp.name) else: self.valves[valve_dp.dp_id] = valve(valve_dp, self.logname) self.gateway_resolve_request_thread = hub.spawn( self.gateway_resolve_request) self.host_expire_request_thread = hub.spawn( self.host_expire_request) self.dp_bgp_speakers = {} self._reset_bgp() # Register to API api = kwargs['faucet_api'] api._register(self) self.send_event_to_observers(EventFaucetAPIRegistered())
def set_logging_level(level): ''' Programmer can set level of logging he/she desires. Note: mostly used internally for compiler development. Args: level (int): See Python's internal logging module. Options are logging.CRITICAL, logging.INFO, logging.DEBUG, etc. ''' util.get_logger().setLevel(level)
def run(self): ''' Event loop. Keep on reading off queue and servicing. ''' while True: service_action = self.threadsafe_queue.get() if __debug__: util.get_logger().debug( ('Servicing action. Remaining queue size: %s' % str(self.threadsafe_queue.qsize())), extra= self.logging_info) service_action.service()
def run(self): if __debug__: logging_info = { 'mod': 'ReceivePartnerReadyAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive ready action ' util.get_logger().info(log_msg,extra=logging_info) self.local_endpoint._other_side_ready() if __debug__: log_msg = 'End receive ready action ' util.get_logger().info(log_msg,extra=logging_info)
def loop_error_item(self, loggers=['validation']): for meta, unit_datas in self._raw: error_datas = [] for row_data in unit_datas: error_count = 0 for logger in loggers: errors = get_logger(logger, row_data) error_count += len(errors) if errors else 0 for col_data in itertools.chain(row_data['raw'], row_data['eval'], row_data['extend']): errors = get_logger(logger, col_data) error_count += len(errors) if errors else 0 if error_count>0: error_datas.append(row_data) if len(error_datas)>0: yield meta, error_datas
def str_item_error(self, row_data, loggers=['validation'], html=False): link_str = '<br>' if html else '\n' msg =u'错误信息:' for logger in loggers: msg += link_str msg += u'--- 错误类型:{0} ---'.format(logger) errors = get_logger(logger, row_data) if errors and len(errors)>0: msg += link_str msg += ';'.join(errors) for col_data in itertools.chain(row_data['raw'], row_data['eval'], row_data['extend']): errors = get_logger(logger, col_data) if errors and len(errors)>0: msg += link_str msg += '%s: %s' % (col_data['key'], ';'.join(errors)) return msg
def router(): _logger = get_logger(__name__) if request.form.get("token") == os.environ.get("SLACK_WEBHOOK_SECRET"): # Get info from incoming request channel_id = request.form.get("channel_id") user = request.form.get("user_name") message = request.form.get("text") _logger.info("Incoming message from {0} on {1}: {2}".format(channel_id, user, message)) # Parse and route try: response = parse_message(message, user) except Exception as e: response = fail(e, user) slack_client = SlackClient(os.environ.get("SLACK_TOKEN")) slack_client.api_call( "chat.postMessage", channel=channel_id, username='******', icon_emoji=':sausage:', **response ) return Response(), 200
def __init__(self, name, address, port, pubsub, key): self.name = name self.port = port self.address = address self.pubsub = pubsub self.key = key self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
def run(self): if __debug__: logging_info = { 'mod': 'ReceivePeeredModifiedMsgAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start receive peered mod msg ' + str(self.msg.event_uuid) util.get_logger().info(log_msg,extra=logging_info) event_uuid = self.msg.event_uuid event = self.local_endpoint._act_event_map.get_or_create_partner_event(event_uuid) event.generate_partner_modified_peered_response(self.msg) if __debug__: log_msg = 'End receive peered mod msg ' + event.str_uuid util.get_logger().info(log_msg,extra=logging_info)
def __init__(self, port): ReliableChatServerSocket.__init__(self, port) self.msg_acks = {} #hashcode -> [clients] self.sent_msgs = {} #who has been sent what? self.all_msgs = {} #hashcode -> msg self.identity = {} #socket_ptr -> name self.logger = get_logger(self)
def __init__(self): self.nodes = dict() self.clients_pubsub = PubSub(self, pub_port=settings.CLIENT_SUB, sub_port=settings.CLIENT_PUB, broadcast=False) self.nodes_pubsub = PubSub(self, pub_port=settings.NODE_SUB, sub_port=settings.NODE_PUB, parse_message=self.parse_message) self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__)) Logger(self) self.run()
def __init__(self, user_name, server_loc): self.logger = get_logger(self) super(ReliableChatClient, self).__init__(*server_loc) self.user_name = user_name self.msg_stack = [] #(timestmap, msg), kept sorted self.acked_messages = {} self.queue_lock = threading.Lock() self.connected = False
def __init__(self, name, *args, **kwargs): self.name = name self.initializing = True if LIVE: self.interface_kit = InterfaceKit() self.manager = PubSub(self, pub_port=settings.NODE_PUB, sub_port=settings.NODE_SUB, sub_filter=self.name) self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__)) self.initialize() self.run()
def __init__(self): self._logger = util.get_logger(__name__, log_file=self._log_file) self._init_devices() # Not sure if this is the right place for this screens saver object but # I don't want to put it in busted() because that would instantiate it # every time busted() is triggered self._screen_saver = Screen_Saver()
def __init__(self, input, output, min, max, state, current_state): self.input = input self.min = min self.max = max self.output = output self.state = state self.current_state = current_state self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
def __init__(self, index, display, interface, change=2, data_rate=64): self.index = index self.display = display self.id = util.slugify(display) self.change = change self.data_rate = data_rate self.interface = interface self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
def echo_error(self, loggers=['validation']): error_total = 0 for meta, error_items in self._raw: print self.str_meta(meta) for row_data in error_items: error_count = 0 for logger in loggers: errors = get_logger(logger, row_data) error_count += len(errors) if errors else 0 for col_data in itertools.chain(row_data['raw'], row_data['eval'], row_data['extend']): errors = get_logger(logger, col_data) error_count += len(errors) if errors else 0 if error_count>0: error_total += error_count print self.str_item(row_data) print self.str_item_error(row_data, loggers=loggers) print u'错误记录条数: {0}'.format(error_total)
def _setup_logging(): ''' Internal function. Not to be used by programmer. ''' DEFAULT_LOG_FILENAME = 'log.txt' DEFAULT_LOGGING_LEVEL = logging.CRITICAL format_ = ( '%(levelname)s : %(asctime)s.%(msecs)f: %(endpoint_string)s : %(mod)s \n %(message)s') # '%(levelname)s : %(asctime)s.%(msecs).03d: %(endpoint_string)s : %(mod)s \n %(message)s') logging.basicConfig( format=format_, filename=DEFAULT_LOG_FILENAME, level=DEFAULT_LOGGING_LEVEL, datefmt='%I:%M:%S') util.get_logger().critical( '***** New *****', extra={'mod': 'NEW', 'endpoint_string': 'NEW'}) util.lock_log('***** New *****')
def ofchannel_log(self, ofmsgs): if self.dp is not None: if self.dp.ofchannel_log is not None: self.ofchannel_logger = util.get_logger( self.dp.ofchannel_log, self.dp.ofchannel_log, logging.DEBUG, 0) for ofmsg in ofmsgs: self.ofchannel_logger.debug(ofmsg)
def main_test(): import util test_urls = { 'dd3322be6b143c6a842acdb4bb5e9f60': 'http://localhost/w/dl/20140728233100.ts', # '0d851220f47e7aed4615aebbd5cd2c7a': 'http://localhost/w/dl/test.jpg' } log = util.get_logger() ttttt(1, test_urls, log) ttttt(3, test_urls, log) ttttt(4, test_urls, log)
def ws_test(log=None): if log is None: import util log = util.get_logger() ws = WorkShop(tmin=5, tmax=20, log=log) i = 0 total = 0 tasks = [] try: ws.serve() while True: task = TaskTest(randint(0, 10), name='T_%05d' % i, log=log) task.makeSubWorks() assert task.subWorks is not None # wk.cancel() ws.addTask(task) tasks.append(task) i += 1 total += 1 log.error(' ||||||||||||||| tasks = %d', ws.currTaskSize) if i < 190: _sleep(0.3) else: _sleep(0.6) if i > 200: break except Exception as e: log.exception(e) raise finally: # _sleep(1) ws.setToStop() ws.join() canceled_total = unknow_total = archived_total = err_total = 0 for task in tasks: log.error('[%s] status=%d', task.name, task.status) if task.isArchived(): archived_total += 1 elif task.isError(): err_total += 1 elif task.status == 3: canceled_total += 1 else: unknow_total += 1 # if task.isArchived() == task.isError(): # _sleep(0.3) # for wk in task.subworks: # print wk.status log.error('TASK: total=%d, exec=%d, arc=%d, canc=%d, err=%d, un=%d, clean=%d', total, TaskTest.EXEC_TOTAL, archived_total, canceled_total, err_total, unknow_total, TaskTest.CLEANUP) log.error('WORK: total=%d, exec=%d', WorkTest.TOTAL, WorkTest.EXEC_TOTAL) assert unknow_total == 0 assert TaskTest.CLEANUP == total assert archived_total + err_total + canceled_total == TaskTest.TOTAL
def __init__(self, input, output, state, set_point, update=60, check=30, P=2.0, I=0.0, D=1.0, Derivator=0, Integrator=0, Integrator_max=500, Integrator_min=-500): self.input = input self.output = output self.state = state self.set_point = set_point self.pid = pid.PID(3.0,0.4,1.2) self.pid.setPoint(set_point) self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__)) self.update = update self.check = check gevent.spawn(self.run)
def run(self): event_uuid = self.partner_request_block_msg.event_uuid if __debug__: logging_info = { 'mod': 'ReceivePartnerMesssageRequestSequenceBlockAction', 'endpoint_string': self.local_endpoint._endpoint_uuid_str } log_msg = 'Start sequence action for ' + str(event_uuid) util.get_logger().info(log_msg,extra=logging_info) evt = self.local_endpoint._act_event_map.get_or_create_partner_event( event_uuid) evt.recv_partner_sequence_call_msg(self.partner_request_block_msg) if __debug__: log_msg = 'End sequence action for ' + evt.str_uuid util.get_logger().info(log_msg,extra=logging_info)
def ofchannel_log(self, ofmsgs): """Log OpenFlow messages in text format to debugging log.""" if self.dp is not None: if self.dp.ofchannel_log is not None: self.ofchannel_logger = util.get_logger( self.dp.ofchannel_log, self.dp.ofchannel_log, logging.DEBUG, 0) for ofmsg in ofmsgs: self.ofchannel_logger.debug(ofmsg)
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torch.optim.lr_scheduler as sched import torch.utils.data as data from torch.utils.data import DataLoader, TensorDataset from torch import Tensor if __name__ == '__main__': args = get_test_args() # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, subdir='test') log = util.get_logger(args.save_dir, args.name) device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') # Load the checkpoint if given as parameter if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(args.load_path) else: # Get model log.info('Building model...') model = util.get_model_class(args.model)(args)
if __name__ == '__main__': # 是否是预览模式,如果关闭则直接删除文件 preview = False # 需要检查的POJO里java文件目录,只支持检查POJO,轻易不要尝试检查其他的类。。。 dto_path = '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-model/src/main' # 需要检查POJO在哪些目录的java和xml文件里是否引用 target_paths = [ '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-model/src/main', '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-dao/src/main', '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-business/src/main', '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/basics-promotion-service/src/main', '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/back-promotion-web/src/main' ] logger = util.get_logger('find_class_used') logger.debug('开始运行') class_list = [] files = util.list_file(dto_path, '.java') for i in files: class_list.append(ClassFile(i)) logger.debug('找到 %s 个文件需要查找引用' % class_list.__len__()) files = util.list_file(paths=target_paths, patterns=['.java', '.xml']) for i in files: logger.info('开始在文件 %s 中查找' % i) with open(i, 'r') as f: content = f.read() for j in class_list: logger.debug('开始查找 class: %s' % j.class_name) # 首先最简单的,判断是否有全路径名引入
import aiohttp import asyncio import redis import json import util logger = util.get_logger("api") CACHE_MCAP_RESULT_KEY = 'beatrice_cmccache' BINANCE_URL = 'https://www.binance.com/api/v3/ticker/price?symbol=NANOBTC' KUCOIN_URL = 'https://api.kucoin.com/v1/open/tick?symbol=NANO-BTC' NANEX_URL = 'https://nanex.co/api/public/ticker/btcnano' CMC_URL = 'https://api.coinmarketcap.com/v2/ticker/1567/' CMC_BTC_URL = 'https://api.coinmarketcap.com/v2/ticker/1/' BANANO_URL = 'https://api.creeper.banano.cc/ticker' async def json_get(reqUrl): try: async with aiohttp.ClientSession() as session: async with session.get(reqUrl, timeout=10) as resp: jsonResp = await resp.json() return jsonResp except BaseException: return None async def get_banano_price(): response = await json_get(BANANO_URL) if response is not None:
# -*- encoding: utf-8 -*- # Created on 2016-07-26 11:04:34 import datetime from abc import ABCMeta, abstractmethod from multiprocessing import Process from peewee import SQL import model import util from kuaidi100 import Kuaidi100ComponentImpl from mojo_qq import MoJoQQComponentImpl from mojo_wx import MoJoWXComponentImpl from package_tracking_repo import PackageTrackingRepoComponentImpl logger = util.get_logger("PackageTracking") class PackageTrackingComponent: def __init__(self): pass __metaclass__ = ABCMeta @abstractmethod def sub_pkg_trk_msg(self, suber_account, suber_nike_name, group_name, group_no, sub_type, sub_source, tracking_no): pass @abstractmethod def qry_pkg_trk_msg(self,
def main(args): # setup logging log = get_logger(args.log) log(args) jsb_file_loc = "./data/jsb_processed.pkl" # ingest training/validation/test data from disk data = pickle.load(open(jsb_file_loc, "rb")) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(np.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, np.mean(training_seq_lengths), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): y = np.repeat(x, n_eval_samples, axis=0) return y # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( np.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( np.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay } adam = ClippedAdam(adam_params) # setup inference algorithm elbo = SVI(dmm.model, dmm.guide, adam, Trace_ELBO()) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): log("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), args.save_model) log("saving optimizer states to %s..." % args.save_opt) adam.save(args.save_opt) log("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) log("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) log("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss = elbo.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) dmm.rnn.eval() # compute the validation and test loss n_samples many times val_nll = elbo.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / np.sum(val_seq_lengths) test_nll = elbo.evaluate_loss( test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / np.sum(test_seq_lengths) # put the RNN back into training mode (i.e. turn on drop-out if applicable) dmm.rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = np.arange(N_train_data) np.random.shuffle(shuffled_indices) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll))
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) word_vectors_char = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, word_vectors_char=word_vectors_char, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
#!/usr/bin/env python # Copyright 2016-2017, Damian Johnson and The Tor Project # See LICENSE for licensing information """ Report for how many of our fallback directories are unreachable. """ import time import traceback import stem.descriptor.remote import stem.directory import util log = util.get_logger('fallback_directories') NOTIFICATION_THRESHOLD = 50 # send notice if this percentage of fallbacks are unusable TO_ADDRESSES = [ '*****@*****.**', '*****@*****.**', '*****@*****.**' ] EMAIL_SUBJECT = 'Fallback Directory Summary (%i/%i, %i%%)' EMAIL_BODY = """\ %i/%i (%i%%) fallback directories have become slow or unresponsive... """ downloader = stem.descriptor.remote.DescriptorDownloader(timeout=30)
import zerorpc from .config import CONNURL from .message import Message import threading from util import get_logger from .state import * from .excutor import Excutor logger = get_logger(__name__, 'e:/{}.log'.format(__name__)) class Commanager: def __init__(self, meg: Message, timeout=3): self.meg = meg self.client = zerorpc.Client() self.event = threading.Event() self.timeout = timeout self.state = WAITNG self.excutor = Excutor() def sendmeg(self): try: self.event.clear() self.client.connect(CONNURL) self.client.send(self.meg.reg()) while not self.event.wait(self.timeout): self.client.send(self.meg.heart_beat()) if self.state == WAITNG: task_item = self.client.get_task(self.meg.uuid) if task_item: task_id, script, time_out = task_item
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vec = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') if args.name == 'baseline': model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'charembeddings': model = BiDAFChar(word_vectors=word_vectors, char_vec=char_vec, word_len=16, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'charembeddings2': model = BiDAFChar2(word_vectors=word_vectors, char_vec=char_vec, word_len=16, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'qanet': model = QANet(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, total_prob=args.total_drop, final_prob=args.final_prob) elif args.name == 'qanet2': model = QANet2(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, rel=args.rel_att, total_prob=args.total_drop, final_prob=args.final_prob, freeze=args.freeze_emb) elif args.name == 'qanet3': model = QANet3(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, rel=args.rel_att, total_prob=args.total_drop, final_prob=args.final_prob, freeze=args.freeze_emb) elif args.name == 'qanet4': model = QANet4(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, rel=args.rel_att, total_prob=args.total_drop, final_prob=args.final_prob, freeze=args.freeze_emb) else: raise ValueError('Wrong model name') model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler if args.name == 'qanet': optimizer = optim.Adam(model.parameters(), args.lr, betas=(0.8, 0.999), weight_decay=3 * 1e-7, eps=1e-7) scheduler = warmup(optimizer, 1, 2000) elif args.opt == 'adam': if args.grad_cent: optimizer = AdamWGC(model.parameters(), args.lr, betas=(0.9, 0.999), weight_decay=3 * 1e-7, eps=1e-7, use_gc=True) else: optimizer = AdamW(model.parameters(), args.lr, betas=(0.8, 0.999), weight_decay=3 * 1e-7, eps=1e-7) scheduler = warmup(optimizer, 1, 2000) elif args.opt == 'adadelta': optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=3 * 1e-7) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR elif args.opt == 'sgd': optimizer = optim.SGD(model.parameters(), args.lr, weight_decay=3 * 1e-7) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) i = 0 while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() i += 1 loss /= args.acc_step # Backward loss.backward() if i % args.acc_step == 0: nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(i // (args.acc_step)) ema(model, i // (args.acc_step)) optimizer.zero_grad() # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0 and i % args.acc_step == 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
bars_cdf, color='black', width=bar_width, edgecolor='white', label='C-DF') plt.ylabel('Amount of resources', fontsize=16) plt.xticks([r + bar_width for r in range(len(bars_ca))], ['(1) Relations', '(2) Types'], fontsize=16) plt.yticks(fontsize=14) plt.legend(fontsize=15) ax = plt.gca() ax.yaxis.grid() plt.savefig( util.get_results_file('results.graphs.dbpedia_unknown_resources'), bbox_inches='tight') # --- START SCRIPT --- if __name__ == '__main__': now = datetime.datetime.now() util.get_logger().info('Started graph generation.') generate_graphs() duration = (datetime.datetime.now() - now).seconds // 60 util.get_logger().info( f'Finished graph generation after {duration} minutes.')
import gzip import glob import os import multiprocessing import json import numpy as np from tqdm import tqdm from pytorch_pretrained_bert.tokenization import BertTokenizer import pickle from functools import partial from util import get_logger import argparse logger = get_logger('.', 'setup') def get_setup_args(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument( "--bert_model", default="bert-base-uncased", type=str, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " "bert-base-multilingual-cased, bert-base-chinese.") parser.add_argument("--is_eval", default=False, type=bool, help="whether to downsample the candidates")
#!/usr/bin/env python # -*- coding: utf-8 -*-' import sqlite3 from sqlite3 import Error from util import get_logger from datetime import datetime logger = get_logger("cdn") class DBController: db_path = "single_domain.db" def __init__(self): super(DBController, self).__init__() self.setup_db() def setup_db(self): self.create_table() def _execute_query(self, sql, values=None): try: conn = self.get_connection() cur = conn.cursor() if not values: cur.execute(sql) else: cur.execute(sql, values) conn.commit()
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SegmentSQuAD(args.train_record_file, args.use_squad_v2) #train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() y1, y2 = y1.to(device), y2.to(device) # Forward loss = 0 for i in range(batch_size): max_p_sum = 0 max_p_sum_idx = 0 for j in range(cw_idxs.size(1)): # Deal with the case when all the words in the window are padded words if cw_idxs[i, j].sum().item() == 0: continue log_p1_j, log_p2_j = model(cw_idxs[i, j].unsqueeze(0), qw_idxs[i].unsqueeze(0)) max_log_p1_j = torch.max(log_p1_j.detach()) max_log_p2_j = torch.max(log_p2_j.detach()) max_p_sum_idx = j if (max_log_p1_j + max_log_p2_j ) > max_p_sum else max_p_sum_idx max_p_sum = max_log_p1_j + max_log_p2_j if ( max_log_p1_j + max_log_p2_j) > max_p_sum else max_p_sum log_p1_max, log_p2_max = model( cw_idxs[i, max_p_sum_idx].unsqueeze(0), qw_idxs[i].unsqueeze(0)) # Adjust label to the window case if max_p_sum_idx * train_dataset.stride + torch.argmax( log_p1_max).item() == y1[i].item(): loss += F.nll_loss( log_p1_max, torch.argmax(log_p1_max).unsqueeze(0)) else: loss += F.nll_loss( log_p1_max, torch.argmin(log_p1_max).unsqueeze(0)) if max_p_sum_idx * train_dataset.stride + torch.argmax( log_p2_max).item() == y2[i].item(): loss += F.nll_loss( log_p2_max, torch.argmax(log_p2_max).unsqueeze(0)) else: loss += F.nll_loss( log_p2_max, torch.argmin(log_p2_max).unsqueeze(0)) loss_val = loss.item() # # Forward # log_p1, log_p2 = model(cw_idxs, qw_idxs) # loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) # loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(): # define parser and arguments args = get_train_test_args() util.set_seed(args.seed) model = DistilBertForQuestionAnswering.from_pretrained( "distilbert-base-uncased") tokenizer = DistilBertTokenizerFast.from_pretrained( 'distilbert-base-uncased') '''###''' # if args.reinit_pooler: # encoder_temp = getattr(model, "distilbert") # Equivalent to model.distilbert # encoder_temp.pooler.dense.weight.data.normal_(mean=0.0, std=encoder_temp.config.initializer_range) # encoder_temp.pooler.dense.bias.data.zero_() # The change of encoder_temp would affect the model # for p in encoder_temp.pooler.parameters(): # p.requires_grad = True if args.reinit_layers > 0: import torch.nn as nn from transformers.models.distilbert.modeling_distilbert import MultiHeadSelfAttention, FFN # model_distilbert = getattr(model, "distilbert") # model.distilbert; change of model_distilbert affects model! # Reinitialization for the last few layers for layer in model.distilbert.transformer.layer[-args.reinit_layers:]: for module in layer.modules(): # print(module) model.distilbert._init_weights( module) # It's the line equivalent to below approach # if isinstance(module, nn.modules.linear.Linear): # Original form for nn.Linear # # model.config.initializer_range == model.distilbert.config.initializer_range => True # module.weight.data.normal_(mean=0.0, std=model.distilbert.config.initializer_range) # if module.bias is not None: # module.bias.data.zero_() # elif isinstance(module, nn.modules.normalization.LayerNorm): # module.weight.data.fill_(1.0) # module.bias.data.zero_() # elif isinstance(module, FFN): # for param in [module.lin1, module.lin2]: # param.weight.data.normal_(mean=0.0, std=model.distilbert.config.initializer_range) # if param.bias is not None: # param.bias.data.zero_() # elif isinstance(module, MultiHeadSelfAttention): # for param in [module.q_lin, module.k_lin, module.v_lin, module.out_lin]: # param.data.weight.normal_(mean=0.0, std=model.distilbert.config.initializer_range) # if param.bias is not None: # param.bias.data.zero_() if args.do_train: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) args.save_dir = util.get_save_dir(args.save_dir, args.run_name) log = util.get_logger(args.save_dir, 'log_train') log.info(f'Args: {json.dumps(vars(args), indent=4, sort_keys=True)}') log.info("Preparing Training Data...") args.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') trainer = Trainer(args, log) train_dataset, _ = get_dataset(args, args.train_datasets, args.train_dir, tokenizer, 'train') log.info("Preparing Validation Data...") val_dataset, val_dict = get_dataset(args, args.train_datasets, args.val_dir, tokenizer, 'val') train_loader = DataLoader( train_dataset, batch_size=args.batch_size, sampler=RandomSampler( train_dataset)) # For squad: 50537/16~3159 items/batches val_loader = DataLoader(val_dataset, batch_size=args.batch_size, sampler=SequentialSampler(val_dataset)) best_scores = trainer.train(model, train_loader, val_loader, val_dict) if args.do_eval: args.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') split_name = 'test' if 'test' in args.eval_dir else 'validation' log = util.get_logger(args.save_dir, f'log_{split_name}') trainer = Trainer(args, log) checkpoint_path = os.path.join(args.save_dir, 'checkpoint') model = DistilBertForQuestionAnswering.from_pretrained( checkpoint_path) # Trained model model.to(args.device) eval_dataset, eval_dict = get_dataset(args, args.eval_datasets, args.eval_dir, tokenizer, split_name) eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size, sampler=SequentialSampler(eval_dataset)) eval_preds, eval_scores = trainer.evaluate(model, eval_loader, eval_dict, return_preds=True, split=split_name) results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in eval_scores.items()) log.info(f'Eval {results_str}') # Write submission file sub_path = os.path.join(args.save_dir, split_name + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(eval_preds): csv_writer.writerow([uuid, eval_preds[uuid]])
import os from enum import Enum, auto from typing import Dict, List from flask import (Blueprint, flash, redirect, render_template, request, session, url_for) from lxml import etree from util import cxml, get_logger, xslt from .settings import settings logger = get_logger(__name__) class VarResolvedSource(Enum): NONE = auto() SETTINGS = auto() SESSION = auto() FORM = auto() pass class TemplateVarSpec: def __init__( self, name: str, sync_session=True, from_settings=True, subst_xpath='', help=''
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') if args.model_name == 'sketchy': model = SketchyReader(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, char_embed_drop_prob=args.char_embed_drop_prob, num_heads=args.num_heads, drop_prob=args.drop_prob) # SKETCHY elif args.model_name == 'intensive': model = IntensiveReader(word_vectors=word_vectors, char_vectors=char_vectors, num_heads=args.num_heads, char_embed_drop_prob=args.char_embed_drop_prob, hidden_size=args.hidden_size, drop_prob=args.drop_prob) # INTENSIVE elif args.model_name == 'retro': model = RetroQANet(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, num_heads=args.num_heads, char_embed_drop_prob=args.char_embed_drop_prob, intensive_path=args.load_path_i, sketchy_path=args.load_path_s, gpu_ids=args.gpu_ids, drop_prob=args.drop_prob) # Outer model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # setup losses bceLoss = nn.BCELoss() # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) if args.optim == "adam": optimizer = optim.Adam( model.parameters(), 0.001, betas=(0.8, 0.999), eps=1e-7, weight_decay=3e-7) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: counter = 0 epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: counter += 1 # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward y1, y2 = y1.to(device), y2.to(device) if args.model_name == 'sketchy': yi = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) loss = bceLoss(yi, torch.where( y1 == 0, 0, 1).type(torch.FloatTensor)) elif args.model_name == 'intensive': yi, log_p1, log_p2 = model( cw_idxs, qw_idxs, cc_idxs, qc_idxs) # if counter % 100 == 0: #print(torch.max(log_p1.exp(), dim=1)[0]) # $print(torch.max(log_p2.exp(), dim=1)[0]) #weights = torch.ones(log_p1.shape[1]) #weights[0] = 2/(log_p1.shape[1]) #nll_loss = nn.NLLLoss(weight=weights.to(device='cuda:0')) # gt_0 = torch.zeros(yi.shape[0]).to(device) # gt_1 = torch.ones(yi.shape[0]).to(device) loss = args.alpha_1 * bceLoss(yi, torch.where(y1 == 0, 0, 1).type( torch.FloatTensor)) + args.alpha_2 * (F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)) #loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) elif args.model_name == 'retro': log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) else: raise ValueError( 'invalid --model_name, sketchy or intensive required') loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_( model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/' + args.model_name, loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2, model_name=args.model_name, a1=args.alpha_1, a2=args.alpha_2) saver.save( step, model, results[args.metric_name], device, model_name=args.model_name) ema.resume(model) # Log to console results_str = ', '.join( f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
import numpy as np import util # user defined import config # from datetime import datetime, date from dateutil.relativedelta import relativedelta try: import xlrd except ImportError: print("Please install the following module: 'xlrd'") sys.exit(-1) kpilog = util.get_logger(config.autokpi["logname"]) #----------------------------------------------- # Create structure to hold plot months # - returns DataFrame structure #----------------------------------------------- def get_plot_months(start_dt, end_dt): months = util.get_kpi_months(start_dt, end_dt) # get corresponding fyq for each month fyq = util.get_month_fyq(months) #df = months_df.to_frame() months_df = pd.DataFrame(months, columns=["Months"])
from datetime import datetime import pytz import boto from boto.s3.bucket import Bucket import re import os import arrow from util import get_logger from snakebite.client import Client log = get_logger('inviso-monitor') job_pattern = re.compile('job_[0-9]+_[0-9]+', re.IGNORECASE) EPOCH = datetime(1970, 1, 1, tzinfo=pytz.UTC) class Cluster: def __init__(self, id, name, host, port, namenode, namenode_port, history_server): self.id = id self.name = name self.host = host self.port = port self.namenode = namenode self.namenode_port = namenode_port self.history_server = history_server class Monitor(object):
# load the example image and convert it to grayscale import pytesseract import util from bot_config import BotConfig from common import WINDOW_WIDTH, WINDOW_HEIGHT from imagesearch import region_grabber, imagesearcharea, imagesearcharea_v2, region_grabber_v2 from util import click_image class ImageNotFoundException(Exception): pass IMAGE_FOLDER = BotConfig().get_property("General", "image_folder") logger = util.get_logger() # TODO test import emu_manager hwnd = emu_manager.get_instance( int(BotConfig().get_property("Emulator", "use_device"))) def find_text(text, x1, y1, x2, y2): import emu_manager hwnd = emu_manager.get_instance( int(BotConfig().get_property("Emulator", "use_device"))) image = region_grabber_v2((x1, y1, x2, y2), hwnd) # image.save('testarea.png') # useful for debugging purposes, this will save the captured region as "testarea.png" image = np.array(image)
import stem.descriptor import stem.descriptor.remote import stem.directory EMAIL_SUBJECT = 'Unable to retrieve tor descriptors' EMAIL_BODY = """\ Unable to retrieve the present %s... source: %s time: %s error: %s """ log = util.get_logger('descriptor_checker') util.log_stem_debugging('descriptor_checker') def main(): # retrieve the server and extrainfo descriptors from any authority targets = [ ('server descriptors', '/tor/server/all.z'), ('extrainfo descriptors', '/tor/extra/all.z'), ] for descriptor_type, resource in targets: log.debug("Downloading %s..." % descriptor_type) query = stem.descriptor.remote.Query(
def main(): args = get_bert_args() assert not (args.do_output and args.do_train), 'Don\'t output and train at the same time!' if args.do_output: sub_dir_prefix = 'output' elif args.do_train: sub_dir_prefix = 'train' else: sub_dir_prefix = 'test' args.save_dir = util.get_save_dir(args.save_dir, args.name, sub_dir_prefix) args.output_dir = args.save_dir global logger logger = util.get_logger(args.save_dir, args.name) if args.doc_stride >= args.max_seq_length - args.max_query_length: logger.warning( "WARNING - You've set a doc stride which may be superior to the document length in some " "examples. This could result in errors when building features from the examples. Please reduce the doc " "stride or increase the maximum length to ensure the features are correctly built." ) if not args.evaluate_during_saving and args.save_best_only: raise ValueError("No best result without evaluation during saving") # Use util.get_save_dir, comment this for now # if ( # os.path.exists(args.output_dir) # and os.listdir(args.output_dir) # and args.do_train # and not args.overwrite_output_dir # ): # raise ValueError( # "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format( # args.output_dir # ) # ) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging # logging.basicConfig( # format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", # datefmt="%m/%d/%Y %H:%M:%S", # level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, # ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, cache_dir=args.cache_dir if args.cache_dir else None, ) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) # model = model_class.from_pretrained( # args.model_name_or_path, # from_tf=bool(".ckpt" in args.model_name_or_path), # config=config, # cache_dir=args.cache_dir if args.cache_dir else None, # ) # model = BertQA(config_class, model_class, model_type=args.model_name_or_path, do_cls=True) if args.local_rank == 0: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set. # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will # remove the need for this code, but it is still valid. if args.fp16: try: import apex apex.amp.register_half_function(torch, "einsum") except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) # Training if args.do_train: train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Save the trained model and the tokenizer if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` # Take care of distributed/parallel training model_to_save = model.module if hasattr(model, "module") else model # model_to_save.save_pretrained(output_dir) # BertQA is not a PreTrainedModel class torch.save(model_to_save, os.path.join(args.output_dir, 'pytorch_model.bin')) # save entire model tokenizer.save_pretrained(args.output_dir) # save tokenizer config.save_pretrained(args.output_dir) # save config # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned # model = model_class.from_pretrained(args.output_dir) # BertQA is not a PreTrainedModel class model = torch.load(os.path.join(args.output_dir, 'pytorch_model.bin')) tokenizer = tokenizer_class.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) model.to(args.device) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory results = {} if args.do_eval and args.local_rank in [-1, 0]: if args.do_train: logger.info( "Loading checkpoints saved during training for evaluation") checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))) # logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs else: logger.info("Loading checkpoint %s for evaluation", args.model_name_or_path) checkpoints = [args.eval_dir] logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split( "-")[-1] if len(checkpoints) > 1 else "" # model = model_class.from_pretrained(checkpoint) # BertQA is not a PreTrainedModel class model = torch.load(os.path.join(checkpoint, 'pytorch_model.bin')) model.to(args.device) # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step, save_dir=args.output_dir, save_log_path=os.path.join( checkpoint, 'eval_result.json')) result = dict( (k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) results.update(result) logger.info( f'Convert format and Writing submission file to directory {args.output_dir}...' ) util.convert_submission_format_and_save( args.output_dir, prediction_file_path=os.path.join(args.output_dir, 'predictions_.json')) logger.info("Results: {}".format(results)) if args.do_output and args.local_rank in [-1, 0]: assert not args.do_train and not args.do_eval logger.info("Loading checkpoint %s for output", args.model_name_or_path) checkpoints = [args.eval_dir] logger.info("Output the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split( "-")[-1] if len(checkpoints) > 1 else "" model = torch.load(os.path.join(checkpoint, 'pytorch_model.bin')) model.to(args.device) generate_model_outputs(args, model, tokenizer, is_dev=True, prefix=global_step, save_dir=args.output_dir) return results
import pandas as pd import numpy as np from util import timeclass, timeit, get_logger import torch VERBOSITY_LEVEL = 'INFO' LOGGER = get_logger(VERBOSITY_LEVEL, __file__) @timeit def sparse_dot(matrix_a, matrix_b): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') return torch.mm( torch.Tensor(matrix_a).to(device), torch.Tensor(matrix_b).to(device)).cpu().numpy() class Feat: def __init__(self): pass @timeclass('Feat') def fit_transform(self, table, drop_sum_columns): degree_columns = self.degree(table) degree_bins_columns = self.degree_bins(table) neighbor_columns = self.get_neighbor(table) bin_2_neighbor_mean_degree_bins_columns = self.bin_2_neighbor_mean_degree_bins( table) gnn_append = [
def __init__(self, algor_params, other_params): super(KMeansEvaluate, self).__init__(algor_params, other_params) self.log = get_logger('KMeansEvaluate')
import os import sys import argparse import glob from pathlib import Path path_this = os.path.dirname(os.path.abspath(__file__)) sys.path.append(path_this) from util import get_logger from notebook import Notebook logger = get_logger("convert") def convert_file(arg): nb = Notebook(arg['input_file']) nb.export(arg['output_file'], img_to=arg['media_folder']) def convert_folder(arg): # first, we need to somehow reconstruct the hierarchy of the input folder # to the output folder # the way we do that by get the input path, and then subtract it from the # actual notes path. tot_base_part = len(Path(arg['input_file']).parts) for note in glob.iglob(os.path.join(arg['input_file'], '**/*.ipynb'), recursive=True): logger.debug("Processing {}".format(note)) # $note is also a path, but joined with the $input_file # get note but with strip base, only preserve the hierarchy
def __init__(self, conf): self.conf = conf self.device = torch.device(f"cuda:{conf.gpu_id}") self.log = get_logger() torch.set_printoptions(precision=8) if conf.runid: conf.rundir = mkdir(conf.outdir / conf.runid) if not conf.rundir: conf.rundir = next_rundir(conf.outdir, log=self.log) self.rundir = conf.rundir dump_args(conf, conf.rundir / "conf.json") set_random_seed(conf.random_seed) if self.conf.use_bert: assert self.conf.lang in Bert.supported_langs, self.conf.lang self.bert = Bert(self.conf.bert_model_name, device=self.device) else: self.bert = None self.data = load_dataset(conf, conf.lang, bert=self.bert) _data = [self.data] for d in _data: self.log.info( f"{len(d.train_loader)} batches | bs {conf.batch_size}") self.model = self.get_model() self.optimizer = get_optim(conf, self.model) optimum = "min" if conf.lr_scheduler == "plateau": self.lr_scheduler = ReduceLROnPlateau(self.optimizer, factor=0.1, patience=2, mode=optimum, verbose=True) elif conf.lr_scheduler: raise ValueError("Unknown lr_scheduler: " + conf.lr_scheduler) self.losses = LossTrackers.from_names("loss", log=self.log) if (self.main_lang_data.tag == "ner" or self.conf.dataset.startswith("sr3de")): if self.data.is_multilingual: self.sentence_texts = { split_name: self.main_lang_data.token_texts(split_name) for split_name in ["dev", "test"] } self.conll_score = { lang: ConllScore(tag_enc=self.main_lang_data.tag_enc) for lang in self.data.dev } self.score = { lang: Score("f1", save_model=False, log=self.log, score_func=self.conll_score[lang], add_mode="append") for lang in self.data.dev } self.avg_score = Score("avg_f1", log=self.log, score_func="dummy", add_mode="append") else: self.sentence_texts = { split_name: self.main_lang_data.token_texts(split_name) [:conf.max_eval_inst] for split_name in ["dev", "test"] } self.conll_score = ConllScore( tag_enc=self.main_lang_data.tag_enc) self.score = Score("f1", log=self.log, score_func=self.conll_score, add_mode="append") else: if self.data.is_multilingual: self.score = { lang: Score("acc", log=self.log) for lang in self.data.dev } self.avg_score = Score("avg_acc", log=self.log, score_func="dummy", add_mode="append") else: self.score = Score("acc", log=self.log) if conf.early_stop > 0: score_optimum = ("max" if (self.conf.dataset.startswith("wikiannmulti") or self.data.is_multilingual) else self.score.optimum) self.early_stop = EarlyStopping( score_optimum, min_delta=conf.early_stop_min_delta, patience=conf.early_stop) else: self.early_stop = None self.epoch = 0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ __all__ = ("service_map", ) from util import get_logger, conf from .device import Device import rgbxy import datetime import requests logger = get_logger(__name__.split(".", 1)[-1]) converter_pool = dict() def get_gamut(model_id): # https://developers.meethue.com/develop/hue-api/supported-devices/ if model_id in ("LCT001", "LCT007", "LCT002", "LCT003", "LLM001"): return rgbxy.GamutB elif model_id in ("LCT010", "LCT014", "LCT015", "LCT016", "LCT011", "LLC020", "LST002", "LCT012", "LCT024"): return rgbxy.GamutC elif model_id in ("LLC010", "LLC006", "LST001", "LLC011", "LLC012", "LLC005", "LLC007", "LLC014"): return rgbxy.GamutA else:
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings # Args: word_vectors: word vector tensor of dimension [vocab_size * wemb_dim] log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get Model log.info('Building Model...') model = QANet(word_vectors, char_vectors, args.para_limit, args.ques_limit, args.f_model, num_head=args.num_head, train_cemb = (not args.pretrained_char)) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam( params=parameters, lr=args.lr, betas=(args.beta1, args.beta2), eps=1e-8, weight_decay=3e-7) cr = 1.0 / math.log(args.lr_warm_up_num) scheduler = optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda ee: cr * math.log(ee + 1) if ee < args.lr_warm_up_num else 1) loss_f = torch.nn.CrossEntropyLoss() # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = torch.mean(loss_f(log_p1, y1) + loss_f(log_p2, y2)) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = ModelClass(embeddings=word_vectors, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.split} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f'{args.split}_eval_file'] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.split.title()} {results_str}') # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
# How many words messages must contain LAST_MSG_RAIN_WORDS = 3 # (Seconds) How long user must wait between tiprandom TIP_RANDOM_WAIT = 10 # (Seconds) How long user mus wait between tipfavorites TIP_FAVORITES_WAIT = 150 db = PooledPostgresqlExtDatabase(settings.database, user=settings.database_user, password=settings.database_password, host='localhost', port=5432, max_connections=16) logger = util.get_logger("db") ### User Stuff @db.connection_context() def get_accounts(): u = User.select(User.wallet_address) accts = [] for a in u: accts.append(a.wallet_address) return accts @db.connection_context() def get_user_by_id(user_id, user_name=None): try:
def main(): # define parser and arguments args = get_train_test_args() util.set_seed(args.seed) # model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased") model = DomainQA(args.num_classes, args.hidden_size, args.num_layers, args.dropout, args.dis_lambda, args.concat, args.anneal) tokenizer = DistilBertTokenizerFast.from_pretrained( 'distilbert-base-uncased') if args.do_train: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) args.save_dir = util.get_save_dir(args.save_dir, args.run_name) log = util.get_logger(args.save_dir, 'log_train') log.info(f'Args: {json.dumps(vars(args), indent=4, sort_keys=True)}') log.info("Preparing Training Data...") args.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') if args.load_weights != '': args.load_weights = os.path.join(args.load_weights, 'checkpoint', model.WEIGHTS_NAME) model.load_state_dict(torch.load(args.load_weights)) if args.load_distilbert_weights != '': args.load_distilbert_weights = os.path.join( args.load_distilbert_weights, 'checkpoint', model.WEIGHTS_NAME) model.distilbert.load_state_dict( torch.load(args.load_distilbert_weights)) print('loaded pretrained distilbert weights from', args.load_distilbert_weights) #target_data_dir, target_dataset, tokenizer, split_name, source_data_dir = None, source_dataset = None train_dataset, _ = get_train_dataset(args, \ args.target_train_dir,\ args.target_train_datasets,\ tokenizer, 'train', \ source_data_dir=args.source_train_dir, \ source_dataset=args.source_train_datasets) log.info("Preparing Validation Data...") val_dataset, val_dict = get_dataset(args, \ args.eval_datasets,\ args.eval_dir,\ tokenizer, 'val') train_loader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=RandomSampler(train_dataset)) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, sampler=SequentialSampler(val_dataset)) # warm up if args.max_steps > 0: args.t_total = args.max_steps # Total number of training updates args.num_epochs = args.max_steps // ( len(train_loader) // args.gradient_accumulation_steps) + 1 else: args.t_total = len( train_loader ) // args.gradient_accumulation_steps * args.num_epochs # self.gradient_accumulation_steps = 1 if args.warmup_ratio > 0: assert args.warmup_steps == 0 args.warmup_steps = int(args.warmup_ratio * args.t_total) trainer = Trainer(args, log, model) best_scores = trainer.train(train_loader, val_loader, val_dict) if args.do_eval: args.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') split_name = 'test' if 'test' in args.eval_dir else 'validation' log = util.get_logger(args.save_dir, f'log_{split_name}') trainer = Trainer(args, log, model) config_path = os.path.join(args.save_dir, 'checkpoint', 'config.json') checkpoint_path = os.path.join(args.save_dir, 'checkpoint', model.WEIGHTS_NAME) model.load_state_dict(torch.load(checkpoint_path)) model.to(args.device) eval_dataset, eval_dict = get_dataset(args, args.eval_datasets, args.eval_dir, tokenizer, split_name) eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size, sampler=SequentialSampler(eval_dataset)) eval_preds, eval_scores = trainer.evaluate(eval_loader, eval_dict, return_preds=True, split=split_name) results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in eval_scores.items()) log.info(f'Eval {results_str}') # Write submission file sub_path = os.path.join(args.save_dir, split_name + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(eval_preds): csv_writer.writerow([uuid, eval_preds[uuid]])