def handle_event(event: dict, channel: str, channel_id: str, message: str, sc: SlackClient, logger: logging.Logger) -> None: pretty_event = pformat(event) logger.debug(f"Event received:\n{pretty_event}") subtype = event.get('subtype') user = event.get('user') if subtype in ('group_join', 'channel_join') and user: # We will use the event's channel ID to send a response and refer to # users by their display_name in accordance with new guidelines. # https://api.slack.com/changelog/2017-09-the-one-about-usernames event_channel_id = event.get('channel') user_profile = event.get('user_profile') username = user_profile.get('display_name') user_mention = f"<@{user}>" message = message.replace('{user}', user_mention) if event_channel_id == channel_id: try: sc.rtm_send_message(event_channel_id, message) logger.info(f"Welcomed {username} to #{channel}") except AttributeError: logger.error(f"Couldn't send message to #{channel}")
class ExpDateCSVParser(object): """Parse expansion and date info from a CSV file and update the database with the correct dates""" # pylint: disable-msg=R0913 # we may need all these arguments for some files def __init__(self, oLogHandler): self.oLogger = Logger('exp date parser') if oLogHandler is not None: self.oLogger.addHandler(oLogHandler) self.oLogHandler = oLogHandler def parse(self, fIn): """Process the CSV file line into the CardSetHolder""" oCsvFile = csv.reader(fIn) aRows = list(oCsvFile) if hasattr(self.oLogHandler, 'set_total'): self.oLogHandler.set_total(len(aRows)) for sExp, sDate in aRows: try: oExp = IExpansion(sExp) except SQLObjectNotFound: # This error is non-fatal - the user may not have imported # the extra card lists, so we can legimately encounter # expansions here which aren't in the database self.oLogger.info('Skipped Expansion: %s' % sExp) continue oDate = datetime.datetime.strptime(sDate, "%Y%m%d").date() oExp.releasedate = oDate oExp.syncUpdate() self.oLogger.info('Added Expansion: %s' % sExp)
def repair( self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path ) -> bool: # TODO: It would be nice to try and get the contents of the # file/directory at this location in the current commit, rather than # just writing out an empty file or directory backup_dir = fsck_dir / "broken_inodes" backup_dir.mkdir(exist_ok=True) inode_data_path = Path(overlay.get_path(self.inode.inode_number)) inode_backup_path = backup_dir / str(self.inode.inode_number) if self.expected_type == InodeType.DIR: log.info( f"replacing corrupt directory inode {self.compute_path()!r} with an " "empty directory" ) os.rename(inode_data_path, inode_backup_path) overlay.write_empty_dir(self.inode.inode_number) else: log.info( f"replacing corrupt file inode {self.compute_path()!r} with an " "empty file" ) os.rename(inode_data_path, inode_backup_path) overlay.write_empty_file(self.inode.inode_number) return True
def spinner(text: str, logger: Logger, quiet=False, debug=False): '''Decoration for long running processes. :param text: Message to output :param logger: Logger to capture the error if it occurs :param quiet: If ``True``, messages will be hidden :param debug: If ``True``, show full tracebacks ''' # pylint: disable=broad-except try: logger.info(text) if not quiet: print(text) yield if not quiet: print('Done\n') except Exception as exception: exception_traceback = format_exc() logger.error(exception_traceback) if not quiet: if debug: print(exception_traceback) else: print(str(exception))
def execute(self, compile_base_path: str, timeout: Optional[int], logger: Logger): detector_invocation = ["java"] + self.detector.java_options + ["-jar", _quote(self.detector.jar_path)] command = detector_invocation + self._get_detector_arguments(self.version.get_compile(compile_base_path)) command = " ".join(command) start = time.time() try: Shell.exec(command, logger=logger, timeout=timeout) self.result = Result.success except CommandFailedError as e: logger.error("Detector failed: %s", e) self.result = Result.error message = str(e) message_lines = str.splitlines(message) if len(message_lines) > 5000: self.message = "\n".join(message_lines[0:500]) + "\n" + "\n".join(message_lines[-4500:]) else: self.message = message except TimeoutError: logger.error("Detector took longer than the maximum of %s seconds", timeout) self.result = Result.timeout finally: end = time.time() runtime = end - start self.runtime = runtime logger.info("Run took {0:.2f} seconds.".format(runtime)) self.save()
def extract_files(src_path, extracted_files, logger:logging.Logger): logger.info('Extracting new files...') list_files = os.listdir(src_path) list_land = [] list_port = [] for filename in list_files: if already_extracted(filename, extracted_files, logger): continue src_file = src_path + filename # check if it is image or not try: im = Image.open(src_file) except OSError: continue x, y = im.size im.close() if x == 1920 and y == 1080: list_land += [filename] if x == 1080 and y == 1920: list_port += [filename] return list_land, list_port
def get_extracted_files(dst_path, logger: logging.Logger): logger.info('Getting list of already extracted files...') published_files = [f for f in os.listdir(dst_path) if f.endswith('.jpg')] dup_files = [f for f in os.listdir(dst_path / 'dups/')if f.endswith('.jpg')] return published_files + dup_files
class Spy(MailService): """ Шпион, который логгирует о всей почтовой переписке, которая проходит через его руки. Он следит только за объектами класса MailMessage и пишет в логгер следующие сообщения Если в качестве отправителя или получателя указан "Austin Powers": то нужно написать в лог сообщение с уровнем WARN: Detected target mail correspondence: from {from} to {to} "{message}" Иначе, необходимо написать в лог сообщение с уровнем INFO: Usual correspondence: from {from} to {to} """ def __init__(self): self.__logger = Logger("logger") def process_mail(self, mail: Mail): if isinstance(mail, MailMessage): source = mail.get_source() destination = mail.get_destination() message = mail.get_message() if source == banned_address or destination == banned_address: self.__logger.warning('Detected target mail correspondence: from {0} to {1} "{2}"'. format(source, destination, message)) else: self.__logger.info('Usual correspondence: from {0} to {1}'.format(source, destination)) return mail
def copy_unique_filepairs(difference_matrix, src_path, list_land, list_port, img_path, logger: logging.Logger): logger.info('Copying new unique files...') cnt = 0 while True: n = len(list_land) m = len(list_port) if not n: break arg_land, arg_port = np.unravel_index(np.argmin(difference_matrix), (n, m)) logger.debug(' {0}'.format(list_land[arg_land])) logger.debug(' {0}'.format(list_port[arg_land])) logger.debug(' {0}'.format(list_land[arg_land] + 'XXXX' + list_port[arg_port] + '.jpg')) shutil.copyfile(src_path + list_land[arg_land], img_path + list_land[arg_land] + '-land-' + list_port[arg_port] + '.jpg') shutil.copyfile(src_path + list_port[arg_port], img_path + list_land[arg_land] + '-port-' + list_port[arg_port] + '.jpg') cnt += 1 del list_land[arg_land], list_port[arg_port] difference_matrix = np.delete(difference_matrix, arg_land, 0) difference_matrix = np.delete(difference_matrix, arg_port, 1) logger.debug('{0} files copied'.format(cnt))
def run(self): global logger global totalFetchTime global totalRequestsCompleted while True: #grabs host from queue host = self.queue.get() threadId = threading.current_thread #grabs urls of urls and prints first 1024 bytes of page beginTime = time.time() url = urllib2.urlopen(host) x = url.read(100000) if (not x): Logger.warn(logger, "[%s] No data for %s" % (threadId, host)) endTime = time.time() elapsedTime = (endTime - beginTime) Logger.info(logger, "Request for %s executed in %s" % (host, elapsedTime)) #signals to queue job is done totalRequestsCompleted += 1 totalFetchTime += elapsedTime self.queue.task_done()
def check_os(logger: logging.Logger): logger.info('Checking Windows 10...') err_msg = 'This system is not Windows 10. Exit.' if sys.platform != 'win32': raise OSError(err_msg) if platform.release() != '10': raise OSError(err_msg)
def start_daemon(judge_logger: logging.Logger): """ Start a daemon process which is running the . :param judge_config: :param judge_logger: :return: None """ pid_file_path = os.path.join(os.getcwd(), judge_config.RUN['pid_file']) # pid = os.fork() # if pid > 0: # sys.exit(0) # # os.chdir('/') # os.setsid() # os.umask(0) # # pid = os.fork() # if pid > 0: # sys.exit(0) if os.path.exists(pid_file_path): print('Judged daemon has being running.') judge_logger.error('Judged daemon has being running.') exit(0) try: (_path, _) = os.path.split(pid_file_path) if not os.path.exists(_path): os.mkdir(_path) pid_file = open(pid_file_path, mode='w+') print('Judge daemon(pid=%d) start successfully.' % os.getpid()) judge_logger.info('Judge daemon(pid=%d) start successfully.' % os.getpid()) pid_file.write('%d' % os.getpid()) pid_file.close() except Exception as e: print(e) #redirect stdio sys.stdout.flush() sys.stderr.flush() si = open(os.devnull, 'r') # so = open(os.devnull, 'a+') # se = open(os.devnull, 'a+') os.dup2(si.fileno(), sys.stdin.fileno()) # os.dup2(so.fileno(), sys.stdout.fileno()) # os.dup2(se.fileno(), sys.stderr.fileno()) # signal.signal(signal.SIGKILL, exit_clean) main_loop(judge_logger) try: os.remove(pid_file_path) except Exception as e: judge_logger.error(e) exit(0)
def copy_database(oOrigConn, oDestConnn, oLogHandler=None): """Copy the database, with no attempts to upgrade. This is a straight copy, with no provision for funky stuff Compatability of database structures is assumed, but not checked. """ # Not checking versions probably should be fixed # Copy tables needed before we can copy AbstractCard flush_cache() oVer = DatabaseVersion() oVer.expire_cache() oLogger = Logger('copy DB') if oLogHandler: oLogger.addHandler(oLogHandler) if hasattr(oLogHandler, 'set_total'): iTotal = 14 + AbstractCard.select(connection=oOrigConn).count() + \ PhysicalCard.select(connection=oOrigConn).count() + \ PhysicalCardSet.select(connection=oOrigConn).count() oLogHandler.set_total(iTotal) bRes = True aMessages = [] oTrans = oDestConnn.transaction() aToCopy = [ (copy_rarity, 'Rarity table', False), (copy_expansion, 'Expansion table', False), (copy_discipline, 'Discipline table', False), (copy_clan, 'Clan table', False), (copy_creed, 'Creed table', False), (copy_virtue, 'Virtue table', False), (copy_card_type, 'CardType table', False), (copy_ruling, 'Ruling table', False), (copy_discipline_pair, 'DisciplinePair table', False), (copy_rarity_pair, 'RarityPair table', False), (copy_sect, 'Sect table', False), (copy_title, 'Title table', False), (copy_artist, 'Artist table', False), (copy_keyword, 'Keyword table', False), (copy_abstract_card, 'AbstractCard table', True), (copy_physical_card, 'PhysicalCard table', True), (copy_physical_card_set, 'PhysicalCardSet table', True), ] for fCopy, sName, bPassLogger in aToCopy: try: if bRes: if bPassLogger: fCopy(oOrigConn, oTrans, oLogger) else: fCopy(oOrigConn, oTrans) except SQLObjectNotFound, oExp: bRes = False aMessages.append('Unable to copy %s: Aborting with error: %s' % (sName, oExp)) else: oTrans.commit() oTrans.cache.clear() if not bPassLogger: oLogger.info('%s copied' % sName)
def run_tests( test_files, run_tool: "func(file, result_file)->(rc, out, err)", check_answer: "func(test_file, result_file, rc, out, err)->(rc, out, err)", stop_on_error, logger: Logger, output_folder=None, ): """ :param output_folder: if not None, intermediate results are saved there. Files in that folder will be overwritten. """ if output_folder: output_dir = output_folder makedirs(output_dir, exist_ok=True) else: output_dir = get_tmp_dir_name() logger.info("using " + output_dir + " as the temporal folder") failed_tests = list() for test in test_files: logger.info("testing {test}..".format(test=test)) log_stream = open(_generate_name(output_dir, test) + ".log", "w") result_file = _generate_name(output_dir, test) + ".model" r_rc, r_out, r_err = run_tool(test, result_file) logger.debug(rc_out_err_to_str(r_rc, r_out, r_err)) print(rc_out_err_to_str(r_rc, r_out, r_err), file=log_stream) c_rc, c_out, c_err = check_answer(test, result_file, r_rc, r_out, r_err) logger.debug(rc_out_err_to_str(c_rc, c_out, c_err)) print(rc_out_err_to_str(c_rc, c_out, c_err), file=log_stream) if c_rc != 0: logger.info(" FAILED") failed_tests.append(test) if stop_on_error: break if failed_tests: logger.info( "The following tests failed: %s \n%s", "".join("\n " + t for t in failed_tests), "See logs in " + output_dir, ) else: logger.info("ALL TESTS PASSED") if not output_folder and not failed_tests: shutil.rmtree(output_dir) return not failed_tests
def write_combos(): logger = Logger('name',20) handler = FileHandler('flog.log') logger.addHandler(handler) with open('namelist.txt','a') as fileobject: llist = ("{} {}".format(x,y) for x in names(0, 'names.txt') for y in names(1, 'names.txt')) for name in llist: if len(name) > 17: logger.info('{} is {} characters long'.format(name, len(name))) fileobject.write('{}\n'.format(name))
async def send_dumplings_from_queue_to_hub( kitchen_name: str, hub: str, dumpling_queue: multiprocessing.Queue, kitchen_info: dict, log: logging.Logger, ): """ Grabs dumplings from the dumpling queue and sends them to ``nd-hub``. :param kitchen_name: The name of the kitchen. :param hub: The address where ``nd-hub`` is receiving dumplings. :param dumpling_queue: Queue to grab dumplings from. :param kitchen_info: Dict describing the kitchen. :param log: Logger. """ hub_ws = 'ws://{0}'.format(hub) log.info("{0}: Connecting to the dumpling hub at {1}".format( kitchen_name, hub_ws) ) try: websocket = await websockets.connect(hub_ws) except OSError as e: log.error( "{0}: There was a problem with the dumpling hub connection. " "Is nd-hub available?".format(kitchen_name)) log.error("{0}: {1}".format(kitchen_name, e)) return try: # Register our kitchen information with the dumpling hub. await websocket.send(json.dumps(kitchen_info)) # Send dumplings to the hub when they come in from the chefs. while True: dumpling = dumpling_queue.get() await websocket.send(dumpling) except asyncio.CancelledError: log.warning( "{0}: Connection to dumpling hub cancelled; closing...".format( kitchen_name)) try: await websocket.close(*ND_CLOSE_MSGS['conn_cancelled']) except websockets.exceptions.InvalidState: pass except websockets.exceptions.ConnectionClosed as e: log.warning("{0}: Lost connection to dumpling hub: {1}".format( kitchen_name, e)) except OSError as e: log.exception( "{0}: Error talking to dumpling hub: {1}".format(kitchen_name, e) )
def attempt_database_upgrade(oLogHandler=None): """Attempt to upgrade the database, going via a temporary memory copy.""" oTempConn = connectionForURI("sqlite:///:memory:") oLogger = Logger('attempt upgrade') if oLogHandler: oLogger.addHandler(oLogHandler) (bOK, aMessages) = create_memory_copy(oTempConn, oLogHandler) if bOK: oLogger.info("Copied database to memory, performing upgrade.") if len(aMessages) > 0: oLogger.info("Messages reported: %s", aMessages) (bOK, aMessages) = create_final_copy(oTempConn, oLogHandler) if bOK: oLogger.info("Everything seems to have gone OK") if len(aMessages) > 0: oLogger.info("Messages reported %s", aMessages) return True else: oLogger.critical("Unable to perform upgrade.") if len(aMessages) > 0: oLogger.error("Errors reported: %s", aMessages) oLogger.critical("!!YOUR DATABASE MAY BE CORRUPTED!!") else: oLogger.error("Unable to create memory copy. Database not upgraded.") if len(aMessages) > 0: oLogger.error("Errors reported %s", aMessages) return False
def fetch_data(oFile, oOutFile=None, sHash=None, oLogHandler=None, fErrorHandler=None): """Fetch data from a file'ish object (WwFile, urlopen or file)""" try: if hasattr(oFile, 'info') and callable(oFile.info): sLength = oFile.info().getheader('Content-Length') else: sLength = None if sLength: oLogger = Logger('Sutekh data fetcher') if oLogHandler is not None: oLogger.addHandler(oLogHandler) aData = [] iLength = int(sLength) if hasattr(oLogHandler, 'set_total'): # We promote to next integer, as we emit a signal # for any left over bits oLogHandler.set_total((iLength + 9999) // 10000) iTotal = 0 bCont = True while bCont: sInf = oFile.read(10000) iTotal += len(sInf) if sInf: oLogger.info('%d downloaded', iTotal) if oOutFile: oOutFile.write(sInf) else: aData.append(sInf) else: bCont = False if oOutFile: sData = None else: sData = ''.join(aData) else: # Just try and download if oOutFile: oOutFile.write(oFile.read()) sData = None else: sData = oFile.read() except urllib2.URLError, oExp: if fErrorHandler: fErrorHandler(oExp) sData = None else: raise
def copy_to_new_abstract_card_db(oOrigConn, oNewConn, oCardLookup, oLogHandler=None): """Copy the card sets to a new Physical Card and Abstract Card List. Given an existing database, and a new database created from a new cardlist, copy the CardSets, going via CardSetHolders, so we can adapt to changed names, etc. """ # pylint: disable-msg=R0914 # we need a lot of variables here aPhysCardSets = [] oOldConn = sqlhub.processConnection sqlhub.processConnection = oOrigConn # Copy Physical card sets oLogger = Logger('copy to new abstract card DB') if oLogHandler: oLogger.addHandler(oLogHandler) if hasattr(oLogHandler, 'set_total'): iTotal = 1 + PhysicalCardSet.select(connection=oOrigConn).count() oLogHandler.set_total(iTotal) aSets = list(PhysicalCardSet.select(connection=oOrigConn)) bDone = False aDone = [] # Ensre we only process a set after it's parent while not bDone: aToDo = [] for oSet in aSets: if oSet.parent is None or oSet.parent in aDone: oCS = make_card_set_holder(oSet, oOrigConn) aPhysCardSets.append(oCS) aDone.append(oSet) else: aToDo.append(oSet) if not aToDo: bDone = True else: aSets = aToDo # Save the current mapping oLogger.info('Memory copies made') # Create the cardsets from the holders dLookupCache = {} sqlhub.processConnection = oNewConn for oSet in aPhysCardSets: # create_pcs will manage transactions for us oSet.create_pcs(oCardLookup, dLookupCache) oLogger.info('Physical Card Set: %s', oSet.name) sqlhub.processConnection.cache.clear() sqlhub.processConnection = oOldConn return (True, [])
def parse_ltl(par_text:str, logger:Logger) -> dict: #TODO: current version of parser is very restrictive: it allows only the specs of the form: # Forall (i,j..) ass_i_j -> (Forall(k) gua_k * Forall(l,m) gua_l_m) # it is impossible to have: # (Forall(i) a_i -> Forall(k) g_k) * (Forall(i,j) a_i_j -> Forall(i) g_i) # what we can have is: # (Forall(i,j,k) ((a_i -> g_i)) * (Forall(i,j) a_i_j -> g_i) """ Return {section:data}, see sections in syntax_desc """ logger.info('parsing input spec..') section_name_to_data = dict(par_parser.parse(par_text, lexer=par_lexer)) #TODO: check unknown signals return section_name_to_data
def build_differencematrix(src_path, list_land, list_port, logger: logging.Logger): logger.info('Building difference matrix...') difference_matrix = [] for land in list_land: im_land = np.array(Image.open(src_path + land)) im_land = im_land[:, (960-304):(960+304), :] difference_row = [] for port in list_port: im_port = np.array(Image.open(src_path + port)) im_port = misc.imresize(im_port, (1080, 608)) difference_row += [np.sum((im_land - im_port) ** 2) / (608 * 1080)] difference_matrix += [difference_row] return np.array(difference_matrix)
def repair( self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path ) -> bool: # TODO: It would be nice to try and get the contents of the # file/directory at this location in the current commit, rather than # just writing out an empty file or directory if stat.S_ISDIR(self.child.mode): log.info( f"replacing missing directory {self.compute_path()!r} with an " "empty directory" ) overlay.write_empty_dir(self.child.inode_number) else: log.info( f"replacing missing file {self.compute_path()!r} with an empty file" ) overlay.write_empty_file(self.child.inode_number) return True
def run(sc: SlackClient, channel: str, message: str, retries: int, logger: logging.Logger) -> None: if sc.rtm_connect(): logger.info("Connected to Slack") channel_id = find_channel_id(channel, sc) logger.debug(f"Found channel ID {channel_id} for #{channel}") logger.info(f"Listening for joins in #{channel}") retry_count = 0 backoff = 0.5 while True: try: # Handle dem events! for event in sc.rtm_read(): handle_event(event, channel, channel_id, message, sc, logger) # Reset exponential backoff retry strategy every time we # successfully loop. Failure would have happened in rtm_read() retry_count = 0 time.sleep(0.5) # This is necessary to handle an error caused by a bug in Slack's # Python client. For more information see # https://github.com/slackhq/python-slackclient/issues/127 # # The TimeoutError could be more elegantly resolved by making a PR # to the websocket-client library and letting them coerce that # exception to a WebSocketTimeoutException. except (websocket.WebSocketConnectionClosedException, TimeoutError): logger.error("Lost connection to Slack, reconnecting...") if not sc.rtm_connect(): logger.info("Failed to reconnect to Slack") if retry_count >= retries: sys.exit(bail( 'fatal', 'red', "Too many failed reconnect attempts, shutting down") ) time.sleep((backoff ** 2) / 4) else: logger.info("Reconnected to Slack") retry_count += 1 else: sys.exit(bail('fatal', 'red', "Couldn't connect to Slack"))
def setup_localisations(logger: logging.Logger): """Setup gettext localisations.""" import gettext import locale # Get the 'en_US' style language code lang_code = locale.getdefaultlocale()[0] # Allow overriding through command line. if len(sys.argv) > 1: for arg in sys.argv[1:]: if arg.casefold().startswith('lang='): lang_code = arg[5:] break # Expands single code to parent categories. expanded_langs = gettext._expand_lang(lang_code) logger.info('Language: {!r}', lang_code) logger.debug('Language codes: {!r}', expanded_langs) for lang in expanded_langs: try: file = open('../i18n/{}.mo'.format(lang), 'rb') except FileNotFoundError: pass else: trans = gettext.GNUTranslations(file) break else: # No translations, fallback to English. # That's fine if the user's language is actually English. if 'en' not in expanded_langs: logger.warning( "Can't find translation for codes: {!r}!", expanded_langs, ) trans = gettext.NullTranslations() # Add these functions to builtins, plus _=gettext trans.install(['gettext', 'ngettext'])
def begin(filename=None, failopen=False): if not filename: unique = os.environ['LOGNAME'] cmd = os.path.basename(sys.argv[0]) filename = "/tmp/%s-%s.lock" % (unique, cmd) if os.path.exists(filename): log.warn("Lockfile found!") f = open(filename, "r") pid = None try: pid = int(f.read()) except ValueError: pass f.close() if not pid: log.error("Invalid lockfile contents.") else: try: os.getpgid(pid) log.error("Aborting! Previous process ({pid}) is still alive. Remove lockfile manually if in error: {path}".format(pid=pid, path=filename)) sys.exit(1) except OSError: if failopen: log.fatal("Aborting until stale lockfile is investigated: {path}".format(path=filename)) sys.exit(1) log.error("Lockfile is stale.") log.info("Removing old lockfile.") os.unlink(filename) f = open(filename, "w") f.write(str(os.getpid())) f.close() global lockfile lockfile = filename
def initialize(img_path, logger: logging.Logger): logger.info('Initializing directories...') if not os.path.isdir(img_path): logger.info('{0} does not exists. Creating it...'.format(img_path)) os.mkdir(img_path) if not os.path.isdir(img_path / 'dups/'): logger.info('{0} does not exists. Creating it...' .format(img_path / 'dups/')) os.mkdir(img_path / 'dups/')
def repair( self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path ) -> bool: lost_n_found = fsck_dir / "lost+found" lost_n_found.mkdir(exist_ok=True) log.info(f"moving orphan inodes to {lost_n_found}") for inode in self.orphan_directories: log.info( f"moving contents of orphan directory {inode.inode_number} " f"to lost+found" ) inode_lnf_path = lost_n_found / str(inode.inode_number) overlay.extract_dir(inode.inode_number, inode_lnf_path, remove=True) file_mode = stat.S_IFREG | 0o644 for inode in self.orphan_files: log.info(f"moving orphan file {inode.inode_number} to lost+found") inode_lnf_path = lost_n_found / str(inode.inode_number) overlay.extract_file( inode.inode_number, inode_lnf_path, file_mode, remove=True ) return True
class BaseFeature(abc.ABC): save_memory: bool = True def __init__(self, debugging: bool = False, **kwargs) -> None: super().__init__() self.name = self.__class__.__name__ self.debugging = debugging self._logger = Logger(self.__class__.__name__) handler = StreamHandler() fmt = Formatter("%(asctime)s - %(levelname)s - %(message)s") handler.setFormatter(fmt) handler.setLevel(INFO) self._logger.addHandler(handler) self.GCS_BUCKET_NAME = GCS_BUCKET_NAME self.PROJECT_ID = PROJECT_ID self.train_table = f"`{PROJECT_ID}.riiid.train`" @abc.abstractmethod def import_columns(self) -> List[str]: """この特徴量を作るのに必要なカラムを指定する """ ... @abc.abstractmethod def make_features( self, df_train_input: pd.DataFrame, ) -> pd.DataFrame: """BigQuery から取得した生データの DataFrame を特徴量に変換する """ ... @classmethod def add_feature_specific_arguments(cls, parser: argparse.ArgumentParser): return @classmethod def main(cls): import logging logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument("--debug", action="store_true") cls.add_feature_specific_arguments(parser) args = parser.parse_args() instance = cls(debugging=args.debug, **vars(args)) instance.run() def run(self): """何も考えずにとりあえずこれを実行すれば BigQuery からデータを読み込んで変換し GCS にアップロードしてくれる """ self._logger.info(f"Running with debugging={self.debugging}") with tempfile.TemporaryDirectory() as tempdir: files: List[str] = [] train_path = os.path.join(tempdir, f"{self.name}_training.ftr") self.read_and_save_features( self.train_table, train_path, ) self._upload_to_gs([train_path]) def read_and_save_features( self, train_table_name: str, train_output_path: str, ) -> None: df_train_input = self._read_from_bigquery(train_table_name) df_train_features = self.make_features( df_train_input ) assert ( df_train_input.shape[0] == df_train_features.shape[0] ), "generated train features is not compatible with the table" df_train_features.columns = f"{self.name}_" + df_train_features.columns if self.save_memory: self._logger.info("Reduce memory size - train data") df_train_features = reduce_mem_usage(df_train_features) self._logger.info(f"Saving features to {train_output_path}") df_train_features.to_feather(train_output_path) def _read_from_bigquery(self, table_name: str) -> pd.DataFrame: self._logger.info(f"Reading from {table_name}") query = """ select {} from {} where content_type_id = 0 """.format( ", ".join(self.import_columns()), table_name ) if self.debugging: query += " limit 10000" bqclient = bigquery.Client(project=PROJECT_ID) bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient() df = ( bqclient.query(query) .result() .to_dataframe(bqstorage_client=bqstorageclient) ) return df def _upload_to_gs(self, files: List[str]): client = storage.Client(project=PROJECT_ID) bucket = client.get_bucket(GCS_BUCKET_NAME) if self.debugging: bucket_dir_name = "features_debug" else: bucket_dir_name = "features" for filename in files: basename = os.path.basename(filename) blob = storage.Blob(os.path.join(bucket_dir_name, basename), bucket) self._logger.info(f"Uploading {basename} to {blob.path}") blob.upload_from_filename(filename) def _download_from_gs(self, feather_file_name: str) -> pd.DataFrame: """GCSにある特徴量ファイル(feather形式)を読み込む """ client = storage.Client(project=PROJECT_ID) bucket = client.get_bucket(GCS_BUCKET_NAME) if self.debugging: bucket_dir_name = "features_debug" else: bucket_dir_name = "features" blob = storage.Blob( os.path.join(bucket_dir_name, feather_file_name), bucket ) content = blob.download_as_string() print(f"Downloading {feather_file_name} from {blob.path}") df = pd.read_feather(BytesIO(content)) return df
async def sync(self, args: Namespace, logger: logging.Logger, chain: AsyncChainAPI, base_db: AtomicDatabaseAPI, peer_pool: BasePeerPool, event_bus: EndpointAPI) -> None: logger.info("Node running without sync (--sync-mode=%s)", self.get_sync_mode())
class Logger: """Logger class Show and collect log entries. """ LEVEL_DEBUG = 'debug' LEVEL_INFO = 'info' LEVEL_WARNING = 'warning' LEVEL_ERROR = 'error' LEVEL_CRITICAL = 'critical' def __init__(self, logger=None): """Constructor :param Logger logger: Logger """ if logger: self.logger = logger else: self.logger = Log("Config Generator") self.logs = [] def clear(self): """Clear log entries.""" self.logs = [] def log_entries(self): """Return log entries.""" return self.logs def debug(self, msg): """Show debug log entry. :param str msg: Log message """ self.logger.debug(msg) # do not collect debug entries def info(self, msg): """Add info log entry. :param str msg: Log message """ self.logger.info(msg) self.add_log_entry(msg, self.LEVEL_INFO) def warning(self, msg): """Add warning log entry. :param str msg: Log message """ self.logger.warning(msg) self.add_log_entry(msg, self.LEVEL_WARNING) def warn(self, msg): self.warning(msg) def error(self, msg): """Add error log entry. :param str msg: Log message """ self.logger.error(msg) self.add_log_entry(msg, self.LEVEL_ERROR) def critical(self, msg): """Add critical log entry. :param str msg: Log message """ self.logger.critical(msg) self.add_log_entry(msg, self.LEVEL_CRITICAL) def add_log_entry(self, msg, level): """Append log entry with level. :param str msg: Log message :param str level: Log level """ self.logs.append({'msg': msg, 'level': level})
def search_nb_of_multi_target_trees_to_use( n_tree_rules_to_generate: int, prepared_data_list: List[PreparedDataForTargetSet], min_support: float, max_depth: int, logger: Logger, seed: Optional[int] = None, ) -> Tuple[Optional[List[Tuple[PreparedDataForTargetSet, RandomForestClassifier]]], TimeDiffSec]: # nb_of_trees_to_use: int = 1 nb_of_tree_based_rules_after_conversion: int = 0 current_rf_list: Optional[List[Tuple[PreparedDataForTargetSet, RandomForestClassifier]]] = None total_time_random_forest_learning_s: TimeDiffSec = 0.0 # min_nb_of_rfs = len(prepared_data_list) # --- estimate the nb of trees to use ------------------------------------------- max_n_rules_in_tree = 2**max_depth min_n_trees_to_use = n_tree_rules_to_generate / max_n_rules_in_tree nb_of_rfs_to_use = len(prepared_data_list) estimate_nb_of_trees_per_rf: int = math.ceil(min_n_trees_to_use / nb_of_rfs_to_use) logger.info( f"INITIAL ESTIMATE: use {nb_of_rfs_to_use} RFs of each {estimate_nb_of_trees_per_rf} trees " f"for about {min_n_trees_to_use} trees in total") nb_of_trees_to_use = estimate_nb_of_trees_per_rf current_step_size = 1 should_break = False while not should_break: nb_of_tree_based_rules_after_conversion = 0 current_rf_list = [] total_time_random_forest_learning_s = 0.0 prepared_data: PreparedDataForTargetSet for prepared_data in prepared_data_list: start_time_decision_tree_learning_s = time.time() classifier: RandomForestClassifier = RandomForestClassifier( n_estimators=nb_of_trees_to_use, random_state=seed, min_samples_leaf=min_support, max_depth=max_depth) current_rf_clf = classifier # --- Learn a random forest given the current number of trees ----------------------------------- classifier.fit( prepared_data.df_one_hot_encoded_descriptive_attributes, prepared_data.df_one_hot_encoded_target_attributes) end_time_decision_tree_learning_s = time.time() total_time_decision_tree_learning_s: float = end_time_decision_tree_learning_s - start_time_decision_tree_learning_s total_time_random_forest_learning_s += total_time_decision_tree_learning_s # --- b. Calculate its total number of leaves ---------------------------------- tree_classifiers: List[ DecisionTreeClassifier] = classifier.estimators_ total_nb_of_leafs_in_random_forest: int = 0 for tree_clf in tree_classifiers: total_nb_of_leafs_in_random_forest += get_nb_of_leaf_nodes( tree_clf) nb_of_tree_based_rules_after_conversion += total_nb_of_leafs_in_random_forest current_rf_list.append((prepared_data, current_rf_clf)) if nb_of_tree_based_rules_after_conversion < n_tree_rules_to_generate: logger.info( f'Learned {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees' f'--> {nb_of_tree_based_rules_after_conversion} rules ' f' < {n_tree_rules_to_generate} (goal)) ' f'--> INcreasing current step size {current_step_size} with 1') current_step_size += 1 nb_of_trees_to_use += current_step_size if nb_of_tree_based_rules_after_conversion >= n_tree_rules_to_generate: should_break = True # else: # logger.info(f'Learned {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees' # f'--> {nb_of_tree_based_rules_after_conversion} rules ' # f' > {n_tree_rules_to_generate} (goal)) ' # f'--> DEcreasing current step size {current_step_size} with 1') # nb_of_trees_to_use -= current_step_size # if current_step_size == 1: # should_break = True # current_step_size = 1 # nb_of_trees_to_use += 1 logger.info( f'FINISHED search for tree rules: {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees' f'--> {nb_of_tree_based_rules_after_conversion} rules ' f' > {n_tree_rules_to_generate} (goal)) ') return current_rf_list, total_time_random_forest_learning_s
def plotClock(dfClk: pd.DataFrame, dRtk: dict, logger: logging.Logger, showplot: bool = False): """ plotClock plots athe clock for all systems """ cFuncName = colored(os.path.basename(__file__), 'yellow') + ' - ' + colored( sys._getframe().f_code.co_name, 'green') # set up the plot plt.style.use('ggplot') colors = ['blue', 'red', 'green', 'black'] amc.logDataframeInfo(df=dfClk, dfName='dfClk', callerName=cFuncName, logger=logger) # find out for which system we have clk offset values GNSSSysts = [] for gnss in ['GAL', 'GPS', 'OTH', 'GLO']: if dfClk[gnss].any(): GNSSSysts.append(gnss) logger.info('{func:s}: Clock available for GNSS systems {syst:s}'.format( func=cFuncName, syst=' '.join(GNSSSysts))) # create the plot araea fig, axis = plt.subplots(nrows=len(GNSSSysts), ncols=1, figsize=(24.0, 20.0)) for i, GNSSsyst in enumerate(GNSSSysts): logger.info('{func:s}: plotting clock offset for {syst:s}'.format( func=cFuncName, syst=GNSSsyst)) # get the axis to draw to if len(GNSSSysts) == 1: ax = axis else: ax = axis[i] # create the plot for this GNSS system dfClk.plot(ax=ax, x='DT', y=GNSSsyst, marker='.', linestyle='', color=colors[i]) # create the ticks for the time axis dtFormat = plot_utils.determine_datetime_ticks( startDT=dfClk['DT'].iloc[0], endDT=dfClk['DT'].iloc[-1]) if dtFormat['minutes']: ax.xaxis.set_major_locator( dates.MinuteLocator(byminute=[0, 15, 30, 45], interval=1)) else: ax.xaxis.set_major_locator( dates.HourLocator( interval=dtFormat['hourInterval'])) # every 4 hours ax.xaxis.set_major_formatter( dates.DateFormatter('%H:%M')) # hours and minutes ax.xaxis.set_minor_locator(dates.DayLocator(interval=1)) # every day ax.xaxis.set_minor_formatter(dates.DateFormatter('\n%d-%m-%Y')) ax.xaxis.set_tick_params(rotation=0) for tick in ax.xaxis.get_major_ticks(): # tick.tick1line.set_markersize(0) # tick.tick2line.set_markersize(0) tick.label1.set_horizontalalignment('center') # name the axis ax.set_ylabel('{syst:s} Clock Offset [ns]'.format(syst=GNSSsyst), fontsize='large', color=colors[i]) ax.set_xlabel('Time', fontsize='large') # title of sub-plot ax.set_title('Clock offset relative to {syst:s} @ {date:s}'.format( syst=GNSSsyst, date=dfClk['DT'].iloc[0].strftime('%d %b %Y'), fontsize='large')) # save the plot in subdir png of GNSSSystem amutils.mkdir_p(os.path.join(dRtk['info']['dir'], 'png')) pngName = os.path.join( dRtk['info']['dir'], 'png', os.path.splitext(dRtk['info']['rtkPosFile'])[0] + '-CLK.png') # print('pngName = {:s}'.format(pngName)) fig.savefig(pngName, dpi=fig.dpi) logger.info('{func:s}: created plot {plot:s}'.format(func=cFuncName, plot=colored( pngName, 'green'))) if showplot: plt.show(block=True) else: plt.close(fig)
def setup_localisations(logger: logging.Logger): """Setup gettext localisations.""" from srctools.property_parser import PROP_FLAGS_DEFAULT import gettext import locale # Get the 'en_US' style language code lang_code = locale.getdefaultlocale()[0] # Allow overriding through command line. if len(sys.argv) > 1: for arg in sys.argv[1:]: if arg.casefold().startswith('lang='): lang_code = arg[5:] break # Expands single code to parent categories. expanded_langs = gettext._expand_lang(lang_code) logger.info('Language: {!r}', lang_code) logger.debug('Language codes: {!r}', expanded_langs) # Add these to Property's default flags, so config files can also # be localised. for lang in expanded_langs: PROP_FLAGS_DEFAULT['lang_' + lang] = True for lang in expanded_langs: try: file = open('../i18n/{}.mo'.format(lang), 'rb') except FileNotFoundError: pass else: trans = gettext.GNUTranslations(file) break else: # No translations, fallback to English. # That's fine if the user's language is actually English. if 'en' not in expanded_langs: logger.warning( "Can't find translation for codes: {!r}!", expanded_langs, ) trans = gettext.NullTranslations() # Add these functions to builtins, plus _=gettext trans.install(['gettext', 'ngettext']) # Some lang-specific overrides.. if trans.gettext('__LANG_USE_SANS_SERIF__') == 'YES': # For Japanese/Chinese, we want a 'sans-serif' / gothic font # style. try: from tkinter import font except ImportError: return font_names = [ 'TkDefaultFont', 'TkHeadingFont', 'TkTooltipFont', 'TkMenuFont', 'TkTextFont', 'TkCaptionFont', 'TkSmallCaptionFont', 'TkIconFont', # Note - not fixed-width... ] for font_name in font_names: font.nametofont(font_name).configure(family='sans-serif')
def __call__(self, logger: logging.Logger, *args: Any, **kwds: Any) -> Any: logger.info("##### Start search for change in backend status") for changes in watch(self.location): logger.info("##### Change detected") if (list(changes)[0][1]).split('/')[-1] == 'debug.log': f = open(list(changes)[0][1], 'r') text = f.readlines()[-1] dct = dict( map( lambda pair: tuple(pair), map( lambda _split: _split.split("="), filter(lambda split: len(split.split("=")) == 2, text.split(" "))))) logger.info(dct) try: if round(float(dct["progress"]), 2) == 0.2: logger.info("##### Sync in 20%") if round(float(dct["progress"]), 2) == 0.4: logger.info("##### Sync in 40%") if round(float(dct["progress"]), 2) == 0.6: logger.info("##### Sync in 60%") if round(float(dct["progress"]), 2) == 0.8: logger.info("##### Sync in 80%") if float(dct["progress"]) == 1.0: logger.info("##### Sync done. Exiting") sys.exit() except Exception as e: logger.info("##### Watcher exception: {}".format(str(e)))
def _log_event(config: Dict[str, Any], logger: Logger, event: Dict[str, Any], what: str, who: str, addinfo: str) -> None: if config['debug_rules']: logger.info("Event %d: %s/%s/%s - %s" % (event["id"], what, who, addinfo, event["text"]))
def notify_suppliers_of_framework_application_event( data_api_client: DataAPIClient, notify_client: DMNotifyClient, notify_template_id: str, framework_slug: str, stage: str, dry_run: bool, logger: Logger, run_id: Optional[UUID] = None, ) -> int: run_is_new = not run_id run_id = run_id or uuid4() logger.info( f"{'Starting' if run_is_new else 'Resuming'} run id {{run_id}}", extra={"run_id": str(run_id)}) framework = data_api_client.get_framework(framework_slug)["frameworks"] framework_context = { "framework_name": framework["name"], "updates_url": f"{get_web_url_from_stage(stage)}/suppliers/frameworks/{framework['slug']}/updates", "framework_dashboard_url": f"{get_web_url_from_stage(stage)}/suppliers/frameworks/{framework['slug']}/", "clarification_questions_closed": "no" if framework["clarificationQuestionsOpen"] else "yes", **_formatted_dates_from_framework(framework), } failure_count = 0 for supplier_framework in data_api_client.find_framework_suppliers_iter( framework_slug): for user in data_api_client.find_users_iter( supplier_id=supplier_framework["supplierId"]): if user["active"]: # generating ref separately so we can exclude certain parameters from the context dict notify_ref = notify_client.get_reference( user["emailAddress"], notify_template_id, { "framework_slug": framework["slug"], "run_id": str(run_id), }, ) if dry_run: # Use the sent references cache unless we're re-running the script following a failure if notify_client.has_been_sent( notify_ref, use_recent_cache=run_is_new): logger.debug( "[DRY RUN] Would NOT send notification to {email_hash} (already sent)", extra={ "email_hash": hash_string(user["emailAddress"]) }, ) else: logger.info( "[DRY RUN] Would send notification to {email_hash}", extra={ "email_hash": hash_string(user["emailAddress"]) }, ) else: try: # Use the sent references cache unless we're re-running the script following a failure notify_client.send_email( user["emailAddress"], notify_template_id, framework_context, allow_resend=False, reference=notify_ref, use_recent_cache=run_is_new, ) except EmailError as e: failure_count += 1 logger.error( "Failed sending to {email_hash}: {e}", extra={ "email_hash": hash_string(user["emailAddress"]), "e": str(e), }, ) if isinstance(e, EmailTemplateError): raise # do not try to continue return failure_count
class Learner: """ Any participant in the "learning loop" - a class inheriting from this one has the ability, synchronously or asynchronously, to learn about nodes in the network, verify some essential details about them, and store information about them for later use. """ _SHORT_LEARNING_DELAY = 5 _LONG_LEARNING_DELAY = 90 LEARNING_TIMEOUT = 10 _ROUNDS_WITHOUT_NODES_AFTER_WHICH_TO_SLOW_DOWN = 10 # For Keeps __DEFAULT_NODE_STORAGE = InMemoryNodeStorage __DEFAULT_MIDDLEWARE_CLASS = RestMiddleware class NotEnoughTeachers(RuntimeError): pass class UnresponsiveTeacher(ConnectionError): pass def __init__(self, network_middleware: RestMiddleware = __DEFAULT_MIDDLEWARE_CLASS(), start_learning_now: bool = False, learn_on_same_thread: bool = False, known_nodes: tuple = None, seed_nodes: Tuple[tuple] = None, known_certificates_dir: str = None, node_storage=None, save_metadata: bool = False, abort_on_learning_error: bool = False ) -> None: self.log = Logger("characters") # type: Logger self.network_middleware = network_middleware self.save_metadata = save_metadata self.start_learning_now = start_learning_now self.learn_on_same_thread = learn_on_same_thread self._abort_on_learning_error = abort_on_learning_error self._learning_listeners = defaultdict(list) self._node_ids_to_learn_about_immediately = set() self.known_certificates_dir = known_certificates_dir or TemporaryDirectory("nucypher-tmp-certs-").name self.__known_nodes = FleetState() self.done_seeding = False # Read if node_storage is None: node_storage = self.__DEFAULT_NODE_STORAGE(federated_only=self.federated_only, # TODO: remove federated_only character_class=self.__class__) self.node_storage = node_storage if save_metadata and node_storage is constants.NO_STORAGE_AVAILIBLE: raise ValueError("Cannot save nodes without a configured node storage") known_nodes = known_nodes or tuple() self.unresponsive_startup_nodes = list() # TODO: Attempt to use these again later for node in known_nodes: try: self.remember_node(node, update_fleet_state=False) # TODO: Need to test this better - do we ever init an Ursula-Learner with Node Storage? except self.UnresponsiveTeacher: self.unresponsive_startup_nodes.append(node) self.teacher_nodes = deque() self._current_teacher_node = None # type: Teacher self._learning_task = task.LoopingCall(self.keep_learning_about_nodes) self._learning_round = 0 # type: int self._rounds_without_new_nodes = 0 # type: int self._seed_nodes = seed_nodes or [] self.unresponsive_seed_nodes = set() if self.start_learning_now: self.start_learning_loop(now=self.learn_on_same_thread) @property def known_nodes(self): return self.__known_nodes def load_seednodes(self, read_storages: bool = True, retry_attempts: int = 3, retry_rate: int = 2, timeout=3): """ Engage known nodes from storages and pre-fetch hardcoded seednode certificates for node learning. """ if self.done_seeding: self.log.debug("Already done seeding; won't try again.") return def __attempt_seednode_learning(seednode_metadata, current_attempt=1): from nucypher.characters.lawful import Ursula self.log.debug( "Seeding from: {}|{}:{}".format(seednode_metadata.checksum_address, seednode_metadata.rest_host, seednode_metadata.rest_port)) seed_node = Ursula.from_seednode_metadata(seednode_metadata=seednode_metadata, network_middleware=self.network_middleware, certificates_directory=self.known_certificates_dir, timeout=timeout, federated_only=self.federated_only) # TODO: 466 if seed_node is False: self.unresponsive_seed_nodes.add(seednode_metadata) else: self.unresponsive_seed_nodes.discard(seednode_metadata) self.remember_node(seed_node) for seednode_metadata in self._seed_nodes: __attempt_seednode_learning(seednode_metadata=seednode_metadata) if not self.unresponsive_seed_nodes: self.log.info("Finished learning about all seednodes.") self.done_seeding = True if read_storages is True: self.read_nodes_from_storage() if not self.known_nodes: self.log.warn("No seednodes were available after {} attempts".format(retry_attempts)) # TODO: Need some actual logic here for situation with no seed nodes (ie, maybe try again much later) def read_nodes_from_storage(self) -> set: stored_nodes = self.node_storage.all(federated_only=self.federated_only) # TODO: 466 for node in stored_nodes: self.remember_node(node) def sorted_nodes(self): nodes_to_consider = list(self.known_nodes.values()) return sorted(nodes_to_consider, key=lambda n: n.checksum_public_address) def remember_node(self, node, force_verification_check=False, update_fleet_state=True): if node == self: # No need to remember self. return False # First, determine if this is an outdated representation of an already known node. with suppress(KeyError): already_known_node = self.known_nodes[node.checksum_public_address] if not node.timestamp > already_known_node.timestamp: self.log.debug("Skipping already known node {}".format(already_known_node)) # This node is already known. We can safely return. return False node.save_certificate_to_disk(directory=self.known_certificates_dir, force=True) # TODO: Verify before force? certificate_filepath = node.get_certificate_filepath(certificates_dir=self.known_certificates_dir) try: node.verify_node(force=force_verification_check, network_middleware=self.network_middleware, accept_federated_only=self.federated_only, # TODO: 466 certificate_filepath=certificate_filepath) except SSLError: return False # TODO: Bucket this node as having bad TLS info - maybe it's an update that hasn't fully propagated? except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout): self.log.info("No Response while trying to verify node {}|{}".format(node.rest_interface, node)) return False # TODO: Bucket this node as "ghost" or something: somebody else knows about it, but we can't get to it. listeners = self._learning_listeners.pop(node.checksum_public_address, tuple()) address = node.checksum_public_address self.__known_nodes[address] = node if self.save_metadata: self.write_node_metadata(node=node) self.log.info("Remembering {}, popping {} listeners.".format(node.checksum_public_address, len(listeners))) for listener in listeners: listener.add(address) self._node_ids_to_learn_about_immediately.discard(address) if update_fleet_state: self.update_fleet_state() return True def update_fleet_state(self): # TODO: Probably not mutate these foreign attrs - ideally maybe move quite a bit of this method up to FleetState (maybe in __setitem__). self.known_nodes.checksum = keccak_digest(b"".join(bytes(n) for n in self.sorted_nodes())).hex() self.known_nodes.updated = maya.now() def start_learning_loop(self, now=False): if self._learning_task.running: return False elif now: self.load_seednodes() self._learning_task() # Unhandled error might happen here. TODO: Call this in a safer place. self.learning_deferred = self._learning_task.start(interval=self._SHORT_LEARNING_DELAY) self.learning_deferred.addErrback(self.handle_learning_errors) return self.learning_deferred else: seeder_deferred = deferToThread(self.load_seednodes) learner_deferred = self._learning_task.start(interval=self._SHORT_LEARNING_DELAY, now=now) seeder_deferred.addErrback(self.handle_learning_errors) learner_deferred.addErrback(self.handle_learning_errors) self.learning_deferred = defer.DeferredList([seeder_deferred, learner_deferred]) return self.learning_deferred def stop_learning_loop(self): """ Only for tests at this point. Maybe some day for graceful shutdowns. """ def handle_learning_errors(self, *args, **kwargs): failure = args[0] if self._abort_on_learning_error: self.log.critical("Unhandled error during node learning. Attempting graceful crash.") reactor.callFromThread(self._crash_gracefully, failure=failure) else: self.log.warn("Unhandled error during node learning: {}".format(failure.getTraceback())) if not self._learning_task.running: self.start_learning_loop() # TODO: Consider a single entry point for this with more elegant pause and unpause. def _crash_gracefully(self, failure=None): """ A facility for crashing more gracefully in the event that an exception is unhandled in a different thread, especially inside a loop like the learning loop. """ self._crashed = failure failure.raiseException() # TODO: We don't actually have checksum_public_address at this level - maybe only Characters can crash gracefully :-) self.log.critical("{} crashed with {}".format(self.checksum_public_address, failure)) def shuffled_known_nodes(self): nodes_we_know_about = list(self.__known_nodes.values()) random.shuffle(nodes_we_know_about) self.log.info("Shuffled {} known nodes".format(len(nodes_we_know_about))) return nodes_we_know_about def select_teacher_nodes(self): nodes_we_know_about = self.shuffled_known_nodes() if not nodes_we_know_about: raise self.NotEnoughTeachers("Need some nodes to start learning from.") self.teacher_nodes.extend(nodes_we_know_about) def cycle_teacher_node(self): # To ensure that all the best teachers are availalble, first let's make sure # that we have connected to all the seed nodes. if self.unresponsive_seed_nodes: self.log.info("Still have unresponsive seed nodes; trying again to connect.") self.load_seednodes() # Ideally, this is async and singular. if not self.teacher_nodes: self.select_teacher_nodes() try: self._current_teacher_node = self.teacher_nodes.pop() except IndexError: error = "Not enough nodes to select a good teacher, Check your network connection then node configuration" raise self.NotEnoughTeachers(error) self.log.info("Cycled teachers; New teacher is {}".format(self._current_teacher_node.checksum_public_address)) def current_teacher_node(self, cycle=False): if cycle: self.cycle_teacher_node() if not self._current_teacher_node: self.cycle_teacher_node() teacher = self._current_teacher_node return teacher def learn_about_nodes_now(self, force=False): if self._learning_task.running: self._learning_task.reset() self._learning_task() elif not force: self.log.warn( "Learning loop isn't started; can't learn about nodes now. You can override this with force=True.") elif force: self.log.info("Learning loop wasn't started; forcing start now.") self._learning_task.start(self._SHORT_LEARNING_DELAY, now=True) def keep_learning_about_nodes(self): """ Continually learn about new nodes. """ self.learn_from_teacher_node(eager=False) # TODO: Allow the user to set eagerness? def learn_about_specific_nodes(self, canonical_addresses: Set): self._node_ids_to_learn_about_immediately.update(canonical_addresses) # hmmmm self.learn_about_nodes_now() # TODO: Dehydrate these next two methods. def block_until_number_of_known_nodes_is(self, number_of_nodes_to_know: int, timeout: int = 10, learn_on_this_thread: bool = False): start = maya.now() starting_round = self._learning_round while True: rounds_undertaken = self._learning_round - starting_round if len(self.__known_nodes) >= number_of_nodes_to_know: if rounds_undertaken: self.log.info("Learned about enough nodes after {} rounds.".format(rounds_undertaken)) return True if not self._learning_task.running: self.log.warn("Blocking to learn about nodes, but learning loop isn't running.") if learn_on_this_thread: try: self.learn_from_teacher_node(eager=True) except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout): # TODO: Even this "same thread" logic can be done off the main thread. self.log.warn("Teacher was unreachable. No good way to handle this on the main thread.") # The rest of the f*****g owl if (maya.now() - start).seconds > timeout: if not self._learning_task.running: raise self.NotEnoughTeachers("Learning loop is not running. Start it with start_learning().") else: raise self.NotEnoughTeachers("After {} seconds and {} rounds, didn't find {} nodes".format( timeout, rounds_undertaken, number_of_nodes_to_know)) else: time.sleep(.1) def block_until_specific_nodes_are_known(self, canonical_addresses: Set, timeout=LEARNING_TIMEOUT, allow_missing=0, learn_on_this_thread=False): start = maya.now() starting_round = self._learning_round while True: if self._crashed: return self._crashed rounds_undertaken = self._learning_round - starting_round if canonical_addresses.issubset(self.__known_nodes): if rounds_undertaken: self.log.info("Learned about all nodes after {} rounds.".format(rounds_undertaken)) return True if not self._learning_task.running: self.log.warn("Blocking to learn about nodes, but learning loop isn't running.") if learn_on_this_thread: self.learn_from_teacher_node(eager=True) if (maya.now() - start).seconds > timeout: still_unknown = canonical_addresses.difference(self.__known_nodes) if len(still_unknown) <= allow_missing: return False elif not self._learning_task.running: raise self.NotEnoughTeachers("The learning loop is not running. Start it with start_learning().") else: raise self.NotEnoughTeachers( "After {} seconds and {} rounds, didn't find these {} nodes: {}".format( timeout, rounds_undertaken, len(still_unknown), still_unknown)) else: time.sleep(.1) def _adjust_learning(self, node_list): """ Takes a list of new nodes, adjusts learning accordingly. Currently, simply slows down learning loop when no new nodes have been discovered in a while. TODO: Do other important things - scrub, bucket, etc. """ if node_list: self._rounds_without_new_nodes = 0 self._learning_task.interval = self._SHORT_LEARNING_DELAY else: self._rounds_without_new_nodes += 1 if self._rounds_without_new_nodes > self._ROUNDS_WITHOUT_NODES_AFTER_WHICH_TO_SLOW_DOWN: self.log.info("After {} rounds with no new nodes, it's time to slow down to {} seconds.".format( self._ROUNDS_WITHOUT_NODES_AFTER_WHICH_TO_SLOW_DOWN, self._LONG_LEARNING_DELAY)) self._learning_task.interval = self._LONG_LEARNING_DELAY def _push_certain_newly_discovered_nodes_here(self, queue_to_push, node_addresses): """ If any node_addresses are discovered, push them to queue_to_push. """ for node_address in node_addresses: self.log.info("Adding listener for {}".format(node_address)) self._learning_listeners[node_address].append(queue_to_push) def network_bootstrap(self, node_list: list) -> None: for node_addr, port in node_list: new_nodes = self.learn_about_nodes_now(node_addr, port) self.__known_nodes.update(new_nodes) def get_nodes_by_ids(self, node_ids): for node_id in node_ids: try: # Scenario 1: We already know about this node. return self.__known_nodes[node_id] except KeyError: raise NotImplementedError # Scenario 2: We don't know about this node, but a nearby node does. # TODO: Build a concurrent pool of lookups here. # Scenario 3: We don't know about this node, and neither does our friend. def write_node_metadata(self, node, serializer=bytes) -> str: return self.node_storage.save(node=node) def learn_from_teacher_node(self, eager=True): """ Sends a request to node_url to find out about known nodes. """ self._learning_round += 1 try: current_teacher = self.current_teacher_node() except self.NotEnoughTeachers as e: self.log.warn("Can't learn right now: {}".format(e.args[0])) return rest_url = current_teacher.rest_interface # TODO: Name this..? # TODO: Do we really want to try to learn about all these nodes instantly? # Hearing this traffic might give insight to an attacker. if VerifiableNode in self.__class__.__bases__: announce_nodes = [self] else: announce_nodes = None unresponsive_nodes = set() try: # TODO: Streamline path generation certificate_filepath = current_teacher.get_certificate_filepath( certificates_dir=self.known_certificates_dir) response = self.network_middleware.get_nodes_via_rest(url=rest_url, nodes_i_need=self._node_ids_to_learn_about_immediately, announce_nodes=announce_nodes, certificate_filepath=certificate_filepath) except requests.exceptions.ConnectionError as e: unresponsive_nodes.add(current_teacher) teacher_rest_info = current_teacher.rest_information()[0] # TODO: This error isn't necessarily "no repsonse" - let's maybe pass on the text of the exception here. self.log.info("No Response from teacher: {}:{}.".format(teacher_rest_info.host, teacher_rest_info.port)) self.cycle_teacher_node() return if response.status_code != 200: raise RuntimeError("Bad response from teacher: {} - {}".format(response, response.content)) signature, nodes = signature_splitter(response.content, return_remainder=True) # TODO: This doesn't make sense - a decentralized node can still learn about a federated-only node. from nucypher.characters.lawful import Ursula node_list = Ursula.batch_from_bytes(nodes, federated_only=self.federated_only) # TODO: 466 new_nodes = [] for node in node_list: try: if eager: certificate_filepath = current_teacher.get_certificate_filepath( certificates_dir=self.known_certificates_dir) node.verify_node(self.network_middleware, accept_federated_only=self.federated_only, # TODO: 466 certificate_filepath=certificate_filepath) self.log.debug("Verified node: {}".format(node.checksum_public_address)) else: node.validate_metadata(accept_federated_only=self.federated_only) # TODO: 466 except node.SuspiciousActivity: # TODO: Account for possibility that stamp, rather than interface, was bad. message = "Suspicious Activity: Discovered node with bad signature: {}. " \ "Propagated by: {}".format(current_teacher.checksum_public_address, rest_url) self.log.warn(message) new = self.remember_node(node) if new: new_nodes.append(node) self._adjust_learning(new_nodes) learning_round_log_message = "Learning round {}. Teacher: {} knew about {} nodes, {} were new." current_teacher.last_seen = maya.now() self.cycle_teacher_node() self.log.info(learning_round_log_message.format(self._learning_round, current_teacher, len(node_list), len(new_nodes)), ) if new_nodes and self.known_certificates_dir: for node in new_nodes: node.save_certificate_to_disk(self.known_certificates_dir, force=True) return new_nodes
def repair(self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path) -> bool: log.info( f"replacing max inode number data with {self.next_inode_number}") overlay.write_next_inode_number(self.next_inode_number) return True
def search_nb_of_single_target_trees_to_use( n_tree_rules_to_generate: int, prepared_data: PreparedDataForTargetSet, min_support: float, max_depth: int, logger: Logger, seed: Optional[int] = None, ) -> Tuple[Optional[RandomForestClassifier], TimeDiffSec]: nb_of_tree_based_rules_after_conversion: int = 0 current_rf_clf: Optional[RandomForestClassifier] = None total_time_decision_tree_learning_s: TimeDiffSec = 0 max_n_rules_in_tree: int = 2**max_depth min_n_trees_to_use = math.ceil(n_tree_rules_to_generate / max_n_rules_in_tree) nb_of_trees_to_use: int = min_n_trees_to_use current_step_size = 1 should_break = False while not should_break: logger.info(f'Learning 1 RF using {nb_of_trees_to_use} trees...') nb_of_tree_based_rules_after_conversion = 0 start_time_decision_tree_learning_s = time.time() current_rf_clf: RandomForestClassifier = RandomForestClassifier( n_estimators=nb_of_trees_to_use, random_state=seed, min_samples_leaf=min_support, max_depth=max_depth) # --- Learn a random forest given the current number of trees ----------------------------------- current_rf_clf.fit( prepared_data.df_one_hot_encoded_descriptive_attributes, prepared_data.df_one_hot_encoded_target_attributes) end_time_decision_tree_learning_s = time.time() total_time_decision_tree_learning_s: TimeDiffSec = end_time_decision_tree_learning_s - start_time_decision_tree_learning_s # --- b. Calculate its total number of leaves ---------------------------------- tree_classifiers: List[ DecisionTreeClassifier] = current_rf_clf.estimators_ total_nb_of_leafs_in_random_forest: int = 0 for tree_clf in tree_classifiers: total_nb_of_leafs_in_random_forest += get_nb_of_leaf_nodes( tree_clf) nb_of_tree_based_rules_after_conversion += total_nb_of_leafs_in_random_forest if nb_of_tree_based_rules_after_conversion < n_tree_rules_to_generate: logger.info( f'Learned 1 RF with {nb_of_trees_to_use} trees' f'--> {nb_of_tree_based_rules_after_conversion} rules ' f' < {n_tree_rules_to_generate} (goal)) ' f'--> INcreasing current step size {current_step_size} with 1') current_step_size += 1 nb_of_trees_to_use += current_step_size if nb_of_tree_based_rules_after_conversion >= n_tree_rules_to_generate: should_break = True # else: # logger.info(f'Learned {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees' # f'--> {nb_of_tree_based_rules_after_conversion} rules ' # f' > {n_tree_rules_to_generate} (goal)) ' # f'--> DEcreasing current step size {current_step_size} with 1') # nb_of_trees_to_use -= current_step_size # if current_step_size == 1: # should_break = True # current_step_size = 1 # nb_of_trees_to_use += 1 logger.info( f'FINISHED search for tree rules: RF has {nb_of_trees_to_use} trees' f'--> {nb_of_tree_based_rules_after_conversion} rules ' f' > {n_tree_rules_to_generate} (goal)) ') return current_rf_clf, total_time_decision_tree_learning_s
def log_record(record: TrainingRecord, logger: logging.Logger): avg_reward = compute_avg_reward(record.rewards) logger.info("avg reward : ") logger.info("\tcentral {:.3f}".format(avg_reward.central)) for k, v in avg_reward.local.items(): logger.info("\tagent {} reward : {:.3f} ".format(k, v)) cumulative_reward = comput_summation_reward(record.rewards) logger.info("summation reward : ") logger.info("\tcentral {:.3f}".format(cumulative_reward.central)) for k, v in cumulative_reward.local.items(): logger.info("\tagent {} reward : {:.3f} ".format(k, v))
from logging import Logger from pyspark.sql import SparkSession """ :param: db_name: db with required table :param: table_name: table to reduce parquets :param: part_columns: list of partitions """ if __name__ == '__main__': # init spark log = Logger(name='recsys_reduce_parquets') log.setLevel('INFO') spark = SparkSession.builder.appName( 'reduce_parquets').enableHiveSupport().getOrCreate() log.info('spark session initialized') # read line arguments parser = argparse.ArgumentParser() parser.add_argument('--db_name', required=True) parser.add_argument('--table_name', required=True) parser.add_argument('--part_columns', default='') args = parser.parse_args() db_name = args.db_name table_name = args.table_name part_columns = args.part_columns # tables to_reduce_tbl = f'{db_name}.{table_name}' bkp_tbl = f'{db_name}.bkp_{table_name}'
def run_check_with_model( model_with_type_info: onnx.ModelProto, mobile_pkg_build_config: pathlib.Path, logger: logging.Logger ): """ Check if an ONNX model can be used with the ORT Mobile pre-built package. :param model_with_type_info: ONNX model that has had ONNX shape inferencing run on to add type/shape information. :param mobile_pkg_build_config: Configuration file used to build the ORT Mobile package. :param logger: Logger for output :return: True if supported """ if not mobile_pkg_build_config: mobile_pkg_build_config = get_default_config_path() enable_type_reduction = True config_path = str(mobile_pkg_build_config.resolve(strict=True)) required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction) global_onnx_tensorproto_types, special_types = _get_global_tensorproto_types(op_type_impl_filter, logger) # get the opset imports opsets = get_opsets_imported(model_with_type_info) # If the ONNX opset of the model is not supported we can recommend using our tools to update that first. supported_onnx_opsets = set(required_ops["ai.onnx"].keys()) # we have a contrib op that is erroneously in the ai.onnx domain with opset 1. manually remove that incorrect value supported_onnx_opsets.remove(1) onnx_opset_model_uses = opsets["ai.onnx"] if onnx_opset_model_uses not in supported_onnx_opsets: logger.info(f"Model uses ONNX opset {onnx_opset_model_uses}.") logger.info(f"The pre-built package only supports ONNX opsets {sorted(supported_onnx_opsets)}.") logger.info( "Please try updating the ONNX model opset to a supported version using " "python -m onnxruntime.tools.onnx_model_utils.update_onnx_opset ..." ) return False unsupported_ops = set() logger.debug( "Checking if the data types and operators used in the model are supported " "in the pre-built ORT package..." ) unsupported = check_graph( model_with_type_info.graph, opsets, required_ops, global_onnx_tensorproto_types, special_types, unsupported_ops, logger, ) if unsupported_ops: logger.info("Unsupported operators:") for entry in sorted(unsupported_ops): logger.info(" " + entry) if unsupported: logger.info("\nModel is not supported by the pre-built package due to unsupported types and/or operators.") logger.info( "Please see https://onnxruntime.ai/docs/reference/mobile/prebuilt-package/ for information " "on what is supported in the pre-built package." ) logger.info( "A custom build of ONNX Runtime will be required to run the model. Please see " "https://onnxruntime.ai/docs/build/custom.html for details on performing that." ) else: logger.info("Model should work with the pre-built package.") logger.info("---------------\n") return not unsupported
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: Logger = None, trg_vocab: object = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, _, test_data, trg_vocab = load_data(data_cfg=cfg["data"], trg_vocab=trg_vocab) data_to_predict = {"test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, word_sentence_acc = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) #pylint: enable=unused-variable if data_set[1]: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) attention_scores = None if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set) return word_sentence_acc
def _read_incremental( self, logger: logging.Logger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], internal_config: InternalConfig, ) -> Iterator[AirbyteMessage]: """Read stream using incremental algorithm :param logger: :param stream_instance: :param configured_stream: :param connector_state: :param internal_config: :return: """ stream_name = configured_stream.stream.name stream_state = connector_state.get(stream_name, {}) if stream_state and "state" in dir(stream_instance): stream_instance.state = stream_state logger.info( f"Setting state of {stream_name} stream to {stream_state}") slices = stream_instance.stream_slices( cursor_field=configured_stream.cursor_field, sync_mode=SyncMode.incremental, stream_state=stream_state, ) logger.debug(f"Processing stream slices for {stream_name}", extra={"stream_slices": slices}) total_records_counter = 0 for _slice in slices: logger.debug("Processing stream slice", extra={"slice": _slice}) records = stream_instance.read_records( sync_mode=SyncMode.incremental, stream_slice=_slice, stream_state=stream_state, cursor_field=configured_stream.cursor_field or None, ) for record_counter, record_data in enumerate(records, start=1): yield self._as_airbyte_record(stream_name, record_data) stream_state = stream_instance.get_updated_state( stream_state, record_data) checkpoint_interval = stream_instance.state_checkpoint_interval if checkpoint_interval and record_counter % checkpoint_interval == 0: yield self._checkpoint_state(stream_instance, stream_state, connector_state) total_records_counter += 1 # This functionality should ideally live outside of this method # but since state is managed inside this method, we keep track # of it here. if self._limit_reached(internal_config, total_records_counter): # Break from slice loop to save state and exit from _read_incremental function. break yield self._checkpoint_state(stream_instance, stream_state, connector_state) if self._limit_reached(internal_config, total_records_counter): return
def _train(_run, max_epochs: int, _log: Logger, checkpoint_at_end: bool): train_dataloader, test_dataloader = _get_dataloaders() model = _get_model() model = model.to(_get_device()) loss_func = _get_loss_func(model=model) optimizer = _get_optimizer(model=model) _log.info('Starting training...') for epoch in range(max_epochs): epoch_loss = 0 epoch_loss1 = 0 epoch_loss2 = 0 epoch_loss3 = 0 iteration_count = 1 for i, data in enumerate(train_dataloader): images, labels = data images = images.to(_get_device()) labels = labels.to(_get_device()) optimizer.zero_grad() outputs = model(images) loss, (loss1, loss2, loss3) = loss_func(outputs, labels, images) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_loss1 += loss1.item() epoch_loss2 += loss2.item() epoch_loss3 += loss3.item() iteration_count += 1 weight1, weight2, weight3 = model.get_loss_weights() _log.info( f'Epoch {epoch}: {epoch_loss / iteration_count:.3f} ' f'({weight1.item():.3f}, {weight2.item():.3f}, {weight3.item():.3f})' ) (acc1, acc2, acc3), (val_loss1, val_loss2, val_loss3) = _validate(test_dataloader=test_dataloader, model=model, loss_func=loss_func) _run.log_scalar('train_loss', epoch_loss / iteration_count, epoch) _run.log_scalar('train_loss1', epoch_loss1 / iteration_count, epoch) _run.log_scalar('train_loss2', epoch_loss2 / iteration_count, epoch) _run.log_scalar('train_loss3', epoch_loss3 / iteration_count, epoch) _run.log_scalar('val_loss1', val_loss1, epoch) _run.log_scalar('val_loss2', val_loss2, epoch) _run.log_scalar('val_loss3', val_loss3, epoch) _run.log_scalar('val_acc1', acc1, epoch) _run.log_scalar('val_acc2', acc2, epoch) _run.log_scalar('val_acc3', acc3, epoch) _run.log_scalar('weight1', weight1.item(), epoch) _run.log_scalar('weight2', weight2.item(), epoch) _run.log_scalar('weight3', weight3.item(), epoch) if checkpoint_at_end: _save_model(_run, model)
def sigterm_handler(logger: Logger, event_loop: AbstractEventLoop) -> None: if event_loop.is_running(): logger.info('Received SIGTERM') event_loop.stop()
def start_qgis_application( enable_gui: bool = False, enable_processing: bool = False, verbose: bool = False, cleanup: bool = True, logger: logging.Logger = None, logprefix: str = 'Qgis:') -> 'QgsApplication': # noqa: F821 """ Start qgis application :param boolean enable_gui: Enable graphical interface, default to False :param boolean enable_processing: Enable processing, default to False :param boolean verbose: Output qgis settings, default to False :param boolean cleanup: Register atexit hook to close qgisapplication on exit(). Note that prevents qgis to segfault when exiting. Default to True. """ os.environ['QGIS_NO_OVERRIDE_IMPORT'] = '1' os.environ['QGIS_DISABLE_MESSAGE_HOOKS'] = '1' logger = logger or logging.getLogger() setup_qgis_paths() from qgis.core import Qgis, QgsApplication logger.info("Starting Qgis application: %s", Qgis.QGIS_VERSION) if QgsApplication.QGIS_APPLICATION_NAME != "QGIS3": raise RuntimeError("You need QGIS3 (found %s)" % QgsApplication.QGIS_APPLICATION_NAME) if not enable_gui: # We MUST set the QT_QPA_PLATFORM to prevent # Qt trying to connect to display in containers if os.environ.get('DISPLAY') is None: logger.info("Setting offscreen mode") os.environ['QT_QPA_PLATFORM'] = 'offscreen' qgis_prefix = os.environ.get('QGIS3_HOME', '/usr') # XXX Set QGIS_PREFIX_PATH, it seems that setPrefixPath # does not do the job correctly os.environ['QGIS_PREFIX_PATH'] = qgis_prefix global qgis_application qgis_application = QgsApplication([], enable_gui) qgis_application.setPrefixPath(qgis_prefix, True) #qgis_application.initQgis() if cleanup: # Closing QgsApplication on exit will # prevent our app to segfault on exit() import atexit logger.info("%s Installing cleanup hook" % logprefix) @atexit.register def exitQgis(): global qgis_application if qgis_application: qgis_application.exitQgis() del qgis_application if verbose: print(qgis_application.showSettings()) # Install logger hook install_logger_hook(logger, logprefix, verbose=verbose) logger.info("%s Qgis application initialized......" % logprefix) if enable_processing: init_processing() logger.info("%s QGis processing initialized" % logprefix) return qgis_application
def build_dataloader(self, data, batch_size, shuffle=False, device=None, logger: logging.Logger = None, gradient_accumulation=1, tau: float = 0.8, prune=None, prefetch=None, tasks_need_custom_eval=None, cache=False, debug=False, **kwargs) -> DataLoader: # This method is only called during training or evaluation but not prediction dataloader = MultiTaskDataLoader(training=shuffle, tau=tau) for i, (task_name, task) in enumerate(self.tasks.items()): encoder_transform, transform = self.build_transform(task) training = None if data == 'trn': if debug: _data = task.dev else: _data = task.trn training = True elif data == 'dev': _data = task.dev training = False elif data == 'tst': _data = task.tst training = False else: _data = data if isinstance(data, str): logger.info( f'[yellow]{i + 1} / {len(self.tasks)}[/yellow] Building [blue]{data}[/blue] dataset for ' f'[cyan]{task_name}[/cyan] ...') # Adjust Tokenizer according to task config config = copy(task.config) config.pop('transform', None) task_dataloader: DataLoader = task.build_dataloader( _data, transform, training, device, logger, tokenizer=encoder_transform.tokenizer, gradient_accumulation=gradient_accumulation, cache=isinstance(data, str), **config) # if prune: # # noinspection PyTypeChecker # task_dataset: TransformDataset = task_dataloader.dataset # size_before = len(task_dataset) # task_dataset.prune(prune) # size_after = len(task_dataset) # num_pruned = size_before - size_after # logger.info(f'Pruned [yellow]{num_pruned} ({num_pruned / size_before:.1%})[/yellow] ' # f'samples out of {size_before}.') if cache and data in ('trn', 'dev'): task_dataloader: CachedDataLoader = CachedDataLoader( task_dataloader, f'{cache}/{os.getpid()}-{data}-{task_name.replace("/", "-")}-cache.pt' if isinstance(cache, str) else None) dataloader.dataloaders[task_name] = task_dataloader if data == 'trn': sampling_weights, total_size = dataloader.sampling_weights headings = [ 'task', '#batches', '%batches', '#scaled', '%scaled', '#epoch' ] matrix = [] min_epochs = [] for (task_name, dataset), weight in zip(dataloader.dataloaders.items(), sampling_weights): epochs = len(dataset) / weight / total_size matrix.append([ f'{task_name}', len(dataset), f'{len(dataset) / total_size:.2%}', int(total_size * weight), f'{weight:.2%}', f'{epochs:.2f}' ]) min_epochs.append(epochs) longest = int(torch.argmax(torch.tensor(min_epochs))) table = markdown_table(headings, matrix) rows = table.splitlines() cells = rows[longest + 2].split('|') cells[-2] = cells[-2].replace( f'{min_epochs[longest]:.2f}', f'[bold][red]{min_epochs[longest]:.2f}[/red][/bold]') rows[longest + 2] = '|'.join(cells) logger.info( f'[bold][yellow]{"Samples Distribution": ^{len(rows[0])}}[/yellow][/bold]' ) logger.info('\n'.join(rows)) if prefetch and (data == 'trn' or not tasks_need_custom_eval): dataloader = PrefetchDataLoader(dataloader, prefetch=prefetch) return dataloader
def trinity_boot(args: Namespace, trinity_config: TrinityConfig, extra_kwargs: Dict[str, Any], plugin_manager: PluginManager, listener: logging.handlers.QueueListener, event_bus: EventBus, main_endpoint: Endpoint, logger: logging.Logger) -> None: # start the listener thread to handle logs produced by other processes in # the local logger. listener.start() ensure_eth1_dirs(trinity_config.get_app_config(Eth1AppConfig)) networking_endpoint = event_bus.create_endpoint( NETWORKING_EVENTBUS_ENDPOINT) event_bus.start() # First initialize the database process. database_server_process = ctx.Process( name="DB", target=run_database_process, args=( trinity_config, LevelDB, ), kwargs=extra_kwargs, ) networking_process = ctx.Process( name="networking", target=launch_node, args=( args, trinity_config, networking_endpoint, ), kwargs=extra_kwargs, ) # start the processes database_server_process.start() logger.info("Started DB server process (pid=%d)", database_server_process.pid) # networking process needs the IPC socket file provided by the database process try: wait_for_ipc(trinity_config.database_ipc_path) except TimeoutError as e: logger.error("Timeout waiting for database to start. Exiting...") kill_process_gracefully(database_server_process, logger) ArgumentParser().error(message="Timed out waiting for database start") networking_process.start() logger.info("Started networking process (pid=%d)", networking_process.pid) def kill_trinity_with_reason(reason: str) -> None: kill_trinity_gracefully(logger, (database_server_process, networking_process), plugin_manager, main_endpoint, event_bus, reason=reason) main_endpoint.subscribe(ShutdownRequest, lambda ev: kill_trinity_with_reason(ev.reason)) plugin_manager.prepare(args, trinity_config, extra_kwargs) try: loop = asyncio.get_event_loop() loop.add_signal_handler(signal.SIGTERM, lambda: kill_trinity_with_reason("SIGTERM")) loop.run_forever() loop.close() except KeyboardInterrupt: kill_trinity_with_reason("CTRL+C / Keyboard Interrupt")
def log_basic_info(logger: logging.Logger, config: Any) -> None: """Logging about pytorch, ignite, configurations, gpu system distributed settings. Parameters ---------- logger Logger instance for logging config config object to log """ import ignite logger.info("PyTorch version: %s", torch.__version__) logger.info("Ignite version: %s", ignite.__version__) if torch.cuda.is_available(): # explicitly import cudnn as # torch.backends.cudnn can not be pickled with hvd spawning procs from torch.backends import cudnn logger.info("GPU device: %s", torch.cuda.get_device_name(idist.get_local_rank())) logger.info("CUDA version: %s", torch.version.cuda) logger.info("CUDNN version: %s", cudnn.version()) logger.info("Configuration: %s", pformat(vars(config))) if idist.get_world_size() > 1: logger.info("distributed configuration: %s", idist.model_name()) logger.info("backend: %s", idist.backend()) logger.info("device: %s", idist.device().type) logger.info("hostname: %s", idist.hostname()) logger.info("world size: %s", idist.get_world_size()) logger.info("rank: %s", idist.get_rank()) logger.info("local rank: %s", idist.get_local_rank()) logger.info("num processes per node: %s", idist.get_nproc_per_node()) logger.info("num nodes: %s", idist.get_nnodes()) logger.info("node rank: %s", idist.get_node_rank())
def train_transformer(df: pd.DataFrame, use_cols_config: Dict[str, dict], window: int, criterion: Union, optimizer: Union, optimizer_params: dict, scheduler: Union, scheduler_params: dict, n_emb: int, n_head: int, n_hidden: int, n_layers: int, batch_size: int, epochs: int, dropout: float, logger: Logger, output_dir: str, model_id: str): """ :param df: :param use_cols: {col_name: {"embedding_num": int}} :param n_emb: :param n_head: :param n_hidden: :param n_layers: :param batch_size: :param dropout: :return: """ train_idx = [] val_idx = [] np.random.seed(0) for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"): if np.random.random() < 0.1: # all val val_idx.extend(w_df.index.tolist()) else: train_num = int(len(w_df) * 0.9) train_idx.extend(w_df[:train_num].index.tolist()) val_idx.extend(w_df[train_num:].index.tolist()) dataset_train = RiiidDataset(df=df, indice=train_idx, use_cols_config=use_cols_config, window=window) dataloader_train = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=batch_size, collate_fn=collate_fn, num_workers=4, shuffle=True) print(f"make_train_data len={len(dataset_train)}") dataset_val = RiiidDataset(df=df, indice=val_idx, use_cols_config=use_cols_config, window=window) dataloader_val = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=batch_size, collate_fn=collate_fn, num_workers=1, shuffle=False) print(f"make_val_data len={len(dataset_val)}") model = TransformerModel(n_emb=n_emb, use_cols_config=use_cols_config, n_head=n_head, n_hidden=n_hidden, n_layers=n_layers, dropout=dropout) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] # TODO: 後で直す。汚い。。 scheduler_params["num_training_steps"] = len( dataset_train) // batch_size * scheduler_params["num_training_epochs"] del scheduler_params["num_training_epochs"] optimizer = optimizer(optimizer_grouped_parameters, **optimizer_params) scheduler = scheduler(optimizer, **scheduler_params) model.train() losses = [] predict = [] label = [] for epoch in range(epochs): logger.info(f"--- epoch {epoch+1} ---") for batch in tqdm.tqdm(dataloader_train): with torch.set_grad_enabled(mode=True): output = model(batch) loss = criterion( output.flatten().float(), batch["answered_correctly"][:, -1].flatten().float()) loss.backward() losses.append(loss.detach().data.numpy()) scheduler.step() optimizer.step() optimizer.zero_grad() predict = [] label = [] for batch in tqdm.tqdm(dataloader_val): output = nn.functional.sigmoid(model(batch)) predict.extend(output.flatten().detach().data.numpy().tolist()) label.extend(batch["answered_correctly"] [:, 0].flatten().detach().data.numpy().tolist()) logger.info( f"AUC: {round(roc_auc_score(np.array(label), np.array(predict)), 4)}" ) df.loc[val_idx].to_csv(f"{output_dir}/val.csv") df.to_csv(f"{output_dir}/all.csv") df_ret = pd.DataFrame(index=val_idx) df_ret["predict"] = np.array(predict) df_ret["target"] = np.array(label) df_ret["target2"] = df.loc[val_idx]["answered_correctly"] df_ret.to_csv(f"{output_dir}/oof_{model_id}.csv")
def fix_unclean_shutdown(chain_config: ChainConfig, logger: logging.Logger) -> None: logger.info("Cleaning up unclean shutdown...") logger.info("Searching for process id files in %s..." % chain_config.data_dir) pidfiles = tuple(chain_config.data_dir.glob('*.pid')) if len(pidfiles) > 1: logger.info('Found %d processes from a previous run. Closing...' % len(pidfiles)) elif len(pidfiles) == 1: logger.info('Found 1 process from a previous run. Closing...') else: logger.info( 'Found 0 processes from a previous run. No processes to kill.') for pidfile in pidfiles: process_id = int(pidfile.read_text()) kill_process_id_gracefully(process_id, time.sleep, logger) try: pidfile.unlink() logger.info('Manually removed %s after killing process id %d' % (pidfile, process_id)) except FileNotFoundError: logger.debug('pidfile %s was gone after killing process id %d' % (pidfile, process_id)) db_ipc = chain_config.database_ipc_path try: db_ipc.unlink() logger.info( 'Removed a dangling IPC socket file for database connections at %s', db_ipc) except FileNotFoundError: logger.debug( 'The IPC socket file for database connections at %s was already gone', db_ipc)
def setup_localisations(logger: logging.Logger) -> None: """Setup gettext localisations.""" from srctools.property_parser import PROP_FLAGS_DEFAULT import gettext import locale # Get the 'en_US' style language code lang_code = locale.getdefaultlocale()[0] # Allow overriding through command line. if len(sys.argv) > 1: for arg in sys.argv[1:]: if arg.casefold().startswith('lang='): lang_code = arg[5:] break # Expands single code to parent categories. expanded_langs = gettext._expand_lang(lang_code) logger.info('Language: {!r}', lang_code) logger.debug('Language codes: {!r}', expanded_langs) # Add these to Property's default flags, so config files can also # be localised. for lang in expanded_langs: PROP_FLAGS_DEFAULT['lang_' + lang] = True lang_folder = install_path('i18n') trans: gettext.NullTranslations for lang in expanded_langs: try: file = open(lang_folder / (lang + '.mo').format(lang), 'rb') except FileNotFoundError: continue with file: trans = gettext.GNUTranslations(file) break else: # To help identify missing translations, replace everything with # something noticable. if lang_code == 'dummy': class DummyTranslations(gettext.NullTranslations): """Dummy form for identifying missing translation entries.""" def gettext(self, message: str) -> str: """Generate placeholder of the right size.""" # We don't want to leave {arr} intact. return ''.join([ '#' if s.isalnum() or s in '{}' else s for s in message ]) def ngettext(self, msgid1: str, msgid2: str, n: int) -> str: """Generate placeholder of the right size for plurals.""" return self.gettext(msgid1 if n == 1 else msgid2) lgettext = gettext lngettext = ngettext trans = DummyTranslations() # No translations, fallback to English. # That's fine if the user's language is actually English. else: if 'en' not in expanded_langs: logger.warning( "Can't find translation for codes: {!r}!", expanded_langs, ) trans = gettext.NullTranslations() # Add these functions to builtins, plus _=gettext trans.install(['gettext', 'ngettext']) # Some lang-specific overrides.. if trans.gettext('__LANG_USE_SANS_SERIF__') == 'YES': # For Japanese/Chinese, we want a 'sans-serif' / gothic font # style. try: from tkinter import font except ImportError: return font_names = [ 'TkDefaultFont', 'TkHeadingFont', 'TkTooltipFont', 'TkMenuFont', 'TkTextFont', 'TkCaptionFont', 'TkSmallCaptionFont', 'TkIconFont', # Note - not fixed-width... ] for font_name in font_names: font.nametofont(font_name).configure(family='sans-serif')
def repair(self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path) -> bool: log.info("no automatic remediation available for this error") return False
Logger.debug(logger, "Loading URL %s from %s" % (line, urlFile)) urls.append(line) if (len(urls) < 1): print "No urls were able to be loaded from %s, exiting!" % urlFile exit(1) def main(): global start global logger for i in range(maxThreadCount): Logger.debug(logger, "Starting thread #%d" % i) t = ThreadUrl(queue) t.setDaemon(True) t.start() #populate queue with data for j in range(maxQueryCount): Logger.debug(logger, "Populating URL #%d" % j) queue.put(urls[randint(0,len(urls)-1)]) start = time.time() queue.join() readUrlsFromFile() main() Logger.info(logger, "Cumulative Query Time: %s" % totalFetchTime) Logger.info(logger, "Total Elapsed Time: %s" % (time.time() - start))
def read( data_dir: str, feature_config: FeatureConfig, tfrecord_type: str, file_io: FileIO, max_sequence_size: int = 0, batch_size: int = 0, preprocessing_keys_to_fns: dict = {}, parse_tfrecord: bool = True, use_part_files: bool = False, logger: Logger = None, **kwargs ) -> data.TFRecordDataset: """ Extract features by reading and parsing TFRecord data and converting into a TFRecordDataset using the FeatureConfig Parameters ---------- data_dir: str path to the directory containing train, validation and test data feature_config: `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them tfrecord_type: {"example", "sequence_example"} Type of the TFRecord protobuf message to be used for TFRecordDataset file_io: `FileIO` object file I/O handler objects for reading and writing data max_sequence_size: int, optional maximum number of sequence to be used with a single SequenceExample proto message The data will be appropriately padded or clipped to fit the max value specified batch_size: int, optional size of each data batch preprocessing_keys_to_fns: dict of(str, function), optional dictionary of function names mapped to function definitions that can now be used for preprocessing while loading the TFRecordDataset to create the RelevanceDataset object use_part_files: bool, optional load dataset from part files checked using "part-" prefix parse_tfrecord: bool, optional parse the TFRecord string from the dataset; returns strings as is otherwise logger: `Logger`, optional logging handler for status messages Returns ------- `TFRecordDataset` TFRecordDataset loaded from the `data_dir` specified using the FeatureConfig """ parse_fn = get_parse_fn( feature_config=feature_config, tfrecord_type=tfrecord_type, preprocessing_keys_to_fns=preprocessing_keys_to_fns, max_sequence_size=max_sequence_size, ) # Get all tfrecord files in directory tfrecord_files = file_io.get_files_in_directory( data_dir, extension="" if use_part_files else ".tfrecord", prefix="part-" if use_part_files else "", ) # Parse the protobuf data to create a TFRecordDataset dataset = data.TFRecordDataset(tfrecord_files) if parse_tfrecord: # Parallel calls set to AUTOTUNE: improved training performance by 40% with a classification model dataset = ( dataset.map(parse_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) # .apply(data.experimental.ignore_errors()) ) # Create BatchedDataSet if batch_size: dataset = dataset.batch(batch_size, drop_remainder=False) if logger: logger.info( "Created TFRecordDataset from SequenceExample protobufs from {} files : {}".format( len(tfrecord_files), str(tfrecord_files)[:50] ) ) # We apply prefetch as it improved train/test/validation throughput by 30% in some real model training. dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) return dataset
self.obj.some_property+=1 def __exit__(self, *args): self.obj.some_property-=1 # a more complex example @contextmanager def error_logging(logger, level): oldlevel = logger.level try: logger.setLevel(level) yield finally: logger.setLevel(oldlevel) if __name__ == "__main__": logger = Logger('name',20) handler = FileHandler('flog.log') logger.addHandler(handler) logger.info('this will get logged') with error_logging(logger, 30): logger.info('this will not get logged') logger.info('this will get logged because the level is {}'.format(logger.level)) class Simple_obj(object): def __init__(self, arg): self.some_property = arg ''' s = Simple_obj(5) with simple_context_manager(s): print s.some_property '''
def live(bucket_name: str, order_name: str, run_date: str, start_time: str, end_time: str, camera_address: str, camera_username: str = 'xames3', camera_password: str = 'iamironman', camera_port: Union[int, str] = 554, camera_timeout: Union[float, int, str] = 30.0, timestamp_format: str = '%H:%M:%S', log: logging.Logger = None) -> Optional[str]: """Record live videos based on time duration using FFMPEG. Args: bucket_name: S3 bucket name. order_name: Order name. run_date: Date when to record the video. start_time: Time when to start recording the video. end_time: Time when to stop recording the video. camera_address: Camera's IP address. camera_username: Camera username. camera_password: Camera password. camera_port: Camera port number. camera_timeout: Maximum time to wait until disconnection occurs. timestamp_format: Timestamp for checking the recording start time. log: Logger object. """ log = _log(__file__) if log is None else log camera_port = int(camera_port) camera_timeout = float(camera_timeout) start_time, end_time = f'{run_date} {start_time}', f'{run_date} {end_time}' duration = calculate_duration(start_time, end_time, timestamp_format, True) force_close = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S') force_close = force_close.replace(tzinfo=timezone.utc).timestamp() vid_type = video_type(True, True, True) temp = os.path.join(_lr, f'{bucket_name}{order_name}') if not os.path.isdir(temp): os.mkdir(temp) temp_file = os.path.join(temp, f'{bucket_name}{order_name}{vid_type}.mp4') url = configure_camera_url(camera_address, camera_username, camera_password, camera_port) slept_duration, idx = 0, 1 if duration != 0: try: while True: if camera_live(camera_address, camera_port, camera_timeout, log): file = filename(temp_file, idx) log.info('Recording started for selected camera.') os.system(ffmpeg_str(url, file, duration, camera_timeout)) stop_utc = now().replace(tzinfo=timezone.utc).timestamp() stop_secs = now().second _old_file = file_size(file) old_duration = stop_secs if _old_file == '300.0 bytes' else drn( file) duration = duration - old_duration - slept_duration slept_duration = 0 idx += 1 if (force_close <= stop_utc) or (duration <= 0): output = concate_videos(temp, delete_old_files=True) if output: return output else: log.warning( 'Unable to record because of poor network connectivity.' ) slept_duration += camera_timeout log.warning( 'Compensating lost time & attempting after 30 secs.') time.sleep(camera_timeout) except Exception as error: log.critical(f'Something went wrong because of {error}')
def __init__(self, logger: Logger, config: IdPConfig, userdb: Optional[Any] = None): self.logger = logger self.config = config self.response_status = None self.start_response = None # Connecting to MongoDB can take some time if the replica set is not fully working. # Log both 'starting' and 'started' messages. self.logger.info("eduid-IdP server starting") self._init_pysaml2() _session_ttl = self.config.sso_session_lifetime * 60 _SSOSessions: SSOSessionCache if self.config.sso_session_mongo_uri: _SSOSessions = eduid_idp.cache.SSOSessionCacheMDB(self.config.sso_session_mongo_uri, self.logger, _session_ttl) else: _SSOSessions = eduid_idp.cache.SSOSessionCacheMem(self.logger, _session_ttl, threading.Lock()) _login_state_ttl = (self.config.login_state_ttl + 1) * 60 _ticket_sessions = SSOLoginDataCache('TicketCache', self.logger, _login_state_ttl, self.config, threading.Lock()) self.authn_info_db = None _actions_db = None if config.mongo_uri: self.authn_info_db = eduid_idp.authn.AuthnInfoStoreMDB(config.mongo_uri, logger) if config.mongo_uri and config.actions_app_uri: _actions_db = ActionDB(config.mongo_uri) self.logger.info("configured to redirect users with pending actions") else: self.logger.debug("NOT configured to redirect users with pending actions") if userdb is None: userdb = eduid_idp.idp_user.IdPUserDb(logger, config) self.userdb = userdb self.authn = eduid_idp.authn.IdPAuthn(logger, config, self.userdb) cherrypy.config.update({'request.error_response': self.handle_error, 'error_page.default': self.error_page_default, }) listen_str = 'http://' if self.config.server_key: listen_str = 'https://' if ':' in self.config.listen_addr: # IPv6 listen_str += '[' + self.config.listen_addr + ']:' + str(self.config.listen_port) else: # IPv4 listen_str += self.config.listen_addr + ':' + str(self.config.listen_port) self.logger.info("eduid-IdP server started, listening on {!s}".format(listen_str)) _common_sessions: Optional[ExpiringCacheCommonSession] = None if (config.redis_sentinel_hosts or config.redis_host) and config.shared_session_cookie_name \ and config.shared_session_secret_key: _common_sessions = ExpiringCacheCommonSession('CommonSessions', logger, config.shared_session_ttl, config, secret=config.shared_session_secret_key) else: logger.info('eduID shared sessions not configured') self.context = IdPContext(config=self.config, idp=self.IDP, logger=self.logger, sso_sessions=_SSOSessions, ticket_sessions=_ticket_sessions, common_sessions=_common_sessions, actions_db=_actions_db, authn=self.authn, )
def build_package_methods(logger: logging.Logger) -> None: init_client_functions: List[str] = [] init_resource_functions: List[str] = [] session_client_functions: List[str] = [] session_resource_functions: List[str] = [] imports: List[str] = [] active_submodules: List[Submodule] = [] for submodule in SUBMODULES: if not submodule.is_active: continue active_submodules.append(submodule) logger.info( "Discovered %s service stubs in %s", submodule.class_name, submodule.pypi_name, ) for submodule in active_submodules: init_client_functions.append( FUNCTION_TEMPLATE.format( overload="@overload\n" if len(active_submodules) > 1 else "", name="client", service_name_type='Literal["{}"]'.format(submodule.boto3_name), return_type="{}Client".format(submodule.class_name), )) session_client_functions.append( METHOD_TEMPLATE.format( overload="@overload\n " if len(active_submodules) > 1 else "", name="client", service_name_type='Literal["{}"]'.format(submodule.boto3_name), return_type="{}Client".format(submodule.class_name), )) imports.append("from mypy_boto3.{} import {}Client".format( submodule.import_name, submodule.class_name, )) if submodule.has_resource: init_resource_functions.append( FUNCTION_TEMPLATE.format( overload="@overload\n" if len(active_submodules) > 1 else "", name="resource", service_name_type='Literal["{}"]'.format( submodule.boto3_name), return_type="{}ServiceResource".format( submodule.class_name), )) session_resource_functions.append( METHOD_TEMPLATE.format( overload="@overload\n " if len(active_submodules) > 1 else "", name="resource", service_name_type='Literal["{}"]'.format( submodule.boto3_name), return_type="{}ServiceResource".format( submodule.class_name), )) imports.append( "from mypy_boto3.{} import {}ServiceResource".format( submodule.import_name, submodule.class_name, )) if not init_client_functions: init_client_functions.append( FUNCTION_TEMPLATE.format( overload="", name="client", service_name_type="str", return_type="Any", )) if not init_resource_functions: init_resource_functions.append( FUNCTION_TEMPLATE.format( overload="", name="resource", service_name_type="str", return_type="Any", )) if not session_client_functions: session_client_functions.append( METHOD_TEMPLATE.format( overload="", name="client", service_name_type="str", return_type="Any", )) if not session_resource_functions: session_resource_functions.append( METHOD_TEMPLATE.format( overload="", name="resource", service_name_type="str", return_type="Any", )) init_contents: List[str] = [ "import sys", "from typing import overload, Any", "if sys.version_info >= (3, 8):", " from typing import Literal", "else:", " from typing_extensions import Literal", ] init_contents.extend(imports) init_contents.append("") init_contents.extend(init_client_functions) init_contents.extend(init_resource_functions) session_contents: List[str] = [ "import sys", "from typing import overload, Any", "if sys.version_info >= (3, 8):", " from typing import Literal", "else:", " from typing_extensions import Literal", ] session_contents.extend(imports) session_contents.append("") session_contents.append("class Session:") session_contents.extend(session_client_functions) session_contents.extend(session_resource_functions) write_text(ROOT_PATH / "boto3_init_gen.py", "\n".join(init_contents), logger) write_text(ROOT_PATH / "boto3_session_gen.py", "\n".join(session_contents), logger)