示例#1
0
def handle_event(event: dict, channel: str, channel_id: str, message: str,
                 sc: SlackClient, logger: logging.Logger) -> None:
    pretty_event = pformat(event)
    logger.debug(f"Event received:\n{pretty_event}")

    subtype = event.get('subtype')
    user = event.get('user')

    if subtype in ('group_join', 'channel_join') and user:

        # We will use the event's channel ID to send a response and refer to
        # users by their display_name in accordance with new guidelines.
        # https://api.slack.com/changelog/2017-09-the-one-about-usernames
        event_channel_id = event.get('channel')
        user_profile = event.get('user_profile')
        username = user_profile.get('display_name')
        user_mention = f"<@{user}>"
        message = message.replace('{user}', user_mention)

        if event_channel_id == channel_id:
            try:
                sc.rtm_send_message(event_channel_id, message)
                logger.info(f"Welcomed {username} to #{channel}")
            except AttributeError:
                logger.error(f"Couldn't send message to #{channel}")
示例#2
0
class ExpDateCSVParser(object):
    """Parse expansion and date info from a CSV file and update the
       database with the correct dates"""

    # pylint: disable-msg=R0913
    # we may need all these arguments for some files
    def __init__(self, oLogHandler):
        self.oLogger = Logger('exp date parser')
        if oLogHandler is not None:
            self.oLogger.addHandler(oLogHandler)
        self.oLogHandler = oLogHandler

    def parse(self, fIn):
        """Process the CSV file line into the CardSetHolder"""
        oCsvFile = csv.reader(fIn)
        aRows = list(oCsvFile)
        if hasattr(self.oLogHandler, 'set_total'):
            self.oLogHandler.set_total(len(aRows))
        for sExp, sDate in aRows:
            try:
                oExp = IExpansion(sExp)
            except SQLObjectNotFound:
                # This error is non-fatal - the user may not have imported
                # the extra card lists, so we can legimately encounter
                # expansions here which aren't in the database
                self.oLogger.info('Skipped Expansion: %s' % sExp)
                continue
            oDate = datetime.datetime.strptime(sDate, "%Y%m%d").date()
            oExp.releasedate = oDate
            oExp.syncUpdate()
            self.oLogger.info('Added Expansion: %s' % sExp)
示例#3
0
    def repair(
        self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path
    ) -> bool:
        # TODO: It would be nice to try and get the contents of the
        # file/directory at this location in the current commit, rather than
        # just writing out an empty file or directory

        backup_dir = fsck_dir / "broken_inodes"
        backup_dir.mkdir(exist_ok=True)
        inode_data_path = Path(overlay.get_path(self.inode.inode_number))
        inode_backup_path = backup_dir / str(self.inode.inode_number)

        if self.expected_type == InodeType.DIR:
            log.info(
                f"replacing corrupt directory inode {self.compute_path()!r} with an "
                "empty directory"
            )
            os.rename(inode_data_path, inode_backup_path)
            overlay.write_empty_dir(self.inode.inode_number)
        else:
            log.info(
                f"replacing corrupt file inode {self.compute_path()!r} with an "
                "empty file"
            )
            os.rename(inode_data_path, inode_backup_path)
            overlay.write_empty_file(self.inode.inode_number)

        return True
示例#4
0
def spinner(text: str, logger: Logger, quiet=False, debug=False):
    '''Decoration for long running processes.

    :param text: Message to output
    :param logger: Logger to capture the error if it occurs
    :param quiet: If ``True``, messages will be hidden
    :param debug: If ``True``, show full tracebacks
    '''

    # pylint: disable=broad-except

    try:
        logger.info(text)

        if not quiet:
            print(text)

        yield

        if not quiet:
            print('Done\n')

    except Exception as exception:
        exception_traceback = format_exc()

        logger.error(exception_traceback)

        if not quiet:
            if debug:
                print(exception_traceback)

            else:
                print(str(exception))
示例#5
0
    def execute(self, compile_base_path: str, timeout: Optional[int], logger: Logger):
        detector_invocation = ["java"] + self.detector.java_options + ["-jar", _quote(self.detector.jar_path)]
        command = detector_invocation + self._get_detector_arguments(self.version.get_compile(compile_base_path))
        command = " ".join(command)

        start = time.time()
        try:
            Shell.exec(command, logger=logger, timeout=timeout)
            self.result = Result.success
        except CommandFailedError as e:
            logger.error("Detector failed: %s", e)
            self.result = Result.error
            message = str(e)
            message_lines = str.splitlines(message)
            if len(message_lines) > 5000:
                self.message = "\n".join(message_lines[0:500]) + "\n" + "\n".join(message_lines[-4500:])
            else:
                self.message = message
        except TimeoutError:
            logger.error("Detector took longer than the maximum of %s seconds", timeout)
            self.result = Result.timeout
        finally:
            end = time.time()
            runtime = end - start
            self.runtime = runtime
            logger.info("Run took {0:.2f} seconds.".format(runtime))

        self.save()
def extract_files(src_path, extracted_files, logger:logging.Logger):
    logger.info('Extracting new files...')

    list_files = os.listdir(src_path)
    list_land = []
    list_port = []
    for filename in list_files:
        if already_extracted(filename, extracted_files, logger):
            continue

        src_file = src_path + filename
        # check if it is image or not
        try:
            im = Image.open(src_file)
        except OSError:
            continue

        x, y = im.size
        im.close()
        if x == 1920 and y == 1080:
            list_land += [filename]
        if x == 1080 and y == 1920:
            list_port += [filename]

    return list_land, list_port
def get_extracted_files(dst_path, logger: logging.Logger):
    logger.info('Getting list of already extracted files...')

    published_files = [f for f in os.listdir(dst_path) if f.endswith('.jpg')]
    dup_files = [f for f in os.listdir(dst_path / 'dups/')if f.endswith('.jpg')]

    return published_files + dup_files
示例#8
0
class Spy(MailService):
    """
    Шпион, который логгирует о всей почтовой переписке, которая проходит через его руки.
    Он следит только за объектами класса MailMessage и пишет в логгер следующие сообщения
    Если в качестве отправителя или получателя указан "Austin Powers":
    то нужно написать в лог сообщение с уровнем WARN:
    Detected target mail correspondence: from {from} to {to} "{message}"
    Иначе, необходимо написать в лог сообщение с уровнем INFO:
    Usual correspondence: from {from} to {to}
    """

    def __init__(self):
        self.__logger = Logger("logger")

    def process_mail(self, mail: Mail):
        if isinstance(mail, MailMessage):
            source = mail.get_source()
            destination = mail.get_destination()
            message = mail.get_message()
            if source == banned_address or destination == banned_address:
                self.__logger.warning('Detected target mail correspondence: from {0} to {1} "{2}"'.
                                      format(source, destination, message))
            else:
                self.__logger.info('Usual correspondence: from {0} to {1}'.format(source, destination))
        return mail
def copy_unique_filepairs(difference_matrix, src_path, list_land, list_port,
                          img_path,
                          logger: logging.Logger):
    logger.info('Copying new unique files...')

    cnt = 0
    while True:
        n = len(list_land)
        m = len(list_port)
        if not n:
            break

        arg_land, arg_port = np.unravel_index(np.argmin(difference_matrix), (n, m))

        logger.debug('  {0}'.format(list_land[arg_land]))
        logger.debug('  {0}'.format(list_port[arg_land]))
        logger.debug('    {0}'.format(list_land[arg_land] + 'XXXX' +
                                      list_port[arg_port] + '.jpg'))

        shutil.copyfile(src_path + list_land[arg_land],
                        img_path +
                        list_land[arg_land] + '-land-' +
                        list_port[arg_port] + '.jpg')
        shutil.copyfile(src_path + list_port[arg_port],
                        img_path +
                        list_land[arg_land] + '-port-' +
                        list_port[arg_port] + '.jpg')
        cnt += 1

        del list_land[arg_land], list_port[arg_port]
        difference_matrix = np.delete(difference_matrix, arg_land, 0)
        difference_matrix = np.delete(difference_matrix, arg_port, 1)

    logger.debug('{0} files copied'.format(cnt))
示例#10
0
    def run(self):
        global logger
        global totalFetchTime
        global totalRequestsCompleted
        while True:
            #grabs host from queue
            host = self.queue.get()
            threadId = threading.current_thread
            
            #grabs urls of urls and prints first 1024 bytes of page
            beginTime = time.time()
            url = urllib2.urlopen(host)
            x = url.read(100000)
            if (not x):
                Logger.warn(logger, "[%s] No data for %s" % (threadId, host))
            endTime = time.time()

            elapsedTime = (endTime - beginTime)
             
            Logger.info(logger, "Request for %s executed in %s" % (host, elapsedTime))
            
            #signals to queue job is done
            totalRequestsCompleted += 1
            totalFetchTime += elapsedTime
            self.queue.task_done()
示例#11
0
def check_os(logger: logging.Logger):
    logger.info('Checking Windows 10...')

    err_msg = 'This system is not Windows 10. Exit.'
    if sys.platform != 'win32':
        raise OSError(err_msg)
    if platform.release() != '10':
        raise OSError(err_msg)
示例#12
0
文件: core.py 项目: neveralso/JStack
def start_daemon(judge_logger: logging.Logger):
    """
    Start a daemon process which is running the .
    :param judge_config:
    :param judge_logger:
    :return: None
    """

    pid_file_path = os.path.join(os.getcwd(), judge_config.RUN['pid_file'])
    # pid = os.fork()
    # if pid > 0:
    #     sys.exit(0)
    #
    # os.chdir('/')
    # os.setsid()
    # os.umask(0)
    #
    # pid = os.fork()
    # if pid > 0:
    #     sys.exit(0)

    if os.path.exists(pid_file_path):
        print('Judged daemon has being running.')
        judge_logger.error('Judged daemon has being running.')
        exit(0)

    try:
        (_path, _) = os.path.split(pid_file_path)
        if not os.path.exists(_path):
            os.mkdir(_path)
        pid_file = open(pid_file_path, mode='w+')
        print('Judge daemon(pid=%d) start successfully.' % os.getpid())
        judge_logger.info('Judge daemon(pid=%d) start successfully.' % os.getpid())
        pid_file.write('%d' % os.getpid())
        pid_file.close()
    except Exception as e:
        print(e)

    #redirect stdio
    sys.stdout.flush()
    sys.stderr.flush()
    si = open(os.devnull, 'r')
    # so = open(os.devnull, 'a+')
    # se = open(os.devnull, 'a+')
    os.dup2(si.fileno(), sys.stdin.fileno())
    # os.dup2(so.fileno(), sys.stdout.fileno())
    # os.dup2(se.fileno(), sys.stderr.fileno())

    # signal.signal(signal.SIGKILL, exit_clean)

    main_loop(judge_logger)

    try:
        os.remove(pid_file_path)
    except Exception as e:
        judge_logger.error(e)
    exit(0)
示例#13
0
def copy_database(oOrigConn, oDestConnn, oLogHandler=None):
    """Copy the database, with no attempts to upgrade.

       This is a straight copy, with no provision for funky stuff
       Compatability of database structures is assumed, but not checked.
       """
    # Not checking versions probably should be fixed
    # Copy tables needed before we can copy AbstractCard
    flush_cache()
    oVer = DatabaseVersion()
    oVer.expire_cache()
    oLogger = Logger('copy DB')
    if oLogHandler:
        oLogger.addHandler(oLogHandler)
        if hasattr(oLogHandler, 'set_total'):
            iTotal = 14 + AbstractCard.select(connection=oOrigConn).count() + \
                    PhysicalCard.select(connection=oOrigConn).count() + \
                    PhysicalCardSet.select(connection=oOrigConn).count()
            oLogHandler.set_total(iTotal)
    bRes = True
    aMessages = []
    oTrans = oDestConnn.transaction()
    aToCopy = [
            (copy_rarity, 'Rarity table', False),
            (copy_expansion, 'Expansion table', False),
            (copy_discipline, 'Discipline table', False),
            (copy_clan, 'Clan table', False),
            (copy_creed, 'Creed table', False),
            (copy_virtue, 'Virtue table', False),
            (copy_card_type, 'CardType table', False),
            (copy_ruling, 'Ruling table', False),
            (copy_discipline_pair, 'DisciplinePair table', False),
            (copy_rarity_pair, 'RarityPair table', False),
            (copy_sect, 'Sect table', False),
            (copy_title, 'Title table', False),
            (copy_artist, 'Artist table', False),
            (copy_keyword, 'Keyword table', False),
            (copy_abstract_card, 'AbstractCard table', True),
            (copy_physical_card, 'PhysicalCard table', True),
            (copy_physical_card_set, 'PhysicalCardSet table', True),
            ]
    for fCopy, sName, bPassLogger in aToCopy:
        try:
            if bRes:
                if bPassLogger:
                    fCopy(oOrigConn, oTrans, oLogger)
                else:
                    fCopy(oOrigConn, oTrans)
        except SQLObjectNotFound, oExp:
            bRes = False
            aMessages.append('Unable to copy %s: Aborting with error: %s'
                    % (sName, oExp))
        else:
            oTrans.commit()
            oTrans.cache.clear()
            if not bPassLogger:
                oLogger.info('%s copied' % sName)
示例#14
0
文件: func_tests.py 项目: 5nizza/aisy
def run_tests(
    test_files,
    run_tool: "func(file, result_file)->(rc, out, err)",
    check_answer: "func(test_file, result_file, rc, out, err)->(rc, out, err)",
    stop_on_error,
    logger: Logger,
    output_folder=None,
):
    """
    :param output_folder: if not None, intermediate results are saved there.
                          Files in that folder will be overwritten.
    """

    if output_folder:
        output_dir = output_folder
        makedirs(output_dir, exist_ok=True)
    else:
        output_dir = get_tmp_dir_name()

    logger.info("using " + output_dir + " as the temporal folder")

    failed_tests = list()
    for test in test_files:
        logger.info("testing {test}..".format(test=test))

        log_stream = open(_generate_name(output_dir, test) + ".log", "w")

        result_file = _generate_name(output_dir, test) + ".model"
        r_rc, r_out, r_err = run_tool(test, result_file)

        logger.debug(rc_out_err_to_str(r_rc, r_out, r_err))
        print(rc_out_err_to_str(r_rc, r_out, r_err), file=log_stream)

        c_rc, c_out, c_err = check_answer(test, result_file, r_rc, r_out, r_err)
        logger.debug(rc_out_err_to_str(c_rc, c_out, c_err))
        print(rc_out_err_to_str(c_rc, c_out, c_err), file=log_stream)

        if c_rc != 0:
            logger.info("    FAILED")
            failed_tests.append(test)
            if stop_on_error:
                break

    if failed_tests:
        logger.info(
            "The following tests failed: %s \n%s",
            "".join("\n    " + t for t in failed_tests),
            "See logs in " + output_dir,
        )
    else:
        logger.info("ALL TESTS PASSED")

    if not output_folder and not failed_tests:
        shutil.rmtree(output_dir)

    return not failed_tests
示例#15
0
def write_combos():
    logger = Logger('name',20)
    handler = FileHandler('flog.log')
    logger.addHandler(handler)
    with open('namelist.txt','a') as fileobject:
        llist = ("{} {}".format(x,y) for x in names(0, 'names.txt') for y in names(1, 'names.txt'))
        for name in llist:
            if len(name) > 17:
                logger.info('{} is {} characters long'.format(name, len(name)))
            fileobject.write('{}\n'.format(name))
示例#16
0
async def send_dumplings_from_queue_to_hub(
        kitchen_name: str,
        hub: str,
        dumpling_queue: multiprocessing.Queue,
        kitchen_info: dict,
        log: logging.Logger,
):
    """
    Grabs dumplings from the dumpling queue and sends them to ``nd-hub``.

    :param kitchen_name: The name of the kitchen.
    :param hub: The address where ``nd-hub`` is receiving dumplings.
    :param dumpling_queue: Queue to grab dumplings from.
    :param kitchen_info: Dict describing the kitchen.
    :param log: Logger.
    """
    hub_ws = 'ws://{0}'.format(hub)

    log.info("{0}: Connecting to the dumpling hub at {1}".format(
        kitchen_name, hub_ws)
    )

    try:
        websocket = await websockets.connect(hub_ws)
    except OSError as e:
        log.error(
            "{0}: There was a problem with the dumpling hub connection. "
            "Is nd-hub available?".format(kitchen_name))
        log.error("{0}: {1}".format(kitchen_name, e))
        return

    try:
        # Register our kitchen information with the dumpling hub.
        await websocket.send(json.dumps(kitchen_info))

        # Send dumplings to the hub when they come in from the chefs.
        while True:
            dumpling = dumpling_queue.get()
            await websocket.send(dumpling)
    except asyncio.CancelledError:
        log.warning(
            "{0}: Connection to dumpling hub cancelled; closing...".format(
                kitchen_name))
        try:
            await websocket.close(*ND_CLOSE_MSGS['conn_cancelled'])
        except websockets.exceptions.InvalidState:
            pass
    except websockets.exceptions.ConnectionClosed as e:
        log.warning("{0}: Lost connection to dumpling hub: {1}".format(
            kitchen_name, e))
    except OSError as e:
        log.exception(
            "{0}: Error talking to dumpling hub: {1}".format(kitchen_name, e)
        )
示例#17
0
def attempt_database_upgrade(oLogHandler=None):
    """Attempt to upgrade the database, going via a temporary memory copy."""
    oTempConn = connectionForURI("sqlite:///:memory:")
    oLogger = Logger('attempt upgrade')
    if oLogHandler:
        oLogger.addHandler(oLogHandler)
    (bOK, aMessages) = create_memory_copy(oTempConn, oLogHandler)
    if bOK:
        oLogger.info("Copied database to memory, performing upgrade.")
        if len(aMessages) > 0:
            oLogger.info("Messages reported: %s", aMessages)
        (bOK, aMessages) = create_final_copy(oTempConn, oLogHandler)
        if bOK:
            oLogger.info("Everything seems to have gone OK")
            if len(aMessages) > 0:
                oLogger.info("Messages reported %s", aMessages)
            return True
        else:
            oLogger.critical("Unable to perform upgrade.")
            if len(aMessages) > 0:
                oLogger.error("Errors reported: %s", aMessages)
            oLogger.critical("!!YOUR DATABASE MAY BE CORRUPTED!!")
    else:
        oLogger.error("Unable to create memory copy. Database not upgraded.")
        if len(aMessages) > 0:
            oLogger.error("Errors reported %s", aMessages)
    return False
示例#18
0
def fetch_data(oFile, oOutFile=None, sHash=None, oLogHandler=None,
        fErrorHandler=None):
    """Fetch data from a file'ish object (WwFile, urlopen or file)"""
    try:
        if hasattr(oFile, 'info') and callable(oFile.info):
            sLength = oFile.info().getheader('Content-Length')
        else:
            sLength = None

        if sLength:
            oLogger = Logger('Sutekh data fetcher')
            if oLogHandler is not None:
                oLogger.addHandler(oLogHandler)
            aData = []
            iLength = int(sLength)
            if hasattr(oLogHandler, 'set_total'):
                # We promote to next integer, as we emit a signal
                # for any left over bits
                oLogHandler.set_total((iLength + 9999) // 10000)
            iTotal = 0
            bCont = True
            while bCont:
                sInf = oFile.read(10000)
                iTotal += len(sInf)
                if sInf:
                    oLogger.info('%d downloaded', iTotal)
                    if oOutFile:
                        oOutFile.write(sInf)
                    else:
                        aData.append(sInf)
                else:
                    bCont = False
            if oOutFile:
                sData = None
            else:
                sData = ''.join(aData)
        else:
            # Just try and download
            if oOutFile:
                oOutFile.write(oFile.read())
                sData = None
            else:
                sData = oFile.read()
    except urllib2.URLError, oExp:
        if fErrorHandler:
            fErrorHandler(oExp)
            sData = None
        else:
            raise
示例#19
0
def copy_to_new_abstract_card_db(oOrigConn, oNewConn, oCardLookup,
        oLogHandler=None):
    """Copy the card sets to a new Physical Card and Abstract Card List.

      Given an existing database, and a new database created from
      a new cardlist, copy the CardSets, going via CardSetHolders, so we
      can adapt to changed names, etc.
      """
    # pylint: disable-msg=R0914
    # we need a lot of variables here
    aPhysCardSets = []
    oOldConn = sqlhub.processConnection
    sqlhub.processConnection = oOrigConn
    # Copy Physical card sets
    oLogger = Logger('copy to new abstract card DB')
    if oLogHandler:
        oLogger.addHandler(oLogHandler)
        if hasattr(oLogHandler, 'set_total'):
            iTotal = 1 + PhysicalCardSet.select(connection=oOrigConn).count()
            oLogHandler.set_total(iTotal)
    aSets = list(PhysicalCardSet.select(connection=oOrigConn))
    bDone = False
    aDone = []
    # Ensre we only process a set after it's parent
    while not bDone:
        aToDo = []
        for oSet in aSets:
            if oSet.parent is None or oSet.parent in aDone:
                oCS = make_card_set_holder(oSet, oOrigConn)
                aPhysCardSets.append(oCS)
                aDone.append(oSet)
            else:
                aToDo.append(oSet)
        if not aToDo:
            bDone = True
        else:
            aSets = aToDo
    # Save the current mapping
    oLogger.info('Memory copies made')
    # Create the cardsets from the holders
    dLookupCache = {}
    sqlhub.processConnection = oNewConn
    for oSet in aPhysCardSets:
        # create_pcs will manage transactions for us
        oSet.create_pcs(oCardLookup, dLookupCache)
        oLogger.info('Physical Card Set: %s', oSet.name)
        sqlhub.processConnection.cache.clear()
    sqlhub.processConnection = oOldConn
    return (True, [])
示例#20
0
def parse_ltl(par_text:str, logger:Logger) -> dict:
    #TODO: current version of parser is very restrictive: it allows only the specs of the form:
    # Forall (i,j..) ass_i_j -> (Forall(k) gua_k * Forall(l,m) gua_l_m)
    # it is impossible to have:
    # (Forall(i) a_i  ->  Forall(k) g_k) * (Forall(i,j) a_i_j  ->  Forall(i) g_i)
    # what we can have is:
    # (Forall(i,j,k) ((a_i -> g_i)) * (Forall(i,j) a_i_j -> g_i)

    """ Return {section:data}, see sections in syntax_desc """

    logger.info('parsing input spec..')
    section_name_to_data = dict(par_parser.parse(par_text, lexer=par_lexer))



    #TODO: check unknown signals
    return section_name_to_data
def build_differencematrix(src_path, list_land, list_port,
                           logger: logging.Logger):
    logger.info('Building difference matrix...')

    difference_matrix = []
    for land in list_land:
        im_land = np.array(Image.open(src_path + land))
        im_land = im_land[:, (960-304):(960+304), :]

        difference_row = []
        for port in list_port:
            im_port = np.array(Image.open(src_path + port))
            im_port = misc.imresize(im_port, (1080, 608))
            difference_row += [np.sum((im_land - im_port) ** 2) / (608 * 1080)]
        difference_matrix += [difference_row]

    return np.array(difference_matrix)
示例#22
0
 def repair(
     self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path
 ) -> bool:
     # TODO: It would be nice to try and get the contents of the
     # file/directory at this location in the current commit, rather than
     # just writing out an empty file or directory
     if stat.S_ISDIR(self.child.mode):
         log.info(
             f"replacing missing directory {self.compute_path()!r} with an "
             "empty directory"
         )
         overlay.write_empty_dir(self.child.inode_number)
     else:
         log.info(
             f"replacing missing file {self.compute_path()!r} with an empty file"
         )
         overlay.write_empty_file(self.child.inode_number)
     return True
示例#23
0
def run(sc: SlackClient, channel: str, message: str, retries: int,
        logger: logging.Logger) -> None:
    if sc.rtm_connect():
        logger.info("Connected to Slack")

        channel_id = find_channel_id(channel, sc)
        logger.debug(f"Found channel ID {channel_id} for #{channel}")

        logger.info(f"Listening for joins in #{channel}")

        retry_count = 0
        backoff = 0.5

        while True:
            try:
                # Handle dem events!
                for event in sc.rtm_read():
                    handle_event(event, channel, channel_id, message, sc, logger)

                # Reset exponential backoff retry strategy every time we
                # successfully loop. Failure would have happened in rtm_read()
                retry_count = 0

                time.sleep(0.5)

            # This is necessary to handle an error caused by a bug in Slack's
            # Python client. For more information see
            # https://github.com/slackhq/python-slackclient/issues/127
            #
            # The TimeoutError could be more elegantly resolved by making a PR
            # to the websocket-client library and letting them coerce that
            # exception to a WebSocketTimeoutException.
            except (websocket.WebSocketConnectionClosedException, TimeoutError):
                logger.error("Lost connection to Slack, reconnecting...")
                if not sc.rtm_connect():
                    logger.info("Failed to reconnect to Slack")
                    if retry_count >= retries:
                        sys.exit(bail(
                            'fatal',
                            'red',
                            "Too many failed reconnect attempts, shutting down")
                        )
                    time.sleep((backoff ** 2) / 4)
                else:
                    logger.info("Reconnected to Slack")

                retry_count += 1

    else:
        sys.exit(bail('fatal', 'red', "Couldn't connect to Slack"))
示例#24
0
def setup_localisations(logger: logging.Logger):
    """Setup gettext localisations."""
    import gettext
    import locale
    # Get the 'en_US' style language code
    lang_code = locale.getdefaultlocale()[0]

    # Allow overriding through command line.
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            if arg.casefold().startswith('lang='):
                lang_code = arg[5:]
                break

    # Expands single code to parent categories.
    expanded_langs = gettext._expand_lang(lang_code)

    logger.info('Language: {!r}', lang_code)
    logger.debug('Language codes: {!r}', expanded_langs)

    for lang in expanded_langs:
        try:
            file = open('../i18n/{}.mo'.format(lang), 'rb')
        except FileNotFoundError:
            pass
        else:
            trans = gettext.GNUTranslations(file)
            break
    else:
        # No translations, fallback to English.
        # That's fine if the user's language is actually English.
        if 'en' not in expanded_langs:
            logger.warning(
                "Can't find translation for codes: {!r}!",
                expanded_langs,
            )
        trans = gettext.NullTranslations()
    # Add these functions to builtins, plus _=gettext
    trans.install(['gettext', 'ngettext'])
示例#25
0
def begin(filename=None, failopen=False):
    if not filename:
        unique = os.environ['LOGNAME']
        cmd = os.path.basename(sys.argv[0])
        filename = "/tmp/%s-%s.lock" % (unique, cmd)

    if os.path.exists(filename):
        log.warn("Lockfile found!")
        f = open(filename, "r")
        pid = None
        try:
            pid = int(f.read())
        except ValueError:
            pass
        f.close()
        if not pid:
            log.error("Invalid lockfile contents.")
        else:
            try:
                os.getpgid(pid)
                log.error("Aborting! Previous process ({pid}) is still alive. Remove lockfile manually if in error: {path}".format(pid=pid, path=filename))
                sys.exit(1)
            except OSError:
                if failopen:
                    log.fatal("Aborting until stale lockfile is investigated: {path}".format(path=filename))
                    sys.exit(1)
                log.error("Lockfile is stale.")
        log.info("Removing old lockfile.")
        os.unlink(filename)

    f = open(filename, "w")
    f.write(str(os.getpid()))
    f.close()

    global lockfile
    lockfile = filename
def initialize(img_path, logger: logging.Logger):
    logger.info('Initializing directories...')

    if not os.path.isdir(img_path):
        logger.info('{0} does not exists. Creating it...'.format(img_path))
        os.mkdir(img_path)

    if not os.path.isdir(img_path / 'dups/'):
        logger.info('{0} does not exists. Creating it...'
                    .format(img_path / 'dups/'))
        os.mkdir(img_path / 'dups/')
示例#27
0
    def repair(
        self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path
    ) -> bool:
        lost_n_found = fsck_dir / "lost+found"
        lost_n_found.mkdir(exist_ok=True)
        log.info(f"moving orphan inodes to {lost_n_found}")

        for inode in self.orphan_directories:
            log.info(
                f"moving contents of orphan directory {inode.inode_number} "
                f"to lost+found"
            )
            inode_lnf_path = lost_n_found / str(inode.inode_number)
            overlay.extract_dir(inode.inode_number, inode_lnf_path, remove=True)

        file_mode = stat.S_IFREG | 0o644
        for inode in self.orphan_files:
            log.info(f"moving orphan file {inode.inode_number} to lost+found")
            inode_lnf_path = lost_n_found / str(inode.inode_number)
            overlay.extract_file(
                inode.inode_number, inode_lnf_path, file_mode, remove=True
            )

        return True
示例#28
0
class BaseFeature(abc.ABC):
    save_memory: bool = True

    def __init__(self, debugging: bool = False, **kwargs) -> None:
        super().__init__()
        self.name = self.__class__.__name__
        self.debugging = debugging
        self._logger = Logger(self.__class__.__name__)
        handler = StreamHandler()
        fmt = Formatter("%(asctime)s - %(levelname)s - %(message)s")
        handler.setFormatter(fmt)
        handler.setLevel(INFO)
        self._logger.addHandler(handler)

        self.GCS_BUCKET_NAME = GCS_BUCKET_NAME
        self.PROJECT_ID = PROJECT_ID

        self.train_table = f"`{PROJECT_ID}.riiid.train`"

    @abc.abstractmethod
    def import_columns(self) -> List[str]:
        """この特徴量を作るのに必要なカラムを指定する
        """
        ...

    @abc.abstractmethod
    def make_features(
        self, df_train_input: pd.DataFrame,
    ) -> pd.DataFrame:
        """BigQuery から取得した生データの DataFrame を特徴量に変換する
        """
        ...

    @classmethod
    def add_feature_specific_arguments(cls, parser: argparse.ArgumentParser):
        return

    @classmethod
    def main(cls):
        import logging

        logging.basicConfig(level=logging.INFO)
        parser = argparse.ArgumentParser()
        parser.add_argument("--debug", action="store_true")
        cls.add_feature_specific_arguments(parser)
        args = parser.parse_args()
        instance = cls(debugging=args.debug, **vars(args))
        instance.run()

    def run(self):
        """何も考えずにとりあえずこれを実行すれば BigQuery からデータを読み込んで変換し GCS にアップロードしてくれる
        """
        self._logger.info(f"Running with debugging={self.debugging}")
        with tempfile.TemporaryDirectory() as tempdir:
            files: List[str] = []
            train_path = os.path.join(tempdir, f"{self.name}_training.ftr")

            self.read_and_save_features(
                self.train_table, train_path,
            )
            self._upload_to_gs([train_path])

    def read_and_save_features(
        self,
        train_table_name: str,
        train_output_path: str,
    ) -> None:
        df_train_input = self._read_from_bigquery(train_table_name)
        df_train_features = self.make_features(
            df_train_input
        )
        assert (
            df_train_input.shape[0] == df_train_features.shape[0]
        ), "generated train features is not compatible with the table"
        df_train_features.columns = f"{self.name}_" + df_train_features.columns

        if self.save_memory:
            self._logger.info("Reduce memory size - train data")
            df_train_features = reduce_mem_usage(df_train_features)

        self._logger.info(f"Saving features to {train_output_path}")
        df_train_features.to_feather(train_output_path)

    def _read_from_bigquery(self, table_name: str) -> pd.DataFrame:
        self._logger.info(f"Reading from {table_name}")
        query = """
            select {}
            from {}
            where content_type_id = 0
        """.format(
            ", ".join(self.import_columns()), table_name
        )
        if self.debugging:
            query += " limit 10000"

        bqclient = bigquery.Client(project=PROJECT_ID)
        bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient()
        df = (
            bqclient.query(query)
            .result()
            .to_dataframe(bqstorage_client=bqstorageclient)
        )
        return df

    def _upload_to_gs(self, files: List[str]):
        client = storage.Client(project=PROJECT_ID)
        bucket = client.get_bucket(GCS_BUCKET_NAME)

        if self.debugging:
            bucket_dir_name = "features_debug"
        else:
            bucket_dir_name = "features"

        for filename in files:
            basename = os.path.basename(filename)
            blob = storage.Blob(os.path.join(bucket_dir_name, basename), bucket)
            self._logger.info(f"Uploading {basename} to {blob.path}")
            blob.upload_from_filename(filename)

    def _download_from_gs(self, feather_file_name: str) -> pd.DataFrame:
        """GCSにある特徴量ファイル(feather形式)を読み込む
        """
        client = storage.Client(project=PROJECT_ID)
        bucket = client.get_bucket(GCS_BUCKET_NAME)

        if self.debugging:
            bucket_dir_name = "features_debug"
        else:
            bucket_dir_name = "features"

        blob = storage.Blob(
            os.path.join(bucket_dir_name, feather_file_name),
            bucket
        )
        content = blob.download_as_string()
        print(f"Downloading {feather_file_name} from {blob.path}")
        df = pd.read_feather(BytesIO(content))

        return df
示例#29
0
    async def sync(self, args: Namespace, logger: logging.Logger,
                   chain: AsyncChainAPI, base_db: AtomicDatabaseAPI,
                   peer_pool: BasePeerPool, event_bus: EndpointAPI) -> None:

        logger.info("Node running without sync (--sync-mode=%s)",
                    self.get_sync_mode())
示例#30
0
class Logger:
    """Logger class

    Show and collect log entries.
    """

    LEVEL_DEBUG = 'debug'
    LEVEL_INFO = 'info'
    LEVEL_WARNING = 'warning'
    LEVEL_ERROR = 'error'
    LEVEL_CRITICAL = 'critical'

    def __init__(self, logger=None):
        """Constructor

        :param Logger logger: Logger
        """
        if logger:
            self.logger = logger
        else:
            self.logger = Log("Config Generator")

        self.logs = []

    def clear(self):
        """Clear log entries."""
        self.logs = []

    def log_entries(self):
        """Return log entries."""
        return self.logs

    def debug(self, msg):
        """Show debug log entry.

        :param str msg: Log message
        """
        self.logger.debug(msg)
        # do not collect debug entries

    def info(self, msg):
        """Add info log entry.

        :param str msg: Log message
        """
        self.logger.info(msg)
        self.add_log_entry(msg, self.LEVEL_INFO)

    def warning(self, msg):
        """Add warning log entry.

        :param str msg: Log message
        """
        self.logger.warning(msg)
        self.add_log_entry(msg, self.LEVEL_WARNING)

    def warn(self, msg):
        self.warning(msg)

    def error(self, msg):
        """Add error log entry.

        :param str msg: Log message
        """
        self.logger.error(msg)
        self.add_log_entry(msg, self.LEVEL_ERROR)

    def critical(self, msg):
        """Add critical log entry.

        :param str msg: Log message
        """
        self.logger.critical(msg)
        self.add_log_entry(msg, self.LEVEL_CRITICAL)

    def add_log_entry(self, msg, level):
        """Append log entry with level.

        :param str msg: Log message
        :param str level: Log level
        """
        self.logs.append({'msg': msg, 'level': level})
示例#31
0
def search_nb_of_multi_target_trees_to_use(
    n_tree_rules_to_generate: int,
    prepared_data_list: List[PreparedDataForTargetSet],
    min_support: float,
    max_depth: int,
    logger: Logger,
    seed: Optional[int] = None,
) -> Tuple[Optional[List[Tuple[PreparedDataForTargetSet,
                               RandomForestClassifier]]], TimeDiffSec]:
    # nb_of_trees_to_use: int = 1
    nb_of_tree_based_rules_after_conversion: int = 0
    current_rf_list: Optional[List[Tuple[PreparedDataForTargetSet,
                                         RandomForestClassifier]]] = None
    total_time_random_forest_learning_s: TimeDiffSec = 0.0

    # min_nb_of_rfs = len(prepared_data_list)

    # --- estimate the nb of trees to use -------------------------------------------
    max_n_rules_in_tree = 2**max_depth
    min_n_trees_to_use = n_tree_rules_to_generate / max_n_rules_in_tree
    nb_of_rfs_to_use = len(prepared_data_list)
    estimate_nb_of_trees_per_rf: int = math.ceil(min_n_trees_to_use /
                                                 nb_of_rfs_to_use)

    logger.info(
        f"INITIAL ESTIMATE: use {nb_of_rfs_to_use} RFs of each {estimate_nb_of_trees_per_rf} trees "
        f"for about {min_n_trees_to_use} trees in total")

    nb_of_trees_to_use = estimate_nb_of_trees_per_rf

    current_step_size = 1

    should_break = False
    while not should_break:
        nb_of_tree_based_rules_after_conversion = 0
        current_rf_list = []
        total_time_random_forest_learning_s = 0.0
        prepared_data: PreparedDataForTargetSet
        for prepared_data in prepared_data_list:

            start_time_decision_tree_learning_s = time.time()
            classifier: RandomForestClassifier = RandomForestClassifier(
                n_estimators=nb_of_trees_to_use,
                random_state=seed,
                min_samples_leaf=min_support,
                max_depth=max_depth)
            current_rf_clf = classifier

            # --- Learn a random forest given the current number of trees -----------------------------------
            classifier.fit(
                prepared_data.df_one_hot_encoded_descriptive_attributes,
                prepared_data.df_one_hot_encoded_target_attributes)
            end_time_decision_tree_learning_s = time.time()
            total_time_decision_tree_learning_s: float = end_time_decision_tree_learning_s - start_time_decision_tree_learning_s
            total_time_random_forest_learning_s += total_time_decision_tree_learning_s

            # --- b. Calculate its total number of leaves ----------------------------------
            tree_classifiers: List[
                DecisionTreeClassifier] = classifier.estimators_
            total_nb_of_leafs_in_random_forest: int = 0
            for tree_clf in tree_classifiers:
                total_nb_of_leafs_in_random_forest += get_nb_of_leaf_nodes(
                    tree_clf)
            nb_of_tree_based_rules_after_conversion += total_nb_of_leafs_in_random_forest
            current_rf_list.append((prepared_data, current_rf_clf))

        if nb_of_tree_based_rules_after_conversion < n_tree_rules_to_generate:
            logger.info(
                f'Learned {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees'
                f'--> {nb_of_tree_based_rules_after_conversion} rules '
                f' < {n_tree_rules_to_generate} (goal)) '
                f'--> INcreasing current step size {current_step_size} with 1')

            current_step_size += 1
            nb_of_trees_to_use += current_step_size
        if nb_of_tree_based_rules_after_conversion >= n_tree_rules_to_generate:
            should_break = True
        # else:
        #     logger.info(f'Learned {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees'
        #                 f'--> {nb_of_tree_based_rules_after_conversion} rules '
        #                 f' > {n_tree_rules_to_generate} (goal)) '
        #                 f'--> DEcreasing current step size {current_step_size} with 1')
        #     nb_of_trees_to_use -= current_step_size
        #     if current_step_size == 1:
        #         should_break = True
        #     current_step_size = 1
        #     nb_of_trees_to_use += 1

    logger.info(
        f'FINISHED search for tree rules: {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees'
        f'--> {nb_of_tree_based_rules_after_conversion} rules '
        f' > {n_tree_rules_to_generate} (goal)) ')

    return current_rf_list, total_time_random_forest_learning_s
示例#32
0
def plotClock(dfClk: pd.DataFrame,
              dRtk: dict,
              logger: logging.Logger,
              showplot: bool = False):
    """
    plotClock plots athe clock for all systems
    """
    cFuncName = colored(os.path.basename(__file__),
                        'yellow') + ' - ' + colored(
                            sys._getframe().f_code.co_name, 'green')

    # set up the plot
    plt.style.use('ggplot')
    colors = ['blue', 'red', 'green', 'black']

    amc.logDataframeInfo(df=dfClk,
                         dfName='dfClk',
                         callerName=cFuncName,
                         logger=logger)

    # find out for which system we have clk offset values
    GNSSSysts = []
    for gnss in ['GAL', 'GPS', 'OTH', 'GLO']:
        if dfClk[gnss].any():
            GNSSSysts.append(gnss)
    logger.info('{func:s}: Clock available for GNSS systems {syst:s}'.format(
        func=cFuncName, syst=' '.join(GNSSSysts)))

    # create the plot araea
    fig, axis = plt.subplots(nrows=len(GNSSSysts),
                             ncols=1,
                             figsize=(24.0, 20.0))

    for i, GNSSsyst in enumerate(GNSSSysts):
        logger.info('{func:s}: plotting clock offset for {syst:s}'.format(
            func=cFuncName, syst=GNSSsyst))

        # get the axis to draw to
        if len(GNSSSysts) == 1:
            ax = axis
        else:
            ax = axis[i]

        # create the plot for this GNSS system
        dfClk.plot(ax=ax,
                   x='DT',
                   y=GNSSsyst,
                   marker='.',
                   linestyle='',
                   color=colors[i])

        # create the ticks for the time axis
        dtFormat = plot_utils.determine_datetime_ticks(
            startDT=dfClk['DT'].iloc[0], endDT=dfClk['DT'].iloc[-1])

        if dtFormat['minutes']:
            ax.xaxis.set_major_locator(
                dates.MinuteLocator(byminute=[0, 15, 30, 45], interval=1))
        else:
            ax.xaxis.set_major_locator(
                dates.HourLocator(
                    interval=dtFormat['hourInterval']))  # every 4 hours
        ax.xaxis.set_major_formatter(
            dates.DateFormatter('%H:%M'))  # hours and minutes

        ax.xaxis.set_minor_locator(dates.DayLocator(interval=1))  # every day
        ax.xaxis.set_minor_formatter(dates.DateFormatter('\n%d-%m-%Y'))

        ax.xaxis.set_tick_params(rotation=0)
        for tick in ax.xaxis.get_major_ticks():
            # tick.tick1line.set_markersize(0)
            # tick.tick2line.set_markersize(0)
            tick.label1.set_horizontalalignment('center')

        # name the axis
        ax.set_ylabel('{syst:s} Clock Offset [ns]'.format(syst=GNSSsyst),
                      fontsize='large',
                      color=colors[i])
        ax.set_xlabel('Time', fontsize='large')

        # title of sub-plot
        ax.set_title('Clock offset relative to {syst:s} @ {date:s}'.format(
            syst=GNSSsyst,
            date=dfClk['DT'].iloc[0].strftime('%d %b %Y'),
            fontsize='large'))

    # save the plot in subdir png of GNSSSystem
    amutils.mkdir_p(os.path.join(dRtk['info']['dir'], 'png'))
    pngName = os.path.join(
        dRtk['info']['dir'], 'png',
        os.path.splitext(dRtk['info']['rtkPosFile'])[0] + '-CLK.png')
    # print('pngName = {:s}'.format(pngName))
    fig.savefig(pngName, dpi=fig.dpi)

    logger.info('{func:s}: created plot {plot:s}'.format(func=cFuncName,
                                                         plot=colored(
                                                             pngName,
                                                             'green')))

    if showplot:
        plt.show(block=True)
    else:
        plt.close(fig)
示例#33
0
def setup_localisations(logger: logging.Logger):
    """Setup gettext localisations."""
    from srctools.property_parser import PROP_FLAGS_DEFAULT
    import gettext
    import locale

    # Get the 'en_US' style language code
    lang_code = locale.getdefaultlocale()[0]

    # Allow overriding through command line.
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            if arg.casefold().startswith('lang='):
                lang_code = arg[5:]
                break

    # Expands single code to parent categories.
    expanded_langs = gettext._expand_lang(lang_code)

    logger.info('Language: {!r}', lang_code)
    logger.debug('Language codes: {!r}', expanded_langs)

    # Add these to Property's default flags, so config files can also
    # be localised.
    for lang in expanded_langs:
        PROP_FLAGS_DEFAULT['lang_' + lang] = True

    for lang in expanded_langs:
        try:
            file = open('../i18n/{}.mo'.format(lang), 'rb')
        except FileNotFoundError:
            pass
        else:
            trans = gettext.GNUTranslations(file)
            break
    else:
        # No translations, fallback to English.
        # That's fine if the user's language is actually English.
        if 'en' not in expanded_langs:
            logger.warning(
                "Can't find translation for codes: {!r}!",
                expanded_langs,
            )
        trans = gettext.NullTranslations()
    # Add these functions to builtins, plus _=gettext
    trans.install(['gettext', 'ngettext'])

    # Some lang-specific overrides..

    if trans.gettext('__LANG_USE_SANS_SERIF__') == 'YES':
        # For Japanese/Chinese, we want a 'sans-serif' / gothic font
        # style.
        try:
            from tkinter import font
        except ImportError:
            return
        font_names = [
            'TkDefaultFont',
            'TkHeadingFont',
            'TkTooltipFont',
            'TkMenuFont',
            'TkTextFont',
            'TkCaptionFont',
            'TkSmallCaptionFont',
            'TkIconFont',
            # Note - not fixed-width...
        ]
        for font_name in font_names:
            font.nametofont(font_name).configure(family='sans-serif')
示例#34
0
    def __call__(self, logger: logging.Logger, *args: Any, **kwds: Any) -> Any:

        logger.info("##### Start search for change in backend status")

        for changes in watch(self.location):

            logger.info("##### Change detected")

            if (list(changes)[0][1]).split('/')[-1] == 'debug.log':
                f = open(list(changes)[0][1], 'r')
                text = f.readlines()[-1]
                dct = dict(
                    map(
                        lambda pair: tuple(pair),
                        map(
                            lambda _split: _split.split("="),
                            filter(lambda split: len(split.split("=")) == 2,
                                   text.split(" ")))))

                logger.info(dct)

                try:

                    if round(float(dct["progress"]), 2) == 0.2:
                        logger.info("##### Sync in 20%")

                    if round(float(dct["progress"]), 2) == 0.4:
                        logger.info("##### Sync in 40%")

                    if round(float(dct["progress"]), 2) == 0.6:
                        logger.info("##### Sync in 60%")

                    if round(float(dct["progress"]), 2) == 0.8:
                        logger.info("##### Sync in 80%")

                    if float(dct["progress"]) == 1.0:
                        logger.info("##### Sync done. Exiting")
                        sys.exit()

                except Exception as e:

                    logger.info("##### Watcher exception: {}".format(str(e)))
示例#35
0
def _log_event(config: Dict[str, Any], logger: Logger, event: Dict[str, Any],
               what: str, who: str, addinfo: str) -> None:
    if config['debug_rules']:
        logger.info("Event %d: %s/%s/%s - %s" %
                    (event["id"], what, who, addinfo, event["text"]))
示例#36
0
def notify_suppliers_of_framework_application_event(
    data_api_client: DataAPIClient,
    notify_client: DMNotifyClient,
    notify_template_id: str,
    framework_slug: str,
    stage: str,
    dry_run: bool,
    logger: Logger,
    run_id: Optional[UUID] = None,
) -> int:
    run_is_new = not run_id
    run_id = run_id or uuid4()
    logger.info(
        f"{'Starting' if run_is_new else 'Resuming'} run id {{run_id}}",
        extra={"run_id": str(run_id)})

    framework = data_api_client.get_framework(framework_slug)["frameworks"]
    framework_context = {
        "framework_name":
        framework["name"],
        "updates_url":
        f"{get_web_url_from_stage(stage)}/suppliers/frameworks/{framework['slug']}/updates",
        "framework_dashboard_url":
        f"{get_web_url_from_stage(stage)}/suppliers/frameworks/{framework['slug']}/",
        "clarification_questions_closed":
        "no" if framework["clarificationQuestionsOpen"] else "yes",
        **_formatted_dates_from_framework(framework),
    }

    failure_count = 0

    for supplier_framework in data_api_client.find_framework_suppliers_iter(
            framework_slug):
        for user in data_api_client.find_users_iter(
                supplier_id=supplier_framework["supplierId"]):
            if user["active"]:
                # generating ref separately so we can exclude certain parameters from the context dict
                notify_ref = notify_client.get_reference(
                    user["emailAddress"],
                    notify_template_id,
                    {
                        "framework_slug": framework["slug"],
                        "run_id": str(run_id),
                    },
                )
                if dry_run:
                    # Use the sent references cache unless we're re-running the script following a failure
                    if notify_client.has_been_sent(
                            notify_ref, use_recent_cache=run_is_new):
                        logger.debug(
                            "[DRY RUN] Would NOT send notification to {email_hash} (already sent)",
                            extra={
                                "email_hash": hash_string(user["emailAddress"])
                            },
                        )
                    else:
                        logger.info(
                            "[DRY RUN] Would send notification to {email_hash}",
                            extra={
                                "email_hash": hash_string(user["emailAddress"])
                            },
                        )
                else:
                    try:
                        # Use the sent references cache unless we're re-running the script following a failure
                        notify_client.send_email(
                            user["emailAddress"],
                            notify_template_id,
                            framework_context,
                            allow_resend=False,
                            reference=notify_ref,
                            use_recent_cache=run_is_new,
                        )
                    except EmailError as e:
                        failure_count += 1
                        logger.error(
                            "Failed sending to {email_hash}: {e}",
                            extra={
                                "email_hash":
                                hash_string(user["emailAddress"]),
                                "e": str(e),
                            },
                        )

                        if isinstance(e, EmailTemplateError):
                            raise  # do not try to continue

    return failure_count
示例#37
0
class Learner:
    """
    Any participant in the "learning loop" - a class inheriting from
    this one has the ability, synchronously or asynchronously,
    to learn about nodes in the network, verify some essential
    details about them, and store information about them for later use.
    """

    _SHORT_LEARNING_DELAY = 5
    _LONG_LEARNING_DELAY = 90
    LEARNING_TIMEOUT = 10
    _ROUNDS_WITHOUT_NODES_AFTER_WHICH_TO_SLOW_DOWN = 10

    # For Keeps
    __DEFAULT_NODE_STORAGE = InMemoryNodeStorage
    __DEFAULT_MIDDLEWARE_CLASS = RestMiddleware

    class NotEnoughTeachers(RuntimeError):
        pass

    class UnresponsiveTeacher(ConnectionError):
        pass

    def __init__(self,
                 network_middleware: RestMiddleware = __DEFAULT_MIDDLEWARE_CLASS(),
                 start_learning_now: bool = False,
                 learn_on_same_thread: bool = False,
                 known_nodes: tuple = None,
                 seed_nodes: Tuple[tuple] = None,
                 known_certificates_dir: str = None,
                 node_storage=None,
                 save_metadata: bool = False,
                 abort_on_learning_error: bool = False
                 ) -> None:

        self.log = Logger("characters")  # type: Logger
        self.network_middleware = network_middleware
        self.save_metadata = save_metadata
        self.start_learning_now = start_learning_now
        self.learn_on_same_thread = learn_on_same_thread

        self._abort_on_learning_error = abort_on_learning_error
        self._learning_listeners = defaultdict(list)
        self._node_ids_to_learn_about_immediately = set()

        self.known_certificates_dir = known_certificates_dir or TemporaryDirectory("nucypher-tmp-certs-").name
        self.__known_nodes = FleetState()

        self.done_seeding = False

        # Read
        if node_storage is None:
            node_storage = self.__DEFAULT_NODE_STORAGE(federated_only=self.federated_only,
                                                       # TODO: remove federated_only
                                                       character_class=self.__class__)

        self.node_storage = node_storage
        if save_metadata and node_storage is constants.NO_STORAGE_AVAILIBLE:
            raise ValueError("Cannot save nodes without a configured node storage")

        known_nodes = known_nodes or tuple()
        self.unresponsive_startup_nodes = list()  # TODO: Attempt to use these again later
        for node in known_nodes:
            try:
                self.remember_node(node, update_fleet_state=False)  # TODO: Need to test this better - do we ever init an Ursula-Learner with Node Storage?
            except self.UnresponsiveTeacher:
                self.unresponsive_startup_nodes.append(node)

        self.teacher_nodes = deque()
        self._current_teacher_node = None  # type: Teacher
        self._learning_task = task.LoopingCall(self.keep_learning_about_nodes)
        self._learning_round = 0  # type: int
        self._rounds_without_new_nodes = 0  # type: int
        self._seed_nodes = seed_nodes or []
        self.unresponsive_seed_nodes = set()

        if self.start_learning_now:
            self.start_learning_loop(now=self.learn_on_same_thread)

    @property
    def known_nodes(self):
        return self.__known_nodes

    def load_seednodes(self,
                       read_storages: bool = True,
                       retry_attempts: int = 3,
                       retry_rate: int = 2,
                       timeout=3):
        """
        Engage known nodes from storages and pre-fetch hardcoded seednode certificates for node learning.
        """
        if self.done_seeding:
            self.log.debug("Already done seeding; won't try again.")
            return

        def __attempt_seednode_learning(seednode_metadata, current_attempt=1):
            from nucypher.characters.lawful import Ursula
            self.log.debug(
                "Seeding from: {}|{}:{}".format(seednode_metadata.checksum_address,
                                                seednode_metadata.rest_host,
                                                seednode_metadata.rest_port))

            seed_node = Ursula.from_seednode_metadata(seednode_metadata=seednode_metadata,
                                                      network_middleware=self.network_middleware,
                                                      certificates_directory=self.known_certificates_dir,
                                                      timeout=timeout,
                                                      federated_only=self.federated_only)  # TODO: 466
            if seed_node is False:
                self.unresponsive_seed_nodes.add(seednode_metadata)
            else:
                self.unresponsive_seed_nodes.discard(seednode_metadata)
                self.remember_node(seed_node)

        for seednode_metadata in self._seed_nodes:
            __attempt_seednode_learning(seednode_metadata=seednode_metadata)

        if not self.unresponsive_seed_nodes:
            self.log.info("Finished learning about all seednodes.")
        self.done_seeding = True

        if read_storages is True:
            self.read_nodes_from_storage()

        if not self.known_nodes:
            self.log.warn("No seednodes were available after {} attempts".format(retry_attempts))
            # TODO: Need some actual logic here for situation with no seed nodes (ie, maybe try again much later)

    def read_nodes_from_storage(self) -> set:
        stored_nodes = self.node_storage.all(federated_only=self.federated_only)  # TODO: 466
        for node in stored_nodes:
            self.remember_node(node)

    def sorted_nodes(self):
        nodes_to_consider = list(self.known_nodes.values())
        return sorted(nodes_to_consider, key=lambda n: n.checksum_public_address)

    def remember_node(self, node, force_verification_check=False, update_fleet_state=True):

        if node == self:  # No need to remember self.
            return False

        # First, determine if this is an outdated representation of an already known node.
        with suppress(KeyError):
            already_known_node = self.known_nodes[node.checksum_public_address]
            if not node.timestamp > already_known_node.timestamp:
                self.log.debug("Skipping already known node {}".format(already_known_node))
                # This node is already known.  We can safely return.
                return False

        node.save_certificate_to_disk(directory=self.known_certificates_dir, force=True)  # TODO: Verify before force?
        certificate_filepath = node.get_certificate_filepath(certificates_dir=self.known_certificates_dir)
        try:
            node.verify_node(force=force_verification_check,
                             network_middleware=self.network_middleware,
                             accept_federated_only=self.federated_only,  # TODO: 466
                             certificate_filepath=certificate_filepath)
        except SSLError:
            return False  # TODO: Bucket this node as having bad TLS info - maybe it's an update that hasn't fully propagated?
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout):
            self.log.info("No Response while trying to verify node {}|{}".format(node.rest_interface, node))
            return False  # TODO: Bucket this node as "ghost" or something: somebody else knows about it, but we can't get to it.

        listeners = self._learning_listeners.pop(node.checksum_public_address, tuple())
        address = node.checksum_public_address

        self.__known_nodes[address] = node

        if self.save_metadata:
            self.write_node_metadata(node=node)

        self.log.info("Remembering {}, popping {} listeners.".format(node.checksum_public_address, len(listeners)))
        for listener in listeners:
            listener.add(address)
        self._node_ids_to_learn_about_immediately.discard(address)

        if update_fleet_state:
            self.update_fleet_state()

        return True

    def update_fleet_state(self):
        # TODO: Probably not mutate these foreign attrs - ideally maybe move quite a bit of this method up to FleetState (maybe in __setitem__).
        self.known_nodes.checksum = keccak_digest(b"".join(bytes(n) for n in self.sorted_nodes())).hex()
        self.known_nodes.updated = maya.now()

    def start_learning_loop(self, now=False):
        if self._learning_task.running:
            return False
        elif now:
            self.load_seednodes()
            self._learning_task()  # Unhandled error might happen here.  TODO: Call this in a safer place.
            self.learning_deferred = self._learning_task.start(interval=self._SHORT_LEARNING_DELAY)
            self.learning_deferred.addErrback(self.handle_learning_errors)
            return self.learning_deferred
        else:
            seeder_deferred = deferToThread(self.load_seednodes)
            learner_deferred = self._learning_task.start(interval=self._SHORT_LEARNING_DELAY, now=now)
            seeder_deferred.addErrback(self.handle_learning_errors)
            learner_deferred.addErrback(self.handle_learning_errors)
            self.learning_deferred = defer.DeferredList([seeder_deferred, learner_deferred])
            return self.learning_deferred

    def stop_learning_loop(self):
        """
        Only for tests at this point.  Maybe some day for graceful shutdowns.
        """

    def handle_learning_errors(self, *args, **kwargs):
        failure = args[0]
        if self._abort_on_learning_error:
            self.log.critical("Unhandled error during node learning.  Attempting graceful crash.")
            reactor.callFromThread(self._crash_gracefully, failure=failure)
        else:
            self.log.warn("Unhandled error during node learning: {}".format(failure.getTraceback()))
            if not self._learning_task.running:
                self.start_learning_loop()  # TODO: Consider a single entry point for this with more elegant pause and unpause.

    def _crash_gracefully(self, failure=None):
        """
        A facility for crashing more gracefully in the event that an exception
        is unhandled in a different thread, especially inside a loop like the learning loop.
        """
        self._crashed = failure
        failure.raiseException()
        # TODO: We don't actually have checksum_public_address at this level - maybe only Characters can crash gracefully :-)
        self.log.critical("{} crashed with {}".format(self.checksum_public_address, failure))

    def shuffled_known_nodes(self):
        nodes_we_know_about = list(self.__known_nodes.values())
        random.shuffle(nodes_we_know_about)
        self.log.info("Shuffled {} known nodes".format(len(nodes_we_know_about)))
        return nodes_we_know_about

    def select_teacher_nodes(self):
        nodes_we_know_about = self.shuffled_known_nodes()

        if not nodes_we_know_about:
            raise self.NotEnoughTeachers("Need some nodes to start learning from.")

        self.teacher_nodes.extend(nodes_we_know_about)

    def cycle_teacher_node(self):
        # To ensure that all the best teachers are availalble, first let's make sure
        # that we have connected to all the seed nodes.
        if self.unresponsive_seed_nodes:
            self.log.info("Still have unresponsive seed nodes; trying again to connect.")
            self.load_seednodes()  # Ideally, this is async and singular.

        if not self.teacher_nodes:
            self.select_teacher_nodes()
        try:
            self._current_teacher_node = self.teacher_nodes.pop()
        except IndexError:
            error = "Not enough nodes to select a good teacher, Check your network connection then node configuration"
            raise self.NotEnoughTeachers(error)
        self.log.info("Cycled teachers; New teacher is {}".format(self._current_teacher_node.checksum_public_address))

    def current_teacher_node(self, cycle=False):
        if cycle:
            self.cycle_teacher_node()

        if not self._current_teacher_node:
            self.cycle_teacher_node()

        teacher = self._current_teacher_node

        return teacher

    def learn_about_nodes_now(self, force=False):
        if self._learning_task.running:
            self._learning_task.reset()
            self._learning_task()
        elif not force:
            self.log.warn(
                "Learning loop isn't started; can't learn about nodes now.  You can override this with force=True.")
        elif force:
            self.log.info("Learning loop wasn't started; forcing start now.")
            self._learning_task.start(self._SHORT_LEARNING_DELAY, now=True)

    def keep_learning_about_nodes(self):
        """
        Continually learn about new nodes.
        """
        self.learn_from_teacher_node(eager=False)  # TODO: Allow the user to set eagerness?

    def learn_about_specific_nodes(self, canonical_addresses: Set):
        self._node_ids_to_learn_about_immediately.update(canonical_addresses)  # hmmmm
        self.learn_about_nodes_now()

    # TODO: Dehydrate these next two methods.

    def block_until_number_of_known_nodes_is(self,
                                             number_of_nodes_to_know: int,
                                             timeout: int = 10,
                                             learn_on_this_thread: bool = False):
        start = maya.now()
        starting_round = self._learning_round

        while True:
            rounds_undertaken = self._learning_round - starting_round
            if len(self.__known_nodes) >= number_of_nodes_to_know:
                if rounds_undertaken:
                    self.log.info("Learned about enough nodes after {} rounds.".format(rounds_undertaken))
                return True

            if not self._learning_task.running:
                self.log.warn("Blocking to learn about nodes, but learning loop isn't running.")
            if learn_on_this_thread:
                try:
                    self.learn_from_teacher_node(eager=True)
                except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout):
                    # TODO: Even this "same thread" logic can be done off the main thread.
                    self.log.warn("Teacher was unreachable.  No good way to handle this on the main thread.")

            # The rest of the f*****g owl
            if (maya.now() - start).seconds > timeout:
                if not self._learning_task.running:
                    raise self.NotEnoughTeachers("Learning loop is not running.  Start it with start_learning().")
                else:
                    raise self.NotEnoughTeachers("After {} seconds and {} rounds, didn't find {} nodes".format(
                        timeout, rounds_undertaken, number_of_nodes_to_know))
            else:
                time.sleep(.1)

    def block_until_specific_nodes_are_known(self,
                                             canonical_addresses: Set,
                                             timeout=LEARNING_TIMEOUT,
                                             allow_missing=0,
                                             learn_on_this_thread=False):
        start = maya.now()
        starting_round = self._learning_round

        while True:
            if self._crashed:
                return self._crashed
            rounds_undertaken = self._learning_round - starting_round
            if canonical_addresses.issubset(self.__known_nodes):
                if rounds_undertaken:
                    self.log.info("Learned about all nodes after {} rounds.".format(rounds_undertaken))
                return True

            if not self._learning_task.running:
                self.log.warn("Blocking to learn about nodes, but learning loop isn't running.")
            if learn_on_this_thread:
                self.learn_from_teacher_node(eager=True)

            if (maya.now() - start).seconds > timeout:

                still_unknown = canonical_addresses.difference(self.__known_nodes)

                if len(still_unknown) <= allow_missing:
                    return False
                elif not self._learning_task.running:
                    raise self.NotEnoughTeachers("The learning loop is not running.  Start it with start_learning().")
                else:
                    raise self.NotEnoughTeachers(
                        "After {} seconds and {} rounds, didn't find these {} nodes: {}".format(
                            timeout, rounds_undertaken, len(still_unknown), still_unknown))

            else:
                time.sleep(.1)

    def _adjust_learning(self, node_list):
        """
        Takes a list of new nodes, adjusts learning accordingly.

        Currently, simply slows down learning loop when no new nodes have been discovered in a while.
        TODO: Do other important things - scrub, bucket, etc.
        """
        if node_list:
            self._rounds_without_new_nodes = 0
            self._learning_task.interval = self._SHORT_LEARNING_DELAY
        else:
            self._rounds_without_new_nodes += 1
            if self._rounds_without_new_nodes > self._ROUNDS_WITHOUT_NODES_AFTER_WHICH_TO_SLOW_DOWN:
                self.log.info("After {} rounds with no new nodes, it's time to slow down to {} seconds.".format(
                    self._ROUNDS_WITHOUT_NODES_AFTER_WHICH_TO_SLOW_DOWN,
                    self._LONG_LEARNING_DELAY))
                self._learning_task.interval = self._LONG_LEARNING_DELAY

    def _push_certain_newly_discovered_nodes_here(self, queue_to_push, node_addresses):
        """
        If any node_addresses are discovered, push them to queue_to_push.
        """
        for node_address in node_addresses:
            self.log.info("Adding listener for {}".format(node_address))
            self._learning_listeners[node_address].append(queue_to_push)

    def network_bootstrap(self, node_list: list) -> None:
        for node_addr, port in node_list:
            new_nodes = self.learn_about_nodes_now(node_addr, port)
            self.__known_nodes.update(new_nodes)

    def get_nodes_by_ids(self, node_ids):
        for node_id in node_ids:
            try:
                # Scenario 1: We already know about this node.
                return self.__known_nodes[node_id]
            except KeyError:
                raise NotImplementedError
        # Scenario 2: We don't know about this node, but a nearby node does.
        # TODO: Build a concurrent pool of lookups here.

        # Scenario 3: We don't know about this node, and neither does our friend.

    def write_node_metadata(self, node, serializer=bytes) -> str:
        return self.node_storage.save(node=node)

    def learn_from_teacher_node(self, eager=True):
        """
        Sends a request to node_url to find out about known nodes.
        """
        self._learning_round += 1

        try:
            current_teacher = self.current_teacher_node()
        except self.NotEnoughTeachers as e:
            self.log.warn("Can't learn right now: {}".format(e.args[0]))
            return

        rest_url = current_teacher.rest_interface  # TODO: Name this..?

        # TODO: Do we really want to try to learn about all these nodes instantly?
        # Hearing this traffic might give insight to an attacker.
        if VerifiableNode in self.__class__.__bases__:
            announce_nodes = [self]
        else:
            announce_nodes = None

        unresponsive_nodes = set()
        try:

            # TODO: Streamline path generation
            certificate_filepath = current_teacher.get_certificate_filepath(
                certificates_dir=self.known_certificates_dir)
            response = self.network_middleware.get_nodes_via_rest(url=rest_url,
                                                                  nodes_i_need=self._node_ids_to_learn_about_immediately,
                                                                  announce_nodes=announce_nodes,
                                                                  certificate_filepath=certificate_filepath)
        except requests.exceptions.ConnectionError as e:
            unresponsive_nodes.add(current_teacher)
            teacher_rest_info = current_teacher.rest_information()[0]

            # TODO: This error isn't necessarily "no repsonse" - let's maybe pass on the text of the exception here.
            self.log.info("No Response from teacher: {}:{}.".format(teacher_rest_info.host, teacher_rest_info.port))
            self.cycle_teacher_node()
            return

        if response.status_code != 200:
            raise RuntimeError("Bad response from teacher: {} - {}".format(response, response.content))

        signature, nodes = signature_splitter(response.content, return_remainder=True)

        # TODO: This doesn't make sense - a decentralized node can still learn about a federated-only node.
        from nucypher.characters.lawful import Ursula
        node_list = Ursula.batch_from_bytes(nodes, federated_only=self.federated_only)  # TODO: 466

        new_nodes = []
        for node in node_list:
            try:
                if eager:
                    certificate_filepath = current_teacher.get_certificate_filepath(
                        certificates_dir=self.known_certificates_dir)
                    node.verify_node(self.network_middleware,
                                     accept_federated_only=self.federated_only,  # TODO: 466
                                     certificate_filepath=certificate_filepath)
                    self.log.debug("Verified node: {}".format(node.checksum_public_address))

                else:
                    node.validate_metadata(accept_federated_only=self.federated_only)  # TODO: 466

            except node.SuspiciousActivity:
                # TODO: Account for possibility that stamp, rather than interface, was bad.
                message = "Suspicious Activity: Discovered node with bad signature: {}.  " \
                          "Propagated by: {}".format(current_teacher.checksum_public_address, rest_url)
                self.log.warn(message)
            new = self.remember_node(node)
            if new:
                new_nodes.append(node)

        self._adjust_learning(new_nodes)

        learning_round_log_message = "Learning round {}.  Teacher: {} knew about {} nodes, {} were new."
        current_teacher.last_seen = maya.now()
        self.cycle_teacher_node()
        self.log.info(learning_round_log_message.format(self._learning_round,
                                                        current_teacher,
                                                        len(node_list),
                                                        len(new_nodes)), )
        if new_nodes and self.known_certificates_dir:
            for node in new_nodes:
                node.save_certificate_to_disk(self.known_certificates_dir, force=True)

        return new_nodes
示例#38
0
 def repair(self, log: logging.Logger, overlay: overlay_mod.Overlay,
            fsck_dir: Path) -> bool:
     log.info(
         f"replacing max inode number data with {self.next_inode_number}")
     overlay.write_next_inode_number(self.next_inode_number)
     return True
示例#39
0
def search_nb_of_single_target_trees_to_use(
    n_tree_rules_to_generate: int,
    prepared_data: PreparedDataForTargetSet,
    min_support: float,
    max_depth: int,
    logger: Logger,
    seed: Optional[int] = None,
) -> Tuple[Optional[RandomForestClassifier], TimeDiffSec]:

    nb_of_tree_based_rules_after_conversion: int = 0
    current_rf_clf: Optional[RandomForestClassifier] = None
    total_time_decision_tree_learning_s: TimeDiffSec = 0

    max_n_rules_in_tree: int = 2**max_depth
    min_n_trees_to_use = math.ceil(n_tree_rules_to_generate /
                                   max_n_rules_in_tree)
    nb_of_trees_to_use: int = min_n_trees_to_use

    current_step_size = 1

    should_break = False
    while not should_break:
        logger.info(f'Learning 1 RF using {nb_of_trees_to_use} trees...')
        nb_of_tree_based_rules_after_conversion = 0

        start_time_decision_tree_learning_s = time.time()
        current_rf_clf: RandomForestClassifier = RandomForestClassifier(
            n_estimators=nb_of_trees_to_use,
            random_state=seed,
            min_samples_leaf=min_support,
            max_depth=max_depth)

        # --- Learn a random forest given the current number of trees -----------------------------------
        current_rf_clf.fit(
            prepared_data.df_one_hot_encoded_descriptive_attributes,
            prepared_data.df_one_hot_encoded_target_attributes)
        end_time_decision_tree_learning_s = time.time()
        total_time_decision_tree_learning_s: TimeDiffSec = end_time_decision_tree_learning_s - start_time_decision_tree_learning_s

        # --- b. Calculate its total number of leaves ----------------------------------
        tree_classifiers: List[
            DecisionTreeClassifier] = current_rf_clf.estimators_
        total_nb_of_leafs_in_random_forest: int = 0
        for tree_clf in tree_classifiers:
            total_nb_of_leafs_in_random_forest += get_nb_of_leaf_nodes(
                tree_clf)
        nb_of_tree_based_rules_after_conversion += total_nb_of_leafs_in_random_forest

        if nb_of_tree_based_rules_after_conversion < n_tree_rules_to_generate:
            logger.info(
                f'Learned 1 RF with {nb_of_trees_to_use} trees'
                f'--> {nb_of_tree_based_rules_after_conversion} rules '
                f' < {n_tree_rules_to_generate} (goal)) '
                f'--> INcreasing current step size {current_step_size} with 1')

            current_step_size += 1
            nb_of_trees_to_use += current_step_size
        if nb_of_tree_based_rules_after_conversion >= n_tree_rules_to_generate:
            should_break = True
        # else:
        #     logger.info(f'Learned {len(current_rf_list)} RFs with each {nb_of_trees_to_use} trees'
        #                 f'--> {nb_of_tree_based_rules_after_conversion} rules '
        #                 f' > {n_tree_rules_to_generate} (goal)) '
        #                 f'--> DEcreasing current step size {current_step_size} with 1')
        #     nb_of_trees_to_use -= current_step_size
        #     if current_step_size == 1:
        #         should_break = True
        #     current_step_size = 1
        #     nb_of_trees_to_use += 1

    logger.info(
        f'FINISHED search for tree rules: RF has {nb_of_trees_to_use} trees'
        f'--> {nb_of_tree_based_rules_after_conversion} rules '
        f' > {n_tree_rules_to_generate} (goal)) ')

    return current_rf_clf, total_time_decision_tree_learning_s
示例#40
0
def log_record(record: TrainingRecord, logger: logging.Logger):
    avg_reward = compute_avg_reward(record.rewards)
    logger.info("avg reward : ")
    logger.info("\tcentral {:.3f}".format(avg_reward.central))
    for k, v in avg_reward.local.items():
        logger.info("\tagent {} reward : {:.3f} ".format(k, v))

    cumulative_reward = comput_summation_reward(record.rewards)
    logger.info("summation reward : ")
    logger.info("\tcentral {:.3f}".format(cumulative_reward.central))
    for k, v in cumulative_reward.local.items():
        logger.info("\tagent {} reward : {:.3f} ".format(k, v))
示例#41
0
from logging import Logger
from pyspark.sql import SparkSession
"""
    :param: db_name: db with required table
    :param: table_name: table to reduce parquets
    :param: part_columns: list of partitions
"""

if __name__ == '__main__':

    # init spark
    log = Logger(name='recsys_reduce_parquets')
    log.setLevel('INFO')
    spark = SparkSession.builder.appName(
        'reduce_parquets').enableHiveSupport().getOrCreate()
    log.info('spark session initialized')

    # read line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--db_name', required=True)
    parser.add_argument('--table_name', required=True)
    parser.add_argument('--part_columns', default='')
    args = parser.parse_args()

    db_name = args.db_name
    table_name = args.table_name
    part_columns = args.part_columns

    # tables
    to_reduce_tbl = f'{db_name}.{table_name}'
    bkp_tbl = f'{db_name}.bkp_{table_name}'
示例#42
0
def run_check_with_model(
    model_with_type_info: onnx.ModelProto, mobile_pkg_build_config: pathlib.Path, logger: logging.Logger
):
    """
    Check if an ONNX model can be used with the ORT Mobile pre-built package.
    :param model_with_type_info: ONNX model that has had ONNX shape inferencing run on to add type/shape information.
    :param mobile_pkg_build_config: Configuration file used to build the ORT Mobile package.
    :param logger: Logger for output
    :return: True if supported
    """
    if not mobile_pkg_build_config:
        mobile_pkg_build_config = get_default_config_path()

    enable_type_reduction = True
    config_path = str(mobile_pkg_build_config.resolve(strict=True))
    required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction)
    global_onnx_tensorproto_types, special_types = _get_global_tensorproto_types(op_type_impl_filter, logger)

    # get the opset imports
    opsets = get_opsets_imported(model_with_type_info)

    # If the ONNX opset of the model is not supported we can recommend using our tools to update that first.
    supported_onnx_opsets = set(required_ops["ai.onnx"].keys())
    # we have a contrib op that is erroneously in the ai.onnx domain with opset 1. manually remove that incorrect value
    supported_onnx_opsets.remove(1)
    onnx_opset_model_uses = opsets["ai.onnx"]
    if onnx_opset_model_uses not in supported_onnx_opsets:
        logger.info(f"Model uses ONNX opset {onnx_opset_model_uses}.")
        logger.info(f"The pre-built package only supports ONNX opsets {sorted(supported_onnx_opsets)}.")
        logger.info(
            "Please try updating the ONNX model opset to a supported version using "
            "python -m onnxruntime.tools.onnx_model_utils.update_onnx_opset ..."
        )

        return False

    unsupported_ops = set()
    logger.debug(
        "Checking if the data types and operators used in the model are supported " "in the pre-built ORT package..."
    )
    unsupported = check_graph(
        model_with_type_info.graph,
        opsets,
        required_ops,
        global_onnx_tensorproto_types,
        special_types,
        unsupported_ops,
        logger,
    )

    if unsupported_ops:
        logger.info("Unsupported operators:")
        for entry in sorted(unsupported_ops):
            logger.info("  " + entry)

    if unsupported:
        logger.info("\nModel is not supported by the pre-built package due to unsupported types and/or operators.")
        logger.info(
            "Please see https://onnxruntime.ai/docs/reference/mobile/prebuilt-package/ for information "
            "on what is supported in the pre-built package."
        )
        logger.info(
            "A custom build of ONNX Runtime will be required to run the model. Please see "
            "https://onnxruntime.ai/docs/build/custom.html for details on performing that."
        )
    else:
        logger.info("Model should work with the pre-built package.")

    logger.info("---------------\n")

    return not unsupported
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False,
         logger: Logger = None,
         trg_vocab: object = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = make_logger()

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = cfg["training"].get("eval_batch_size",
                                     cfg["training"]["batch_size"])
    batch_type = cfg["training"].get(
        "eval_batch_type", cfg["training"].get("batch_type", "sentence"))
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # load the data
    _, _, test_data, trg_vocab = load_data(data_cfg=cfg["data"],
                                           trg_vocab=trg_vocab)

    data_to_predict = {"test": test_data}

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 1)
        beam_alpha = cfg["testing"].get("alpha", -1)
        postprocess = cfg["testing"].get("postprocess", True)
    else:
        beam_size = 1
        beam_alpha = -1
        postprocess = True

    for data_set_name, data_set in data_to_predict.items():

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, word_sentence_acc = validate_on_data(
            model, data=data_set, batch_size=batch_size,
            batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric=eval_metric,
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha, logger=logger, postprocess=postprocess)
        #pylint: enable=unused-variable

        if data_set[1]:
            decoding_description = "Greedy decoding" if beam_size < 2 else \
                "Beam search decoding with beam size = {} and alpha = {}".\
                    format(beam_size, beam_alpha)
            logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric,
                        score, decoding_description)
        else:
            logger.info("No references given for %s -> no evaluation.",
                        data_set_name)

        attention_scores = None
        if save_attention:
            if attention_scores:
                attention_name = "{}.{}.att".format(data_set_name, step)
                attention_path = os.path.join(model_dir, attention_name)
                logger.info(
                    "Saving attention plots. This might take a while..")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=data_set.src,
                                      indices=range(len(hypotheses)),
                                      output_prefix=attention_path)
                logger.info("Attention plots saved to: %s", attention_path)
            else:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not available "
                               "when using beam search. "
                               "Set beam_size to 1 for greedy decoding.")

        if output_path is not None:
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            logger.info("Translations saved to: %s", output_path_set)

    return word_sentence_acc
示例#44
0
    def _read_incremental(
        self,
        logger: logging.Logger,
        stream_instance: Stream,
        configured_stream: ConfiguredAirbyteStream,
        connector_state: MutableMapping[str, Any],
        internal_config: InternalConfig,
    ) -> Iterator[AirbyteMessage]:
        """Read stream using incremental algorithm

        :param logger:
        :param stream_instance:
        :param configured_stream:
        :param connector_state:
        :param internal_config:
        :return:
        """
        stream_name = configured_stream.stream.name
        stream_state = connector_state.get(stream_name, {})
        if stream_state and "state" in dir(stream_instance):
            stream_instance.state = stream_state
            logger.info(
                f"Setting state of {stream_name} stream to {stream_state}")

        slices = stream_instance.stream_slices(
            cursor_field=configured_stream.cursor_field,
            sync_mode=SyncMode.incremental,
            stream_state=stream_state,
        )
        logger.debug(f"Processing stream slices for {stream_name}",
                     extra={"stream_slices": slices})
        total_records_counter = 0
        for _slice in slices:
            logger.debug("Processing stream slice", extra={"slice": _slice})
            records = stream_instance.read_records(
                sync_mode=SyncMode.incremental,
                stream_slice=_slice,
                stream_state=stream_state,
                cursor_field=configured_stream.cursor_field or None,
            )
            for record_counter, record_data in enumerate(records, start=1):
                yield self._as_airbyte_record(stream_name, record_data)
                stream_state = stream_instance.get_updated_state(
                    stream_state, record_data)
                checkpoint_interval = stream_instance.state_checkpoint_interval
                if checkpoint_interval and record_counter % checkpoint_interval == 0:
                    yield self._checkpoint_state(stream_instance, stream_state,
                                                 connector_state)

                total_records_counter += 1
                # This functionality should ideally live outside of this method
                # but since state is managed inside this method, we keep track
                # of it here.
                if self._limit_reached(internal_config, total_records_counter):
                    # Break from slice loop to save state and exit from _read_incremental function.
                    break

            yield self._checkpoint_state(stream_instance, stream_state,
                                         connector_state)
            if self._limit_reached(internal_config, total_records_counter):
                return
示例#45
0
def _train(_run, max_epochs: int, _log: Logger, checkpoint_at_end: bool):
    train_dataloader, test_dataloader = _get_dataloaders()

    model = _get_model()
    model = model.to(_get_device())

    loss_func = _get_loss_func(model=model)
    optimizer = _get_optimizer(model=model)

    _log.info('Starting training...')

    for epoch in range(max_epochs):
        epoch_loss = 0
        epoch_loss1 = 0
        epoch_loss2 = 0
        epoch_loss3 = 0

        iteration_count = 1
        for i, data in enumerate(train_dataloader):
            images, labels = data

            images = images.to(_get_device())
            labels = labels.to(_get_device())

            optimizer.zero_grad()

            outputs = model(images)

            loss, (loss1, loss2, loss3) = loss_func(outputs, labels, images)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            epoch_loss1 += loss1.item()
            epoch_loss2 += loss2.item()
            epoch_loss3 += loss3.item()

            iteration_count += 1

        weight1, weight2, weight3 = model.get_loss_weights()
        _log.info(
            f'Epoch {epoch}: {epoch_loss / iteration_count:.3f} '
            f'({weight1.item():.3f}, {weight2.item():.3f}, {weight3.item():.3f})'
        )

        (acc1, acc2,
         acc3), (val_loss1, val_loss2,
                 val_loss3) = _validate(test_dataloader=test_dataloader,
                                        model=model,
                                        loss_func=loss_func)

        _run.log_scalar('train_loss', epoch_loss / iteration_count, epoch)
        _run.log_scalar('train_loss1', epoch_loss1 / iteration_count, epoch)
        _run.log_scalar('train_loss2', epoch_loss2 / iteration_count, epoch)
        _run.log_scalar('train_loss3', epoch_loss3 / iteration_count, epoch)
        _run.log_scalar('val_loss1', val_loss1, epoch)
        _run.log_scalar('val_loss2', val_loss2, epoch)
        _run.log_scalar('val_loss3', val_loss3, epoch)
        _run.log_scalar('val_acc1', acc1, epoch)
        _run.log_scalar('val_acc2', acc2, epoch)
        _run.log_scalar('val_acc3', acc3, epoch)
        _run.log_scalar('weight1', weight1.item(), epoch)
        _run.log_scalar('weight2', weight2.item(), epoch)
        _run.log_scalar('weight3', weight3.item(), epoch)

    if checkpoint_at_end:
        _save_model(_run, model)
示例#46
0
文件: runner.py 项目: dfee/asphalt
def sigterm_handler(logger: Logger, event_loop: AbstractEventLoop) -> None:
    if event_loop.is_running():
        logger.info('Received SIGTERM')
        event_loop.stop()
示例#47
0
def start_qgis_application(
        enable_gui: bool = False,
        enable_processing: bool = False,
        verbose: bool = False,
        cleanup: bool = True,
        logger: logging.Logger = None,
        logprefix: str = 'Qgis:') -> 'QgsApplication':  # noqa: F821
    """ Start qgis application

        :param boolean enable_gui: Enable graphical interface, default to False
        :param boolean enable_processing: Enable processing, default to False
        :param boolean verbose: Output qgis settings, default to False
        :param boolean cleanup: Register atexit hook to close qgisapplication on exit().
            Note that prevents qgis to segfault when exiting. Default to True.
    """
    os.environ['QGIS_NO_OVERRIDE_IMPORT'] = '1'
    os.environ['QGIS_DISABLE_MESSAGE_HOOKS'] = '1'

    logger = logger or logging.getLogger()
    setup_qgis_paths()

    from qgis.core import Qgis, QgsApplication

    logger.info("Starting Qgis application: %s", Qgis.QGIS_VERSION)

    if QgsApplication.QGIS_APPLICATION_NAME != "QGIS3":
        raise RuntimeError("You need QGIS3 (found %s)" %
                           QgsApplication.QGIS_APPLICATION_NAME)

    if not enable_gui:
        #  We MUST set the QT_QPA_PLATFORM to prevent
        #  Qt trying to connect to display in containers
        if os.environ.get('DISPLAY') is None:
            logger.info("Setting offscreen mode")
            os.environ['QT_QPA_PLATFORM'] = 'offscreen'

    qgis_prefix = os.environ.get('QGIS3_HOME', '/usr')

    # XXX Set QGIS_PREFIX_PATH, it seems that setPrefixPath
    # does not do the job correctly
    os.environ['QGIS_PREFIX_PATH'] = qgis_prefix

    global qgis_application

    qgis_application = QgsApplication([], enable_gui)
    qgis_application.setPrefixPath(qgis_prefix, True)
    #qgis_application.initQgis()

    if cleanup:
        # Closing QgsApplication on exit will
        # prevent our app to segfault on exit()
        import atexit

        logger.info("%s Installing cleanup hook" % logprefix)

        @atexit.register
        def exitQgis():
            global qgis_application
            if qgis_application:
                qgis_application.exitQgis()
                del qgis_application

    if verbose:
        print(qgis_application.showSettings())

    # Install logger hook
    install_logger_hook(logger, logprefix, verbose=verbose)

    logger.info("%s Qgis application initialized......" % logprefix)

    if enable_processing:
        init_processing()
        logger.info("%s QGis processing initialized" % logprefix)

    return qgis_application
示例#48
0
    def build_dataloader(self,
                         data,
                         batch_size,
                         shuffle=False,
                         device=None,
                         logger: logging.Logger = None,
                         gradient_accumulation=1,
                         tau: float = 0.8,
                         prune=None,
                         prefetch=None,
                         tasks_need_custom_eval=None,
                         cache=False,
                         debug=False,
                         **kwargs) -> DataLoader:
        # This method is only called during training or evaluation but not prediction
        dataloader = MultiTaskDataLoader(training=shuffle, tau=tau)
        for i, (task_name, task) in enumerate(self.tasks.items()):
            encoder_transform, transform = self.build_transform(task)
            training = None
            if data == 'trn':
                if debug:
                    _data = task.dev
                else:
                    _data = task.trn
                training = True
            elif data == 'dev':
                _data = task.dev
                training = False
            elif data == 'tst':
                _data = task.tst
                training = False
            else:
                _data = data
            if isinstance(data, str):
                logger.info(
                    f'[yellow]{i + 1} / {len(self.tasks)}[/yellow] Building [blue]{data}[/blue] dataset for '
                    f'[cyan]{task_name}[/cyan] ...')
            # Adjust Tokenizer according to task config
            config = copy(task.config)
            config.pop('transform', None)
            task_dataloader: DataLoader = task.build_dataloader(
                _data,
                transform,
                training,
                device,
                logger,
                tokenizer=encoder_transform.tokenizer,
                gradient_accumulation=gradient_accumulation,
                cache=isinstance(data, str),
                **config)
            # if prune:
            #     # noinspection PyTypeChecker
            #     task_dataset: TransformDataset = task_dataloader.dataset
            #     size_before = len(task_dataset)
            #     task_dataset.prune(prune)
            #     size_after = len(task_dataset)
            #     num_pruned = size_before - size_after
            #     logger.info(f'Pruned [yellow]{num_pruned} ({num_pruned / size_before:.1%})[/yellow] '
            #                 f'samples out of {size_before}.')
            if cache and data in ('trn', 'dev'):
                task_dataloader: CachedDataLoader = CachedDataLoader(
                    task_dataloader,
                    f'{cache}/{os.getpid()}-{data}-{task_name.replace("/", "-")}-cache.pt'
                    if isinstance(cache, str) else None)
            dataloader.dataloaders[task_name] = task_dataloader
        if data == 'trn':
            sampling_weights, total_size = dataloader.sampling_weights
            headings = [
                'task', '#batches', '%batches', '#scaled', '%scaled', '#epoch'
            ]
            matrix = []
            min_epochs = []
            for (task_name,
                 dataset), weight in zip(dataloader.dataloaders.items(),
                                         sampling_weights):
                epochs = len(dataset) / weight / total_size
                matrix.append([
                    f'{task_name}',
                    len(dataset), f'{len(dataset) / total_size:.2%}',
                    int(total_size * weight), f'{weight:.2%}', f'{epochs:.2f}'
                ])
                min_epochs.append(epochs)
            longest = int(torch.argmax(torch.tensor(min_epochs)))
            table = markdown_table(headings, matrix)
            rows = table.splitlines()
            cells = rows[longest + 2].split('|')
            cells[-2] = cells[-2].replace(
                f'{min_epochs[longest]:.2f}',
                f'[bold][red]{min_epochs[longest]:.2f}[/red][/bold]')
            rows[longest + 2] = '|'.join(cells)
            logger.info(
                f'[bold][yellow]{"Samples Distribution": ^{len(rows[0])}}[/yellow][/bold]'
            )
            logger.info('\n'.join(rows))
        if prefetch and (data == 'trn' or not tasks_need_custom_eval):
            dataloader = PrefetchDataLoader(dataloader, prefetch=prefetch)

        return dataloader
示例#49
0
def trinity_boot(args: Namespace, trinity_config: TrinityConfig,
                 extra_kwargs: Dict[str, Any], plugin_manager: PluginManager,
                 listener: logging.handlers.QueueListener, event_bus: EventBus,
                 main_endpoint: Endpoint, logger: logging.Logger) -> None:
    # start the listener thread to handle logs produced by other processes in
    # the local logger.
    listener.start()

    ensure_eth1_dirs(trinity_config.get_app_config(Eth1AppConfig))

    networking_endpoint = event_bus.create_endpoint(
        NETWORKING_EVENTBUS_ENDPOINT)
    event_bus.start()

    # First initialize the database process.
    database_server_process = ctx.Process(
        name="DB",
        target=run_database_process,
        args=(
            trinity_config,
            LevelDB,
        ),
        kwargs=extra_kwargs,
    )

    networking_process = ctx.Process(
        name="networking",
        target=launch_node,
        args=(
            args,
            trinity_config,
            networking_endpoint,
        ),
        kwargs=extra_kwargs,
    )

    # start the processes
    database_server_process.start()
    logger.info("Started DB server process (pid=%d)",
                database_server_process.pid)

    # networking process needs the IPC socket file provided by the database process
    try:
        wait_for_ipc(trinity_config.database_ipc_path)
    except TimeoutError as e:
        logger.error("Timeout waiting for database to start.  Exiting...")
        kill_process_gracefully(database_server_process, logger)
        ArgumentParser().error(message="Timed out waiting for database start")

    networking_process.start()
    logger.info("Started networking process (pid=%d)", networking_process.pid)

    def kill_trinity_with_reason(reason: str) -> None:
        kill_trinity_gracefully(logger,
                                (database_server_process, networking_process),
                                plugin_manager,
                                main_endpoint,
                                event_bus,
                                reason=reason)

    main_endpoint.subscribe(ShutdownRequest,
                            lambda ev: kill_trinity_with_reason(ev.reason))

    plugin_manager.prepare(args, trinity_config, extra_kwargs)

    try:
        loop = asyncio.get_event_loop()
        loop.add_signal_handler(signal.SIGTERM,
                                lambda: kill_trinity_with_reason("SIGTERM"))
        loop.run_forever()
        loop.close()
    except KeyboardInterrupt:
        kill_trinity_with_reason("CTRL+C / Keyboard Interrupt")
示例#50
0
def log_basic_info(logger: logging.Logger, config: Any) -> None:
    """Logging about pytorch, ignite, configurations, gpu system
    distributed settings.

    Parameters
    ----------
    logger
        Logger instance for logging
    config
        config object to log
    """
    import ignite

    logger.info("PyTorch version: %s", torch.__version__)
    logger.info("Ignite version: %s", ignite.__version__)
    if torch.cuda.is_available():
        # explicitly import cudnn as
        # torch.backends.cudnn can not be pickled with hvd spawning procs
        from torch.backends import cudnn

        logger.info("GPU device: %s", torch.cuda.get_device_name(idist.get_local_rank()))
        logger.info("CUDA version: %s", torch.version.cuda)
        logger.info("CUDNN version: %s", cudnn.version())

    logger.info("Configuration: %s", pformat(vars(config)))

    if idist.get_world_size() > 1:
        logger.info("distributed configuration: %s", idist.model_name())
        logger.info("backend: %s", idist.backend())
        logger.info("device: %s", idist.device().type)
        logger.info("hostname: %s", idist.hostname())
        logger.info("world size: %s", idist.get_world_size())
        logger.info("rank: %s", idist.get_rank())
        logger.info("local rank: %s", idist.get_local_rank())
        logger.info("num processes per node: %s", idist.get_nproc_per_node())
        logger.info("num nodes: %s", idist.get_nnodes())
        logger.info("node rank: %s", idist.get_node_rank())
示例#51
0
def train_transformer(df: pd.DataFrame, use_cols_config: Dict[str, dict],
                      window: int, criterion: Union, optimizer: Union,
                      optimizer_params: dict, scheduler: Union,
                      scheduler_params: dict, n_emb: int, n_head: int,
                      n_hidden: int, n_layers: int, batch_size: int,
                      epochs: int, dropout: float, logger: Logger,
                      output_dir: str, model_id: str):
    """

    :param df:
    :param use_cols:
        {col_name: {"embedding_num": int}}
    :param n_emb:
    :param n_head:
    :param n_hidden:
    :param n_layers:
    :param batch_size:
    :param dropout:
    :return:
    """

    train_idx = []
    val_idx = []
    np.random.seed(0)
    for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"):
        if np.random.random() < 0.1:
            # all val
            val_idx.extend(w_df.index.tolist())
        else:
            train_num = int(len(w_df) * 0.9)
            train_idx.extend(w_df[:train_num].index.tolist())
            val_idx.extend(w_df[train_num:].index.tolist())

    dataset_train = RiiidDataset(df=df,
                                 indice=train_idx,
                                 use_cols_config=use_cols_config,
                                 window=window)
    dataloader_train = torch.utils.data.DataLoader(dataset=dataset_train,
                                                   batch_size=batch_size,
                                                   collate_fn=collate_fn,
                                                   num_workers=4,
                                                   shuffle=True)
    print(f"make_train_data len={len(dataset_train)}")
    dataset_val = RiiidDataset(df=df,
                               indice=val_idx,
                               use_cols_config=use_cols_config,
                               window=window)
    dataloader_val = torch.utils.data.DataLoader(dataset=dataset_val,
                                                 batch_size=batch_size,
                                                 collate_fn=collate_fn,
                                                 num_workers=1,
                                                 shuffle=False)
    print(f"make_val_data len={len(dataset_val)}")

    model = TransformerModel(n_emb=n_emb,
                             use_cols_config=use_cols_config,
                             n_head=n_head,
                             n_hidden=n_hidden,
                             n_layers=n_layers,
                             dropout=dropout)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    # TODO: 後で直す。汚い。。
    scheduler_params["num_training_steps"] = len(
        dataset_train) // batch_size * scheduler_params["num_training_epochs"]
    del scheduler_params["num_training_epochs"]

    optimizer = optimizer(optimizer_grouped_parameters, **optimizer_params)
    scheduler = scheduler(optimizer, **scheduler_params)
    model.train()
    losses = []
    predict = []
    label = []
    for epoch in range(epochs):
        logger.info(f"--- epoch {epoch+1} ---")

        for batch in tqdm.tqdm(dataloader_train):
            with torch.set_grad_enabled(mode=True):
                output = model(batch)
                loss = criterion(
                    output.flatten().float(),
                    batch["answered_correctly"][:, -1].flatten().float())
                loss.backward()
                losses.append(loss.detach().data.numpy())
                scheduler.step()
                optimizer.step()
                optimizer.zero_grad()

        predict = []
        label = []
        for batch in tqdm.tqdm(dataloader_val):
            output = nn.functional.sigmoid(model(batch))
            predict.extend(output.flatten().detach().data.numpy().tolist())
            label.extend(batch["answered_correctly"]
                         [:, 0].flatten().detach().data.numpy().tolist())
        logger.info(
            f"AUC: {round(roc_auc_score(np.array(label), np.array(predict)), 4)}"
        )

    df.loc[val_idx].to_csv(f"{output_dir}/val.csv")
    df.to_csv(f"{output_dir}/all.csv")
    df_ret = pd.DataFrame(index=val_idx)
    df_ret["predict"] = np.array(predict)
    df_ret["target"] = np.array(label)
    df_ret["target2"] = df.loc[val_idx]["answered_correctly"]
    df_ret.to_csv(f"{output_dir}/oof_{model_id}.csv")
示例#52
0
文件: main.py 项目: mykolaska/py-evm
def fix_unclean_shutdown(chain_config: ChainConfig,
                         logger: logging.Logger) -> None:
    logger.info("Cleaning up unclean shutdown...")

    logger.info("Searching for process id files in %s..." %
                chain_config.data_dir)
    pidfiles = tuple(chain_config.data_dir.glob('*.pid'))
    if len(pidfiles) > 1:
        logger.info('Found %d processes from a previous run. Closing...' %
                    len(pidfiles))
    elif len(pidfiles) == 1:
        logger.info('Found 1 process from a previous run. Closing...')
    else:
        logger.info(
            'Found 0 processes from a previous run. No processes to kill.')

    for pidfile in pidfiles:
        process_id = int(pidfile.read_text())
        kill_process_id_gracefully(process_id, time.sleep, logger)
        try:
            pidfile.unlink()
            logger.info('Manually removed %s after killing process id %d' %
                        (pidfile, process_id))
        except FileNotFoundError:
            logger.debug('pidfile %s was gone after killing process id %d' %
                         (pidfile, process_id))

    db_ipc = chain_config.database_ipc_path
    try:
        db_ipc.unlink()
        logger.info(
            'Removed a dangling IPC socket file for database connections at %s',
            db_ipc)
    except FileNotFoundError:
        logger.debug(
            'The IPC socket file for database connections at %s was already gone',
            db_ipc)
示例#53
0
def setup_localisations(logger: logging.Logger) -> None:
    """Setup gettext localisations."""
    from srctools.property_parser import PROP_FLAGS_DEFAULT
    import gettext
    import locale

    # Get the 'en_US' style language code
    lang_code = locale.getdefaultlocale()[0]

    # Allow overriding through command line.
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            if arg.casefold().startswith('lang='):
                lang_code = arg[5:]
                break

    # Expands single code to parent categories.
    expanded_langs = gettext._expand_lang(lang_code)

    logger.info('Language: {!r}', lang_code)
    logger.debug('Language codes: {!r}', expanded_langs)

    # Add these to Property's default flags, so config files can also
    # be localised.
    for lang in expanded_langs:
        PROP_FLAGS_DEFAULT['lang_' + lang] = True

    lang_folder = install_path('i18n')

    trans: gettext.NullTranslations

    for lang in expanded_langs:
        try:
            file = open(lang_folder / (lang + '.mo').format(lang), 'rb')
        except FileNotFoundError:
            continue
        with file:
            trans = gettext.GNUTranslations(file)
            break
    else:
        # To help identify missing translations, replace everything with
        # something noticable.
        if lang_code == 'dummy':

            class DummyTranslations(gettext.NullTranslations):
                """Dummy form for identifying missing translation entries."""
                def gettext(self, message: str) -> str:
                    """Generate placeholder of the right size."""
                    # We don't want to leave {arr} intact.
                    return ''.join([
                        '#' if s.isalnum() or s in '{}' else s for s in message
                    ])

                def ngettext(self, msgid1: str, msgid2: str, n: int) -> str:
                    """Generate placeholder of the right size for plurals."""
                    return self.gettext(msgid1 if n == 1 else msgid2)

                lgettext = gettext
                lngettext = ngettext

            trans = DummyTranslations()
        # No translations, fallback to English.
        # That's fine if the user's language is actually English.
        else:
            if 'en' not in expanded_langs:
                logger.warning(
                    "Can't find translation for codes: {!r}!",
                    expanded_langs,
                )
            trans = gettext.NullTranslations()

    # Add these functions to builtins, plus _=gettext
    trans.install(['gettext', 'ngettext'])

    # Some lang-specific overrides..

    if trans.gettext('__LANG_USE_SANS_SERIF__') == 'YES':
        # For Japanese/Chinese, we want a 'sans-serif' / gothic font
        # style.
        try:
            from tkinter import font
        except ImportError:
            return
        font_names = [
            'TkDefaultFont',
            'TkHeadingFont',
            'TkTooltipFont',
            'TkMenuFont',
            'TkTextFont',
            'TkCaptionFont',
            'TkSmallCaptionFont',
            'TkIconFont',
            # Note - not fixed-width...
        ]
        for font_name in font_names:
            font.nametofont(font_name).configure(family='sans-serif')
示例#54
0
 def repair(self, log: logging.Logger, overlay: overlay_mod.Overlay,
            fsck_dir: Path) -> bool:
     log.info("no automatic remediation available for this error")
     return False
示例#55
0
        Logger.debug(logger, "Loading URL %s from %s" % (line, urlFile))
        urls.append(line)
    if (len(urls) < 1):
        print "No urls were able to be loaded from %s, exiting!" % urlFile
        exit(1)

def main():
    global start
    global logger
    
    for i in range(maxThreadCount):
        Logger.debug(logger, "Starting thread #%d" % i)
        t = ThreadUrl(queue)
        t.setDaemon(True)
        t.start()
              
    #populate queue with data
    for j in range(maxQueryCount):
        Logger.debug(logger, "Populating URL #%d" % j)
        queue.put(urls[randint(0,len(urls)-1)])
           
    start = time.time()
    queue.join()

readUrlsFromFile()          
main()

Logger.info(logger, "Cumulative Query Time: %s" % totalFetchTime)
Logger.info(logger, "Total Elapsed Time: %s" % (time.time() - start))
      
示例#56
0
def read(
    data_dir: str,
    feature_config: FeatureConfig,
    tfrecord_type: str,
    file_io: FileIO,
    max_sequence_size: int = 0,
    batch_size: int = 0,
    preprocessing_keys_to_fns: dict = {},
    parse_tfrecord: bool = True,
    use_part_files: bool = False,
    logger: Logger = None,
    **kwargs
) -> data.TFRecordDataset:
    """
    Extract features by reading and parsing TFRecord data
    and converting into a TFRecordDataset using the FeatureConfig

    Parameters
    ----------
    data_dir: str
        path to the directory containing train, validation and test data
    feature_config: `FeatureConfig` object
        FeatureConfig object that defines the features to be loaded in the dataset
        and the preprocessing functions to be applied to each of them
    tfrecord_type: {"example", "sequence_example"}
        Type of the TFRecord protobuf message to be used for TFRecordDataset
    file_io: `FileIO` object
        file I/O handler objects for reading and writing data
    max_sequence_size: int, optional
        maximum number of sequence to be used with a single SequenceExample proto message
        The data will be appropriately padded or clipped to fit the max value specified
    batch_size: int, optional
        size of each data batch
    preprocessing_keys_to_fns: dict of(str, function), optional
        dictionary of function names mapped to function definitions
        that can now be used for preprocessing while loading the
        TFRecordDataset to create the RelevanceDataset object
    use_part_files: bool, optional
        load dataset from part files checked using "part-" prefix
    parse_tfrecord: bool, optional
        parse the TFRecord string from the dataset;
        returns strings as is otherwise
    logger: `Logger`, optional
        logging handler for status messages

    Returns
    -------
    `TFRecordDataset`
        TFRecordDataset loaded from the `data_dir` specified using the FeatureConfig
    """
    parse_fn = get_parse_fn(
        feature_config=feature_config,
        tfrecord_type=tfrecord_type,
        preprocessing_keys_to_fns=preprocessing_keys_to_fns,
        max_sequence_size=max_sequence_size,
    )

    # Get all tfrecord files in directory
    tfrecord_files = file_io.get_files_in_directory(
        data_dir,
        extension="" if use_part_files else ".tfrecord",
        prefix="part-" if use_part_files else "",
    )

    # Parse the protobuf data to create a TFRecordDataset
    dataset = data.TFRecordDataset(tfrecord_files)

    if parse_tfrecord:
        # Parallel calls set to AUTOTUNE: improved training performance by 40% with a classification model
        dataset = (
            dataset.map(parse_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
            # .apply(data.experimental.ignore_errors())
        )

    # Create BatchedDataSet
    if batch_size:
        dataset = dataset.batch(batch_size, drop_remainder=False)

    if logger:
        logger.info(
            "Created TFRecordDataset from SequenceExample protobufs from {} files : {}".format(
                len(tfrecord_files), str(tfrecord_files)[:50]
            )
        )

    # We apply prefetch as it improved train/test/validation throughput by 30% in some real model training.
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    return dataset
        self.obj.some_property+=1
    def __exit__(self, *args):
        self.obj.some_property-=1

# a more complex example
@contextmanager
def error_logging(logger, level):
    oldlevel = logger.level
    try:
        logger.setLevel(level)
        yield
    finally:
        logger.setLevel(oldlevel)

if __name__ == "__main__":
    logger = Logger('name',20)
    handler = FileHandler('flog.log')
    logger.addHandler(handler)
    logger.info('this will get logged')
    with error_logging(logger, 30):
        logger.info('this will not get logged')
    logger.info('this will get logged because the level is {}'.format(logger.level))
    
class Simple_obj(object):
    def __init__(self, arg):
        self.some_property = arg
'''
s = Simple_obj(5)
with simple_context_manager(s):
    print s.some_property
    '''
示例#58
0
def live(bucket_name: str,
         order_name: str,
         run_date: str,
         start_time: str,
         end_time: str,
         camera_address: str,
         camera_username: str = 'xames3',
         camera_password: str = 'iamironman',
         camera_port: Union[int, str] = 554,
         camera_timeout: Union[float, int, str] = 30.0,
         timestamp_format: str = '%H:%M:%S',
         log: logging.Logger = None) -> Optional[str]:
    """Record live videos based on time duration using FFMPEG.

  Args:
    bucket_name: S3 bucket name.
    order_name: Order name.
    run_date: Date when to record the video.
    start_time: Time when to start recording the video.
    end_time: Time when to stop recording the video.
    camera_address: Camera's IP address.
    camera_username: Camera username.
    camera_password: Camera password.
    camera_port: Camera port number.
    camera_timeout: Maximum time to wait until disconnection occurs.
    timestamp_format: Timestamp for checking the recording start time.
    log: Logger object.
  """
    log = _log(__file__) if log is None else log

    camera_port = int(camera_port)
    camera_timeout = float(camera_timeout)

    start_time, end_time = f'{run_date} {start_time}', f'{run_date} {end_time}'
    duration = calculate_duration(start_time, end_time, timestamp_format, True)
    force_close = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S')
    force_close = force_close.replace(tzinfo=timezone.utc).timestamp()

    vid_type = video_type(True, True, True)
    temp = os.path.join(_lr, f'{bucket_name}{order_name}')

    if not os.path.isdir(temp):
        os.mkdir(temp)
    temp_file = os.path.join(temp, f'{bucket_name}{order_name}{vid_type}.mp4')

    url = configure_camera_url(camera_address, camera_username,
                               camera_password, camera_port)
    slept_duration, idx = 0, 1

    if duration != 0:
        try:
            while True:
                if camera_live(camera_address, camera_port, camera_timeout,
                               log):
                    file = filename(temp_file, idx)
                    log.info('Recording started for selected camera.')
                    os.system(ffmpeg_str(url, file, duration, camera_timeout))

                    stop_utc = now().replace(tzinfo=timezone.utc).timestamp()
                    stop_secs = now().second

                    _old_file = file_size(file)
                    old_duration = stop_secs if _old_file == '300.0 bytes' else drn(
                        file)
                    duration = duration - old_duration - slept_duration

                    slept_duration = 0
                    idx += 1
                    if (force_close <= stop_utc) or (duration <= 0):
                        output = concate_videos(temp, delete_old_files=True)
                        if output:
                            return output
                else:
                    log.warning(
                        'Unable to record because of poor network connectivity.'
                    )
                    slept_duration += camera_timeout
                    log.warning(
                        'Compensating lost time & attempting after 30 secs.')
                    time.sleep(camera_timeout)
        except Exception as error:
            log.critical(f'Something went wrong because of {error}')
示例#59
0
文件: idp.py 项目: SUNET/eduid-IdP
    def __init__(self, logger: Logger, config: IdPConfig, userdb: Optional[Any] = None):
        self.logger = logger
        self.config = config
        self.response_status = None
        self.start_response = None

        # Connecting to MongoDB can take some time if the replica set is not fully working.
        # Log both 'starting' and 'started' messages.
        self.logger.info("eduid-IdP server starting")

        self._init_pysaml2()

        _session_ttl = self.config.sso_session_lifetime * 60
        _SSOSessions: SSOSessionCache
        if self.config.sso_session_mongo_uri:
            _SSOSessions = eduid_idp.cache.SSOSessionCacheMDB(self.config.sso_session_mongo_uri,
                                                              self.logger, _session_ttl)
        else:
            _SSOSessions = eduid_idp.cache.SSOSessionCacheMem(self.logger, _session_ttl, threading.Lock())

        _login_state_ttl = (self.config.login_state_ttl + 1) * 60
        _ticket_sessions = SSOLoginDataCache('TicketCache', self.logger, _login_state_ttl,
                                      self.config, threading.Lock())
        self.authn_info_db = None
        _actions_db = None

        if config.mongo_uri:
            self.authn_info_db = eduid_idp.authn.AuthnInfoStoreMDB(config.mongo_uri, logger)

        if config.mongo_uri and config.actions_app_uri:
            _actions_db = ActionDB(config.mongo_uri)
            self.logger.info("configured to redirect users with pending actions")
        else:
            self.logger.debug("NOT configured to redirect users with pending actions")

        if userdb is None:
            userdb = eduid_idp.idp_user.IdPUserDb(logger, config)
        self.userdb = userdb
        self.authn = eduid_idp.authn.IdPAuthn(logger, config, self.userdb)

        cherrypy.config.update({'request.error_response': self.handle_error,
                                'error_page.default': self.error_page_default,
                                })
        listen_str = 'http://'
        if self.config.server_key:
            listen_str = 'https://'
        if ':' in self.config.listen_addr:  # IPv6
            listen_str += '[' + self.config.listen_addr + ']:' + str(self.config.listen_port)
        else:  # IPv4
            listen_str += self.config.listen_addr + ':' + str(self.config.listen_port)
        self.logger.info("eduid-IdP server started, listening on {!s}".format(listen_str))

        _common_sessions: Optional[ExpiringCacheCommonSession] = None

        if (config.redis_sentinel_hosts or config.redis_host) and config.shared_session_cookie_name \
                and config.shared_session_secret_key:
            _common_sessions = ExpiringCacheCommonSession('CommonSessions', logger,
                                                          config.shared_session_ttl, config,
                                                          secret=config.shared_session_secret_key)
        else:
            logger.info('eduID shared sessions not configured')

        self.context = IdPContext(config=self.config,
                                  idp=self.IDP,
                                  logger=self.logger,
                                  sso_sessions=_SSOSessions,
                                  ticket_sessions=_ticket_sessions,
                                  common_sessions=_common_sessions,
                                  actions_db=_actions_db,
                                  authn=self.authn,
                                  )
示例#60
0
def build_package_methods(logger: logging.Logger) -> None:
    init_client_functions: List[str] = []
    init_resource_functions: List[str] = []
    session_client_functions: List[str] = []
    session_resource_functions: List[str] = []
    imports: List[str] = []
    active_submodules: List[Submodule] = []
    for submodule in SUBMODULES:
        if not submodule.is_active:
            continue

        active_submodules.append(submodule)
        logger.info(
            "Discovered %s service stubs in %s",
            submodule.class_name,
            submodule.pypi_name,
        )

    for submodule in active_submodules:
        init_client_functions.append(
            FUNCTION_TEMPLATE.format(
                overload="@overload\n" if len(active_submodules) > 1 else "",
                name="client",
                service_name_type='Literal["{}"]'.format(submodule.boto3_name),
                return_type="{}Client".format(submodule.class_name),
            ))
        session_client_functions.append(
            METHOD_TEMPLATE.format(
                overload="@overload\n    "
                if len(active_submodules) > 1 else "",
                name="client",
                service_name_type='Literal["{}"]'.format(submodule.boto3_name),
                return_type="{}Client".format(submodule.class_name),
            ))
        imports.append("from mypy_boto3.{} import {}Client".format(
            submodule.import_name,
            submodule.class_name,
        ))
        if submodule.has_resource:
            init_resource_functions.append(
                FUNCTION_TEMPLATE.format(
                    overload="@overload\n"
                    if len(active_submodules) > 1 else "",
                    name="resource",
                    service_name_type='Literal["{}"]'.format(
                        submodule.boto3_name),
                    return_type="{}ServiceResource".format(
                        submodule.class_name),
                ))
            session_resource_functions.append(
                METHOD_TEMPLATE.format(
                    overload="@overload\n    "
                    if len(active_submodules) > 1 else "",
                    name="resource",
                    service_name_type='Literal["{}"]'.format(
                        submodule.boto3_name),
                    return_type="{}ServiceResource".format(
                        submodule.class_name),
                ))
            imports.append(
                "from mypy_boto3.{} import {}ServiceResource".format(
                    submodule.import_name,
                    submodule.class_name,
                ))

    if not init_client_functions:
        init_client_functions.append(
            FUNCTION_TEMPLATE.format(
                overload="",
                name="client",
                service_name_type="str",
                return_type="Any",
            ))
    if not init_resource_functions:
        init_resource_functions.append(
            FUNCTION_TEMPLATE.format(
                overload="",
                name="resource",
                service_name_type="str",
                return_type="Any",
            ))
    if not session_client_functions:
        session_client_functions.append(
            METHOD_TEMPLATE.format(
                overload="",
                name="client",
                service_name_type="str",
                return_type="Any",
            ))
    if not session_resource_functions:
        session_resource_functions.append(
            METHOD_TEMPLATE.format(
                overload="",
                name="resource",
                service_name_type="str",
                return_type="Any",
            ))

    init_contents: List[str] = [
        "import sys",
        "from typing import overload, Any",
        "if sys.version_info >= (3, 8):",
        "    from typing import Literal",
        "else:",
        "    from typing_extensions import Literal",
    ]
    init_contents.extend(imports)
    init_contents.append("")
    init_contents.extend(init_client_functions)
    init_contents.extend(init_resource_functions)

    session_contents: List[str] = [
        "import sys",
        "from typing import overload, Any",
        "if sys.version_info >= (3, 8):",
        "    from typing import Literal",
        "else:",
        "    from typing_extensions import Literal",
    ]
    session_contents.extend(imports)
    session_contents.append("")
    session_contents.append("class Session:")
    session_contents.extend(session_client_functions)
    session_contents.extend(session_resource_functions)

    write_text(ROOT_PATH / "boto3_init_gen.py", "\n".join(init_contents),
               logger)
    write_text(ROOT_PATH / "boto3_session_gen.py", "\n".join(session_contents),
               logger)