Пример #1
0
 def tmpdir(self, base=''):
     """
     Return the pytest tmpdir for this test with an optional basename
     appended
     """
     if base:
         rval = U.pathjoin(self.pytest_tmpdir, base)
     else:
         rval = U.pathjoin(self.pytest_tmpdir)
     return rval
Пример #2
0
 def tmpdir(self, base=''):
     """
     Return the pytest tmpdir for this test with an optional basename
     appended
     """
     if base:
         rval = U.pathjoin(self.pytest_tmpdir, base)
     else:
         rval = U.pathjoin(self.pytest_tmpdir)
     return rval
Пример #3
0
def get_log_handler(logpath=None, cfg=None):
    """
    Create a handler object to go in the logger
    """
    if logpath == '':
        logpath = None

    try:
        dcfg = get_config()
    except:
        dcfg = None

    envname = os.getenv('CRAWL_LOG')

    if logpath is not None:
        final_logpath = logpath
    elif envname:
        final_logpath = envname
    elif cfg:
        final_logpath = cfg.get_d('crawler', 'logpath', U.default_logpath())
    elif dcfg:
        final_logpath = dcfg.get_d('crawler', 'logpath', U.default_logpath())
    else:
        final_logpath = U.default_logpath()

    if cfg:
        maxBytes = cfg.get_size('crawler', 'logsize', 10*1024*1024)
        backupCount = cfg.get_size('crawler', 'logmax', 5)
        archdir = cfg.get_d('crawler', 'archive_dir',
                            U.pathjoin(U.dirname(final_logpath),
                                       'hpss_log_archive'))
    elif dcfg:
        maxBytes = dcfg.get_size('crawler', 'logsize', 10*1024*1024)
        backupCount = dcfg.get_size('crawler', 'logmax', 5)
        archdir = dcfg.get_d('crawler', 'archive_dir',
                             U.pathjoin(U.dirname(final_logpath),
                                        'hpss_log_archive'))
    else:
        maxBytes = 10*1024*1024
        backupCount = 5
        archdir = U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive')

    fh = util.ArchiveLogfileHandler(final_logpath,
                                    maxBytes=maxBytes,
                                    backupCount=backupCount,
                                    archdir=archdir)

    strfmt = "%" + "(asctime)s [%s] " % U.hostname + '%' + "(message)s"
    fmt = logging.Formatter(strfmt, datefmt="%Y.%m%d %H:%M:%S")
    fh.setFormatter(fmt)
    fh.handleError = raiseError
    return fh
Пример #4
0
    def createBoard(self, board_file, title):
        '''
        スレッド一覧ファイルからスレッドリストを作成するクラス

        subject.txtの1行からスレッド情報を取得
        http://info.2ch.net/wiki/index.php?monazilla%2Fdevelop%2Fsubject.txt

        subject.txtの仕様
        =================
        0000000000.dat<>スレッドタイトル (レス数)

        - threads[n]['title'] : スレッドタイトル
        - threads[n]['url']   : http://server/board/dat/0000000000.dat
        '''
        data, path_tokens, info = util.boardload(board_file)
        if title:
            self.title = title

        for l in data:
            matched = _titleptn.search(l)
            if matched:
                r = matched.groupdict()
                subject_url = util.pathjoin(path_tokens)
                datfile = urljoin(subject_url, 'dat/' + r['dat'])
                thread = dict(title = r['title'],
                              url   = datfile)
            else:
                print 'no match --> ' + l

            self.threads.append(thread)
Пример #5
0
    def load_priority_list(cls):
        """
        If one or more priority list files are configured, read them and put
        their contents first in the list of Checkables to be processed
        """
        rval = []
        cfg = CrawlConfig.get_config()
        priglob = cfg.get_d('cv', 'priority', '')
        if priglob == '':
            return rval

        pricomp = cfg.get_d('cv', 'completed',
                            U.pathjoin(U.dirname(priglob), 'completed'))

        for pripath in U.foldsort(glob.glob(priglob)):
            with open(pripath, 'r') as f:
                for line in f.readlines():
                    path = line.strip()
                    rval.append(Checkable(path=path, type='f'))
            os.rename(pripath, U.pathjoin(pricomp, U.basename(pripath)))

        return rval
Пример #6
0
    def load_priority_list(cls):
        """
        If one or more priority list files are configured, read them and put
        their contents first in the list of Checkables to be processed
        """
        rval = []
        cfg = CrawlConfig.get_config()
        priglob = cfg.get_d('cv', 'priority', '')
        if priglob == '':
            return rval

        pricomp = cfg.get_d('cv',
                            'completed',
                            U.pathjoin(U.dirname(priglob), 'completed'))

        for pripath in U.foldsort(glob.glob(priglob)):
            with open(pripath, 'r') as f:
                for line in f.readlines():
                    path = line.strip()
                    rval.append(Checkable(path=path, type='f'))
            os.rename(pripath, U.pathjoin(pricomp, U.basename(pripath)))

        return rval
Пример #7
0
def pidcmd():
    """
    Collect a list of running processes and their command lines
    """
    rval = ""
    for proc in glob.glob("/proc/*"):
        bname = util.basename(proc)
        if not bname.isdigit():
            continue
        try:
            cmdline = util.contents(util.pathjoin(proc, "cmdline"))
            if 0 == len(cmdline):
                continue
        except IOError:
            continue
        rval += "%s %s\n" % (bname, cmdline.replace("\x00", " "))
    return rval
Пример #8
0
def pidcmd():
    """
    Collect a list of running processes and their command lines
    """
    rval = ""
    for proc in glob.glob("/proc/*"):
        bname = util.basename(proc)
        if not bname.isdigit():
            continue
        try:
            cmdline = util.contents(util.pathjoin(proc, "cmdline"))
            if 0 == len(cmdline):
                continue
        except IOError:
            continue
        rval += "%s %s\n" % (bname, cmdline.replace("\x00", " "))
    return rval
Пример #9
0
def new_logger(logpath='', cfg=None):
    """
    Return a new logging object for this process. The log file path is derived
    from (in order):

     - logpath if set
     - environment ($CRAWL_LOG)
     - cfg
     - default (/var/log/hpssic.log if writable, else /tmp/hpssic.log)
    """

    # -------------------------------------------------------------------------
    def cfg_get(func, section, option, defval):
        if cfg:
            rval = func(section, option, defval)
        else:
            rval = defval
        return rval

    # -------------------------------------------------------------------------
    def raiseError(record=None):
        raise

    envname = os.getenv('CRAWL_LOG')
    try:
        dcfg = get_config()
    except:
        dcfg = None

    if logpath != '':
        final_logpath = logpath
    elif envname:
        final_logpath = envname
    elif cfg:
        try:
            final_logpath = cfg.get('crawler', 'logpath')
        except NoOptionError:
            final_logpath = U.default_logpath()
        except NoSectionError:
            final_logpath = U.default_logpath()
    elif dcfg:
        try:
            final_logpath = dcfg.get('crawler', 'logpath')
        except NoOptionError:
            final_logpath = U.default_logpath()
        except NoSectionError:
            final_logpath = U.default_logpath()
    else:
        final_logpath = U.default_logpath()

    rval = logging.getLogger('hpssic')
    rval.setLevel(logging.INFO)
    host = U.hostname()

    for h in rval.handlers:
        h.close()
        del h

    if cfg:
        maxBytes = cfg.get_size('crawler', 'logsize', 10 * 1024 * 1024)
        backupCount = cfg.get_size('crawler', 'logmax', 5)
        archdir = cfg.get_d(
            'crawler', 'archive_dir',
            U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive'))
    else:
        maxBytes = 10 * 1024 * 1024
        backupCount = 5
        archdir = U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive')

    fh = U.ArchiveLogfileHandler(final_logpath,
                                 maxBytes=maxBytes,
                                 backupCount=backupCount,
                                 archdir=archdir)

    strfmt = "%" + "(asctime)s [%s] " % host + '%' + "(message)s"
    fmt = logging.Formatter(strfmt, datefmt="%Y.%m%d %H:%M:%S")
    fh.setFormatter(fmt)
    fh.handleError = raiseError

    while 0 < len(rval.handlers):
        z = U.pop0(rval.handlers)
        del z
    rval.addHandler(fh)

    rval.info('-' * (55 - len(host)))

    return rval