def tmpdir(self, base=''): """ Return the pytest tmpdir for this test with an optional basename appended """ if base: rval = U.pathjoin(self.pytest_tmpdir, base) else: rval = U.pathjoin(self.pytest_tmpdir) return rval
def get_log_handler(logpath=None, cfg=None): """ Create a handler object to go in the logger """ if logpath == '': logpath = None try: dcfg = get_config() except: dcfg = None envname = os.getenv('CRAWL_LOG') if logpath is not None: final_logpath = logpath elif envname: final_logpath = envname elif cfg: final_logpath = cfg.get_d('crawler', 'logpath', U.default_logpath()) elif dcfg: final_logpath = dcfg.get_d('crawler', 'logpath', U.default_logpath()) else: final_logpath = U.default_logpath() if cfg: maxBytes = cfg.get_size('crawler', 'logsize', 10*1024*1024) backupCount = cfg.get_size('crawler', 'logmax', 5) archdir = cfg.get_d('crawler', 'archive_dir', U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive')) elif dcfg: maxBytes = dcfg.get_size('crawler', 'logsize', 10*1024*1024) backupCount = dcfg.get_size('crawler', 'logmax', 5) archdir = dcfg.get_d('crawler', 'archive_dir', U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive')) else: maxBytes = 10*1024*1024 backupCount = 5 archdir = U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive') fh = util.ArchiveLogfileHandler(final_logpath, maxBytes=maxBytes, backupCount=backupCount, archdir=archdir) strfmt = "%" + "(asctime)s [%s] " % U.hostname + '%' + "(message)s" fmt = logging.Formatter(strfmt, datefmt="%Y.%m%d %H:%M:%S") fh.setFormatter(fmt) fh.handleError = raiseError return fh
def createBoard(self, board_file, title): ''' スレッド一覧ファイルからスレッドリストを作成するクラス subject.txtの1行からスレッド情報を取得 http://info.2ch.net/wiki/index.php?monazilla%2Fdevelop%2Fsubject.txt subject.txtの仕様 ================= 0000000000.dat<>スレッドタイトル (レス数) - threads[n]['title'] : スレッドタイトル - threads[n]['url'] : http://server/board/dat/0000000000.dat ''' data, path_tokens, info = util.boardload(board_file) if title: self.title = title for l in data: matched = _titleptn.search(l) if matched: r = matched.groupdict() subject_url = util.pathjoin(path_tokens) datfile = urljoin(subject_url, 'dat/' + r['dat']) thread = dict(title = r['title'], url = datfile) else: print 'no match --> ' + l self.threads.append(thread)
def load_priority_list(cls): """ If one or more priority list files are configured, read them and put their contents first in the list of Checkables to be processed """ rval = [] cfg = CrawlConfig.get_config() priglob = cfg.get_d('cv', 'priority', '') if priglob == '': return rval pricomp = cfg.get_d('cv', 'completed', U.pathjoin(U.dirname(priglob), 'completed')) for pripath in U.foldsort(glob.glob(priglob)): with open(pripath, 'r') as f: for line in f.readlines(): path = line.strip() rval.append(Checkable(path=path, type='f')) os.rename(pripath, U.pathjoin(pricomp, U.basename(pripath))) return rval
def pidcmd(): """ Collect a list of running processes and their command lines """ rval = "" for proc in glob.glob("/proc/*"): bname = util.basename(proc) if not bname.isdigit(): continue try: cmdline = util.contents(util.pathjoin(proc, "cmdline")) if 0 == len(cmdline): continue except IOError: continue rval += "%s %s\n" % (bname, cmdline.replace("\x00", " ")) return rval
def new_logger(logpath='', cfg=None): """ Return a new logging object for this process. The log file path is derived from (in order): - logpath if set - environment ($CRAWL_LOG) - cfg - default (/var/log/hpssic.log if writable, else /tmp/hpssic.log) """ # ------------------------------------------------------------------------- def cfg_get(func, section, option, defval): if cfg: rval = func(section, option, defval) else: rval = defval return rval # ------------------------------------------------------------------------- def raiseError(record=None): raise envname = os.getenv('CRAWL_LOG') try: dcfg = get_config() except: dcfg = None if logpath != '': final_logpath = logpath elif envname: final_logpath = envname elif cfg: try: final_logpath = cfg.get('crawler', 'logpath') except NoOptionError: final_logpath = U.default_logpath() except NoSectionError: final_logpath = U.default_logpath() elif dcfg: try: final_logpath = dcfg.get('crawler', 'logpath') except NoOptionError: final_logpath = U.default_logpath() except NoSectionError: final_logpath = U.default_logpath() else: final_logpath = U.default_logpath() rval = logging.getLogger('hpssic') rval.setLevel(logging.INFO) host = U.hostname() for h in rval.handlers: h.close() del h if cfg: maxBytes = cfg.get_size('crawler', 'logsize', 10 * 1024 * 1024) backupCount = cfg.get_size('crawler', 'logmax', 5) archdir = cfg.get_d( 'crawler', 'archive_dir', U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive')) else: maxBytes = 10 * 1024 * 1024 backupCount = 5 archdir = U.pathjoin(U.dirname(final_logpath), 'hpss_log_archive') fh = U.ArchiveLogfileHandler(final_logpath, maxBytes=maxBytes, backupCount=backupCount, archdir=archdir) strfmt = "%" + "(asctime)s [%s] " % host + '%' + "(message)s" fmt = logging.Formatter(strfmt, datefmt="%Y.%m%d %H:%M:%S") fh.setFormatter(fmt) fh.handleError = raiseError while 0 < len(rval.handlers): z = U.pop0(rval.handlers) del z rval.addHandler(fh) rval.info('-' * (55 - len(host))) return rval