def check_path(path, verbose=False, plugin=True, xof=True): """ If plugin is True, we want to log and store, which tcc_report does by default so we leave those flags alone. If plugin is False, we're interactive and we want to write any report to stdout. However, we only make a report if 1) verbose is True, or 2) the counts don't match. """ cosinfo = get_cos_info() nsobj = path_nsobject(path) try: bfl = get_bitfile_set(int(nsobj), 1) except U.HpssicError as e: if plugin: CrawlConfig.log(e.value) return elif xof: raise SystemExit(e.value) else: raise U.HpssicError(e.value) bf = U.pop0(bfl) sc_count = int(bf['SC_COUNT']) cos_count = int(cosinfo[bf['BFATTR_COS_ID']]) if plugin and sc_count != cos_count: tcc_report(bf, path=path) elif not plugin and (verbose or sc_count != cos_count): print(tcc_report(bf, path=path, log=False, store=False))
def get_bitfile_path(bitfile): """ Given a bitfile id, walk back up the tree in HPSS to generate the bitfile's path """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') rows = db.select(table='nsobject', fields=['parent_id', 'name'], where='bitfile_id = ?', data=(bitfile, )) if 1 < len(rows): raise U.HpssicError(MSG.multiple_objects_S % hexstr(bitfile)) elif len(rows) < 1: return ("<unnamed bitfile>") rval = '' while rows: x = rows[0] if rval == '': rval = x['NAME'] else: rval = os.path.join(x['NAME'], rval) rows = db.select(table='nsobject', fields=['parent_id', 'name'], where='object_id = ?', data=(x['PARENT_ID'], )) return rval
def history_show(rptfmt): """ Report the records in the history table in chronological order """ funcname = 'history_show_' + rptfmt if funcname in globals(): func = globals()[funcname] func() else: raise U.HpssicError(history_invalid_format_S % rptfmt)
def handle_exception(exc, defval): if type(defval) == int: rval = defval # log(str(e) + '; using default value %d' % defval) elif type(defval) == float: rval = defval # log(str(e) + '; using default value %f' % defval) elif defval is not None: raise U.HpssicError(MSG.default_int_float) else: exc.message += " in %s" % self.filename raise return rval
def get_bitfile_set(first_nsobj_id, limit): """ Get a collection of bitfiles from DB2 returning a dict. The bitfiles in the set begin with object_id first_nsobj_id and end with the one before last_nsobj_id. """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') bfid_list = nsobject_lookup(first_nsobj_id, limit, dbh=db) if 0 == len(bfid_list): db.close() raise U.HpssicError(MSG.not_in_nsobject_D % first_nsobj_id) n_found = bitfile_lookup(bfid_list, dbh=db) if 0 == n_found: db.close() raise U.HpssicError(MSG.not_in_bitfile_S % bfid_list[0]) rval = db.select(table=['nsobject A', 'bitfile B', 'bftapeseg C'], fields=[ 'A.object_id', 'B.bfid', 'B.bfattr_cos_id', 'B.bfattr_create_time', 'count(C.storage_class) as sc_count' ], where="A.bitfile_id = B.bfid and B.bfid = C.bfid and " + "B.bfattr_data_len > 0 and C.bf_offset = 0 and " + "? <= A.object_id and A.object_id < ? ", groupby=", ".join([ "A.object_id", "B.bfid", "B.bfattr_cos_id", "B.bfattr_create_time" ]), data=(first_nsobj_id, first_nsobj_id + limit), limit=limit) db.close() if 0 == len(rval): raise U.HpssicError(MSG.not_in_bftapeseg_S % bfid_list[0]) return rval
def by_bitfile_id(bfid): """ Get info about a bitfile from DB2 returning a dict. """ bfid_val = CrawlDBI.DBIdb2.hexval(bfid) db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') rval = db.select( table=['nsobject A', 'bitfile B', 'bftapeseg C'], fields=[ 'A.object_id', 'B.bfid', 'B.bfattr_cos_id', 'B.bfattr_create_time', 'count(C.storage_class) as sc_count' ], where="A.bitfile_id = B.bfid and B.bfid = C.bfid and " + "B.bfattr_data_len > 0 and C.bf_offset = 0 and " + "A.bitfile_id = ?", groupby=", ".join([ "A.object_id", "B.bfid", "B.bfattr_cos_id", "B.bfattr_create_time" ]), data=(bfid_val, )) if 1 < len(rval): raise U.HpssicError(MSG.multiple_objects_S % bfid) elif len(rval) < 1: raise U.HpssicError(MSG.no_bitfile_found_S % bfid) return rval[0]
def path_nsobject(path=''): """ Look up an nsobject id based on a path """ if not path.startswith('/'): raise U.HpssicError("An absolute path is required") # break the path into its components with '/' at the beginning nl = ['/'] + [z for z in path.lstrip('/').split(os.path.sep)] parent_id = None # walk down the tree structure to the leaf for name in nl: (obj_id, parent_id) = nsobj_id(name=name, parent=parent_id) parent_id = obj_id # return the bottom object id return obj_id
def stop_wait(cfg=None): """ Watch for the crawler's exit file to disappear. If it's still there after the timeout period, give up and throw an exception. """ if cfg is None: cfg = CrawlConfig.get_config() context = cfg.get('crawler', 'context') exitpath = cfg.get('crawler', 'exitpath') timeout = cfg.get_time('crawler', 'stopwait_timeout', 5.0) sleep_time = cfg.get_time('crawler', 'sleep_time', 0.25) lapse = 0.0 while is_running(context) and lapse < timeout: time.sleep(sleep_time) lapse += sleep_time if is_running(context) and timeout <= lapse: raise util.HpssicError("Stop wait timeout exceeded")
def nsobj_id(name='', parent=None): """ Look up an nsobject id based on name and, optionally, parent id """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') if name == '': return -1 elif name != '' and parent is None: where = "name = '%s'" % name elif name != '' and parent is not None: where = "name = '%s' and parent_id=%d" % (name, parent) rows = db.select(table='hpss.nsobject', fields=['object_id', 'parent_id'], where=where) db.close() try: rval = (rows[0]['OBJECT_ID'], rows[0]['PARENT_ID']) except IndexError: raise U.HpssicError(MSG.no_such_path_component_SD % (name, parent)) return rval
def tpop_select_by_paths(path_l, db=None): """ Return a list checkable rows that match the path list where ttypes and/or cart is null. """ if type(path_l) != list: raise U.HpssicError(MSG.list_expected_S % type(path_l)) close = False if db is None: db = CrawlDBI.DBI(dbtype='crawler') close = True rval = [] for path in path_l: rows = db.select( table="checkables", fields=["path", "type", "ttypes", "cart", "last_check"], where="path like ? and type = 'f' and " + "(ttypes is NULL or cart is NULL)", data=(path, )) rval.extend(rows) if close: db.close() return rval
def send(to='', subj='', msg='', sender='', cfg=None): """ Send e-mail as indicated sender precedence: argument, cfg, default value; if type(sender) is not str, throw the exception """ if type(to) != str: raise util.HpssicError(MSG.invalid_recip_list) if sender is not None and type(sender) != str: raise util.HpssicError(MSG.invalid_sender_S % str(sender)) if type(msg) != str: raise util.HpssicError(MSG.invalid_msg_body) if subj is not None and type(subj) != str: raise util.HpssicError(MSG.invalid_subject_S % str(subj)) # Prepare a message object based on *msg* if msg: payload = email.mime.text.MIMEText(msg) else: payload = email.mime.text.MIMEText(MSG.empty_message) # Set the recipient address(es) based on *to* default_recip = '*****@*****.**' if to == '': if cfg is None: raise util.HpssicError(MSG.no_recip_list) else: (section, option) = ('crawler', 'notify-e-mail') addrs = cfg.get(section, option) elif ',' in to or '@' in to: addrs = to elif '.' in to: if cfg is None: addrs = default_recip else: (section, option) = to.split('.') addrs = cfg.get_d(section, option, default_recip) addrlist = [x.strip() for x in addrs.split(',')] payload['To'] = addrs # Set the subject based on *subj* if subj: payload['Subject'] = subj else: payload['Subject'] = MSG.default_mail_subject # Set the from address default_sender = 'hpssic@%s' % util.hostname(long=True) if sender is None or sender == '': if cfg is not None: sender = cfg.get_d('crawler', 'from_address', default_sender) else: sender = default_sender elif '@' not in sender: raise util.HpssicError(MSG.invalid_sender_S % str(sender)) payload['From'] = sender # Send the message s = smtplib.SMTP('localhost') s.sendmail(sender, addrlist, payload.as_string()) s.quit() # Log it CrawlConfig.log("sent mail to %s", addrs)