def open_db_conn(self): """Return a database connection""" # PG Defaults in libpq connection string / dsn parameters: # DbUser,user: same as UNIX user # DbName,dbname: DbUser # DbHost,host: UNIX socket # DbPort,port: 5432 # Other optional PG parameters: # http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS dburl = 'dbname=%s user=%s host=%s' % (self._static_info['DbName'], self._static_info['DbUser'], self._static_info['DbHost']) if self._static_info['DbPort']: dburl += ' port=%s' % self._static_info['DbPort'] if self._static_info['DbPassword']: dburl += ' password=%s' % self._static_info['DbPassword'] DebugPrint(4, "Connecting to PgSQL database: %s" % dburl) try: self._connection = psycopg2.connect(dburl) self._cursor = self._get_cursor(self._connection) except: tblist = traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback) errmsg = 'Failed to connect to %s:\n%s' % (dburl, "\n".join(tblist)) DebugPrint(1, errmsg) raise # Masking connection failure #self._connection = None return self._connection
def process_record(self, record): #TODO: yield the value for processing to gratia () # logfile attribute (if present) is used to keep track and delete files DebugPrint(5, "Creating JUR for %s" % record) # Filter out uninteresting records (and remove their files) if False: if 'gratia_logfile' in record: DebugPrint( 1, 'Deleting transient record file: ' + record["gratia_logfile"]) file_utils.RemoveFile(record['gratia_logfile']) raise IgnoreRecordException("Ignoring record.") # Define the record # UsageRecord is defined in https://twiki.opensciencegrid.org/bin/view/Accounting/ProbeDevelopement # setters have the name of the attribute # Set resource type ( Batch, BatchPilot, GridMonitor, Storage, ActiveTape ) resource_type = "Batch" r = Gratia.UsageRecord(resource_type) # fill r using the values in record # remember to specify the transient file (that will be removed if the record # is acquired successfully) if 'logfile' in record: r.AddTransientInputFile(record['gratia_logfile']) return r
def do_process_recovery(self, start_time=None, end_time=None): """ Recovery procedure the recovery command will output the records that are processed and sent to Gratia by process_recovery_fd """ rec_command = None if start_time is not None and end_time is not None: rec_command = self.RECOVERY_COMMAND % { 'data': "", 'start': start_time, 'end': end_time } else: rec_command = self.RECOVERY_COMMAND % {'data': ""} DebugPrint(-1, "RUNNING: %s" % rec_command) fd = os.popen(rec_command) submit_count, found_count = self.process_data_fd(fd) if fd.close(): DebugPrint(-1, "Recovery mode ERROR: Call to rec " \ "failed: %s" % rec_command) DebugPrint(-1, "Recovery mode: Records submitted: " \ "%d" % submit_count) DebugPrint(-1, "Recovery mode: Records found: " \ "%d" % found_count)
def logfiles_to_process(self, args): """List all the log files. args is a list of file names or directory names """ for arg in args: if os.path.isfile(arg) and os.stat(arg).st_size: DebugPrint(5, "Processing logfile %s" % arg) yield arg elif os.path.isdir(arg): DebugPrint(5, "Processing directory %s." % arg) for logfile in os.listdir(arg): m = self.LOGFILE_RE.match(logfile) if m: DebugPrint(5, "Processing logfile %s" % logfile) yield os.path.join(arg, logfile)
def __init__(self, target=None): """ Create a checkpoint file target - checkpoint filename (optionally null) """ if target: try: fd = os.open(target, os.O_RDWR | os.O_CREAT) self._fp = os.fdopen(fd, 'r+') self._val = long(self._fp.readline()) DebugPrint(1, "Resuming from checkpoint in %s" % target) except IOError: raise IOError("Could not open checkpoint file %s" % target) except ValueError: DebugPrint(1, "Failed to read checkpoint file %s" % target)
def __init__(self): try: self.opts, self.args = self.parse_opts() except Exception as e: print(e, file=sys.stderr) sys.exit(1) # Initialize Gratia if not self.opts.gratia_config or not os.path.exists( self.opts.gratia_config): raise Exception("Gratia config, %s, does not exist." % self.opts.gratia_config) Gratia.Initialize(self.opts.gratia_config) if self.opts.verbose: Gratia.Config.set_DebugLevel(5) # Sanity checks for the probe's runtime environment. GratiaWrapper.CheckPreconditions() if self.opts.sleep: rnd = random.randint(1, int(self.opts.sleep)) DebugPrint(2, "Sleeping for %d seconds before proceeding." % rnd) time.sleep(rnd) # Make sure we have an exclusive lock for this probe. GratiaWrapper.ExclusiveLock() self.register_gratia("slurm_meter") # Find the checkpoint filename (if enabled) if self.opts.checkpoint: checkpoint_file = os.path.join(Gratia.Config.get_WorkingFolder(), "checkpoint") else: checkpoint_file = None # Open the checkpoint file self.checkpoint = SlurmCheckpoint(checkpoint_file) # Only process DataFileExpiration days of history # (unless we're resuming from a checkpoint file) if self.checkpoint.val is None: self.checkpoint.val = int(time.time() - (Gratia.Config.get_DataFileExpiration() * 86400)) # Connect to database self.conn = self.get_db_conn() self.cluster = Gratia.Config.getConfigAttribute('SlurmCluster') # SLURM made changes to the accounting database schema slurm_version = self.get_slurm_version() if LooseVersion(slurm_version) < LooseVersion("15.08.0"): # Original schema self.sacct = SlurmAcct_v1(self.conn, self.cluster, slurm_version) else: # Added TRES (Trackable resources) in 15.08.0pre5 self.sacct = SlurmAcct_v2(self.conn, self.cluster, slurm_version)
def get_version(self): """Return the input version (LRM version, server version). Normally form an external program. This is not the probe version""" #For error: raise Exception("Unable to invoke %s" % cmd) DebugPrint( 2, "Called ProbeInput get_version instead of the Probe specific one.") return ProbeInput.UNKNOWN
def do_test(self, static_info=None): """Prepare the input for testing, e.g. replacing some methods with stubs, increasing verbosity, limiting actions, ... Invoked after init (object has been created and initialized) and before start (static_info from config file not passed, final initialization not done) and get_records """ DebugPrint(4, "ProbeInput test invoked but not defined") pass
def register_gratia(self, name): Gratia.RegisterReporter(name) try: slurm_version = self.get_slurm_version() except Exception, e: DebugPrint(0, "Unable to get SLURM version: %s" % str(e)) raise
def _jobs(self, where, having='1=1'): cursor = self._conn.cursor() # Note: When jobs are preempted, multiple cluster_job_table records # are inserted, each with distinct start and end times. # We consider the walltime to be the total time running, # adding up all the records. sql = '''SELECT j.id_job , j.exit_code , j.id_group , j.id_user , j.job_name , j.cpus_alloc , j.partition , j.state , MIN(j.time_start) AS time_start , MAX(j.time_end) AS time_end , SUM(j.time_suspended) AS time_suspended , SUM(CASE WHEN j.time_end < j.time_start + j.time_suspended THEN 0 ELSE j.time_end - j.time_start - j.time_suspended END) AS wall_time , a.acct , a.user , ( SELECT MAX(s.max_rss) FROM %(cluster)s_step_table s WHERE s.job_db_inx = j.job_db_inx /* Note: Will underreport mem for jobs with simultaneous steps */ ) AS max_rss , ( SELECT SUM(s.user_sec) + SUM(s.user_usec/1000000) FROM %(cluster)s_step_table s WHERE s.job_db_inx = j.job_db_inx ) AS cpu_user , ( SELECT SUM(s.sys_sec) + SUM(s.sys_usec/1000000) FROM %(cluster)s_step_table s WHERE s.job_db_inx = j.job_db_inx ) AS cpu_sys FROM %(cluster)s_job_table as j LEFT JOIN %(cluster)s_assoc_table AS a ON j.id_assoc = a.id_assoc WHERE %(where)s GROUP BY id_job HAVING %(having)s ORDER BY j.time_end ''' % { 'cluster': self._cluster, 'where': where, 'having': having } DebugPrint(5, "Executing SQL: %s" % sql) cursor.execute(sql) for r in cursor: # Add handy data to job record r['cluster'] = self._cluster self._addUserInfoIfMissing(r) yield r
def query(self, sql): """Generator returning one row at the time as pseudo-dictionary (DictCursor). psycopg2.extras.DictCursor is a tuple, accessible by indexes and returned as values, not keys, in a loop (for i in row) but row.keys() lists the columns and row['column_name'] accesses the column. It is compatible w/ standard cursors For proper dictionary see psycopg2.extras.RealDictCursor NOTE that the values are not mutable (cannot be changed) :param sql: string w/ the SQL query :return: row as psycopg2.extras.DictCursor (tuple and dictionary) """ if not sql: DebugPrint(2, "WARNING: No SQL provided: no query.") return if not self._connection: DebugPrint( 4, "WARNING: No connection provided: trying to (re)open connection." ) if not self.open_db_conn(): DebugPrint(2, "WARNING: Unable to open connection: no query.") return if not self._cursor: self._cursor = self._get_cursor(self._connection) if not self._cursor: DebugPrint(2, "WARNING: Unable to get cursor: no query.") return cursor = self._cursor DebugPrint(4, "Executing SQL: %s" % sql) try: cursor.execute(sql) except psycopg2.ProgrammingError as er: DebugPrint(2, "ERROR, error running the query: %s" % er) if cursor.rowcount is None: DebugPrint(2, "WARNING, problems running the query: %s" % sql) elif cursor.rowcount <= 0: DebugPrint( 3, "WARNING, no rows returned by the query (rowcount: %s). OK for iterators." % cursor.rowcount) # resultset = self._cur.fetchall() if self.support_itersize: for r in cursor: yield r else: # implement itersize manually (for psycopg < 2.4) # normal iteration would be inefficient fetching one record at the time while True: resultset = cursor.fetchmany() if not resultset: break for r in resultset: yield r
def process_data_file(self, logfile): # Open the file and send it to process try: fd = open(logfile, 'r') except IOError, ie: DebugPrint( 2, "Cannot process %s: (errno=%d) %s" % (logfile, ie.errno, ie.strerror)) return 0, 0
def add_static_info(self, static_info): if not static_info: return for k in static_info: if k in self._static_info: DebugPrint( 4, "Updating probe %s from %s to %s" % (k, self._static_info[k], static_info[k])) self._static_info[k] = static_info[k]
def lines_to_record(lines): """Parse one or more lines of data into a record (data structure) Here regular expressions are used to match values for a dictionary The input steram is a series of "name = value" lines with empty lines or the end of a stream separating records '#' at the beginning of the line is used to add comments (skipped) :param lines: :return: """ # dictionary, caseless_dictionary, sorted dictionary, array # are all possible structures, be consistent with what you use in process_record record = {} if not type(lines) == type([]): lines = [lines] for line in lines: line = line.strip() m = val_bool_re.match(line) if m: attr, val = m.groups() if val.lower().find("true") >= 0: record[attr] = True else: record[attr] = False continue m = val_int_re.match(line) if m: attr, val = m.groups() record[attr] = int(val) continue m = val_double_re.match(line) if m: attr, val = m.groups() record[attr] = float(val) continue m = val_string_re.match(line) if m: attr, val = m.groups() record[attr] = str(val) continue m = val_catchall_re.match(line) if m: attr, val = m.groups() record[attr] = str(val) continue if not line: yield record record = {} continue if line[0] == '#': continue DebugPrint(2, "Invalid line in record stream: %s" % line) yield record
def register_gratia(self, name): Gratia.RegisterReporter(name) try: slurm_version = self.get_slurm_version() except Exception as e: DebugPrint(0, "Unable to get SLURM version: %s" % str(e)) raise Gratia.RegisterService("SLURM", slurm_version) Gratia.setProbeBatchManager("slurm")
def process_data_fd(self, fd, filename=None): """ Process records from a file descriptor. If filename is None there are no transient files (e.g. recovery mode) Otherwise filename is a transient file Gratia will attempt to cleanup afterward. Transient files are associated with the first record in the file. This works well only if transient files habe only one record, otherwise they will be deleted if the first record is processed successfully (or deemed uninteresting), quarantined if the first record fails to process. """ count_submit = 0 count_found = 0 if filename: added_transient = False else: added_transient = True for record in lines_to_record(fd): count_found += 1 if not record: DebugPrint(5, "Ignoring empty record from file: %s" % fd.name) continue if not added_transient: record['gratia_logfile'] = filename added_transient = True try: yield record except KeyboardInterrupt: raise except SystemExit: raise except IgnoreRecordException, e: DebugPrint(3, "Ignoring Record: %s" % str(e)) count_submit += 1 continue except Exception, e: DebugPrint( 2, "Exception while processing the record: %s" % str(e)) continue
def query(self, sql): """Generator returning one row at the time as pseudo-dictionary (DictCursor). psycopg2.extras.DictCursor is a tuple, accessible by indexes and returned as values, not keys, in a loop (for i in row) but row.keys() lists the columns and row['column_name'] accesses the column. It is compatible w/ standard cursors For proper dictionary see psycopg2.extras.RealDictCursor NOTE that the values are not mutable (cannot be changed) :param sql: string w/ the SQL query :return: row as psycopg2.extras.DictCursor (tuple and dictionary) """ if not sql: DebugPrint(2, "WARNING: No SQL provided: no query.") return if not self._connection: DebugPrint( 4, "WARNING: No connection provided: trying to (re)open connection." ) if not self.open_db_conn(): DebugPrint(2, "WARNING: Unable to open connection: no query.") return if not self._cursor: self._cursor = self._get_cursor(self._connection) if not self._cursor: DebugPrint(2, "WARNING: Unable to get cursor: no query.") return cursor = self._cursor DebugPrint(4, "Executing SQL: %s" % sql) try: cursor.execute(sql) except psycopg2.ProgrammingError, er: DebugPrint(2, "ERROR, error running the query: %s" % er)
def process_record(self, record): #TODO: yield the value for processing to gratia () # logfile attribute (if present) is used to keep track and delete files DebugPrint(5, "Creating JUR for %s" % record) # Filter out uninteresting records (and remove their files) if False: if 'gratia_logfile' in record: DebugPrint( 1, 'Deleting transient record file: ' + record["gratia_logfile"]) file_utils.RemoveFile(record['gratia_logfile']) raise IgnoreRecordException("Ignoring record.") # Define the record # UsageRecord is defined in https://twiki.opensciencegrid.org/bin/view/Accounting/ProbeDevelopement # setters have the name of the attribute # Set resource type ( Batch, BatchPilot, GridMonitor, Storage, ActiveTape ) resource_type = "Batch" r = Gratia.UsageRecord(resource_type) # fill r using the values in record # remember to specify the transient file (that will be removed if the record # is acquired successfully) if 'logfile' in record: r.AddTransientInputFile(record['gratia_logfile']) return r # TODO: end of part to remove ############################################################# # Some references # http://seann.herdejurgen.com/resume/samag.com/html/v11/i04/a6.htm # http://stackoverflow.com/questions/14863224/efficient-reading-of-800-gb-xml-file-in-python-2-7 # http://radimrehurek.com/2014/03/data-streaming-in-python-generators-iterators-iterables/
def _users(self, where): cursor = self._conn.cursor() # Default GROUP_CONCAT() maximum length is 1024 chars # Increase it to 64MB cursor.execute('SET SESSION group_concat_max_len=64*1024*1024;') # See enum job_states in slurm/slurm.h for state values sql = '''SELECT j.id_user , j.id_group , (SELECT SUM(cpus_req) FROM %(cluster)s_job_table WHERE id_user = j.id_user AND state IN (0,2)) AS cpus_pending , (SELECT GROUP_CONCAT('|', tres_alloc) FROM %(cluster)s_job_table WHERE id_user = j.id_user AND state IN (1) ) AS tres_alloc_list , MAX(j.time_end) AS time_end , a.acct , a.user FROM %(cluster)s_job_table as j LEFT JOIN %(cluster)s_assoc_table AS a ON j.id_assoc = a.id_assoc WHERE %(where)s GROUP BY id_user ORDER BY time_end ''' % { 'cluster': self._cluster, 'where': where } DebugPrint(5, "Executing SQL: %s" % sql) cursor.execute(sql) for r in cursor: # Add handy data to job record r['cluster'] = self._cluster # Extract cpus_alloc from tres_alloc and sum to get cpus_running # We were formerly relying on SQL to sum the cpus_alloc. # Now we get a list of tres_alloc parameters, parse them, and sum # the CPU count ourselves. r['cpus_running'] = 0 if r['tres_alloc_list']: for tres_txt in r['tres_alloc_list'].split('|'): tres = self._parse_tres(tres_txt) # tres_types_t.TRES_CPU = 1 r['cpus_running'] += tres.get(1, 0) # Return 0 instead of None where we don't have values if r['cpus_pending'] is None: r['cpus_pending'] = 0 self._addUserInfoIfMissing(r) yield r
def process_data_dirs(self, dirs=None): submit_count = 0 found_count = 0 logs_found = 0 logfile_errors = 0 # Note we are not ordering logfiles by type, as we don't want to # pull them all into memory at once. DebugPrint( 4, "We will process the following directories: %s." % ", ".join(dirs)) for log in self.logfiles_to_process(dirs): logs_found += 1 _, logfile_name = os.path.split(log) # This should actually not be needed (done in the itarator) # Make sure the filename is in a reasonable format m = self.LOGFILE_RE.match(logfile_name) if not m: DebugPrint(2, "Ignoring log file with invalid name: %s" % log) continue cnt_submit, cnt_found = self.process_data_file(log) if cnt_submit == cnt_found and cnt_submit > 0: DebugPrint( 5, "Processed %i records from file %s" % (cnt_submit, log)) else: DebugPrint( 2, "Unable to process records from file (will add to quarantine): %s. Submit count %d; found count %d" % (log, cnt_submit, cnt_found)) GratiaCore.QuarantineFile(log, False) logfile_errors += 1 submit_count += cnt_submit found_count += cnt_found DebugPrint(2, "Number of logfiles processed: %d" % logs_found) DebugPrint(2, "Number of logfiles with errors: %d" % logfile_errors) DebugPrint(2, "Number of usage records submitted: %d" % submit_count) DebugPrint(2, "Number of usage records found: %d" % found_count)
def _parse_tres(self, tres): """Parse SLURM database tres_alloc job data into dict""" # SLURM 15 changed its job_table.cpus_alloc database column to tres_alloc # and converted the data to a comma separated list of "key=value" pairs # Keys are defined in tres_types_t in src/common/slurmdb_defs.h # 1 => CPU, 2 => MEM, 3 => ENERGY, 4 => NODE ret = dict() for item in tres.split(','): # Skip blank entries if not item: continue try: k, v = item.split('=', 1) ret[int(k)] = int(v) except ValueError: # TRES string is damaged? Continuing. DebugPrint(1, "Error parsing TRES string '%s'" % tres) return ret
def _users(self, where): cursor = self._conn.cursor() # See enum job_states in slurm/slurm.h for state values sql = '''SELECT j.id_user , j.id_group , (SELECT SUM(cpus_req) FROM %(cluster)s_job_table WHERE id_user = j.id_user AND state IN (0,2)) AS cpus_pending , (SELECT SUM(cpus_alloc) FROM %(cluster)s_job_table WHERE id_user = j.id_user AND state IN (1) ) AS cpus_running , MAX(j.time_end) AS time_end , a.acct , a.user FROM %(cluster)s_job_table as j LEFT JOIN %(cluster)s_assoc_table AS a ON j.id_assoc = a.id_assoc WHERE %(where)s GROUP BY id_user ORDER BY time_end ''' % { 'cluster': self._cluster, 'where': where } DebugPrint(5, "Executing SQL: %s" % sql) cursor.execute(sql) for r in cursor: # Add handy data to job record r['cluster'] = self._cluster # Return 0 instead of None where we don't have values if r['cpus_pending'] is None: r['cpus_pending'] = 0 if r['cpus_running'] is None: r['cpus_running'] = 0 self._addUserInfoIfMissing(r) yield r
class SlurmProbe: opts = None args = None checkpoint = None conn = None cluster = None sacct = None def __init__(self): try: self.opts, self.args = self.parse_opts() except Exception, e: print >> sys.stderr, str(e) sys.exit(1) # Initialize Gratia if not self.opts.gratia_config or not os.path.exists( self.opts.gratia_config): raise Exception("Gratia config, %s, does not exist." % self.opts.gratia_config) Gratia.Initialize(self.opts.gratia_config) if self.opts.verbose: Gratia.Config.set_DebugLevel(5) # Sanity checks for the probe's runtime environment. GratiaWrapper.CheckPreconditions() if self.opts.sleep: rnd = random.randint(1, int(self.opts.sleep)) DebugPrint(2, "Sleeping for %d seconds before proceeding." % rnd) time.sleep(rnd) # Make sure we have an exclusive lock for this probe. GratiaWrapper.ExclusiveLock() self.register_gratia("slurm_meter") # Find the checkpoint filename (if enabled) if self.opts.checkpoint: checkpoint_file = os.path.join(Gratia.Config.get_WorkingFolder(), "checkpoint") else: checkpoint_file = None # Open the checkpoint file self.checkpoint = SlurmCheckpoint(checkpoint_file) # Only process DataFileExpiration days of history # (unless we're resuming from a checkpoint file) if self.checkpoint.val is None: self.checkpoint.val = int(time.time() - (Gratia.Config.get_DataFileExpiration() * 86400)) # Connect to database self.conn = self.get_db_conn() self.cluster = Gratia.Config.getConfigAttribute('SlurmCluster') self.sacct = SlurmAcct(self.conn, self.cluster)
def _jobs(self, where, having='1=1'): cursor = self._conn.cursor() # Note: When jobs are preempted, multiple cluster_job_table records # are inserted, each with distinct start and end times. # We consider the walltime to be the total time running, # adding up all the records. if LooseVersion(self._slurm_version) < LooseVersion("18"): max_rss = '''( SELECT MAX(s.max_rss) FROM `%(cluster)s_step_table` s WHERE s.job_db_inx = j.job_db_inx /* Note: Will underreport mem for jobs with simultaneous steps */ )''' % { 'cluster': self._cluster } else: max_rss = '''MAX(j.mem_req)''' sql = '''SELECT j.id_job , j.exit_code , j.id_group , j.id_user , j.job_name , j.tres_alloc , j.partition , j.state , MIN(j.time_start) AS time_start , MAX(j.time_end) AS time_end , SUM(j.time_suspended) AS time_suspended , SUM(CASE WHEN j.time_end < j.time_start + j.time_suspended THEN 0 ELSE j.time_end - j.time_start - j.time_suspended END) AS wall_time , a.acct , a.user , %(max_rss)s AS max_rss , ( SELECT SUM(s.user_sec) + SUM(s.user_usec/1000000) FROM `%(cluster)s_step_table` s WHERE s.job_db_inx = j.job_db_inx ) AS cpu_user , ( SELECT SUM(s.sys_sec) + SUM(s.sys_usec/1000000) FROM `%(cluster)s_step_table` s WHERE s.job_db_inx = j.job_db_inx ) AS cpu_sys FROM `%(cluster)s_job_table` as j LEFT JOIN `%(cluster)s_assoc_table` AS a ON j.id_assoc = a.id_assoc WHERE %(where)s GROUP BY j.id_job , j.exit_code , j.id_group , j.id_user , j.job_name , j.tres_alloc , j.partition , j.state , a.acct , a.user , j.job_db_inx HAVING %(having)s ORDER BY j.time_end ''' % { 'cluster': self._cluster, 'where': where, 'having': having, 'max_rss': max_rss } DebugPrint(5, "Executing SQL: %s" % sql) cursor.execute(sql) for r in cursor: # Add handy data to job record r['cluster'] = self._cluster # Extract cpus_alloc from tres_alloc tres = self._parse_tres(r['tres_alloc']) r['cpus_alloc'] = tres.get(1, 0) # tres_types_t.TRES_CPU = 1 self._addUserInfoIfMissing(r) yield r
def _get_version(self, rpm_package_name=None, version_command=None, version_command_filter=None): """Get program version looking in order for: 0. self._version (caching the value form previous executions) 1. rpm -q 2. the output (stdout only, not stderr) of version_command filtered by version_command_filter 3. the value in the config file (stored in self._static_info['version'] This is a protected method """ DebugPrint( 5, "Called get_version (%s, %s; %s, %s, %s)" % (self._version, self._static_info['version'], rpm_package_name, version_command, version_command_filter)) if self._version: return self._version if rpm_package_name: # Use RPM version, as specified in # http://fedoraproject.org/wiki/Packaging%3aNamingGuidelines#Package_Versioning # rpm --queryformat "%{NAME} %{VERSION} %{RELEASE} %{ARCH}" -q # %% to escape % fd = os.popen( 'rpm --queryformat "%%{NAME} %%{VERSION} %%{RELEASE} %%{ARCH}" -q %s' % rpm_package_name) version = fd.read() if fd.close(): DebugPrint( 4, "Unable to invoke rpm to retrieve the %s version" % rpm_package_name) #raise Exception("Unable to invoke rpm to retrieve version") else: rpm_version_re = re.compile("^(.*)\s+(.*)\s+(.*)\s+(.*)$") m = rpm_version_re.match(version.strip()) if m: self._version = "%s-%s" % (m.groups()[1], m.groups()[2]) return self._version DebugPrint( 4, "Unable to parse the %s version from 'rpm -q'" % rpm_package_name) if version_command: # Use version command fd = os.popen(version_command) version = fd.read() if fd.close(): DebugPrint( 4, "Unable to invoke '%s' to retrieve the version" % version_command) #raise Exception("Unable to invoke command") else: if version_command_filter: version = version_command_filter(version.strip()) if version: self._version = version return self._version DebugPrint( 4, "Unable to parse the version from '%s'" % version_command) # If other fail try the version attribute retv = self._static_info['version'] if not retv: DebugPrint( 2, "Unable to retrieve the ProbeInput (%s) version" % type(self).__name__) # raise Exception("Unable to parse condor_version output: %s" % version) return ProbeInput.UNKNOWN self._version = retv return retv
class PgInput(DbInput): """PostgreSQL input. Database name, host, user are mandatory parameters. Port (5432) and password are optional Type conversion is done by psycopg2 (http://initd.org/psycopg/docs/usage.html):: *Python *PostgreSQL None NULL bool bool float real, double int smallint long integer, bigint Decimal numeric str varchar unicode text buffer, memoryview, bytearray, bytes, Buffer protocol bytea date date time time datetime timestamp, timestamptz timedelta interval list ARRAY tuple, namedtuple Composite types dict hstore Psycopg's Range range Anything(TM) json uuid uuid """ def __init__(self, conn=None): DbInput.__init__(self) # PsycoPG 2.4 or greater support itersize, so that iterable named cursor # is not fetching only 1 row at the time self.support_itersize = True self._cursor = None if conn: self._connection = conn else: self._connection = None def open_db_conn(self): """Return a database connection""" # PG Defaults in libpq connection string / dsn parameters: # DbUser,user: same as UNIX user # DbName,dbname: DbUser # DbHost,host: UNIX socket # DbPort,port: 5432 # Other optional PG parameters: # http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS dburl = 'dbname=%s user=%s host=%s' % (self._static_info['DbName'], self._static_info['DbUser'], self._static_info['DbHost']) if self._static_info['DbPort']: dburl += ' port=%s' % self._static_info['DbPort'] if self._static_info['DbPassword']: dburl += ' password=%s' % self._static_info['DbPassword'] DebugPrint(4, "Connecting to PgSQL database: %s" % dburl) try: self._connection = psycopg2.connect(dburl) self._cursor = self._get_cursor(self._connection) except: tblist = traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback) errmsg = 'Failed to connect to %s:\n%s' % (dburl, "\n".join(tblist)) DebugPrint(1, errmsg) raise # Masking connection failure #self._connection = None return self._connection def _get_cursor(self, connection, buffer_size=None): """Return a cursor for the given connection :param connection: PG connection :param buffer_size: size used when fetching resultsets (None for the default one) :return: cursor """ # give the cursor a unique name which will invoke server side cursors # TODO: should this be unique each time or for input? cursor = connection.cursor(name='cur%s' % str(uuid.uuid4()).replace('-', ''), cursor_factory=psycopg2.extras.DictCursor) #cursor.tzinfo_factory = None if not buffer_size: cursor.arraysize = self._max_select_mem() else: cursor.arraysize = buffer_size try: cursor.itersize = cursor.arraysize except AttributeError: self.support_itersize = False return cursor def close_db_conn(self): """Explicitly close the connection. Connection is closed automatically at del """ # NOTE: uncommitted operations are rolled back but inputs are read only if self._connection is not None: if self._cursor is not None: try: self._cursor.close() except psycopg2.InterfaceError: # was already closed pass self._cursor = None try: self._connection.close() except psycopg2.InterfaceError: # was already closed pass self._connection = None def status_ok(self): """Return True if OK, False if the connection is closed""" if self._connection is None or self._cursor is None: return False # TODO: do a select 1 test? The only way to really test # try: # self._cursor.execute("SELECT 1") # return True #except: # return False return True def status_string(self): """Return a string describing the current status""" if self._connection is None: return "NOT CONNECTED" if self._cursor is None: return "NO CURSOR" retv = "CONNECTED" trans_status = self._cursor.get_transaction_status() trans_string = "" if trans_status == psycopg2.extensions.STATUS_READY: trans_string = "STATUS_READY" elif trans_status == psycopg2.extensions.STATUS_BEGIN: trans_string = "STATUS_BEGIN" elif trans_status == psycopg2.extensions.STATUS_IN_TRANSACTION: trans_string = "STATUS_IN_TRANSACTION" elif trans_status == psycopg2.extensions.STATUS_PREPARED: trans_string = "STATUS_PREPARED" if trans_status is not None: retv = "%s (%s/%s)" % (retv, trans_status, trans_string) def query(self, sql): """Generator returning one row at the time as pseudo-dictionary (DictCursor). psycopg2.extras.DictCursor is a tuple, accessible by indexes and returned as values, not keys, in a loop (for i in row) but row.keys() lists the columns and row['column_name'] accesses the column. It is compatible w/ standard cursors For proper dictionary see psycopg2.extras.RealDictCursor NOTE that the values are not mutable (cannot be changed) :param sql: string w/ the SQL query :return: row as psycopg2.extras.DictCursor (tuple and dictionary) """ if not sql: DebugPrint(2, "WARNING: No SQL provided: no query.") return if not self._connection: DebugPrint( 4, "WARNING: No connection provided: trying to (re)open connection." ) if not self.open_db_conn(): DebugPrint(2, "WARNING: Unable to open connection: no query.") return if not self._cursor: self._cursor = self._get_cursor(self._connection) if not self._cursor: DebugPrint(2, "WARNING: Unable to get cursor: no query.") return cursor = self._cursor DebugPrint(4, "Executing SQL: %s" % sql) try: cursor.execute(sql) except psycopg2.ProgrammingError, er: DebugPrint(2, "ERROR, error running the query: %s" % er) if cursor.rowcount is None: DebugPrint(2, "WARNING, problems running the query: %s" % sql) elif cursor.rowcount <= 0: DebugPrint( 3, "WARNING, no rows returned by the query (rowcount: %s). OK for iterators." % cursor.rowcount) # resultset = self._cur.fetchall() if self.support_itersize: for r in cursor: yield r else: # implement itersize manually (for psycopg < 2.4) # normal iteration would be inefficient fetching one record at the time while True: resultset = cursor.fetchmany() if not resultset: break for r in resultset: yield r