def _launch_new_tasks(self, num_concurrent_tasks): num_to_launch = num_concurrent_tasks - len(self._processes) if num_to_launch <= 0: return with scinetutil.transaction(self._conn, self._must_exit, True) as trx: # By default, NULLs get sorted *after* non-null values, which is what we # want when sorting on priority -- *any* priority value is treated as more # important than no priority value. query = ''' SELECT id, command, run_dir FROM tasks WHERE is_active = TRUE AND started_at IS NULL AND times_failed = 0 AND times_interrupted = 0 ORDER BY priority, batch_name LIMIT %s ''' trx.execute(query, (num_to_launch, )) results = trx.fetchall() if len(results) == 0: return for task_id, command, run_dir in results: process = self._launch_task(task_id, command, run_dir) self._processes[task_id] = process self._mark_started(task_id, trx)
def _insert_node_status(self): if 'PBS_JOBID' in os.environ: job_id = os.environ['PBS_JOBID'] else: job_id = None with scinetutil.transaction(self._conn, self._must_exit, False) as trx: query = '''INSERT INTO nodes ( hostname, process_id, job_id, physical_cpus, logical_cpus, created_at, last_updated ) VALUES (%s, %s, %s, %s, %s, NOW(), NOW()) RETURNING id ''' trx.execute(query, ( socket.gethostname(), os.getpid(), job_id, psutil.cpu_count(logical=False), psutil.cpu_count(logical=True), )) node_id = trx.fetchone()[0] return node_id
def _launch_new_tasks(self, num_concurrent_tasks): num_to_launch = num_concurrent_tasks - len(self._processes) if num_to_launch <= 0: return with scinetutil.transaction(self._conn, self._must_exit, True) as trx: # By default, NULLs get sorted *after* non-null values, which is what we # want when sorting on priority -- *any* priority value is treated as more # important than no priority value. query = ''' SELECT id, command, run_dir FROM tasks WHERE is_active = TRUE AND started_at IS NULL AND times_failed = 0 AND times_interrupted = 0 ORDER BY priority, batch_name LIMIT %s ''' trx.execute(query, (num_to_launch,)) results = trx.fetchall() if len(results) == 0: return for task_id, command, run_dir in results: process = self._launch_task(task_id, command, run_dir) self._processes[task_id] = process self._mark_started(task_id, trx)
def _update_node_status(self): query = '''UPDATE nodes SET load_avg_1min = %s, load_avg_5min = %s, load_avg_15min = %s, cpu_usage = %s, mem_free = %s, mem_used = %s, num_tasks = %s, last_updated = NOW() WHERE id = %s ''' load_avg = os.getloadavg() mem_usage = psutil.virtual_memory() with scinetutil.transaction(self._conn, self._must_exit, False) as trx: trx.execute(query, ( load_avg[0], load_avg[1], load_avg[2], psutil.cpu_percent(interval=0.5), mem_usage.available, mem_usage.total - mem_usage.available, len(self._processes), self._node_id ))
def terminate_run(self): terminated_ids = [] for task_id, process in self._processes.items(): logmsg('Terminating task=%s' % task_id) process.terminate() terminated_ids.append(task_id) with scinetutil.transaction(self._conn, self._must_exit, True) as trx: for task_id in terminated_ids: self._mark_interrupted(task_id, trx) with scinetutil.transaction(self._conn, self._must_exit, False) as trx: self._delete_node_status(trx) self._conn.close() logmsg('Sleeping before exit ...') time.sleep(3) logmsg('Waking before exit ...')
def _update_finished_tasks(self): finished = [] for task_id, process in self._processes.items(): retval = process.poll() if retval is None: # Process still running continue else: del self._processes[task_id] finished.append((task_id, retval)) if len(finished) > 0: with scinetutil.transaction(self._conn, self._must_exit, True) as trx: for task_id, retval in finished: self._mark_finished(task_id, retval, trx)
def _update_node_status(self): query = '''UPDATE nodes SET load_avg_1min = %s, load_avg_5min = %s, load_avg_15min = %s, cpu_usage = %s, mem_free = %s, mem_used = %s, num_tasks = %s, last_updated = NOW() WHERE id = %s ''' load_avg = os.getloadavg() mem_usage = psutil.virtual_memory() with scinetutil.transaction(self._conn, self._must_exit, False) as trx: trx.execute(query, (load_avg[0], load_avg[1], load_avg[2], psutil.cpu_percent(interval=0.5), mem_usage.available, mem_usage.total - mem_usage.available, len(self._processes), self._node_id))