def terminate_process(pid, jobchainid, log_id, writeconnectioninfo): querystr = '''select pid, status from {}.jobchain_lst_run where jobchain_id={} and id = {}'''.format( writeconnectioninfo[4], jobchainid, log_id) res = mycon.db_read(writeconnectioninfo, querystr) if len(res) != 1: raise RuntimeError('No result returned') log_pid = res[0][0] status = res[0][1] if status != 'RUNNING...': raise RuntimeError('The job is not running') if log_pid != pid: raise RuntimeError('Mismatched pid') else: shellstr = '''sudo kill -sigint {}'''.format(pid) try: a = sp.run(shellstr.split(' '), stdout=sp.PIPE, stderr=sp.PIPE, timeout=5) time.sleep(1) if a.returncode == 0: counter = 5 while kill_check(log_id, writeconnectioninfo) is False: counter -= 1 time.sleep(1) if counter == 0: raise RuntimeError('Cannot kill this job') else: raise RuntimeError(a.stderr.decode('ascii', 'ignore')) except sp.TimeoutExpired: raise RuntimeError('Time out, Please try again')
def fetchjob(self, conninfo): # dbread querystr = \ "SELECT id, step_lvl, env, command, ottime FROM schedulerDB.jobtable WHERE job_chain_id={};".format(self.id) rowtpl = mycon.db_read(conninfo, querystr) for row in rowtpl: self.joblst.append(Myjob(row[0], row[1], row[2], row[3], row[4])) self.joblst.sort(key=lambda temp: temp.step)
def register(EXTERNAL_IP): dbconnectioninfo = [DATABASES['scheduler_control']['HOST'], int(DATABASES['scheduler_control']['PORT']), DATABASES['scheduler_control']['USER'], DATABASES['scheduler_control']['PASSWORD'], DATABASES['scheduler_control']['NAME']] #EXTERNAL_IP = ipgetter.myip() querystr='''select count(*) from {}.registered_scheduler where machine_ip = \'{}\''''.format(dbconnectioninfo[4],EXTERNAL_IP) res = mycon.db_read(dbconnectioninfo, querystr)[0][0] if res == 1: ## Exists querystr='''UPDATE {}.registered_scheduler SET machine_port={}, machine_role='{}' where machine_ip = '{}' '''\ .format(dbconnectioninfo[4],SOCKET_SERVER_PORT, NODE_TYPE, EXTERNAL_IP) mycon.db_write(dbconnectioninfo, querystr) else: ## Not Exists querystr='''INSERT INTO {}.registered_scheduler (machine_ip, machine_port, machine_role) VALUES ('{}',{},'{}')'''\ .format(dbconnectioninfo[4],EXTERNAL_IP, SOCKET_SERVER_PORT, NODE_TYPE) mycon.db_write(dbconnectioninfo, querystr)
def kill_check(log_id, writeconnectioninfo): querystr = '''select status from {}.jobchain_lst_run where id = {}'''.format(writeconnectioninfo[4], log_id) res = mycon.db_read(writeconnectioninfo, querystr) status = res[0][0] if 'running' not in status: return True return False
def scheduler_status_check(self): bkpflag = self.envinfo[0] thispid = os.getpid() retflag = False typestr = 'main' if bkpflag == 1: typestr = 'bkp' query = "SELECT pid_index FROM schedulerDB.realtime_status " \ "WHERE name = '{}';".format(typestr) retans = myconn.db_read(self.writeconnectioninfo, query) if len(retans) == 0: self.scheduler_status_init(thispid) retflag = True else: dbpid = retans[0][0] if dbpid == 0: query = "UPDATE schedulerDB.realtime_status SET pid_index={} WHERE name = '{}';".format( thispid, typestr) myconn.db_write(self.writeconnectioninfo, query) retflag = True return retflag
def insert_job(jobchainid, replyqueue, readconnectioninfo, writeconnectioninfo, envinfo, tracklck, runningjobdict): bkpflag = envinfo[0] flagstr = 'main_flag' typestr = 'main' if bkpflag == 1: typestr = 'bkp' flagstr = 'BKP_flag' query = \ "SELECT id,timestr, dep_ot ,job_desc " \ "FROM {}.jobchain_table " \ "WHERE id={} and {}=1; ".format(readconnectioninfo[4], jobchainid, flagstr) res = mycon.db_read(writeconnectioninfo, query) if len(res) != 1: raise RuntimeError('{} does not have jobchain {}'.format( typestr, jobchainid)) row = res[0] eachjc = mj.Myjobchain(row[0], row[1], None, row[2], row[3]) tracklck.acquire() errorlst = [] try: if str(jobchainid) in runningjobdict and runningjobdict[str( jobchainid)] == 1: errorlst.append((0, eachjc)) else: runningjobdict[str(eachjc.id)] = 1 p = mp.Process(target=worker.worker, args=(eachjc, replyqueue, readconnectioninfo, writeconnectioninfo, envinfo)) p.start() finally: tracklck.release() for eacherrjc in errorlst: if eacherrjc[0] == 0: raise RuntimeError('Job Id: {} not finished'.format( eacherrjc[1].id))
def sqlfetcher(timestampqueue: mp.Queue, replyqueue, readconnectioninfo, writeconnectioninfo, envinfo, tracklck, runningjobdict ): thispid = os.getpid() pidregister(thispid, writeconnectioninfo) bkpflag = envinfo[0] runningprocessdict = {} typestr = 'main' flagstr = 'main_flag' if bkpflag == 1: typestr = 'bkp' flagstr = 'BKP_flag' try: while True: exetime = timestampqueue.get() # get timestamp query = \ "SELECT id, timestr, dep_ot ,job_desc " \ "FROM schedulerDB.jobchain_table " \ "WHERE switch='ON' and {}=1; ".format(flagstr) read_cur = mycon.db_read(readconnectioninfo, query) query = \ "SELECT jobchain_id, MAX(last_run_start) FROM schedulerDB.jobchain_lst_run " \ "GROUP BY jobchain_id ORDER BY jobchain_id;" write_cur = mycon.db_read(writeconnectioninfo, query) if len(write_cur) == 0: tempnp = np.array([[-1, -1]]) else: tempnp = np.array(write_cur) pdframe = pd.DataFrame(tempnp, index=tempnp[:, 0]) jobChainlst = [] for row in read_cur: lstrun = None if row[0] in pdframe.index: lstrun = pdframe.loc[[row[0]]].values[0, 1] jobChainlst.append(mj.Myjobchain(row[0], row[1], lstrun, row[2], row[3])) errorlst = [] for eachjc in jobChainlst: if jobchain_timestrcheck(eachjc) is False: errorlst.append((1, eachjc)) ## Turn off that jobchain querystr = '''update jobchain_table set switch = 'OFF' where id = {};'''.format(eachjc.id) mycon.db_write(writeconnectioninfo, querystr) continue if jobcheck(eachjc, exetime) is False: continue tracklck.acquire() try: if str(eachjc.id) in runningjobdict and runningjobdict[str(eachjc.id)] == 1: errorlst.append((0, eachjc)) else: runningjobdict[str(eachjc.id)] = 1 # worker.worker(eachjc, replyqueue, # readconnectioninfo, # writeconnectioninfo, # envinfo) p = mp.Process(target=worker.worker, args=(eachjc, replyqueue, readconnectioninfo, writeconnectioninfo, envinfo)) runningprocessdict[str(eachjc.id)] = p runningprocessdict[str(eachjc.id)].start() finally: tracklck.release() for eacherrjc in errorlst: if eacherrjc[0] == 0: util.sendlog(writeconnectioninfo, 'Job Id: {} not finished'.format(eacherrjc[1].id)) elif eacherrjc[0] == 1: util.sendlog(writeconnectioninfo, 'Job Id: {} invalid timestr'.format(eacherrjc[1].id)) finally: query = "UPDATE schedulerDB.realtime_status SET pid_sqlfetcher={};".format(0) mycon.db_write(writeconnectioninfo, query)
def fetchdep(self, conninfo): # dbread querystr = "SELECT id,env, command, ottime FROM schedulerDB.deptable WHERE jobchainid={};".format( self.id) rowtpl = mycon.db_read(conninfo, querystr) for row in rowtpl: self.deplst.append(Mydep(row[0], row[1], row[2], row[3]))