def write_sh(self,group_id=0): #指定groupid则只更新group_id的分组 engine=conn.meta('etl_data') sshcon=ssh_con() ssh_uat=ssh_cmd(sshcon.ssh_uat) ssh_sc=ssh_cmd(sshcon.ssh_sc) sql_txt=""" SELECT group_id,sql_file,cmds FROM job_group_set where del_flag=0 and freq_type='{0}' order by group_id,rank_id """ job_group=pd.read_sql(sql_txt.format(self.frency),engine) #if group_id<1 or group_id>self.group_num: gp_map,gp_sql=self.group_sh() #将文件清空 for i in gp_map.keys(): filepath=confs.main_path_bin+gp_map[i] f=open(filepath, 'a',encoding='utf-8') #打开文件 tp=list(job_group[job_group['group_id']==i]['cmds']) for sqls in tp: f.write(sqls) f.write("\n") f.close() ssh_uat.upload(filepath,confs.remote_path_bin+gp_map[i]) ssh_sc.upload(filepath,confs.remote_path_bin+gp_map[i]) ssh_uat.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh_sc.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh_uat.close() ssh_sc.close() return 1
def _push(self, instance_list, host_list, pattern): # check all machine whether has agent finish_host_status = {} for host in host_list: finish_host_status[host] = DeployFinishStatus() cmd = 'test -e ' + util.get_remote_deploy_file( self.gcm_data.deploy_info.dst_root_path) ssh.ssh_cmd(host, self.user, self.password, cmd) pack_repo = util.pack_repo_files( self.gcm_data.artifact_map, self.gcm_data.deploy_info.tmp_root_path, self.src_path, instance_list, util.get_repo_name()) dst_root_path = self.gcm_data.deploy_info.dst_root_path remote_repo = util.get_remote_repo(dst_root_path) self._real_push(host_list[0], pack_repo, remote_repo) # wait finish_host_status[host_list[0]].finish = True while True: finish_cnt = 0 for key, value in finish_host_status.items(): if value.finish: finish_cnt += 1 if not value.show: LogInfo('host: %s is finish' % key) value.show = True if finish_cnt == len(finish_host_status): break time.sleep(1) LogInfo('success to finish')
def sdd_table(self,db,tb_list):#uat和生产环境同步建SDD表 sshcon=ssh_con() ssh=ssh_cmd(sshcon.ssh_uat) is_success=ssh.hive_ddl(db,tb_list) if is_success>0: ssh=ssh_cmd(sshcon.ssh_sc) ssh.hive_ddl(db,tb_list) ssh.close()
def _push_agent(self, instance_list, host_list, pattern): agent_repo = util.pack_agent_files( self.gcm_data.deploy_info.tmp_root_path, self.src_path, self.gcm_data.agent_artifact.files, util.get_agent_repo_name()) dst_root_path = self.gcm_data.deploy_info.dst_root_path remote_agent_repo = util.get_remote_agent_repo(dst_root_path) for host in host_list: self._real_push(host, agent_repo, remote_agent_repo) cmd = 'cd ' + self.gcm_data.deploy_info.dst_root_path cmd += '; chmod +x ./repo/deploy/bin/*' ssh.ssh_cmd(host, self.user, self.password, cmd)
def __init__(self,group_num=10,frency='d',tar_ssh='ssh_uat'): self.group_num=group_num if frency in ['d','w','m']:#d 表示天 w 表示zhou m表示月 self.frency=frency else: print('frency 参数只能是 d(天),w(周),m(月) ') raise Exception("frency 参数只能是 d(天),w(周),m(月) ") self.group_name=frency+'_run_group' sshcon=ssh_con() self.ssh=ssh_cmd(sshcon.ssh_uat) if tar_ssh=='ssh_sc': self.ssh=ssh_cmd(sshcon.ssh_sc)
def auto_deploy(self,tar_ssh='ssh_uat'): tb_list=self.read_deploy() print(tb_list) sshcon=ssh_con() #ssh=ssh_cmd(sshcon.ssh_uat) if tar_ssh=='ssh_sc': self.ssh=ssh_cmd(sshcon.ssh_sc) ssh=self.ssh for tb in tb_list: heads=tb[0:4] if heads in confs.db_map.keys(): print('\n sqoop同步配置:',tb) tp_tb=tb[5:] tar_cmd=heads+' '+tp_tb+' auto' tb_size=conn.sljr_tb_size(db=heads,tb=tp_tb) if conn.etl_set_exists(tb)>0: print(tb,'目标表已经加入了调度,如果需要重新调度请手动修改') break if tb_size<0: print(tp_tb,'表不存在不能同步,或者检查表名') break if tb_size>10000000: print(tp_tb,'大于1千万需要增量同步:',tb_size) tar_cmd=tar_cmd+' inc' if conn.hive_tb_exists(tb)==0: self.sdd_table(db=heads,tb_list=[tp_tb]) #同步表结构 group_sh=confs.local_path+'bin/sqoop_'+heads+'.sh' tar_cmd=confs.sqoop_sh+tar_cmd if self.append_sh(group_sh,tar_cmd)>0: if ssh.cmd_run([tar_cmd])>0: ssh.upload(group_sh,confs.remote_path+'bin/sqoop_'+heads+'.sh') else: print(heads,'shell文件配置错位') break else: #hive sql配置 print('\n hive sql同步配置检测:',tb) flag,tar_tb,depd_list=self.check_deploy(tb) if flag==0: print('\033[1;37;45m ERROR:',tb,' 配置文件检查错误 \033[0m') break else: print('检测通过:',tb) ssh.upload(confs.main_path+'cfg/'+tb+'.properties',confs.remote_path+'cfg/'+tb+'.properties') ssh.upload(confs.main_path+'sql/'+tb+'.sql',confs.remote_path+'sql/'+tb+'.sql') #ssh.upload(confs.main_path+'bin/'+tb+'.sh',confs.remote_path+'bin/'+tb+'.sh') tar_cmd=confs.hive_sh+tb+'.sql' #print('执行数据同步完成') if ssh.cmd_run([tar_cmd])>0: if self.add_job(tb+'.sql',tar_tb,depd_list)>0: self.write_sh() else: #self.write_sh() print('\033[1;37;45m ERROR:',tb,' sql执行错误,请修改 \033[0m') ssh.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh.close()
def _real_push(self, host, pack_repo, remote_pack_repo): # upload ssh.ssh_upload_repo(host, self.user, self.password, pack_repo, remote_pack_repo) # unpack cmd = 'cd ' + os.path.dirname(remote_pack_repo) cmd += '; tar -zxvf ' cmd += os.path.basename(remote_pack_repo) return ssh.ssh_cmd(host, self.user, self.password, cmd)
def run_sql(self,tb,tar_ssh='ssh_uat'): sshcon=ssh_con() ssh=ssh_cmd(sshcon.ssh_uat) if tar_ssh=='ssh_sc': ssh=ssh_cmd(sshcon.ssh_sc) flag,tar_tb,depd_list=self.check_deploy(tb) if flag==0: print('\033[1;37;45m ERROR:',tb,' 配置文件检查错误 \033[0m') else: print('检测通过:',tb) ssh.upload(confs.main_path+'cfg/'+tb+'.properties',confs.remote_path+'cfg/'+tb+'.properties') ssh.upload(confs.main_path+'sql/'+tb+'.sql',confs.remote_path+'sql/'+tb+'.sql') tar_cmd=confs.hive_sh+tb+'.sql' #print('执行数据同步完成') if ssh.cmd_run([tar_cmd])>0: print('执行成功') else: print('\033[1;37;45m ERROR:',tb,' sql执行错误,请修改 \033[0m') ssh.close()
def ssh_main(iplist,cmdlist): try: infolist=get_userinfo.getinfo() username=infolist[0][0] passwd=infolist[0][1] port=22 threads = [] for i in iplist: ip=i[0] hostname=i[1] outlist=ssh.ssh_cmd(ip,port,username,passwd,cmdlist) if outlist: for i in outlist: l=i.strip('\n') with open(r'./log/tomcat_error.log','a') as error_log: print >> error_log,('%s %s %s' % (ip,hostname,l)) ssh_main=threading.Thread(target=ssh.ssh_cmd,args=(ip,port,username,passwd,cmdlist)) ssh_main.start() except Exception,e: ssh_mainerr='%s %s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'ssh_main-execute ssh_main() falied') with open(r'./log/error.log','a') as error_log: print >> error_log,('%s %s' %(ssh_mainerr,e)) return ssh_mainerr
import sys import config import ssh # read config content_json = config.readconfig('temp.json') # get server & program list try: svr = content_json['svr'] print svr program_list = content_json['program_list'] print program_list program_list_leng = len(program_list) pass except Exception, e: print "exception" sys.exit() print '\nSSH to server ' + svr + '.\n' # get input username = raw_input("SSH username:"******"SSH password:") # test ssh server ssh.ssh_cmd(svr, username, password, 'ls /home/' + username) # if __name__ == '__main__':
import os import sys import config import ssh # read config content_json = config.readconfig('temp.json') # get server & program list try: svr = content_json['svr'] print svr program_list = content_json['program_list'] print program_list program_list_leng = len(program_list) pass except Exception, e: print "exception" sys.exit() print '\nSSH to server ' + svr + '.\n' # get input username = raw_input("SSH username:"******"SSH password:") # test ssh server ssh.ssh_cmd(svr, username, password, 'ls /home/' + username) # if __name__ == '__main__':
def auto_deploy(etl_group, tar_ssh='ssh_uat'): tb_list = read_deploy() sshcon = ssh_con() ssh = ssh_cmd(sshcon.ssh_uat) if tar_ssh == 'ssh_sc': ssh = ssh_cmd(sshcon.ssh_sc) for tb in tb_list: heads = tb[0:4] if heads in confs.db_map.keys(): print('sqoop同步配置:', tb) tp_tb = tb[5:] tar_cmd = db + ' ' + tb + ' auto' tb_size = conn.sljr_tb_size(db=heads, tb=tp_tb) if conn.etl_set_exists(tb) > 0: print(tb, '目标表已经加入了调度,如果需要重新调度请手动修改') break if tb_size < 0: print(tp_tb, '表不存在不能同步,或者检查表名') break if tb_size > 10000000: print(tp_tb, '大于1千万需要增量同步:', tb_size) tar_cmd = tar_cmd + ' inc' if conn.hive_tb_exists(tb) == 0: sdd_table(db=heads, tb_list=[tp_tb]) #同步表结构 group_sh = confs.local_path + 'bin/sqoop_' + heads + '.sh' if append_sh(group_sh, tar_cmd) > 0: ssh.upload(group_sh, confs.remote_path + 'bin/sqoop_' + heads + '.sh') else: print(heads, 'shell文件配置错位') break else: #hive sql配置 print('hive sql同步配置检测:', tb) flag, tar_tb = check_deploy(tb) if flag == 0: print(tb, '配置文件检查错误') break else: print('检测通过:', tb) if tb in etl_group.keys(): if conn.etl_set_exists(tar_tb) > 0: print(tar_tb, '目标表已经加入了调度,如果需要重新调度请手动修改') else: group_sh = confs.local_path + 'bin/' + etl_group[tb] if append_sh(group_sh, tb + '.sql') > 0: ssh.upload( group_sh, confs.remote_path + 'bin/' + etl_group[tb]) else: print(etl_group[tb], 'shell文件配置错位') break ssh.upload(confs.main_path + 'cfg/' + tb + '.properties', confs.remote_path + 'cfg/' + tb + '.properties') ssh.upload(confs.main_path + 'sql/' + tb + '.sql', confs.remote_path + 'sql/' + tb + '.sql') ssh.upload(confs.main_path + 'bin/' + tb + '.sh', confs.remote_path + 'bin/' + tb + '.sh') ssh.cmd_run(['chmod 755 -R /home/bigdata/bin']) else: print('脚本没有指定分组调度') break ssh.close()
def get_sc_hive_dml(): etl_data=conn.meta() tbs_sql=""" select -- d.`NAME` db_name, concat( d.`NAME`,'.', t.TBL_NAME) tb_name, tp.tb_com tb_name_cn, v.COLUMN_NAME col_name, v.`COMMENT` col_comment, v.TYPE_NAME col_data_type,CURRENT_DATE() check_date from hive.columns_v2 v inner join hive.sds s on v.CD_ID=s.CD_ID inner join hive.tbls t on s.sd_id=t.sd_id inner join hive.dbs d on d.db_id=t.db_id LEFT JOIN(select s.tbl_id tb_id, max(if(PARAM_KEY='comment',PARAM_VALUE,null)) tb_com, FROM_UNIXTIME(max(if(PARAM_KEY='transient_lastDdlTime',PARAM_VALUE,null))) last_ddl_time, FROM_UNIXTIME(max(if(PARAM_KEY='last_modified_time',PARAM_VALUE,null))) last_modified_time, max(if(PARAM_KEY='last_modified_by',PARAM_VALUE,'')) last_modified_by from hive.TABLE_PARAMS s GROUP BY s.TBL_ID) tp on t.TBL_ID=tp.tb_id where d.`NAME` in( 'cdi','app') """ part_sql=""" SELECT concat(d.name,'.',t.TBL_NAME) tb_name, p.PKEY_NAME col_name, p.PKEY_COMMENT col_comment, p.PKEY_TYPE col_data_type FROM hive.partition_keys p inner join hive.tbls t on p.tbl_id=t.tbl_id inner join hive.dbs d on d.db_id=t.db_id where d.`NAME` in( 'cdi','app') """ sc=pd.read_sql(tbs_sql,etl_data) parts=pd.read_sql(part_sql,etl_data) ddl_file = open(confs.main_path_py+'hive/sc_hive_tbs.sql', 'w+',encoding='utf-8') tb_list=sc[['tb_name','tb_name_cn']].drop_duplicates() tb_list=tb_list.set_index('tb_name').to_dict()['tb_name_cn'] for tb in tb_list.keys(): ddls="\ndrop table if exists {0};\ncreate table if not exists {0} (".format(tb) tb_com=sc[sc['tb_name']==tb] if tb_com.shape[0]>0: for i in tb_com.index: tb_sql=tb_com.loc[i,'col_name'].ljust(30)+tb_com.loc[i,'col_data_type']+' COMMENT \''+tb_com.loc[i,'col_comment'].replace(';','').replace('\'','')+'\','# ddls=ddls+'\n'+tb_sql ddls=ddls[:-1]+")\n comment '{0}'".format(tb_list[tb]) tp_parts=parts[parts['tb_name']==tb] if tp_parts.shape[0]>0: #print('dsssss',tp_parts) p_str="\npartitioned by (" for kp in tp_parts.index: tb_sql=tp_parts.loc[kp,'col_name'].ljust(10)+tp_parts.loc[kp,'col_data_type']+' COMMENT \''+str(tp_parts.loc[kp,'col_comment'])+'\','# p_str=p_str+'\n'+tb_sql p_str=(p_str[:-1])+')' ddls=ddls+p_str ddls=ddls+'\n STORED AS ORCfile;' ddl_file.write(ddls) ddl_file.write('\n\n') #print(ddls) ddl_file.close() sshcon=ssh_con() ssh=ssh_cmd(sshcon.ssh_uat) ssh.upload(confs.main_path_py+'hive/sc_hive_tbs.sql',confs.remote_path_py+'hive/sc_hive_tbs.sql') ssh.cmd_run(["hive -f '{0}'".format(confs.remote_path_py+'hive/sc_hive_tbs.sql')]) ssh.close() return 1
for z in ['l', 'h']: var = x + '-' + y + '-' + z variants.append(var) parser = argparse.ArgumentParser('Simulation Runner') parser.add_argument('-a', choices=[ 'astar', 'ga', 'gsa', 'mct', 'minmin', 'olb', 'tabu' ], dest='algo', help='The algorithm to run', required=True) parser.add_argument('-v', choices=variants, dest='variant', help='The variant of the data file to run', default='i-l-l') parser.add_argument('--kill', dest='kill', action='store_true', default=False, help='If present, kills the processes found') args = parser.parse_args() # print 'Querying machines...' machs = ssh.get_machines() cmd = 'ps -ef | grep ' + args.algo + ' | grep \'' + args.variant + '\' | grep -v \'grep\' | grep -v \'kill.py\' | awk \'{print $2}\'' # print cmd threads = ssh.ssh_cmd(cmd, machs) # iterate over the machines for t in threads: if t.out == None: continue # get the PID's running on the current machine for pid in t.out.split(): try: pid = int(pid) print 'Found', pid, 'on', t.mach, '(killed)' if args.kill else '' if args.kill == True: cmd = 'kill ' + str(pid) ssh.single_ssh_cmd(cmd, t.mach)
print(tar_tb, '目标表已经加入了调度,如果需要重新调度请手动修改') else: group_sh = confs.local_path + 'bin/' + etl_group[tb] tar_cmd = hive_sh + tb + '.sql' print('执行命令:', tar_cmd) if append_sh(group_sh, tar_cmd) > 0: if if_run: if ssh.cmd_run([tar_cmd]) > 0: ssh.upload( group_sh, confs.remote_path + 'bin/' + etl_group[tb]) else: print(etl_group[tb], 'shell文件配置错位') break else: print('\033[1;37;45m ERROR:', tb, ' 脚本没有指定分组调度 \033[0m') break ssh.cmd_run( ['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh.close() if __name__ == '__main__': cmd = sqoop_tp() sshcon = ssh_con() ssh = ssh_cmd(sshcon.ssh_sc) ssh.cmd_run(cmd, if_print=0) ssh.close()