def mv_unzip_analysis(file_path: str): """ 针对ftp上传的zip文件进行,移动、解压、包分析 """ if not path.isfile(file_path) or not file_path.endswith(".zip"): cli.error( "{} 不是文件 or 不是 zip 压缩文件,请传入需要处理的 zip 压缩包的路径".format(file_path)) exit() file_path = path.abspath(file_path) helper = FtpServerHelperStep1(file_path) # 开始任务 cli.warning("Task Start...") helper.ls_lah(helper.zipfile_path) # 0. 分析路径获取相关信息 helper.show_zipfile_info() # 1. 挪动文件到 Configs['ftp_data_tmp'] 下,为了避免文件名不规范导致数据丢失,给文件重新起名 helper.mv() helper.show_tree() # 2. 解压 helper.unzip() # 3. 进行包分析,修复,生成excel cli.info("正在针对解压后的目录进行数据解密,数据清洗工作") helper.package_analysis() # 完成 cli.warning("Task Finished!")
def kill_job(args, job_id): if job_running(args, job_id): cli.warning(f'Kill job {job_id}.') if args.slurm: run(f'scancel {job_id}') elif args.pbs: run(f'qdel {job_id}')
def mu(self, word, location): """ score scanning """ mu_tab = [0] * (location[1] - location[0] + 1) #find intervals to use a, b = location inLocation = False f = 0 n = 0 for interval in self.intervals: if a >= interval[0] and a <= interval[1]: inLocation = True if inLocation: F = self[interval] i, j = max(a, interval[0]), min(b, interval[1]) x = j - i + 1 #change this ? f = F.get(word, 0.0) if not F.get(word, 0): cli.warning('can not find key="%s" in BG' % word) mu_tab[i - a:i - a + x] = [f] * x if b <= interval[1]: break return smooth(mu_tab, 200)
def get_info_file_info(self): """ 返回info file信息 :return: """ info_data = {} if self.exist_info_file: info_file_full_path = path.join(self.package_path, "info.txt") with open(info_file_full_path, "rb+") as f: try: info_con = f.read() # 如果存在Bom 先处理一下 if info_con[:3] == codecs.BOM_UTF8: info_con = info_con[3:] # 重新写入文件 f.seek(0) f.truncate() f.write(info_con) if len(info_con.strip()) > 0: info_data = json.loads(info_con.decode()) else: pass cli.warning("\ninfo 文件是空的") except: cli.error("\n %s 包的 info文件信息解析出错了" % self.package_name) return info_data
def extract_windows_score(N, H, bg, location, w, params): """ """ R = [] ratio = params['ratio'] # convert position to new ref l = [p - location[0] for p in H[w]] if len(l) < params['occ'][0] or len(l) > params['occ'][1]: return [] #extract windows mu = bg.mu(w, location)[:-len(w) + 1] #print w mu = [mu[i] * N[len(w)][i] for i in range(len(mu))] alpha = [x * ratio for x in mu] for a, b, obsOcc in score(l, alpha, mu): a, b = a + location[0], b + location[0] if b - a + 1 < params['width'][0] or b - a + 1 > params['width'][1]: continue #test occ if obsOcc < params['occ'][0] or obsOcc > params['occ'][1]: continue n = sum(N[len(w)][a - location[0]:b - location[0] + 1]) try: obsFreq = obsOcc / float(n) except: cli.warning('n error') pass expFreq = bg.freq(w, (a, b)) expOcc = expFreq * n pv = Stats.dist.ppois(obsOcc, expOcc) #pv = Stats.dist.pbinom(obsOcc, n, expFreq) ev = 1.0 label = '%s|%s' % (w, reverse_complement(w)) R.append([ w, label, obsFreq, expFreq, obsOcc, expOcc, pv, ev, -log10(ev), a, b, b - a + 1, 0, n, 0, 0 ]) #cli.info(R[-1]) for r in R: r[7] = r[6] * len(H) * len(R) r[8] = -log10(r[7]) r[12] = len(R) return R
def gsi_version(gsi_root): if os.path.isfile(f'{gsi_root}/README.comgsi'): f = open(f'{gsi_root}/README.comgsi', 'r') f.readline() line = f.readline() else: cli.warning('Could not find GSI version! Assume 3.6.') return parse('3.6') # Community GSIv3.7_EnKFv1.3 match = re.search('GSIv(\d+\.\d+)', line)[1] return parse(match)
def get_package_metadata_info(packages_path, keys): """ 读取每个包中任意一个metadata 获取给定key列表,count(keys) >=1 """ if not path.isabs(packages_path): packages_path = (path.abspath(packages_path)).rstrip("//") if not len(keys) > 0: cli.error("Warning:keys 必须得有一个") return cli.warning("Task Starting...") for package in os.listdir(packages_path): package_path = path.join(packages_path, package) key_values = get_package_metadata_file_con_by_key(package_path, keys) cli.info("%s 包 %s" % (package, str(key_values))) cli.warning("Task Finish!")
def update_fluidinfo(self, db): # check owner has control permissions for name in CONTROL_NAMES: if hasattr(self, name): if self.__dict__[name].policy == 'open': assert not self.owner in self.__dict__[name].exceptions else: assert self.owner in self.__dict__[name].exceptions entities = (u'abstract-tag', u'tag') if self.isTag else (u'namespace',) for entity in entities: for name in RAW_PERMS[entity].names: action = RAW_PERMS[entity].action(name) err = db.set_raw_perm(entity, self.path[1:], action, self.__dict__[name].policy, self.__dict__[name].exceptions) if err: cli.warning(cli.error_code(err))
def fix_wrong_packagename_files(packages_path): """ 包中错误包名文件修改成当前包名,文件格式实例: 2017001P00012a1234.wav|txt """ """ :param packages_path: :return: """ cli.info("Task starting...") assistant_controller = PackageAssistantController(packages_path) assistant_controller.do_analysis() package_models = assistant_controller.abnormal_packages script_info = [] for package_model in package_models: for wrong_file in package_model.wrong_package_files: file_full_path = path.join(package_model.package_path, wrong_file) package_name_start_point = wrong_file.find("P") package_name_end_point = wrong_file.find( "i") if "i" in wrong_file else wrong_file.find("a") file_package_name = wrong_file[ package_name_start_point:package_name_end_point] # if len(file_package_name) == 0 or len(file_package_name) > 6: # P00474 包名一般6位 # cli.error("推算文件包名有误,暂无处理,请核对。file:%s" % file_full_path) # continue script_info.append("%s 文件被修改包名,原:%s,新:%s" % (file_full_path, file_package_name, package_model.package_name)) # 替换包名 file_new_full_path = path.join( package_model.package_path, wrong_file.replace(file_package_name, package_model.package_name)) os.rename(file_full_path, file_new_full_path) if len(script_info) > 0: cli.warning("\n".join(script_info)) cli.info("Task finished!")
def tar_upload(packages_folder_path: str): """ 针对待数据核对的包目录进行,压缩、上传到微软服务器 """ if not path.isdir(packages_folder_path): cli.error("{} 必须是一个目录".format(packages_folder_path)) exit() packages_folder_path = path.abspath(packages_folder_path) helper = FtpServerHelperStep2(packages_folder_path) # 开始任务 cli.warning("Task Start...") # 1.需要压缩tar.gz 放入 data_sync_backup目录下 helper.compress_folder() # 2.上传到微软服务器 helper.upload_2_weiruan() # 完成 cli.warning("Task Finished!")
def freq(self, word, location): #find intervals to use a, b = location inLocation = False f = 0 n = 0 for interval in self.intervals: if a >= interval[0] and a <= interval[1]: inLocation = True if inLocation: F = self[interval] x = min(b, interval[1]) + 1 - max(a, interval[0]) #change this ? f += F.get(word, 0.0) * x if F.get(word, None) == None: cli.warning('can not find key="%s" in BG' % word) n += x if b <= interval[1]: break return f / float(n)
def execute_chmod_command(objs, args, options, credentials): cli.warning('Not implemented yet.') return db = ExtendedFluidDB(host=options.hostname, credentials=credentials, debug=options.debug, unixStylePaths=fdblib.path_style(options)) if len(args) < 2: Print(u'Form: chmod [perms-spec] list-of-tags-and-namespaces') return spec = args[0] if not all(u'0' <= p<= u'7' for p in spec) or len(spec) != 3: Print((u'Permissions specifier must have for ddd with each d between ' u'0 and 7')) new_perms = UnixPerms(spec, db.credentials.username) fullpaths = (db.abs_tag_path(t, inPref=True) for t in args[1:]) Print(unicode(new_perms)) Print(u'READ: %s' % unicode(new_perms.read)) Print(u'WRITE: %s' % unicode(new_perms.write)) Print(u'CONTROL: %s' % unicode(new_perms.control)) new_perms.check_owner_control_ok() for path in fullpaths: done = False if db.tag_exists(path): inPerms = FluidinfoPerms(db, path, isTag=True) Print(unicode(inPerms)) new_perms.isTag = True # outPerms = new_perms.new_fi_tag_perms(inTagPerms) done = True if db.ns_exists(path): inPerms = FluidinfoPerms(db, path, isTag=False) Print(unicode(inPerms)) new_perms.isTag = False # outPerms = new_perms.new_fi_ns_perms(inTagPerms) done = True if not done: Print('No tag or namespace %s found' % db.abs_tag_path(path, outPref=True))
def __next(self, a, b, obsOcc): width = b - a + 1 if width < self.MIN_WIDTH or width > self.MAX_WIDTH: return if obsOcc < self.MIN_OCC or obsOcc > self.MAX_WIDTH: return n = sum(self.N[len(self.w)][a - self.location[0]:b - self.location[0] + 1]) try: obsFreq = obsOcc / float(n) except: cli.warning('n error') expFreq = self.bg.freq(self.w, (a, b)) expOcc = expFreq * n #pv = ppois(obsOcc, expOcc) #pv = ppois_cached(obsOcc, expOcc) #pv = pbinom_right_left_cached(obsOcc, n, expFreq) if self.params['under']: pv = pbinom_left(obsOcc, n, expFreq) else: pv = pbinom(obsOcc, n, expFreq) ev = 1.0 label = '%s|%s' % (self.w, reverse_complement(self.w)) w = self.w spaces = self.w.count('N') if spaces >= 1: label = label.replace('N' * spaces, 'n{%d}' % spaces) w = self.w.replace('N' * spaces, 'n{%d}' % spaces) self.R.append([ w, label, obsFreq, expFreq, obsOcc, expOcc, pv, ev, -log10(ev), a, b, b - a + 1, 0, n, 0, 0 ])
def convert_utf8(packages_path, write_utf8_con): """ 包中所有txt文件的编码转换成utf-8编码(corpus & info) """ if not path.isabs(packages_path): packages_path = (path.abspath(packages_path)).rstrip("//") cli.warning("Task Starting...") with click.progressbar( os.listdir(packages_path), label="正在检测包中的无utf8文件", fill_char='*', show_eta=True, show_percent=True, show_pos=True, ) as packages_bar: for package in packages_bar: packages_bar.label = "正在检测包 {}".format(package) if packages_bar.finished: packages_bar.label = "检测已完成" package_path = path.join(packages_path, package) if not path.isdir(package_path): cli.warning("\n 跳过非目录文件:" + package_path) continue deep = 5 for file in glob.glob(path.join(package_path, "*.txt")): is_utf8 = check_and_convert2utf8(file, write_utf8_con) pre_fix = (" " * deep) + "|" + ("-" * deep) if not is_utf8: cli.warning("\n" + pre_fix + " %s 文件不是utf-8" % path.basename(file)) # else: # cli.info(pre_fix + " %s 文件无bom" % path.basename(file)) cli.warning("Task Finish!")
def remove_bom(packages_path, write_clean_con): """ 清除包中所有txt文件的Bom(corpus & info) """ if not path.isabs(packages_path): packages_path = (path.abspath(packages_path)).rstrip("//") cli.warning("Task Starting...") with click.progressbar(os.listdir(packages_path), label="正在检测包中的带bom的文件", fill_char='*', show_eta=True, show_percent=True, show_pos=True, ) as packages_bar: for package in packages_bar: packages_bar.label = "正在检测包 {}".format(package) if packages_bar.finished: packages_bar.label = "检测已完成" package_path = path.join(packages_path, package) if not path.isdir(package_path): cli.warning("\n跳过非目录文件:" + package_path) continue deep = 5 # 格式化显示的时的字符长度 for file in glob.glob(path.join(package_path, "*.txt")): has_bom = remove_utf8_bom(file, write_clean_con) pre_fix = (" " * deep) + "|" + ("-" * deep) if has_bom: cli.warning("\n"+pre_fix + " %s 文件存在Bom" % path.basename(file)) # else: # cli.info(pre_fix + " %s 文件无bom" % path.basename(file)) cli.warning("Task Finish!")
def package_assistant(packages_path, result_send_mail, result_write_file, result_echo_via_pager, package_info_write_excel_file, delete_unknown_folder, replace_folder_wav_files, rename_endswith_u_file, delete_duplicated_underline_number_file, delete_unknown_suffix_file, decrypt_endswith_enc_file, delete_single_txt, quiet, auto_clean): """ 命令:包助理。 用于:包分析,包清理等等 """ if not path.isabs(packages_path): # 路径转绝对路径 cli.warning("程序按照给定的相对路径: %s 进行处理" % packages_path) packages_path = path.abspath(packages_path) # 清除目录最后一个斜杠 packages_path = packages_path.rstrip("//") cli.info("Task Starting...") assistant_controller = PackageAssistantController(packages_path) assistant_controller.do_analysis() if auto_clean: # 1. wav 目录下的文件移到wav同级目录下 replace_folder_wav_files = True # 2. 删除 包中出现的 m4a mp3 temp 目录 delete_unknown_folder = True # 3. 删除后缀 skip、sk、pk等文件 delete_unknown_suffix_file = True # 4. .u后缀文件rename rename_endswith_u_file = True # 5. 删除重复录制产生的 xxx_1.wav xxx_2.wav 这种文件 delete_duplicated_underline_number_file = True # 6. 解密被加密的文件 decrypt_endswith_enc_file = True # 7. 删除没有对应音频的txt delete_single_txt = True # wav 目录下的文件移到wav同级目录下 if replace_folder_wav_files: assistant_controller.replace_folder_wav_files() # 删除 包中出现的 m4a mp3 temp 目录 if delete_unknown_folder: assistant_controller.delete_unknown_folders() # 删除后缀 skip、sk、pk等文件 if delete_unknown_suffix_file: assistant_controller.delete_unknown_suffix_file() # .u后缀文件rename if rename_endswith_u_file: assistant_controller.rename_endswith_u_file() # 删除重复录制产生的 xxx_1.wav xxx_2.wav 这种文件 if delete_duplicated_underline_number_file: assistant_controller.delete_duplicated_underline_number_file() # 解密被加密的文件 if decrypt_endswith_enc_file: assistant_controller.decrypt_endswith_enc_file() # 删除没有对应音频的txt if delete_single_txt: assistant_controller.delete_single_txt() # 是否静默运行 if not quiet: # 运行结果是否分页显示 if result_echo_via_pager: assistant_controller.result_echo_via_pager() else: assistant_controller.result_echo_no_via_pager() # 运行结果是否写入文件 if result_write_file: assistant_controller.result_write_2_file() # 生成package_info的excel文件 if package_info_write_excel_file: assistant_controller.package_info_write_to_excel() # 运行结果以及附件发送邮件 if result_send_mail: # 1. 判断是否写入了文件,没写入不用带附件 # 2. 判断是否生成了excel,没生成过的话不用带附件 assistant_controller.send_mail(result_write_file, package_info_write_excel_file) cli.info("Task Finish!")
import cli import os queue = None ntasks_per_node = None if not 'WRF_SCRIPTS_QUEUE' in os.environ: cli.warning('Environment WRF_SCRIPTS_QUEUE is not set. Will run executable in current node!') elif not 'WRF_SCRIPTS_NTASKS_PER_NODE' in os.environ: cli.error('Environment WRF_SCRIPTS_NTASKS_PER_NODE should be set by you!') else: queue = os.environ['WRF_SCRIPTS_QUEUE'] ntasks_per_node = os.environ['WRF_SCRIPTS_NTASKS_PER_NODE'] if queue != None: # Allow user to set multiple queues with descending priority. queue = queue.split(',') ntasks_per_node = [int(x) for x in ntasks_per_node.split(',')]
def submit_job(cmd, ntasks, config, args, logfile='rsl.out.0000', wait=False, queue_idx=0): if logfile: run(f'rm -f {logfile}') ntasks_per_node = None if args.ntasks_per_node != None: ntasks_per_node = args.ntasks_per_node elif mach.ntasks_per_node: ntasks_per_node = mach.ntasks_per_node[queue_idx] if ntasks_per_node != None and ntasks < ntasks_per_node: cli.warning(f'Change ntasks_per_node from {ntasks_per_node} to {ntasks}.') ntasks_per_node = ntasks if args.slurm: f = open('submit.sh', 'w') f.write(f'''\ #!/bin/bash #SBATCH --job-name {config["tag"]} #SBATCH --comment WRF #SBATCH --partition {mach.queue[queue_idx]} #SBATCH --time 24:00:00 #SBATCH --ntasks {ntasks} #SBATCH --ntasks-per-node {ntasks_per_node} #SBATCH --nodes {int(ntasks / ntasks_per_node)} mpiexec -np {ntasks} {cmd} ''') f.close() stdout = run('sbatch < submit.sh', stdout=True) match = re.search('Submitted batch job (\w+)', stdout) if not match: if queue_idx < len(mach.queue) - 1: cli.warning(f'Failed to submit to queue {mach.queue[queue_idx]}, try queue {mach.queue[queue_idx+1]}.') submit_job(cmd, ntasks, config, args, logfile, wait, queue_idx+1) return else: cli.error(f'Failed to submit job!') job_id = match[1] cli.notice(f'Job {job_id} submitted running {ntasks} tasks.') if wait: cli.notice(f'Wait for job {job_id}.') try: last_line = None while job_running(args, job_id): sleep(10) if not os.path.isfile(logfile): if job_pending(args, job_id): cli.notice(f'Job {job_id} is still pending.') continue line = subprocess.run(['tail', '-n', '1', logfile], stdout=subprocess.PIPE).stdout.decode('utf-8').strip() if last_line != line and line != '': last_line = line print(f'{cli.cyan("==>")} {last_line if len(last_line) <= 80 else last_line[:80]}') except KeyboardInterrupt: kill_job(args, job_id) exit(1) return job_id elif args.pbs: f = open('submit.sh', 'w') f.write(f'''\ #!/bin/bash #PBS -N {config["tag"]} #PBS -q {mach.queue} #PBS -l nodes={int(ntasks / ntasks_per_node)}:ppn={ntasks_per_node} cd $PBS_O_WORKDIR mpiexec -np {ntasks} -machinefile $PBS_NODEFILE {cmd} ''') f.close() stdout = run('qsub < submit.sh', stdout=True) match = re.search('(\w+)', stdout) if not match: cli.error(f'Failed to parse job id from {stdout}') job_id = match[1] cli.notice(f'Job {job_id} submitted running {ntasks} tasks.') if wait: cli.notice(f'Wait for job {job_id}.') try: last_line = None while job_running(args, job_id): sleep(10) if not os.path.isfile(logfile): if job_pending(args, job_id): cli.notice(f'Job {job_id} is still pending.') continue line = subprocess.run(['tail', '-n', '1', logfile], stdout=subprocess.PIPE).stdout.decode('utf-8').strip() if last_line != line and line != '': last_line = line print(f'{cli.cyan("==>")} {last_line if len(last_line) <= 80 else last_line[:80]}') except KeyboardInterrupt: kill_job(args, job_id) exit(1) return job_id else: proc = run(f'mpiexec -np {ntasks} {cmd}', bg=True) try: while proc.poll() == None: sleep(10) if not os.path.isfile(logfile): continue res = subprocess.run(['tail', '-n', '1', logfile], stdout=subprocess.PIPE) last_line = res.stdout.decode("utf-8").strip() print(f'{cli.cyan("==>")} {last_line if len(last_line) <= 80 else last_line[:80]}') except KeyboardInterrupt: cli.warning('Ended by user!') proc.kill() exit(1)
def send_mail(self, sub, content, sender=None, receivers=None, cc=None, attachment=[]): """ :param str sub: 标题 :param str content: 邮件正文 :param str sender: 发邮件人邮箱 :param list receivers: 接受邮件人列表 :param list cc: 抄送人列表 :return None: """ if sender is not None: self.sender = sender if receivers is not None: self.receivers.extend(receivers) if cc is not None: self.cc.extend(cc) assert len(self.receivers) != 0, "收件人不能 None" cli.info("开始发送邮件了") msg = MIMEMultipart() msg['Subject'] = sub msg['From'] = self.sender msg['To'] = ";".join(self.receivers) msg["cc"] = ";".join(self.cc) msg.attach(MIMEText(content)) # 循环附件列表,添加每一个附件 if len(attachment) > 0: for path in attachment: assert os.path.exists(path) and os.path.isfile( path), "附件文件不存在或者不是文件" file_name = path.split("/")[-1] part = MIMEApplication(open(path, 'rb').read()) part.add_header('Content-Disposition', 'attachment', filename=file_name) msg.attach(part) s = smtplib.SMTP() try: s.connect(self.smtpserver) # 连接smtp服务器 s.login(self.user, self.password) # 登陆服务器 s.sendmail(self.sender, self.receivers + self.cc, msg.as_string()) # 发送邮件 cli.info("邮件发送成功") return True except Exception as e: cli.warning("邮件发送失败") print(e) print(self) return False finally: s.close()