def Mains(DirName,OutFile='Main.tex',mtype='pad',num=True,pyin=False): txt_files={} #print(OutFile) fl=open(OutFile,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for root,dirs,files in os.walk(DirName): for f in files: if os.path.splitext(f)[1] in ['.txt']: pf=root+'/'+f if num: fnum=re.findall('第(\d*)批',ut.ChNumToArab(f)) if len(fnum)==0: txt_files[f]=Singal_input(pf,pyin) else: txt_files[fnum[0].zfill(3)]=Singal_input(pf,pyin) else: txt_files[f]=Singal_input(pf,pyin) if len(txt_files)>0: txt_files1=sorted(txt_files.items(),key=lambda txt_files:txt_files[0]) print(txt_files1) for f in txt_files1: print(f[0]) fl.write('\input{%s}'%f[1]) fl.write(r'\newpage') #fl.write('\n\n') fl.write(end) fl.close() #print('%s'%OutFile) #p=subprocess.Popen('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) #p=subprocess.Popen('xelatex -interaction=nonstopmode %s' %OutFile, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile) os.system('xelatex -interaction=nonstopmode %s' %OutFile) _removef(OutFile) for root,dirs,files in os.walk(DirName): for f in files: if os.path.splitext(f)[1] in ['.tex']: #os.system('rm %s'%os.path.abspath(root+'/'+f)) pass return
def MainsGF(DirName,OutFile='Main',mtype='pad',num=None,\ pyin=False,Total='max',Research=None,\ startw=None,item1_bool=False,item2_bool=False,\ item0_bool=False): txt_files={} rsch=[] if isinstance(Research,str): rsch.append(Research) elif isinstance(Research,list): rsch.extend(Research) for root,dirs,files in os.walk(DirName): for f in files: if os.path.splitext(f)[1].lower() in ['.txt','.doc','.docx']: if sys.platform.startswith('win'): rt1=root.split('\\') root='/'.join(rt1) pf=root+'/'+f #print(pf) if num is not None: fnum=num.findall(ut.ChNumToArab(f)) if len(fnum)==0: txt_files[f]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool) else: txt_files[fnum[0].zfill(3)]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) elif (num is None) and (Research is not None): for i in rsch: if i in f: txt_files[f]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) elif (num is None) and (startw is not None): if startw.match(f) is not None: txt_files[f]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) else: txt_files[f]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) if len(txt_files)>0: txt_files1=sorted(txt_files.items(),key=lambda txt_files:txt_files[0]) else: print('No files 适合条件') sys.exit() #print(txt_files1) ##########################3 if Total=='max': OutFile1=OutFile+'.tex' fl=open(OutFile1,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for f in txt_files1: fl.write('\input{%s}'%f[1]) fl.write(r'\newpage') #fl.write('\n\n') fl.write(end) fl.close() os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1) os.system('xelatex -interaction=nonstopmode %s' %OutFile1) _removef(OutFile1) ###########################3 elif isinstance(Total,int): for f in txt_files1: txp=[txt_files1[i:i+Total] for i in range(0,len(txt_files),Total)] fn=1 for ff in txp: OutFile1=OutFile+'_%s.tex'%str(fn).zfill(2) fl=open(OutFile1,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for f in ff: fl.write('\input{%s}'%f[1]) fl.write(r'\newpage') #fl.write('\n\n') fl.write(end) fl.close() os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1) os.system('xelatex -interaction=nonstopmode %s' %OutFile1) _removef(OutFile1) fn +=1 else: print('Total is max out int, please input the right parameter.') for root,dirs,files in os.walk(DirName): for f in files: if os.path.splitext(f)[1] in ['.tex']: os.remove('%s'%os.path.abspath(root+'/'+f)) pass return
def Mains(DirName,OutFile='Main',mtype='pad',num=None,\ pyin=False,Total='max',\ item1_bool=False,item2_bool=False,\ item0_bool=False): txt_files={} for root,dirs,files in os.walk(DirName): for f in files: if os.path.splitext(f)[1].lower() in ['.txt','.doc','.docx']: if sys.platform.startswith('win'): rt1=root.split('\\') root='/'.join(rt1) pf=root+'/'+f #print(pf) if num is not None: fnum=num.findall(ut.ChNumToArab(f)) if len(fnum)==0: txt_files[f]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) else: txt_files[fnum[0].zfill(3)]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) else: txt_files[f]=Singal_input(pf,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) if len(txt_files)>0: txt_files1=sorted(txt_files.items(),key=lambda txt_files:txt_files[0]) print(txt_files1) ##########################3 if (Total=='max') or (len(txt_files)<Total): OutFile1=OutFile+'.tex' fl=open(OutFile1,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for f in txt_files1: fl.write('\input{%s}'%f[1]) fl.write(r'\newpage') #fl.write('\n\n') fl.write(end) fl.close() os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1) os.system('xelatex -interaction=nonstopmode %s' %OutFile1) #cmd=subprocess.Popen('xelatex -no-pdf -interaction=nonstopmode %s'%OutFile1,stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE, shell=True) #cmd=subprocess.Popen('xelatex -interaction=nonstopmode %s'%OutFile1,stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE, shell=True) _removef(OutFile1) ###########################3 elif isinstance(Total,int) and (len(txt_files)>Total): txp=[txt_files1[i:i+Total] for i in range(0,len(txt_files),Total)] #fn=1 for ii,ff in enumerate(txp): OutFile1=OutFile+'_%s.tex'%str(ii).zfill(2) fl=open(OutFile1,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for f in ff: fl.write('\input{%s}'%f[1]) fl.write(r'\newpage') fl.write('\n') fl.write(end) fl.close() os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1) os.system('xelatex -interaction=nonstopmode %s' %OutFile1) _removef(OutFile1) #fn +=1 else: print('Total is max out int, please input the right parameter.') for root,dirs,files in os.walk(DirName): for f in files: if os.path.splitext(f)[1] in ['.tex']: os.remove('%s'%os.path.abspath(root+'/'+f)) pass return
def PdfFile(path,OutFile='Main',mtype='pad',\ num=None,pyin=False,Total='max',res=True,\ item1_bool=False,item2_bool=False,\ item0_bool=False): file_list=[] path_list=[] if isinstance(path,list): for f in path: if os.path.isfile(f): file_list.append(f) elif os.path.isdir(f): path_list.append(f) elif os.path.isfile(path): file_list.append(path) elif os.path.isdir(path): path_list.append(path) elif path is None: txtpath=os.getcwd() else: print('Please in list of dir/file,or dir,file') sys.exit() Tem_files_list=[] if len(path_list)>0: for path in path_list: for root,ds,fs in os.walk(path): for f in fs: path=os.path.abspath(os.path.join(root,f)) Tem_files_list.append(path) if len(file_list)>0: for f in file_list: Tem_files_list.append(os.path.abspath(f)) txt_files={} for f in Tem_files_list: f_name=os.path.basename(f) if os.path.splitext(f)[1].lower() in ['.txt','.doc','.docx']: if num is not None: fnum=num.findall(ut.ChNumToArab(f_name)) if len(fnum)==0: txt_files[f_name]=Singal_input(f,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) else: txt_files[fnum[0].zfill(3)]=Singal_input(f,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) else: txt_files[f_name]=Singal_input(f,pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool) if len(txt_files)>0: txt_files1=sorted(txt_files.items(),key=lambda txt:txt[0],reverse=res) if Total=='max': OutFile1=OutFile+'.tex' fl=open(OutFile1,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for ff in txt_files1: fl.write('\input{%s}'%ff[1]) fl.write(r'\newpage') #fl.write('\n\n') fl.write(end) fl.close() os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1) os.system('xelatex -interaction=nonstopmode %s' %OutFile1) _removef(OutFile1) ###########################3 elif isinstance(Total,int): for f in txt_files1: txp=[txt_files1[i:i+Total] for i in range(0,len(txt_files),Total)] fn=1 for ff in txp: OutFile1=OutFile+'_%s.tex'%str(fn).zfill(2) fl=open(OutFile1,'w',encoding='utf8') fl.write(latexs[mtype]+'\n\n') for f in ff: fl.write('\input{%s}'%f[1]) fl.write(r'\newpage') #fl.write('\n\n') fl.write(end) fl.close() os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1) os.system('xelatex -interaction=nonstopmode %s' %OutFile1) _removef(OutFile1) fn +=1 else: print('Total is max out int, please input the right parameter.') for f in txt_files1: #print(f[1]) os.remove(f[1]) pass return
def GenerateBookGF(path,regrex1=None,\ search=None,startw=None,\ exclude=None,\ func=C2html,\ item1_bool=False,\ item2_bool=False,\ item0_bool=False,\ htmlfile='htmlfile/htmlbook_output',\ pdffile='htmlbook_Main',mtype='article',\ num=None,pyin=False,File_num='max',\ m1=re.compile(r'^第\w{1,3}[编|篇]'),\ m2=re.compile(r'^第\w{1,3}章'),\ m3=re.compile(r'^第\w{1,3}节'),\ m4=re.compile(r'^\w{1,3}、'),\ index=True,res=True,\ Spp=False,\ Spplit=False,\ rc=re.compile('(.*?案\s*(检例第\d*号))'),\ p1=re.compile('【要\s*旨】'),p2=re.compile('【\w*】'),yz=True): """ regrex:re.compile('\d*'),从文件名中提取中关键字作排序用 search:str/list,民事诉讼,将文件名中符合含有关键字的文件提取出来 startw:re.compile('^ok'),将文件名中以特定字开头的文件提取出来 exclude:str/list,刑事诉讼,将含有exclude的文件予以排除 num:regrex的作用相同,主要是用于latex的文件中 m1:html文件中的一级目录 m2:同上,是2级目录g m3:同上,是3级目录 m4:同上,是4级目录 """ if func.__name__ in ['MainSpp']: func(path,yz=yz,mtype=mtype) return if func.__name__ in ['MainsAbs']: func(path,pyin=pyin,Startw=startw,mtype=mtype,regrex1=regrex1) return cc=re.compile('([,、:-》.《—_;;〈〉<>【】()()])*\s*-') rs=[] if isinstance(search ,list): rs.extend(search) elif isinstance(search ,str): rs.append(search) excl=[] if isinstance(exclude ,list): excl.extend(exclude) elif isinstance(exclude,str): excl.append(exclude) file_list = [] path_list = [] if isinstance(path,list): for f in path: if isfile(f): file_list.append(f) elif isdir(f): path_list.append(f) elif isfile(path): file_list.append(path) elif isdir(path): path_list.append(path) elif path is None: txtpath = os.getcwd() else: print('Please in list of dir/file,or dir,file') sys.exit() File_tmp = GFlist(path_list) for ff in File_tmp: file_list.append(ff[1]) only_one = set() fls = [] word = re.compile(r'[\u4e00-\u9fa5]+\d*') for ff in file_list: aa = os.path.basename(ff) nwd = ''.join(word.findall(aa)) if nwd not in only_one: only_one.add(nwd) fls.append(ff) if len(fls) > 0: file_list = fls temff = set() if exclude is not None: for ff in file_list: for ex in excl: if ex in os.path.basename(ff): temff.add(ff) File_tmp = [f for f in file_list if f not in temff] Final_list = {} for f in File_tmp: ff=basename(f) if regrex1 is not None: if splitext(ff)[1].lower() in ['.txt','.doc','.docx']: i1 = [i for i in regrex1.findall(ff) if len(i) > 0] i2 = [i for i in regrex1.findall(ut.ChNumToArab(ff)) if len(i) > 0] if len(i1) > 0: num1 = int(i1[0]) Final_list[num1] = f elif len(i2) > 0: num1= int(i2[0]) Final_list[num1] = f else: num1 = cc.sub('', ff).replace(' ', '') Final_list[num1] = f if search is not None: Tem={} for k,v in Final_list.items(): for rsch in rs: if rsch in basename(v): Tem[k]=v if len(Tem)>0: Final_list=Tem else: print('没有关于 "%s" 的文件'%search) sys.exit() if startw is not None: dff={} for k,v in Final_list.items(): if startw.match(basename(v)) is not None: #print('start word ...',v) dff[k]=v if len(dff)>0: Final_list=dff else: print('没有符合的文件') sys.exit() if len(Final_list)>0: Final=sorted(Final_list.items(),key=lambda item:item[0],reverse=res) Final_files=[i[1] for i in Final] #if res: # Final_files if func.__name__ in ['C2html','txt2htmlv1']: func(Final_files,output=htmlfile,m1=m1,m2=m2,m3=m3,index=index) pass elif func.__name__ in ['PdfFile']: func(Final_files,OutFile=pdffile,mtype=mtype,\ num=num,pyin=pyin,Total=File_num,\ item0_bool=item0_bool,\ item1_bool=item1_bool,item2_bool=item2_bool) #os.remove(pdffile+'.pdf','htmlfile/'+pdffile+'.pdf') pass else: print('Please input right function:','C2html','C2htmlBase','txt2htmlv1','txt2html_inonefile','PdfFile') if Spplit: shutil.rmtree(path) return#Final_files
def GFlistv2(path,regrex1=None,research=None,startw=None,exclude=None,res=False): """ regrex1:为re.compile 的类型 startw:re.compile类型 research: str or list exclude: str or list """ rs=[] if isinstance(research ,list): rs.extend(research) elif isinstance(research ,str): rs.append(research) excl=[] if isinstance(exclude ,list): excl.extend(exclude) elif isinstance(exclude,str): excl.append(exclude) pathlist=[] filelist = [] if isinstance(path,list): for ff in path: if os.path.isfile(ff): filelist.append(ff) elif os.path.isdir(ff): pathlist.append(ff) elif isinstance(path,str): if os.path.isdir(path): pathlist.append(path) ss={} sre = re.compile('^(\d{4}-*\d{2}-*\d{2})') for path in pathlist: for root,ds,fs in os.walk(path): for f in fs: #print(f) if regrex1 is not None: #print('ok....1') ff=os.path.splitext(f)[0].lower() ff=sre.sub('',ff) if os.path.splitext(f)[1].lower() in ['.txt','.doc','.docx']: i1=[i for i in regrex1.findall(ff) if len(i)>0] i2=[i for i in regrex1.findall(ut.ChNumToArab(ff)) if len(i)>0] if len(i1)>0: num=int(i1[0]) ss[num]=os.path.abspath(os.path.join(root,f)) elif len(i2)>0: num= int(i2[0]) ss[num]=os.path.abspath(root+'/'+f) #dd=sorted(ss.items(),key=lambda item:item[0]) else: #print('ok ......2') f1=os.path.splitext(f)[0] num=cc.sub('',f1).replace(' ','') ss[num]=os.path.abspath(root+'/'+f) #dd=sorted(ss.items(),key=lambda item:item[0]) if (research is not None): ddf={} for k,v in ss.items(): for rsch in rs: if rsch in os.path.basename(v): ddf[k]=v if len(ddf)>0: ss=ddf else: print('没有关于 "%s" 的文件'%research) return if startw is not None: dff={} for k,v in ss.items(): name = os.path.basename(v) if startw.match(v) is not None: dff[k]=v if len(dff)>0: ss=dff if exclude is not None: File_tmp = {} for k,v in ss.items(): for ex in excl: if ex not in os.path.basename(v): File_tmp[k] = v ss=File_tmp if len(ss)>0: dd=sorted(ss.items(),key=lambda item:item[0], reverse=res) return dd else: print('没有关于 "%s" 的文件'%research) return
def GFlistv1(path,\ regrex1=None,\ search=None,\ startw=None,\ exclude=None,\ res=False): """ regrex1:为re.compile 的类型,选取相应的关键字作为排序 startw:re.compile类型,选取以某个字为开头 search: str or list res:True or False,是否倒序 """ rs=[] if isinstance(search ,list): rs.extend(search) elif isinstance(search ,str): rs.append(search) file_list=[] path_list=[] if isinstance(path,list): for f in path: if os.path.isfile(f): file_list.append(f) elif os.path.isdir(f): path_list.append(f) elif os.path.isfile(path): file_list.append(path) elif os.path.isdir(path): path_list.append(path) elif path is None: txtpath=os.getcwd() excl=[] if isinstance(exclude ,list): excl.extend(exclude) elif isinstance(exclude,str): excl.append(exclude) for path in path_list: for root,ds,fs in os.walk(path): for f in fs: file_list.append(os.path.abspath(os.path.join(root,f))) only_one=set() fls=[] word=re.compile(r'[\u4e00-\u9fa5]+') for ff in file_list: aa=os.path.basename(ff) nwd=''.join(word.findall(aa)) if nwd not in only_one: only_one.add(nwd) fls.append(ff) if len(fls)>0: file_list=fls temff=set() if exclude is not None: for ff in file_list: for ex in excl: aa=os.path.basename(ff) if ex in aa: temff.add(ff) File_tmp=[f for f in file_list if f not in temff] Final_list={} for f in File_tmp: ff=os.path.basename(f) if regrex1 is not None: if splitext(ff)[1].lower() in ['.txt','.doc','.docx']: i1=[i for i in regrex1.findall(ff) if len(i)>0] i2=[i for i in regrex1.findall(ut.ChNumToArab(ff)) if len(i)>0] if len(i1)>0: num1=int(i1[0]) Final_list[num1]=f elif len(i2)>0: num1= int(i2[0]) Final_list[num1]=f else: num1=cc.sub('',ff).replace(' ','') Final_list[num1]=f if search is not None: Tem={} for k,v in Final_list.items(): for rsch in rs: if rsch in os.path.basename(v): Tem[k]=v if len(Tem)>0: Final_list=Tem else: print('没有关于 "%s" 的文件'%search) sys.exit() if startw is not None: dff={} for k,v in Final_list.items(): if startw.match(basename(v)) is not None: #print('start word ...',v) dff[k]=v if len(dff)>0: Final_list=dff else: print('没有符合的文件') sys.exit() Final_files=[] if len(Final_list)>0: Final=sorted(Final_list.items(),key=lambda item:item[0],reverse=res) Final_files=[i[1] for i in Final] return Final_files
def GFlist(path,regrex1=None,research=None,startw=None): """ regrex1:为re.compile 的类型 startw:re.compile类型 research: str or list """ rs=[] if isinstance(research ,list): rs.extend(research) elif isinstance(research ,str): rs.append(research) pathlist=[] if isinstance(path,list): pathlist.extend(path) elif isinstance(path,str): pathlist.append(path) ss={} for path in pathlist: for root,ds,fs in os.walk(path): for f in fs: #print(f) if regrex1 is not None: #print('ok....1') if os.path.splitext(f)[1].lower() in ['.txt','.doc','.docx']: i1=[i for i in regrex1.findall(f) if len(i)>0] i2=[i for i in regrex1.findall(ut.ChNumToArab(f)) if len(i)>0] if len(i1)>0: num=int(i1[0]) ss[num]=os.path.abspath(os.path.join(root,f)) elif len(i2)>0: num= int(i2[0]) ss[num]=os.path.abspath(root+'/'+f) dd=sorted(ss.items(),key=lambda item:item[0]) else: #print('ok ......2') num=cc.sub('',f).replace(' ','') ss[num]=os.path.abspath(root+'/'+f) dd=sorted(ss.items(),key=lambda item:item[0]) if (regrex1 is None) and (research is not None): ddf={} for k,v in dd: for rsch in rs: if rsch in k: ddf[k]=v if len(ddf)>0: dd=sorted(ddf.items(),key=lambda item:item[0]) else: print('没有关于 "%s" 的文件'%research) if (regrex1 is None) and (startw is not None): dff={} for k,v in dd: if startw.match(k) is not None: dff[k]=v if len(dff)>0: dd=sorted(dff.items(),key=lambda item:item[0]) else: print('没有关于 "%s" 的文件'%research) return dd