Exemplo n.º 1
0
def handle1sutra(xml, folder_out):
	''' 處理一經 '''
	print(xml)
	fn_in=os.path.basename(xml)
	s=fn_in.replace('.xml','')
	globals['sutra_no']=s[s.find('n')+1:]
	if globals['collection'] == 'T':
		globals['sutra_no']=re.sub("0220[a-z]","0220",globals['sutra_no'])	# 大般若經經號後面的 a-z 移除
	globals['sutra_no_0']=re.sub("^0*","",globals['sutra_no'])	# sutra_no_0 是前面沒有 0 的經號
	print('sutra_no:', globals['sutra_no'])
	fn_out=fn_in.replace('.xml','.txt')
	path_out=os.path.join(folder_out, fn_out)
	
	fo=open(path_out, 'w', encoding='utf8')

	tree=etree.parse(xml)
	tree=zbx_xml.stripNamespaces(tree) # 去掉 namespace
	#zbx_xml.stripComments(tree) # 去掉 xml 註解
	
	readCharInfo(tree)
	if options.fileHeader:		# 是否要印出卷首資訊		
		mo=re.search(r'T07n0220[d-z]', xml)
		if mo is not None:
			# T07n0220d 之後的不要印出詳細卷首
			tmp=fileHeader(tree)	# 還是要先執行, 以取得需要的資料
			tmp=shortFileHeader()
			tmp=tmp[:-1]			# 移除最後一個 '換行', 只有 T07 此處才需要
			fo.write(tmp)
		else:
			fo.write(fileHeader(tree))
	
	# 處理 <text rend='no_nor'> 的情況
	globals['no_nor'] = 0
	text_tag = tree.find('.//text')
	if text_tag.get('rend') == 'no_nor': globals['no_nor'] += 1
	
	body=tree.find('.//body')
	outtxt = handleNode(body)
	# 在這裡處理連續悉曇字
	outtxt = re.sub(r"((()|([)|(?)|(…)|(.)|(‧))*◇((◇)|( )|( )|(.)|(‧)|(()|())|([)|(])|(?)|(…))*◇(())|(])|(?)|(…)|(.)|(‧))*","【◇】",outtxt);
	fo.write(outtxt)
	fo.close()
	if options.splitByJuan: splitByJuan(path_out, folder_out)
Exemplo n.º 2
0
def handle1sutra(xml, folder_out):
	''' 處理一經 '''
	print(xml)
	fn_in=os.path.basename(xml)
	s=fn_in.replace('.xml','')
	globals['sutra_no']=s[s.find('n')+1:]
	globals['sutra_no_0']=re.sub("^0*","",globals['sutra_no'])	# sutra_no_0 是前面沒有 0 的經號
	print('sutra_no:', globals['sutra_no'])
	fn_out=fn_in.replace('.xml','.txt')
	path_out=os.path.join(folder_out, fn_out)
	fo=open(path_out, 'w', encoding='utf8')
	
	tree=etree.parse(xml)
	tree=zbx_xml.stripNamespaces(tree) # 去掉 namespace
	#zbx_xml.stripComments(tree) # 去掉 xml 註解
	
	if options.fileHeader: fo.write(fileHeader(tree))
	readCharInfo(tree)
	
	body=tree.find('.//body')
	fo.write(handleNode(body))
	fo.close()
	if options.splitByJuan: splitByJuan(path_out, folder_out)
Exemplo n.º 3
0
def handle1sutra(xml, folder_out):
	''' 處理一經 '''
	print(xml)
	fn_in=os.path.basename(xml)
	s=fn_in.replace('.xml','')
	globals['sutra_no']=s[s.find('n')+1:]
	globals['sutra_no_0']=re.sub("^0*","",globals['sutra_no'])	# sutra_no_0 是前面沒有 0 的經號
	print('sutra_no:', globals['sutra_no'])
	fn_out=fn_in.replace('.xml','.txt')
	path_out=os.path.join(folder_out, fn_out)
	fo=open(path_out, 'w', encoding='utf8')
	
	tree=etree.parse(xml)
	tree=zbx_xml.stripNamespaces(tree) # 去掉 namespace
	#zbx_xml.stripComments(tree) # 去掉 xml 註解
	
	if options.fileHeader: fo.write(fileHeader(tree))
	readCharInfo(tree)
	
	body=tree.find('.//body')
	fo.write(handleNode(body))
	fo.close()
	if options.splitByJuan: splitByJuan(path_out, folder_out)