Python XML_PDFParser.buscar_fichero示例

编程语言: Python

类/类型: XML_PDFParser

方法/功能: buscar_fichero

hotexamples.com的示例: 2

Python XML_PDFParser.buscar_fichero - 已找到2个示例。这些是从开源项目中提取的最受好评的XML_PDFParser.buscar_fichero现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

buscar_fichero(2)

eliminar_simbolos_html(1)

formato_web(1)

fusionar_ficheros(1)

obten_grupo_xml(1)

obten_grupo_xml_multiple(1)

procesa_titulos_pdfs(1)

示例#1

显示文件

    def procesar_dblp_local(self):
        
        self.ventana.progressBar_2.setValue(0)        
        
        # Obtenemos los valores de el origen y destino de los datos
        origen = self.ventana.ComboBoxOrigen.currentIndex()
        destino = self.ventana.ComboBoxDestino.currentIndex()
        
        # Obtenemos el termino por el cual filtrar nuestros datos
        termino_busqueda = str(self.ventana.TerminoBusqueda.toPlainText())
        
        indices = []
        fichero_origen = "dblp.xml"
        # Origen es la plataforma DBLP Computer Science Bibliography
        if origen == 0: 
            
            if destino == 0:
                fich_nom = self.ventana.NombreFichero.toPlainText()
                fichero_destino = fich_nom+".xml"
                print fichero_destino
                if self.ventana.CheckBoxAutor.isChecked():
                    print("Calculo Indices de Autor")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<author"))
                    self.ventana.progressBar_2.setValue(5)
                    print indices
                if self.ventana.CheckBoxEditor.isChecked():
                    print("Calculo Indices de Editor")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<editor"))
                    self.ventana.progressBar_2.setValue(10)
                    print indices
                if self.ventana.CheckBoxPublicador.isChecked():
                    print("Calculo Indices de Publicador")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<publisher"))
                    self.ventana.progressBar_2.setValue(15)
                    print indices
                if self.ventana.CheckBoxTitulo.isChecked():
                    print("Calculo Indices de Titulo")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<title"))
                    self.ventana.progressBar_2.setValue(20)
                    print indices

                indices.sort()                    
                
                if self.ventana.RadioButtonArticulo.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<article","</article")
                    self.ventana.progressBar_2.setValue(40)
                elif self.ventana.RadioButtonColeccion.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<incollection","</incollection")
                    self.ventana.progressBar_2.setValue(60)
                elif self.ventana.RadioButtonDebate.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<proceedings","</proceedings")
                    self.ventana.progressBar_2.setValue(70)
                elif self.ventana.RadioButtonTesis.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<phdthesis","</phdthesis")
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<mastersthesis","</mastersthesis")
                    self.ventana.progressBar_2.setValue(90)
                elif self.ventana.RadioButtonLibro.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<book","</book")
                    self.ventana.progressBar_2.setValue(95)
                elif self.ventana.RadioButtonOtros.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<www","</www")
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<inproceedings","</www")
                    self.ventana.progressBar_2.setValue(100)
                    
                print("PROCESADO FINALIZADO")
                self.ventana.progressBar_2.setValue(100)
            
            
        return 1

示例#2

显示文件

if args.interfaz_activa == 1:
    Interfaz_Usuario()
    
elif args.descarga_activa == 1:
    
    # Descargamos los datos si es que es necesario
    URL = "http://dblp.uni-trier.de/xml/dblp.xml"
    URLInteract.descargar_fichero(URL)  
    
    # Comprobamos si se quieren procesar y lo hacemos
    if args.procesar_activa == 1:
    
        if args.origen_datos == "dblp":
        
            indices = XML_PDFParser.buscar_fichero("dblp.xml",args.termino_busqueda,"")
            
            etiquetas_apertura = ["<article","<incollection","<proceedings","<phdthesis","<mastersthesis","<book","<www","<inproceedings"]
            etiquetas_cierre = ["</article>","</incollection>","</proceedings>","</phdthesis>","</mastersthesis>","</book>","</www>","</inproceedings>"]            
            
            print indices            
            
            XML_PDFParser.obten_grupo_xml_multiple("dblp.xml",args.fichero_destino+"_temp.txt",indices,etiquetas_apertura,etiquetas_cierre)

            XML_PDFParser.eliminar_simbolos_html(args.fichero_destino+"_temp.txt",args.fichero_destino+"_temp2.txt")

            XML_PDFParser.formato_web(args.fichero_destino+"_temp2.txt",args.fichero_destino+".txt")
            
            if args.fusionar_activa == 1:
                
                XML_PDFParser.fusionar_ficheros(args.fichero_fusion,args.fichero_destino+".txt",args.fichero_destino+"_fusionado.txt")