Python BeautifulStoneSoup.get_text示例

编程语言: Python

命名空间/包名称: bs4

方法/功能: get_text

hotexamples.com的示例: 2

Python BeautifulStoneSoup.get_text - 已找到2个示例。这些是从开源项目中提取的最受好评的bs4.BeautifulStoneSoup.get_text现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BeautifulStoneSoup(30)

findAll(14)

find(12)

find_all(7)

get_text(2)

get(1)

prettify(1)

walk(1)

示例#1

显示文件

文件： test.py 项目： zinaida911/lab2

def get_text(fileDir):
    document = zipfile.ZipFile(fileDir)
    #xml_content = document.read('content.xml')
    #document.close()

    #xml = parse(document.)
    #xml = parse('inputText/content.xml')
    #print(document.filelist)
    #print(document.open('content.xml'))
    xml = parse(document.open('content.xml'))

    textSoup = BeautifulStoneSoup(document.read('content.xml'))
    #print(textSoup.prettify())
    #print(textSoup.get_text())

    document.close()
    """
    officeText = xml.getElementsByTagName('office:text')

    textFromDoc = []

    if len((officeText[0].childNodes)) != 0:
        for officeNode in officeText[0].childNodes:
            if len(officeNode.childNodes) != 0:
                for nextNode1 in officeNode.childNodes:
                    if len(nextNode1.childNodes) == 0:
                        if nextNode1.nodeValue == None:
                            textFromDoc.append(' ')
                        else:
                            textFromDoc.append(nextNode1.nodeValue)
                    else:
                        for nextNode2 in  nextNode1.childNodes:
                            if len(nextNode2.childNodes) == 0:
                                textFromDoc.append(nextNode2.nodeValue)
    """

    #for node in text:
    #textFromDoc.append(getTextFromTag(node))
    #print(getTextFromTag(node))

    return textSoup.get_text()

示例#2

显示文件

文件： lab2.py 项目： zinaida911/lab2

def get_text(fileDir):
    document = zipfile.ZipFile(fileDir)
    textSoup = BeautifulStoneSoup(document.read('content.xml'))
    document.close()
    return textSoup.get_text()