Exemplo n.º 1
0
def expand(xsl_filepath: Path,
           xml_filepath: Path,
           doctype: Optional[str] = None):
    """expand a template file using the book's book.xml metadata"""
    template = lxml.etree.XSLT(read(xsl_filepath))
    result = template(book)
    xml.save(xml_filepath, result, doctype)
Exemplo n.º 2
0
def getAll(file_name):
  xml = open(file_name, "r")   # 打开文件
  data = xml.read()  # 读取文件
  lable = re.search('<label>[0-9]+',data)  
  lable = lable.group(0)[7:]     #获取标签名
  data = data.replace("\n","")
  data = data.replace("\t","")  #删除格式
  while "<" in data  :  # 删除标签
    start = data.index("<")
    end = data.index(">")
    data = data[:start] + data[end+1:]
  f = open('ans.robot','a')
  f.write("验证第" + lable+"条参考文献all")
  f.write("\n" + "    Jats Text Should Be         ")
  f.write(".//ref-list/ref[" + lable + "]")
  f.write("         " + data + "\n\n")
Exemplo n.º 3
0
import xml
from pathlib import Path

from xml import read

BOOK = Path(sys.argv[1])
EPUB = Path(sys.argv[2])
CODE = Path(__file__).parent

OEBPS = EPUB / 'OEBPS'
DEFAULTS = [CODE / 'templates' / 'epub', CODE / 'templates' / 'common']
TEMPLATES = CODE / 'templates' / 'epub' / 'XML'

book_dtd = lxml.etree.DTD(str(CODE / 'book.dtd'))

book = xml.read(BOOK / 'book.xml', dtd=book_dtd)
book.attrib['date'] = time.strftime("%Y-%m-%d")


def expand(xsl_filepath: Path,
           xml_filepath: Path,
           doctype: Optional[str] = None):
    """expand a template file using the book's book.xml metadata"""
    template = lxml.etree.XSLT(read(xsl_filepath))
    result = template(book)
    xml.save(xml_filepath, result, doctype)


def copy_files(subdirectory: str, filenames: List[str]) -> None:
    """
    copy files from BOOK to EPUB, using a default or template
Exemplo n.º 4
0
def run(ebook: Path, bigbook: Path, ubercoordinator: Path,
        files: List[Path]) -> None:
    """
    :param ebook: the ebook source directory
    :param bigbook: the Big Book of Key
    :param ubercoordinator: the ubercoordinator source directory, for the DTD
    :param files: the XHTML file from the Big Book of Key that need adding
    :return:
    """

    index = Index(bigbook)

    book_dtd = DTD((ubercoordinator / 'src' / 'book.dtd').open())
    book = xml.read(ebook / 'book.xml', dtd=book_dtd)

    illustrations = xml.get_one(book, 'illustrations')
    contents = xml.get_one(book, 'contents')

    sections = set(
        xml.get_all_str(contents, '//section[not(@template="yes")]/@file'))
    images = set(xml.get_all_str(illustrations, '//image/@file'))

    initial_sections = sections.copy()
    initial_images = images.copy()

    for filename in sections:
        ebook_file = ebook / 'Text' / filename
        bigbook_file = bigbook / 'Text' / filename
        if not ebook_file.exists() and bigbook_file.exists():
            copyfile(bigbook_file, ebook_file)
        if ebook_file.exists():
            for img_filename in find_images(ebook_file):
                if img_filename not in images:
                    illustrations.append(file_element('image', img_filename))
                    images.add(img_filename)
        else:
            print(
                f"{ebook / 'book.xml'}:0:0:WARNING: is this missing?: {filename}"
            )

    for file in files:
        article_id = file.stem
        article = index.articles_by_id[article_id]

        if article.file.name not in sections:
            copyfile(article.file, ebook / 'Text' / article.file.name)
            title = xml.rewrap('title', XML(article.link))
            section = file_element('section', article.file.name)
            section.append(title)
            contents.append(section)
            sections.add(file.name)

        for img_filename in find_images(article.file):
            if img_filename not in images:
                illustrations.append(file_element('image', img_filename))
                images.add(img_filename)

    for img_filename in images:
        file = ebook / 'Images' / img_filename
        if not file.exists():
            copyfile(bigbook / 'Images' / img_filename, file)

    book.attrib['date'] = strftime("%Y-%m-%d")

    if sections != initial_sections or images != initial_images:
        copyfile(ebook / 'book.xml', ebook / 'book.xml.bak')
        xml.save(ebook / 'book.xml', book, doctype='book')
Exemplo n.º 5
0
    def search(startRecord, maxRecords, *args):
        """This function passes your queries, separated by commas, in addition to the record you'd like to start with. """

        oErr = ErrHandle()
        RECHERCHE_BASEURL = "https://gallica.bnf.fr/SRU"

        try:
            for arg in args:
                search_string = (', '.join('"' + item + '"' for item in arg))
      
            oData = {}
            oData['operation'] = 'searchRetrieve'
            oData['version'] = "1.2"
            # oData['query'] = "((dc.language any lat) and (gallica all {}))".format(search_string)
            oData['query'] = urllib.parse.quote("((dc.language any \"lat\" \"latin\") and (dc.type any \"manuscript\" \"manuscrit\") and (dc.title any {}))".format(search_string))
            oData['query'] = "((dc.language all \"lat\") and (dc.type all \"manuscrit\") and (notice any {}))".format(search_string)
            oData['query'] = "( (dc.language all \"lat\") and (dc.title adj \"latin\"))"
            oData['query'] = "( (dc.language all \"lat\") and (dc.source adj \"département des manuscrits\"))"
            oData['maximumRecords'] = maxRecords
            oData['startRecord'] = startRecord

            # Combine the data into an URL
            sData = ""
            for k,v in oData.items():
                if sData == "":
                    sData = "?"
                else:
                    sData = sData + "&"
                sData = sData + k + "=" + v
            url = RECHERCHE_BASEURL + sData

            print(url)

            attempts = 10
            bSuccess = False
            root = None
        except:
            oErr.DoError("Search/search error 1")
            return ""

        # set up a filename
        filename = "gallica_{}.xml".format(startRecord)

        while not bSuccess and attempts > 0:

            # Show progess if attempts are lower than 10
            if attempts < 10:
                print("Attempts left: {}".format(attempts))

            bHaveRequest = True
            try:
                r = requests.get(url)
            except:
                oErr.DoError("Search/search error 2")
                bHaveRequest = False

            # Action depends on what we receive
            if bHaveRequest and r.status_code == 200:
                # Read the content
                contents = r.text
                # contents = s.read()

                try:
                    # Write the contents to a local directory Gallica.xml
                    file = open(filename, 'w', encoding="utf-8-sig")
                    file.write(contents)
                    file.close()
            
                    # Open this file and parse it as xml
                    with open(filename, encoding="utf-8-sig") as xml:
                        # Read the text
                        sText = xml.read()

                    # Convert text to XML object
                    root = ET.fromstring(sText)
                    # Indicate we have it
                    bSuccess = True
                except:
                    oErr.DoError("Could not read the XML")
                    return ""
            else:
                oErr.Status("Site returns status: {}".format(r.status_code))
                # Keep track of attempts
                attempts -= 1

        if bSuccess:
            # return this document
            return root
        else:
            # Were not able to process the request
            oErr.DoError("Could not process the request")
            return ""