def expand(xsl_filepath: Path, xml_filepath: Path, doctype: Optional[str] = None): """expand a template file using the book's book.xml metadata""" template = lxml.etree.XSLT(read(xsl_filepath)) result = template(book) xml.save(xml_filepath, result, doctype)
def getAll(file_name): xml = open(file_name, "r") # 打开文件 data = xml.read() # 读取文件 lable = re.search('<label>[0-9]+',data) lable = lable.group(0)[7:] #获取标签名 data = data.replace("\n","") data = data.replace("\t","") #删除格式 while "<" in data : # 删除标签 start = data.index("<") end = data.index(">") data = data[:start] + data[end+1:] f = open('ans.robot','a') f.write("验证第" + lable+"条参考文献all") f.write("\n" + " Jats Text Should Be ") f.write(".//ref-list/ref[" + lable + "]") f.write(" " + data + "\n\n")
import xml from pathlib import Path from xml import read BOOK = Path(sys.argv[1]) EPUB = Path(sys.argv[2]) CODE = Path(__file__).parent OEBPS = EPUB / 'OEBPS' DEFAULTS = [CODE / 'templates' / 'epub', CODE / 'templates' / 'common'] TEMPLATES = CODE / 'templates' / 'epub' / 'XML' book_dtd = lxml.etree.DTD(str(CODE / 'book.dtd')) book = xml.read(BOOK / 'book.xml', dtd=book_dtd) book.attrib['date'] = time.strftime("%Y-%m-%d") def expand(xsl_filepath: Path, xml_filepath: Path, doctype: Optional[str] = None): """expand a template file using the book's book.xml metadata""" template = lxml.etree.XSLT(read(xsl_filepath)) result = template(book) xml.save(xml_filepath, result, doctype) def copy_files(subdirectory: str, filenames: List[str]) -> None: """ copy files from BOOK to EPUB, using a default or template
def run(ebook: Path, bigbook: Path, ubercoordinator: Path, files: List[Path]) -> None: """ :param ebook: the ebook source directory :param bigbook: the Big Book of Key :param ubercoordinator: the ubercoordinator source directory, for the DTD :param files: the XHTML file from the Big Book of Key that need adding :return: """ index = Index(bigbook) book_dtd = DTD((ubercoordinator / 'src' / 'book.dtd').open()) book = xml.read(ebook / 'book.xml', dtd=book_dtd) illustrations = xml.get_one(book, 'illustrations') contents = xml.get_one(book, 'contents') sections = set( xml.get_all_str(contents, '//section[not(@template="yes")]/@file')) images = set(xml.get_all_str(illustrations, '//image/@file')) initial_sections = sections.copy() initial_images = images.copy() for filename in sections: ebook_file = ebook / 'Text' / filename bigbook_file = bigbook / 'Text' / filename if not ebook_file.exists() and bigbook_file.exists(): copyfile(bigbook_file, ebook_file) if ebook_file.exists(): for img_filename in find_images(ebook_file): if img_filename not in images: illustrations.append(file_element('image', img_filename)) images.add(img_filename) else: print( f"{ebook / 'book.xml'}:0:0:WARNING: is this missing?: {filename}" ) for file in files: article_id = file.stem article = index.articles_by_id[article_id] if article.file.name not in sections: copyfile(article.file, ebook / 'Text' / article.file.name) title = xml.rewrap('title', XML(article.link)) section = file_element('section', article.file.name) section.append(title) contents.append(section) sections.add(file.name) for img_filename in find_images(article.file): if img_filename not in images: illustrations.append(file_element('image', img_filename)) images.add(img_filename) for img_filename in images: file = ebook / 'Images' / img_filename if not file.exists(): copyfile(bigbook / 'Images' / img_filename, file) book.attrib['date'] = strftime("%Y-%m-%d") if sections != initial_sections or images != initial_images: copyfile(ebook / 'book.xml', ebook / 'book.xml.bak') xml.save(ebook / 'book.xml', book, doctype='book')
def search(startRecord, maxRecords, *args): """This function passes your queries, separated by commas, in addition to the record you'd like to start with. """ oErr = ErrHandle() RECHERCHE_BASEURL = "https://gallica.bnf.fr/SRU" try: for arg in args: search_string = (', '.join('"' + item + '"' for item in arg)) oData = {} oData['operation'] = 'searchRetrieve' oData['version'] = "1.2" # oData['query'] = "((dc.language any lat) and (gallica all {}))".format(search_string) oData['query'] = urllib.parse.quote("((dc.language any \"lat\" \"latin\") and (dc.type any \"manuscript\" \"manuscrit\") and (dc.title any {}))".format(search_string)) oData['query'] = "((dc.language all \"lat\") and (dc.type all \"manuscrit\") and (notice any {}))".format(search_string) oData['query'] = "( (dc.language all \"lat\") and (dc.title adj \"latin\"))" oData['query'] = "( (dc.language all \"lat\") and (dc.source adj \"département des manuscrits\"))" oData['maximumRecords'] = maxRecords oData['startRecord'] = startRecord # Combine the data into an URL sData = "" for k,v in oData.items(): if sData == "": sData = "?" else: sData = sData + "&" sData = sData + k + "=" + v url = RECHERCHE_BASEURL + sData print(url) attempts = 10 bSuccess = False root = None except: oErr.DoError("Search/search error 1") return "" # set up a filename filename = "gallica_{}.xml".format(startRecord) while not bSuccess and attempts > 0: # Show progess if attempts are lower than 10 if attempts < 10: print("Attempts left: {}".format(attempts)) bHaveRequest = True try: r = requests.get(url) except: oErr.DoError("Search/search error 2") bHaveRequest = False # Action depends on what we receive if bHaveRequest and r.status_code == 200: # Read the content contents = r.text # contents = s.read() try: # Write the contents to a local directory Gallica.xml file = open(filename, 'w', encoding="utf-8-sig") file.write(contents) file.close() # Open this file and parse it as xml with open(filename, encoding="utf-8-sig") as xml: # Read the text sText = xml.read() # Convert text to XML object root = ET.fromstring(sText) # Indicate we have it bSuccess = True except: oErr.DoError("Could not read the XML") return "" else: oErr.Status("Site returns status: {}".format(r.status_code)) # Keep track of attempts attempts -= 1 if bSuccess: # return this document return root else: # Were not able to process the request oErr.DoError("Could not process the request") return ""