Пример #1
0
def extract(filename,text):
    biblio = {}
    root = st.parse(text)
    header = root.find("teiHeader")
    
    biblio["filename"] = filename    

    for author in bib_metadata["auth_name"]:
	if header.findtext(author):
	    biblio["author"] = header.findtext(author)
	    break

    for auth_date in bib_metadata["auth_dates"]:
	if header.findtext(auth_date):
	    biblio["auth_date"] = header.findtext(auth_date)
	    break

    for gender in bib_metadata["auth_gender"]:
        if header.findtext(gender):
            biblio["gender"] = header.findtext(gender)
            break

    for title in bib_metadata["titles"]:
	if header.findtext(title):
	    biblio["title"] = header.findtext(title)
	    break

    for cr_date in bib_metadata["createdate"]:
	if header.findtext(cr_date):
	    biblio["date"] = int(header.findtext(cr_date))
	    break

    for genre in bib_metadata["text_genre"]:
	if header.findtext(genre):
	    biblio["genre"] = header.findtext(genre)
	    break

    for publisher in bib_metadata["publishers"]:
        if header.findtext(publisher):
            biblio["publisher"] = header.findtext(publisher)
            break

    for pub_place in bib_metadata["pub_places"]:
        if header.findtext(pub_place):
            biblio["pub_place"] = header.findtext(pub_place)
            break

    return biblio
Пример #2
0
def extract(filename, text):
    biblio = {}
    root = st.parse(text)
    header = root.find("teiHeader")

    biblio["filename"] = filename

    for author in bib_metadata["auth_name"]:
        if header.findtext(author):
            biblio["author"] = header.findtext(author)
            break

    for auth_date in bib_metadata["auth_dates"]:
        if header.findtext(auth_date):
            biblio["auth_date"] = header.findtext(auth_date)
            break

    for gender in bib_metadata["auth_gender"]:
        if header.findtext(gender):
            biblio["gender"] = header.findtext(gender)
            break

    for title in bib_metadata["titles"]:
        if header.findtext(title):
            biblio["title"] = header.findtext(title)
            break

    for cr_date in bib_metadata["createdate"]:
        if header.findtext(cr_date):
            biblio["date"] = int(header.findtext(cr_date))
            break

    for genre in bib_metadata["text_genre"]:
        if header.findtext(genre):
            biblio["genre"] = header.findtext(genre)
            break

    for publisher in bib_metadata["publishers"]:
        if header.findtext(publisher):
            biblio["publisher"] = header.findtext(publisher)
            break

    for pub_place in bib_metadata["pub_places"]:
        if header.findtext(pub_place):
            biblio["pub_place"] = header.findtext(pub_place)
            break

    return biblio
Пример #3
0
#!/usr/bin/env python
import philologic.shlaxtree as st
import sys
import codecs

for filename in sys.argv[1:]:
    file = codecs.open(filename, "r", "utf-8")
    root = st.parse(file)
    header = root.find("teiHeader")
    print st.et.tostring(header)
    print header.findtext(".//titleStmt/title")
    print header.findtext(".//titleStmt/author")
Пример #4
0
#!/usr/bin/env python
import philologic.shlaxtree as st
import sys
import codecs

for filename in sys.argv[1:]:
    file = codecs.open(filename,"r","utf-8")
    root = st.parse(file)
    header = root.find("teiHeader")
    print st.et.tostring(header)
    print header.findtext(".//titleStmt/title")
    print header.findtext(".//titleStmt/author")