Exemplo n.º 1
0
def parse_xml_to_array(xml_file, strict=False, normalize_form=None):
    """
    parse an xml file and return the records as an array. If you would
    like the parser to explicitly check the namespaces for the MARCSlim
    namespace use the strict=True option.
    Valid values for normalize_form are 'NFC', 'NFKC', 'NFD', and 'NFKD'. See
    unicodedata.normalize info.
    """
    handler = XmlHandler(strict, normalize_form)
    parse_xml(xml_file, handler)
    return handler.records
Exemplo n.º 2
0
def map_xml(function, *files):
    """
    map a function onto the file, so that for each record that is
    parsed the function will get called with the extracted record

    def do_it(r):
      print(r)

    map_xml(do_it, 'marc.xml')
    """
    handler = XmlHandler()
    handler.process_record = function
    for xml_file in files:
        parse_xml(xml_file, handler)
Exemplo n.º 3
0
 def __call__(self, element):
     print element[0][1].text
     handler = marcxml.XmlHandler()
     marcxml.parse_xml(StringIO(tostring(element[0])), handler)
     return handler.records[0]
Exemplo n.º 4
0
 def __call__(self, element):
     handler = marcxml.XmlHandler(strict=True, normalize_form='NFC')
     marcxml.parse_xml(StringIO(tostring(element[0], encoding='UTF-8')), handler)
     return handler.records[0]
Exemplo n.º 5
0
 def __call__(self, element):
     handler = marcxml.XmlHandler(strict=True, normalize_form='NFC')
     marcxml.parse_xml(StringIO(tostring(element[0], encoding='UTF-8')), handler)
     return handler.records[0]
Exemplo n.º 6
0
 def __call__(self, element):
     #print element[0][1].text
     handler = marcxml.XmlHandler()
     marcxml.parse_xml(StringIO(tostring(element[0], encoding='UTF-8')), handler)
     return handler.records[0]
def MarcXML(xml):
    handler = marcxml.XmlHandler()
    data = tostring(xml, encoding='UTF-8')
    marcxml.parse_xml(io.BytesIO(data), handler)
    return handler.records[0]
Exemplo n.º 8
0
 def __call__(self, element):
     # print element[0][1].text
     handler = marcxml.XmlHandler(strict=True, normalize_form="NFC")
     marcxml.parse_xml(StringIO(tostring(element[0], encoding="UTF-8")), handler)
     return handler.records[0]
Exemplo n.º 9
0
	def __call__(self, element):
		handler = marcxml.XmlHandler()
		#marcxml.parse_xml( StringIO( """<?xml version="1.0" encoding="UTF-8"?>\n""" + tostring(element[0]) ), handler)
		marcxml.parse_xml( StringIO( tostring(element[0]) ), handler)
		return handler.records[0]
Exemplo n.º 10
0
from OAIClient import *
from oaipmh import metadata
from oaipmh.datestamp import datestamp_to_datetime
from cStringIO import StringIO
from lxml.etree import tostring, XPathEvaluator
from pymarc import marcxml
from pymarc.record import *
import datetime
import sys
import properties

class MARCXMLReader(object):
    """Returns the PyMARC record from the OAI structure for MARC XML"""
    def __call__(self, element):
        handler = marcxml.XmlHandler()
        marcxml.parse_xml( StringIO( tostring(element[0]) ), handler)
        return handler.records[0]
                
if len(sys.argv) != 4:
    print "Usage: deletebib.py <YYYYMMDD> <YYYYMMDD> <sigel1>[,sigel2,sigel3,...,sigeln]"
    exit(1)

marcxml_reader = MARCXMLReader()
oaipmh_host = 'http://data.libris.kb.se'

from_time = sys.argv[1]
until = sys.argv[2]
sigels = sys.argv[3].split(',')

user = properties.user
password = properties.password
Exemplo n.º 11
0
 def __call__(self, element):
     handler = marcxml.XmlHandler()
     marcxml.parse_xml(StringIO(tostring(element[0])), handler)
     return marcxml.record_to_xml(handler.records[0], namespace=True)