示例#1
0
 def __init__(self, file_stream, url, label, mime):
     DocumentParser.__init__(self, file_stream)
     self._url = url
     self._label = label
     self._mime = mime
     img = Image.open(file_stream)
     (self._width, self._height) = img.size
示例#2
0
 def __init__(self, file_name, url, label):
     file_stream = file(file_name)
     DocumentParser.__init__(self, file_stream)
     self._url = url
     self._label = label
     self._page_id_to_page_numbers = None
     self._doc = poppler.PDFDoc(file_name)
示例#3
0
 def __init__(self, file_name, url, label):
     file_stream = file(file_name)
     DocumentParser.__init__(self, file_stream)
     self._url = url
     self._label = label
     self._page_id_to_page_numbers = None
     self._doc = poppler.PDFDoc(file_name)
    def __init__(self, file_stream, url):
        DocumentParser.__init__(self, file_stream)
        self._url = url
        self._namespace_URI = 'http://www.loc.gov/METS/'
        self._mods_namespace_URI = 'http://www.loc.gov/mods/v3'

        #read the content of the file
        self._content_str = self._file_stream.read()
        
        self._logical_structure = None
        self._physical_structure = None
        self._meta_data = None
        self._relation = None
        self._file_list = None

        #some METS files contain uppercase mets directive
        #self._content_str = self._content_str.replace('METS=', 'mets=')
        #self._content_str = self._content_str.replace('', '')
        #self._content_str = self._content_str.replace('MODS=', 'mods=')
        #self._content_str = self._content_str.replace('', '')
        try:
            self._doc = parseString(self._content_str)
        except Exception:
            raise ParserError.InvalidDocument("The file is invalid. (is it" \
                    "corrupted?)")
        if self._check_xml() is not True:
            raise ParserError.InvalidDocument("The file is invalid. (is it" \
                    "corrupted?)")
 def __init__(self, file_stream, url, label, mime):
     DocumentParser.__init__(self, file_stream)
     self._url = url
     self._label = label
     self._mime = mime
     img = Image.open(file_stream)
     (self._width, self._height) = img.size
示例#6
0
 def __init__(self, file_stream, url):
     DocumentParser.__init__(self, file_stream)
     self._url = url
示例#7
0
 def __init__(self, file_stream, url):
     self._namespace_URI = 'http://purl.org/dc/elements/1.1/'
     DocumentParser.__init__(self, file_stream)
     self._url = url
示例#8
0
 def __init__(self, file_stream, url):
     DocumentParser.__init__(self, file_stream)
     self._url = url
示例#9
0
 def __init__(self, file_stream, url):
     self._namespace_URI = 'http://purl.org/dc/elements/1.1/'
     DocumentParser.__init__(self, file_stream)
     self._url = url