class Publisher(xmlmap.XmlObject): '''A journal publisher''' id = xmlmap.StringField('@id') # numeric id for romeo records; 'DOAJ' for doaj records with no romeo data name = xmlmap.StringField('name') alias = xmlmap.StringField('alias') url = xmlmap.StringField('homeurl') # archiving values: can, cannot, restricted, unclear, unknown. # restrictions present only if restricted. preprint_archiving = xmlmap.StringField('preprints/prearchiving') preprint_restrictions = xmlmap.StringListField('preprints/prerestrictions/prerestriction') postprint_archiving = xmlmap.StringField('postprints/postarchiving') postprint_restrictions = xmlmap.StringListField('postprints/postrestrictions/postrestriction') pdf_archiving = xmlmap.StringField('pdfversion/pdfarchiving') pdf_restrictions = xmlmap.StringListField('pdfversion/pdfrestrictions/pdfrestriction') conditions = xmlmap.StringListField('conditions/condition') mandates = xmlmap.NodeListField('mandates/mandate', Mandate) paid_access_url = xmlmap.StringField('paidaccess/paidaccessurl') paid_access_name = xmlmap.StringField('paidaccess/paidaccessname') paid_access_notes = xmlmap.StringField('paidaccess/paidaccessnotes') copyright_links = xmlmap.NodeListField('copyrightlinks/copyrightlink', Copyright) romeo_colour = xmlmap.StringField('romeocolour') '''`Colour values <http://www.sherpa.ac.uk/romeoinfo.html#colours>`_ used by RoMEO to describe archiving rights''' date_added = xmlmap.DateTimeField('dateadded', format='%Y-%m-%d %H:%M:%S') date_updated = xmlmap.DateTimeField('dateupdated', format='%Y-%m-%d %H:%M:%S') def __repr__(self): return u'<%s:%s %s>' % (self.__class__.__name__, self.id, self.name)
class ProfileDescription(_EadBase): """Profile Descriptor for an EAD document. Expected node element passed to constructor: 'ead/eadheader/profiledesc'. """ ROOT_NAME = 'profiledesc' date = xmlmap.NodeField("e:creation/e:date", DateField) ":class:`DateField` - `creation/date`" languages = xmlmap.StringListField("e:langusage/e:language") "language information - `langusage/language`" language_codes = xmlmap.StringListField("e:langusage/e:language/@langcode") "language codes - `langusage/language/@langcode`"
class VideoDigitalTech(_BaseDigitalTech): ":class:`~eulxml.xmlmap.XmlObject` for Digital Technical Metadata." ROOT_NAME = 'digitaltech' date_captured = xmlmap.StringField( 'dt:dateCaptured[@encoding="w3cdtf"]', help_text='Date digital capture was made', required=True) 'date digital capture was made (string)' codec_quality = xmlmap.StringField( 'dt:codecQuality', required=False, help_text='Whether the data compression method was lossless or lossy', choices=('lossless', 'compressed')) 'codec quality - lossless or lossy' duration = xmlmap.IntegerField( 'dt:duration/dt:measure[@type="time"][@unit="seconds"][@aspect="duration of playing time"]', help_text='Duration of video playing time', required=True) 'duration of the video file' # FIXME/TODO: note and digitization purpose could be plural note = xmlmap.StringField( 'dt:note[@type="general"]', required=False, help_text= 'Additional information that may be helpful in describing the surrogate' ) 'general note' note_list = xmlmap.StringListField('dt:note[@type="general"]') digitization_purpose = xmlmap.StringField( 'dt:note[@type="purpose of digitization"]', required=False, help_text= 'The reason why the digital surrogate was created (e.g., exhibit, patron request, preservation)' ) 'reason the item was digitized' digitization_purpose_list = xmlmap.StringListField( 'dt:note[@type="purpose of digitization"]') transfer_engineer = xmlmap.NodeField( 'dt:transferEngineer', TransferEngineer, required=False, help_text= 'The person who performed the digitization or conversion that produced the file' ) ':class:`TransferEngineer` - person who digitized the item' codec_creator = xmlmap.NodeField( 'dt:codecCreator[@type="moving image"]', VideoCodecCreator, help_text= 'Hardware, software, and software version used to create the digital file' ) ':class:`VideoCodecCreator` - hardware & software used to digitize the item'
class QueryResult(xmlmap.XmlObject): """The results of an eXist XQuery query""" start = xmlmap.IntegerField("@start|@exist:start") """The index of the first result returned""" values = xmlmap.StringListField("exist:value") "Generic value (*exist:value*) returned from an exist xquery" session = xmlmap.IntegerField("@exist:session") "Session id, when a query is requested to be cached" _raw_count = xmlmap.IntegerField("@count|@exist:count") @property def count(self): """The number of results returned in this chunk""" return self._raw_count or 0 _raw_hits = xmlmap.IntegerField("@hits|@exist:hits") @property def hits(self): """The total number of hits found by the search""" return self._raw_hits or 0 @property def results(self): """The result documents themselves as nodes, starting at :attr:`start` and containing :attr:`count` members""" return self.node.xpath('*')
class NewPids(_FedoraBase): """:class:`~eulxml.xmlmap.XmlObject` for a list of pids as returned by :meth:`REST_API.getNextPID`.""" # NOTE: default namespace as of should be manage, but the # namespace was missing until Fedora 3.5. Match with or without a # namespace, to support Fedora 3.5 as well as older versions. pids = xmlmap.StringListField('pid|m:pid')
class ObjectMethodService(_FedoraBase): """:class:`~eulxml.xmlmap.XmlObject` for object method services; included in :class:`ObjectMethods` for data returned by :meth:`REST_API.listMethods`.""" # default namespace is fedora access ROOT_NAME = 'sDef' pid = xmlmap.StringField('@pid') methods = xmlmap.StringListField('a:method/@name')
class VideoCodecCreator(CodecCreator): ''':class:`~eulxml.xmlmap.XmlObject` for :class:`DigitalTech` codec creator Overrides the base options and replaces with options for video''' configurations = { # current format is id : hardware, software, software version '1': (('iMac', ), 'Canopus ADVC-700, Final Cut Pro', '7'), '2': (('iMac', ), 'Canopus ADVC-300, Final Cut Pro', '7'), '3': (('iMac', ), 'Final Cut Pro', '7'), '4': (('iMac', ), 'MPEG Streamclip', '1.9'), '5': (('Unknown', ), 'Unknown', None), '6': (('Vendor', ), 'Vendor', None), } 'controlled vocabulary for codec creator configurations' options = [(id, '%s, %s %s' % (', '.join(c[0]), c[1], c[2] if c[2] is not None else '')) for id, c in configurations.iteritems()] options.insert( 0, ('', '')) # empty value at beginning of list (initial default) id = xmlmap.StringField('dt:codecCreatorID') 'codec creator id - `dt:codecCreatorId`' hardware = xmlmap.StringField('dt:hardware') 'hardware - `dt:hardware` (first hardware only, even if there are multiple)' hardware_list = xmlmap.StringListField('dt:hardware') 'list of all hardware' software = xmlmap.StringField('dt:software') 'software - `dt:software` (first software only, even if there are multiple)' software_version = xmlmap.StringField('dt:softwareVersion') 'list of all software'
class CodecCreator(_BaseDigitalTech): ''':class:`~eulxml.xmlmap.XmlObject` for :class:`DigitalTech` codec creator''' ROOT_NAME = 'codecCreator' configurations = { # current format is id : hardware, software, software version '1': (('Mac G4', ), 'DigiDesign ProTools LE', '5.2'), '2': (('Mac G5', ), 'DigiDesign ProTools LE', '6.7'), '3': (('Dell Optiplex 755', 'Apogee Rosetta 200'), 'Sound Forge', '9.0'), '4': (('Dell Optiplex 755', ), 'iTunes', None), '5': (('Unknown', ), 'Unknown', None), '6': (('iMac', 'Benchmark ADC1'), 'Adobe Audition CS6', '5.0'), '7': (('iMac', 'Benchmark ADC1'), 'Sound Forge Pro', '1.0'), '8': (('iMac', ), 'iTunes', None), } 'controlled vocabulary for codec creator configurations' options = [(id, '%s, %s %s' % (', '.join(c[0]), c[1], c[2] if c[2] is not None else '')) for id, c in configurations.iteritems()] options.insert( 0, ('', '')) # empty value at beginning of list (initial default) id = xmlmap.StringField('dt:codecCreatorID') 'codec creator id - `dt:codecCreatorId`' hardware = xmlmap.StringField('dt:hardware') 'hardware - `dt:hardware` (first hardware only, even if there are multiple)' hardware_list = xmlmap.StringListField('dt:hardware') 'list of all hardware' software = xmlmap.StringField('dt:software') 'software - `dt:software` (first software only, even if there are multiple)' software_version = xmlmap.StringField('dt:softwareVersion') 'list of all software'
class Address(_EadBase): """Address information. Expected node element passed to constructor: `address`. """ ROOT_NAME = 'address' lines = xmlmap.StringListField("e:addressline") "list of lines in an address - `line`"
class ArchivalDescription(_EadBase): """Archival description, contains the bulk of the information in an EAD document. Expected node element passed to constructor: `ead/archdesc`. """ ROOT_NAME = 'archdesc' did = xmlmap.NodeField("e:did", DescriptiveIdentification) 'descriptive identification :class:`DescriptiveIdentification` - `did`' origination = xmlmap.StringField("e:did/e:origination", normalize=True) "origination - `did/origination`" unitid = xmlmap.NodeField("e:did/e:unitid", Unitid) ":class:`Unitid` - `did/unitid`" extent = xmlmap.StringListField("e:did/e:physdesc/e:extent") "extent from the physical description - `did/physdesc/extent`" langmaterial = xmlmap.StringField("e:did/e:langmaterial") "language of the materials - `did/langmaterial`" location = xmlmap.StringField("e:did/e:physloc") "physical location - `did/physloc`" access_restriction = xmlmap.NodeField("e:accessrestrict", Section) "access restrictions :class:`Section` - `accessrestrict`" use_restriction = xmlmap.NodeField("e:userestrict", Section) "use restrictions :class:`Section` - `userestrict`" alternate_form = xmlmap.NodeField("e:altformavail", Section) "alternative form available :class:`Section` - `altformavail`" originals_location = xmlmap.NodeField("e:originalsloc", Section) "location of originals :class:`Section` - `originalsloc`" related_material = xmlmap.NodeField("e:relatedmaterial", Section) "related material :class:`Section` - `relatedmaterial`" separated_material = xmlmap.NodeField("e:separatedmaterial", Section) "separated material :class:`Section` - `separatedmaterial`" acquisition_info = xmlmap.NodeField("e:acqinfo", Section) "acquistion info :class:`Section` - `acqinfo`" custodial_history = xmlmap.NodeField("e:custodhist", Section) "custodial history :class:`Section` - `custodhist`" preferred_citation = xmlmap.NodeField("e:prefercite", Section) "preferred citation :class:`Section` - `prefercite`" biography_history = xmlmap.NodeField("e:bioghist", Section) "biography or history :class:`Section` - `bioghist`" bibliography = xmlmap.NodeField("e:bibliography", Section) "bibliography :class:`Section` - `bibliograhy`" scope_content = xmlmap.NodeField("e:scopecontent", Section) "scope and content :class:`Section` - `scopecontent`" process_info = xmlmap.NodeField("e:archdesc/e:processinfo", Section) "processing information :class:`Section` - `processinfo`" arrangement = xmlmap.NodeField("e:arrangement", Section) "arrangement :class:`Section` - `arrangement`" other = xmlmap.NodeField("e:otherfindaid", Section) "other finding aid :class:`Section` - `otherfindaid`" controlaccess = xmlmap.NodeField("e:controlaccess", ControlledAccessHeadings) ":class:`ControlledAccessHeadings` - `controlaccess`; subject terms, names, etc." index = xmlmap.NodeListField("e:index", Index) "list of :class:`Index` - `index`; e.g., index of selected correspondents" dao_list = xmlmap.NodeListField("e:dao", DigitalArchivalObject) "list of digital archival object references as :class:`DigitalArchivalObject`"
class Header(_BaseCerp): ROOT_NAME = 'Header' name = xmlmap.StringField('xm:Name') value = xmlmap.StringField('xm:Value') comments = xmlmap.StringListField('xm:Comments') def __str__(self): return '%s: %s' % (self.name, self.value) def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.name)
class QueryTestModel(xmlmap.XmlObject): ROOT_NAMESPACES = {'ex': 'http://example.com/'} id = xmlmap.StringField('@id') name = xmlmap.StringField('name') description = xmlmap.StringField('description') wnn = xmlmap.IntegerField('wacky_node_name') sub = xmlmap.NodeField("sub", QuerySubModel) or_field = xmlmap.StringField('name|description|@id') substring = xmlmap.StringField('substring(name, 1, 1)') nsfield = xmlmap.StringField('ex:field') years = xmlmap.StringListField('year')
class TeiDiv(_TeiBase): id = xmlmap.StringField('@xml:id') type = xmlmap.StringField('@type') author = xmlmap.StringField('tei:docAuthor/tei:name/tei:choice/tei:sic') docauthor = xmlmap.StringField('tei:docAuthor') title = xmlmap.StringField('tei:head[1]') # easy access to FIRST head title_list = xmlmap.StringListField( 'tei:head') # access to all heads when there are multiple text = xmlmap.StringField( '.' ) # short-hand mapping for full text of a div (e.g., for short divs) linegroup = xmlmap.NodeListField('tei:lg', TeiLineGroup) div = xmlmap.NodeListField('tei:div', 'self') byline = xmlmap.StringField('tei:byline') epigraph = xmlmap.NodeListField('tei:epigraph', TeiEpigraph) p = xmlmap.StringListField('tei:p') q = xmlmap.StringListField('tei:q') quote = xmlmap.StringListField('tei:quote') floatingText = xmlmap.NodeListField('tei:floatingText/tei:body/tei:div', 'self')
class RocheTEI(teimap.Tei): title_en = xmlmap.StringField( 'tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[@xml:lang="en"]') chapter = xmlmap.IntegerField('tei:text/tei:body/tei:div/@n') chapter_title = xmlmap.StringField( 'tei:text/tei:body/tei:div/tei:div/descendant::tei:p[1]') place_names = xmlmap.StringListField('//tei:placeName') persons = xmlmap.StringListField('//tei:persName') terms = xmlmap.StringListField('//tei:term') @property def first_letter_author(self): """Return the first letter of the authors surname""" return self.author @property def first_letter_title(self): """Return the first letter of the title""" return self.title
class TestObject(xmlmap.XmlObject): ROOT_NAME = 'foo' id = xmlmap.StringField('@id', verbose_name='My Id', help_text='enter an id') int = xmlmap.IntegerField('bar[2]/baz') bool = xmlmap.SimpleBooleanField('boolean', 'yes', 'no') longtext = xmlmap.StringField('longtext', normalize=True, required=False) date = xmlmap.DateField('date') child = xmlmap.NodeField('bar[1]', TestSubobject, verbose_name='Child bar1') children = xmlmap.NodeListField('bar', TestSubobject) other_child = xmlmap.NodeField('plugh', OtherTestSubobject) my_opt = xmlmap.StringField('opt', choices=['a', 'b', 'c']) text = xmlmap.StringListField('text') numbers = xmlmap.IntegerListField('number')
class Bibl(TeiBase): 'TEI Bibl, with mappings for digital edition and pdf urls' #: type type = xmlmap.StringField('@type') #: title title = xmlmap.StringField('tei:title') #: author authors = xmlmap.StringListField('tei:author') #: date date = xmlmap.StringField('tei:date') #: url to digital edition url = xmlmap.StringField('tei:ref[@type="digital-edition"]/@target') #: url to pdf of digital edition pdf_url = xmlmap.StringField('tei:ref[@type="pdf"]/@target')
class AnnotatedFacsimile(Facsimile): '''Annotated Tei facsimile, with mappings needed to generate TEI with annotations. ''' #: main tei title main_title = xmlmap.StringField( 'tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[@type="full"]/tei:title[@type="main"]' ) #: tei subtitle (e.g., annotated edition) subtitle = xmlmap.StringField( 'tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[@type="full"]/tei:title[@type="sub"]' ) #: responsibility statement text responsibility = xmlmap.StringField( 'tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:respStmt/tei:resp') #: responsibility statement names responsible_names = xmlmap.NodeListField( 'tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:respStmt/tei:name', Name) # additional mappings for annotation data #: list of annotations at body/div[@type="annotations"]/note[@type="annotation"], as :class:`Note` annotations = xmlmap.NodeListField( 'tei:text/tei:body/tei:div[@type="annotations"]/tei:note[@type="annotation"]', Note) #: list of bibliographic citations/works cited citations = xmlmap.NodeListField( 'tei:text/tei:body/tei:div[@type="works-cited"]/tei:listBibl/tei:biblStruct', BiblStruct) #: list of bibliographic citation ids citation_ids = xmlmap.StringListField( 'tei:text/tei:body/tei:div[@type="works-cited"]/tei:listBibl/tei:biblStruct/@xml:id' ) #: annotation tags, as :class:`~eulxml.xmlmap.teimap.TeiInterpGroup` tags = xmlmap.NodeField('tei:text/tei:back/tei:interpGrp[@type="tags"]', InterpGroup) def page_id_by_xlink(self, link): results = self.node.xpath('//tei:surface[@type="page"][@xlink:href="%s"]/@xml:id' \ % link, namespaces=self.ROOT_NAMESPACES) if results: return results[0]
class RepositoryDescription(_FedoraBase): """:class:`~eulxml.xmlmap.XmlObject` for a repository description as returned by :meth:`API_A_LITE.describeRepository` """ # default namespace is fedora access name = xmlmap.StringField('a:repositoryName') "repository name" base_url = xmlmap.StringField('a:repositoryBaseURL') "base url" version = xmlmap.StringField('a:repositoryVersion') "version of Fedora being run" pid_info = xmlmap.NodeField('a:repositoryPID', RepositoryDescriptionPid) ":class:`RepositoryDescriptionPid` - configuration info for pids" oai_info = xmlmap.NodeField('a:repositoryPID', RepositoryDescriptionOAI) ":class:`RepositoryDescriptionOAI` - configuration info for OAI" search_url = xmlmap.StringField('a:sampleSearch-URL') "sample search url" access_url = xmlmap.StringField('a:sampleAccess-URL') "sample access url" oai_url = xmlmap.StringField('a:sampleOAI-URL') "sample OAI url" admin_email = xmlmap.StringListField("a:adminEmail") "administrator emails"
class Note(TeiBase): 'Tei Note, used here to contain an annotation' ROOT_NAME = 'note' #: xml id id = xmlmap.StringField('@xml:id') #: responsibility resp = xmlmap.StringField('@resp') #: target target = xmlmap.StringField('@target') #: type type = xmlmap.StringField('@type') #: ana attribute, e.g. for tag identifiers ana = xmlmap.StringField('@ana') #: xlink href href = xmlmap.StringField('@xlink:href') #: list of paragraphs as strings paragraphs = xmlmap.StringListField('tei:p') #: code for the markdown used in the original annotation markdown = xmlmap.StringField('tei:code[@lang="markdown"]') #: links to related pages related_pages = xmlmap.NodeListField('tei:ref[@type="related page"]', Ref) #: list of bibliographic citations/works cited citations = xmlmap.NodeListField('tei:listBibl/tei:biblStruct', BiblStruct) # in-text citation generated from markdown; these fields # are mapped so they can be removed from the annotated tei document works_cited = xmlmap.NodeField('tei:head[text() = "Works Cited"]', xmlmap.XmlObject) zotero_items = xmlmap.NodeField( 'tei:list[contains(tei:item/tei:anchor/@xml:id, "zotero")]', xmlmap.XmlObject) works_cited_milestone = xmlmap.NodeField( 'tei:milestone[following-sibling::tei:head/text() = "Works Cited"]', xmlmap.XmlObject) # mapped to remove empty list bibl element list_bibl = xmlmap.NodeField('tei:listBibl', xmlmap.XmlObject)
class _BaseContent(_BaseCerp): '''Common content encoding elements''' charset_list = xmlmap.StringListField('xm:CharSet') transfer_encoding_list = xmlmap.StringListField('xm:TransferEncoding')
class _BaseMessage(_BaseCerp): '''Common message elements''' local_id = xmlmap.IntegerField('xm:LocalId') message_id = xmlmap.StringField('xm:MessageId') message_id_supplied = xmlmap.SimpleBooleanField('xm:MessageId/@Supplied', true='1', false=None) mime_version = xmlmap.StringField('xm:MimeVersion') orig_date_list = xmlmap.StringListField('xm:OrigDate') # FIXME: really datetime # NOTE: eulxml.xmlmap.DateTimeField supports specifying format, # but we might need additional work since %z only works with # strftime, not strptime from_list = xmlmap.StringListField('xm:From') sender_list = xmlmap.StringListField('xm:Sender') to_list = xmlmap.StringListField('xm:To') cc_list = xmlmap.StringListField('xm:Cc') bcc_list = xmlmap.StringListField('xm:Bcc') in_reply_to_list = xmlmap.StringListField('xm:InReplyTo') references_list = xmlmap.StringListField('xm:References') subject_list = xmlmap.StringListField('xm:Subject') comments_list = xmlmap.StringListField('xm:Comments') keywords_list = xmlmap.StringListField('xm:Keywords') headers = xmlmap.NodeListField('xm:Header', Header) single_body = xmlmap.NodeField('xm:SingleBody', SingleBody) multi_body = xmlmap.NodeField('xm:MultiBody', MultiBody) @property def body(self): return self.single_body or self.multi_body incomplete_list = xmlmap.NodeField('xm:Incomplete', Incomplete) def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.message_id or self.local_id or '(no id)')
class SourceTech(_BaseSourceTech): ':class:`~eulxml.xmlmap.XmlObject` for Source Technical Metadata.' ROOT_NAME = 'sourcetech' # option lists for controlled vocabulary source tech fields form_options = ('', 'audio cassette', 'open reel tape', 'LP', 'CD', 'sound file (WAV)', 'sound file (MP3)', 'sound file (M4A)', 'sound file (AIFF)', 'microcassette', 'DAT', '78', '45 RPM', 'acetate disc', 'aluminum disc', 'glass disc', 'flexi disc', 'cardboard disc', 'phonograph cylinder', 'wire recording', 'dictabelt', 'other') 'controlled vocabulary for :class:`SourceTech.form`' housing_options = ('', 'jewel case', 'plastic container', 'paper sleeve', 'cardboard sleeve', 'cardboard box', 'other', 'none') 'controlled vocabulary for :class:`SourceTech.housing`' reel_sizes = ('3', '4', '5', '7', '10', '12', '14' ) # also Other -> empty field 'controlled vocabulary used to generate form options for :class:`SourceTech.reel_size`' reel_size_options = [(size, '%s"' % size) for size in reel_sizes] reel_size_options.append(('Other', 'Other')) reel_size_options.append(('Not Applicable', 'Not Applicable')) # add an empty value at the beginning of the list to force active selection reel_size_options.insert(0, ('', '')) sound_characteristic_options = ('', 'mono', 'stereo') 'controlled vocabulary for :class:`SourceTech.sound_characteristics`' speed_options = ( # delimited format is aspect, value, unit ('', ''), ('tape', ( ('tape|15/16|inches/sec', '15/16 ips'), ('tape|1.2|cm/sec', '1.2 cm/s'), ('tape|2.4|cm/sec', '2.4 cm/s'), ('tape|1 7/8|inches/sec', '1 7/8 ips'), ('tape|3 3/4|inches/sec', '3 3/4 ips'), ('tape|7 1/2|inches/sec', '7 1/2 ips'), ('tape|15|inches/sec', '15 ips'), ('tape|30|inches/sec', '30 ips'), ('tape|other|other', 'Other'), )), ('phono disc', ( ('phono disc|16|rpm', '16 rpm'), ('phono disc|33 1/3|rpm', '33 1/3 rpm'), ('phono disc|45|rpm', '45 rpm'), ('phono disc|78|rpm', '78 rpm'), ('phono disc|other|other', 'Other'), )), ( 'phono cylinder', ( ('phono cylinder|90|rpm', '90 rpm'), ('phono cylinder|120|rpm', '120 rpm'), ('phono cylinder|160|rpm', '160 rpm'), ('phono cylinder|other|other', 'Other'), ), ), ( 'other', (('other|other|other', 'Other'), ), ), ('na|Not applicable|na', 'Not Applicable'), ) 'controlled vocabulary for :class:`SourceTech.speed`, grouped by format' # NOTE: speed should be displayed as ips but saved to xml as inches/sec # speed options is formatted for grouped options in django select widget # planned schema location (schema not yet available) #XSD_SCHEMA = 'http://pid.emory.edu/ns/2010/sourcetech/v1/sourcetech-1.xsd' #xmlschema = xmlmap.loadSchema(XSD_SCHEMA) note = xmlmap.StringField('st:note[@type="general"]', required=False, verbose_name='General Note', help_text='General note about the physical item') 'general note' note_list = xmlmap.StringListField('st:note[@type="general"]') related_files = xmlmap.StringField( 'st:note[@type="relatedFiles"]', required=False, help_text= 'IDs of other digitized files for the same item, separated by semicolons. Required for multi-part items.' ) 'related files (string of IDs delimited by semicolons' related_files_list = xmlmap.StringListField( 'st:note[@type="relatedFiles"]') # NOTE: according to spec, related_files is required if multi-part-- # - not tracking multi-part for Min Items (that I know of) conservation_history = xmlmap.StringField( 'st:note[@type="conservationHistory"]', required=False) 'note about conservation history' conservation_history_list = xmlmap.StringListField( 'st:note[@type="conservationHistory"]') speed = xmlmap.NodeField('st:speed/st:measure[@type="speed"]', SourceTechMeasure) ':class:`SourceTechMeasure`' sublocation = xmlmap.StringField( 'st:sublocation', required=False, help_text= 'Storage location within the collection (e.g., box and folder)') 'storage location within the collection' form = xmlmap.StringField( 'st:form[@type="sound"]', choices=form_options, required=False, help_text='The physical form or medium of the resource') 'physical format - options controlled by :class:`SourceTech.form_options`' form_list = xmlmap.StringListField('st:form[@type="sound"]') sound_characteristics = xmlmap.StringField( 'st:soundChar', choices=sound_characteristic_options, required=False) 'sound characteristics - options controlled by :class:`SourceTech.sound_characteristic_options`' stock = xmlmap.StringField( 'st:stock', verbose_name='Tape Brand/Stock', help_text='The brand or stock of the magnetic tape', required=False) 'Stock or brand of source media' stock_list = xmlmap.StringListField('st:stock') housing = xmlmap.StringField( 'st:housing[@type="sound"]', choices=housing_options, required=False, help_text='Type of housing for the source item') 'Type of housing - options controlled by :class:`SourceTech.housing_options`' reel_size = xmlmap.NodeField( 'st:reelSize/st:measure[@type="diameter"][@aspect="reel size"]', SourceTechMeasure, required=False) ':class:`SourceTechMeasure`' # tech_note is migrate/view only technical_note = xmlmap.StringListField('st:note[@type="technical"]', required=False) 'note with type="technical"'
class DDBCPlaceName(teimap._TeiBase): place_names = xmlmap.StringListField('tei:placeName') district = xmlmap.StringField('tei:district') notes = xmlmap.StringListField('tei:note') geo = xmlmap.StringField('tei:location/tei:geo')
class DublinCore(_BaseDublinCore): """ XmlObject for Simple (unqualified) Dublin Core metadata. If no node is specified when initialized, a new, empty Dublin Core XmlObject will be created. """ ROOT_NAME = 'dc' XSD_SCHEMA = "http://www.openarchives.org/OAI/2.0/oai_dc.xsd" contributor = xmlmap.StringField("dc:contributor", required=False) contributor_list = xmlmap.StringListField("dc:contributor", verbose_name='Contributors') coverage = xmlmap.StringField("dc:coverage", required=False) coverage_list = xmlmap.StringListField("dc:coverage", verbose_name='Coverage') #? creator = xmlmap.StringField("dc:creator", required=False) creator_list = xmlmap.StringListField("dc:creator", verbose_name='Creators') date = xmlmap.StringField("dc:date", required=False) date_list = xmlmap.StringListField("dc:date", verbose_name='Dates') description = xmlmap.StringField("dc:description", required=False) description_list = xmlmap.StringListField("dc:description", verbose_name='Descriptions') format = xmlmap.StringField("dc:format", required=False) format_list = xmlmap.StringListField("dc:format", verbose_name='Formats') identifier = xmlmap.StringField("dc:identifier", required=False) identifier_list = xmlmap.StringListField("dc:identifier", verbose_name='Identifiers') language = xmlmap.StringField("dc:language", required=False) language_list = xmlmap.StringListField("dc:language", verbose_name='Languages') publisher = xmlmap.StringField("dc:publisher", required=False) publisher_list = xmlmap.StringListField("dc:publisher", verbose_name='Publishers') relation = xmlmap.StringField("dc:relation", required=False) relation_list = xmlmap.StringListField("dc:relation", verbose_name='Relations') rights = xmlmap.StringField("dc:rights", required=False) rights_list = xmlmap.StringListField("dc:rights", verbose_name='Rights') source = xmlmap.StringField("dc:source", required=False) source_list = xmlmap.StringListField("dc:source", verbose_name='Sources') subject = xmlmap.StringField("dc:subject", required=False) subject_list = xmlmap.StringListField("dc:subject", verbose_name='Subjects') title = xmlmap.StringField("dc:title", required=False) title_list = xmlmap.StringListField("dc:title", verbose_name='Titles') type = xmlmap.StringField("dc:type", required=False) type_list = xmlmap.StringListField("dc:type", verbose_name='Types') elements = xmlmap.NodeListField('dc:*', DublinCoreElement) 'list of all DC elements as instances of :class:`DublinCoreElement`' # RDF declaration of the Recommended DCMI types DCMI_TYPES_RDF = 'http://dublincore.org/2010/10/11/dctype.rdf' DCMI_TYPE_URI = 'http://purl.org/dc/dcmitype/' if rdflib: DCMI_TYPE_URI = rdflib.URIRef(DCMI_TYPE_URI) _dcmi_types_graph = None @property def dcmi_types_graph(self): 'DCMI Types Vocabulary as an :class:`rdflib.Graph`' # only initialize if requested; then save the result if self._dcmi_types_graph is None: self._dcmi_types_graph = rdflib.Graph() self._dcmi_types_graph.parse(self.DCMI_TYPES_RDF) return self._dcmi_types_graph _dcmi_types = None @property def dcmi_types(self): '''DCMI Type Vocabulary (recommended), as documented at http://dublincore.org/documents/dcmi-type-vocabulary/''' if self._dcmi_types is None: # generate a list of DCMI types based on the RDF dctype document self._dcmi_types = [] # get all items with rdf:type of rdfs:Clas items = self.dcmi_types_graph.subjects(rdflib.RDF.type, rdflib.RDFS.Class) for item in items: # check that this item is defined by dcmitype if (item, rdflib.RDFS.isDefinedBy, self.DCMI_TYPE_URI) in self.dcmi_types_graph: # add the label to the list self._dcmi_types.append(str(self.dcmi_types_graph.label(subject=item))) return self._dcmi_types else: # no rdflib dcmi_types_graph = None dcmi_types = None
class _BaseBody(_BaseCerp): '''Common email header elements''' content_type_list = xmlmap.StringListField('xm:ContentType') charset_list = xmlmap.StringListField('xm:Charset') content_name_list = xmlmap.StringListField('xm:ContentName') content_type_comments_list = xmlmap.StringListField('xm:ContentTypeComments') content_type_param_list = xmlmap.NodeListField('xm:ContentTypeParam', Parameter) transfer_encoding_list = xmlmap.StringListField('xm:TransferEncoding') transfer_encoding_comments_list = xmlmap.StringListField('xm:TransferEncodingComments') content_id_list = xmlmap.StringListField('xm:ContentId') content_id_comments_list = xmlmap.StringListField('xm:ContentIdComments') description_list = xmlmap.StringListField('xm:Description') description_comments_list = xmlmap.StringListField('xm:DescriptionComments') disposition_list = xmlmap.StringListField('xm:Disposition') disposition_file_name_list = xmlmap.StringListField('xm:DispositionFileName') disposition_comments_list = xmlmap.StringListField('xm:DispositionComments') disposition_params = xmlmap.NodeListField('xm:DispositionParams', Parameter) other_mime_headers = xmlmap.NodeListField('xm:OtherMimeHeader', Header)
class MyQueryTest(QueryTestModel): name = xmlmap.StringListField('name')
class Message(_BaseMessage, _BaseExternal): """A single email message in a :class:`Folder`.""" ROOT_NAME = 'Message' STATUS_FLAG_CHOICES = [ 'Seen', 'Answered', 'Flagged', 'Deleted', 'Draft', 'Recent'] status_flags = xmlmap.StringListField('xm:StatusFlag', choices=STATUS_FLAG_CHOICES) @classmethod def from_email_message(cls, message, local_id=None): ''' Convert an :class:`email.message.Message` or compatible message object into a CERP XML :class:`eulxml.xmlmap.cerp.Message`. If an id is specified, it will be stored in the Message <LocalId>. :param message: `email.message.Message` object :param id: optional message id to be set as `local_id` :returns: :class:`eulxml.xmlmap.cerp.Message` instance populated with message information ''' result = cls() if local_id is not None: result.local_id = id message_id = message.get('Message-Id') if message_id: result.message_id_supplied = True result.message_id = message_id result.mime_version = message.get('MIME-Version') dates = message.get_all('Date', []) result.orig_date_list.extend([parse_mail_date(d) for d in dates]) result.from_list.extend(message.get_all('From', [])) result.sender_list.extend(message.get_all('From', [])) try: result.to_list.extend(message.get_all('To', [])) except UnicodeError: print(repr(message['To'])) raise result.cc_list.extend(message.get_all('Cc', [])) result.bcc_list.extend(message.get_all('Bcc', [])) result.in_reply_to_list.extend(message.get_all('In-Reply-To', [])) result.references_list.extend(message.get_all('References', [])) result.subject_list.extend(message.get_all('Subject', [])) result.comments_list.extend(message.get_all('Comments', [])) result.keywords_list.extend(message.get_all('Keywords', [])) headers = [ Header(name=key, value=val) for key, val in message.items() ] result.headers.extend(headers) # FIXME: skip multipart messages for now if not message.is_multipart(): result.create_single_body() # FIXME: this is a small subset of the actual elements CERP allows. # we should add the rest of them, too. # message.get_content_type() always returns something. only # put it in the CERP if a Content-Type was explicitly specified. if message['Content-Type']: result.single_body.content_type_list.append(message.get_content_type()) if message.get_content_charset(): result.single_body.charset_list.append(message.get_content_charset()) if message.get_filename(): result.single_body.content_name_list.append(message.get_filename()) # FIXME: attaching the body_content only makes sense for text # content types. we'll eventually need a better solution for # non-text messages result.single_body.create_body_content() payload = message.get_payload(decode=False) # if not unicode, attempt to convert if isinstance(payload, six.binary_type): charset = message.get_charset() # decode according to the specified character set, if any if charset is not None: charset_decoder = codecs.getdecoder(str(charset)) payload, length = charset_decoder(payload) # otherwise, just try to convert else: payload = u(payload) # remove any control characters not allowed in XML control_char_map = dict.fromkeys(range(32)) for i in [9, 10, 13]: # preserve horizontal tab, line feed, carriage return del control_char_map[i] payload = u(payload).translate(control_char_map) result.single_body.body_content.content = payload else: # TODO: handle multipart logger.warn('CERP conversion does not yet handle multipart') # assume we've normalized newlines: result.eol = EOLMAP[os.linesep] return result
class GenizaTei(teimap.Tei): # extend eulxml TEI to add mappings for the fields we care about # NOTE: at least one pgpid is in format ### + ### pgpid = xmlmap.IntegerField('tei:teiHeader//tei:idno[@type="PGP"]') # normally main text content is under text/body/div; but at least one document has no div text = xmlmap.NodeField( "tei:text/tei:body/tei:div|tei:text/tei:body[not(tei:div)]", MainText ) lines = xmlmap.NodeListField("tei:text/tei:body/tei:div/tei:l", GenizaTeiLine) labels = xmlmap.NodeListField( "tei:text/tei:body/tei:div/tei:label", GenizaTeiLine ) # not really a line... # source description sometimes contains reference to scholarship record source = xmlmap.NodeListField( "tei:teiHeader//tei:sourceDesc/tei:msDesc/tei:msContents/tei:p", GenizaTeiLine ) # for documents with more than one transcription, authors have been # tagged with last name in n attribute to allow identifying/differentiating source_authors = xmlmap.StringListField( "tei:teiHeader//tei:sourceDesc//tei:author/@n" ) def no_content(self): return str(self.text).strip() == "" def text_to_html(self): # convert the TEI text content to basic HTML blocks = [] lines = [] label = [] # because blocks are indicated by labels without containing elements, # iterate over all lines and create blocks based on the labels for line in self.text.lines: if line.name == "label": # append current text block if set, and initialize a new one if lines: blocks.append( { "label": "\n".join(label), "lines": lines, # "languages": list(languages), } ) label = [] lines = [] # store the label; sometimes there are two in a row label.append(str(line)) elif line.name == "l": # use language codes? unreliable in the xml # append tuple of line number, text # return empty string for line number if no line attribute lines.append((line.number or "", str(line))) # append the last block if lines: blocks.append( { "label": "\n".join(label), "lines": lines, } ) # combine blocks of text into html html = [] for block in blocks: output = ["<section>"] # add label if we have one if block["label"]: output.append(f" <h1>{block['label']}</h1>") text_lines = " <ul>%s</ul>" % "".join( "\n <li%s>%s</li>" % (f" value='{line_number}'" if line_number else "", line) for line_number, line in block["lines"] if line.strip() ) output.append(text_lines) output.append("</section>") html.append("\n".join(output)) return "\n".join(html) rtl_mark = "\u200F" ltr_mark = "\u200E" def text_to_plaintext(self): lines = [] # because blocks are indicated by labels without containing elements, # iterate over all lines and create blocks based on the labels # errors if there are no lines; sync transcription now checks # and won't call in that case if not self.text.lines: return # determine longest line so we can pad the text longest_line = max(len(str(line)) for line in self.text.lines) # some files have descriptions that are making lines much too long, # so set a limit on line length if longest_line > 100: longest_line = 100 for line in self.text.lines: if line.name == "label": # blank line to indicate breaks between blocks lines.append("") lines.append("%s%s" % (self.ltr_mark, line)) elif line.name == "l": line_num = line.number or "" # combine line text with line number and right justify; # right justify line number lines.append( " ".join( [ self.rtl_mark, str(line).rjust(longest_line), self.ltr_mark, line_num.rjust(3), ] ) ) return "\n".join(lines)
class VideoSourceTech(_BaseSourceTech): ':class:`~eulxml.xmlmap.XmlObject` for Source Technical Metadata.' ROOT_NAME = 'sourcetech' # option lists for controlled vocabulary source tech fields form_options = ('', 'VHS', 'S-VHS', 'BetacamSP', 'Digital Betacam', 'Umatic', 'MiniDV', 'DVCAM', 'DVCPro', 'HDCAM', 'Video8', 'Digital8', 'Betamax', 'DVD', 'DVD-R/+R/-RW/+RW', 'Open Reel', 'Laserdisc', 'Film', 'Born digital', 'other') 'controlled vocabulary for :class:`SourceTech.form`' signal_format_options = (('', ''), ('DVD', ( (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), ('All', 'All'), )), ('VHS', ( ('NTSC', 'NTSC'), ('PAL/SECAM', 'PAL/SECAM'), ))) sound_characteristic_options = ('', 'mono', 'stereo') 'controlled vocabulary for :class:`SourceTech.sound_characteristics`' speed_options = ('', 'SP', 'LP', 'EP', 'Other', 'Not applicable') gauge_options = ('', '16mm', '35mm', '65mm', 'Super 8', 'other') 'controlled vocabulary for :class:`SourceTech.speed`' # NOTE: speed should be displayed as ips but saved to xml as inches/sec # speed options is formatted for grouped options in django select widget # planned schema location (schema not yet available) #XSD_SCHEMA = 'http://pid.emory.edu/ns/2010/sourcetech/v1/sourcetech-1.xsd' #xmlschema = xmlmap.loadSchema(XSD_SCHEMA) note = xmlmap.StringField('st:note[@type="general"]', required=False, verbose_name='General Note', help_text='General note about the physical item') 'general note' note_list = xmlmap.StringListField('st:note[@type="general"]') conservation_history = xmlmap.StringField( 'st:note[@type="conservationHistory"]', required=False) 'note about conservation history' conservation_history_list = xmlmap.StringListField( 'st:note[@type="conservationHistory"]') speed = xmlmap.StringField( 'st:speed/st:measure[@type="speed"]', choices=speed_options, verbose_name='Recording Speed', required=False, help_text='Speed at which the video was recorded') gauge = xmlmap.StringField('st:gauge', choices=gauge_options, verbose_name='Gauge', required=False, help_text='Width of the film') sublocation = xmlmap.StringField( 'st:sublocation', required=False, help_text= 'Storage location within the collection (e.g., box and folder)') 'storage location within the collection' form = xmlmap.StringField( 'st:form[@type="moving image"]', choices=form_options, required=False, help_text='The physical form or medium of the resource') 'physical format - options controlled by :class:`SourceTech.form_options`' signal_format = xmlmap.StringField( 'st:signalFormat[@type="moving image"]', choices=signal_format_options, required=False, help_text='The physical form or medium of the resource') 'physical format - options controlled by :class:`SourceTech.form_options`' form_list = xmlmap.StringListField('st:form[@type="sound"]') sound_characteristics = xmlmap.StringField( 'st:soundChar', choices=sound_characteristic_options, required=False) 'sound characteristics - options controlled by :class:`SourceTech.sound_characteristic_options`' chroma = xmlmap.StringField( 'st:chroma', choices=('color', 'monochrome'), required=False, help_text='The physical form or medium of the resource') 'physical format - options controlled by :class:`SourceTech.form_options`' stock = xmlmap.StringField( 'st:stock', verbose_name='Tape Brand/Stock', help_text='The brand or stock of the magnetic tape', required=False) 'Stock or brand of source media' stock_list = xmlmap.StringListField('st:stock') # tech_note is migrate/view only technical_note = xmlmap.StringListField('st:note[@type="technical"]', required=False) 'note with type="technical"'