def __processImage(self, el, firstImage): i_dict = { 'id': el.get('id', default=None), 'path': el.findtext('ruta'), 'description': el.findtext('pie'), 'line': el.sourceline } if i_dict['id'] and i_dict['path'] and path.exists(self.getPath(i_dict['path'])): #Get or create an new image containing the new object. try: image = FedoraWrapper.client.getObject(FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='imageID', obj="'%(id)s'" % i_dict)) except KeyError: image = FedoraWrapper.getNextObject(self.prefix, label='Image: %(id)s' % i_dict) #FIXME: Detect Mimetype, and create image accordingly? update_datastream(obj=image, dsid="JPG", filename=self.getPath(i_dict['path']), mimeType="image/jpeg") i_rels_ext = FR.rels_ext(obj=image, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:imageCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='imageID'), FR.rels_object(el.get('id'), FR.rels_object.LITERAL) ), #Relate the image to the concert as a general image... ( FR.rels_predicate(alias='atm-rel', predicate='isImageOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ) ] #Set the first image as the "primary" (Used for thumbnails) if firstImage: rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='isIconOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ) ) #Update and commit the rels_ext FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=i_rels_ext).update() dc = dict() dc['type'] = [unicode('StillImage')] #Add a description, based on the 'pie' (if it exists, and there isn't already on for the image...), and don't clobber any existing description... if i_dict['description'] and 'description' not in dc: dc['description'] = [unicode('%(description)s' % i_dict)] Concert.save_dc(image, dc) image.state = unicode('A') return True else: logger.warning('No ID or invalid path for image at line: %(line)s' % i_dict) return False
def process(self): logger = self.logger logger.info('Starting to ingest: %(class)s %(id)s' % {'class': type(self), 'id': self.dbid}) try: logger.info('Checking to see if %s already exists in Fedora' % self.norm_name) pid = self[self.norm_name] logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid}) self.composer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: try: logger.debug('Not known by name, checking by composerID') pid = FedoraWrapper.getPid(uri=Composer.NS['fjm-db'].uri, predicate='composerID', obj="'%s'" % self.dbid) logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid}) self.composer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: logger.info('Doesn\'t exist: creating a new Fedora Object') self.composer = FedoraWrapper.getNextObject(self.prefix, label='Composer %s' % self.dbid) rels_ext = FR.rels_ext(self.composer, namespaces=Composer.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:personCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='composerID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() FedoraWrapper.correlateDBEntry('composedBy', 'composerID') #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: eaccpf = CPF.EACCPF(self.composer.pid, xml=self.composer['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException, e: if e.httpcode == 404: eaccpf = CPF.EACCPF(self.composer.pid) event_type="created" else: raise e
def __processConferences(self): logger = logging.getLogger('ingest.atm_concert.__processConferences') for el in self.element.findall('Eventos_Asociados/Evento_Asociado'): e_dict = { 'id': el.get('id'), 'type': el.findtext('Tipo'), 'description': el.findtext('descripcion'), 'mp3_path': el.findtext('ruta'), 'concert': self.dbid, 'line': el.sourceline } if e_dict['id']: try: pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate="lectureID", obj="'%(id)s'" % e_dict) conference = FedoraWrapper.client.getObject(pid) except KeyError: conference = FedoraWrapper.getNextObject(self.prefix, label="Conference %(id)s in %(concert)s" % e_dict) c_rels_ext = FR.rels_ext(obj=conference, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:lectureCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='lectureID'), FR.rels_object(e_dict['id'], FR.rels_object.LITERAL) ) ] #Add and commit relationships FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=c_rels_ext).update() if e_dict['mp3_path']: mp3_path = self.getPath(e_dict['mp3_path']) if path.exists(mp3_path): update_datastream(obj=conference, dsid='MP3', filename=mp3_path, mimeType="audio/mpeg") else: logger.error('MP3 specified (%(mp3_path)s), but doesn\'t exist for id %(id)s on line %(line)s' % e_dict) else: logger.warning('No MP3 indicated for id %(id)s on line %(line)s' % e_dict) dc = dict() dc['type'] = [unicode('Sound')] dc['description'] = [unicode(e_dict['description'])] dc['subject'] = [unicode(e_dict['type'])] Concert.save_dc(conference, dc) conference.state = unicode('A')
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}): """ Create the RELS-EXT relationships between childObject and object:parentPid We set the default namespace for our interconnections, then apply the content model, and make childObject a member of the object:parentPid collection. If object:parentPid doesn't have the collection content model then strange things might happen. @param childObject The FedoraObject to attach the RELS-EXT to. @param parentPid The pid of the parent to assign to childObject. @param contentModel The @contentModel to give to childObject. @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data. @param extraRelationsips Any additional relationship values to assign to childObject. By default the object gets: hasModel:contentModel and isMemberOfCollection:parentPid """ nsmap = [ fedora_relationships.rels_namespace( 'fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace( 'fedora-model', 'info:fedora/fedora-system:def/model#') ] if extraNamespaces and type(extraNamespaces) is DictType: for k, v in extraNamespaces.iteritems(): nsmap.append(fedora_relationships.rels_namespace(k, v)) #add relationships rels_ext = fedora_relationships.rels_ext(childObject, nsmap, 'fedora') rels_ext.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"]) rels_ext.addRelationship( fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"]) if extraRelationships and type(extraRelationships) is DictType: for k, v in extraRelationships.iteritems(): rels_ext.addRelationship(k, [v, "literal"]) loop = True while loop: loop = False try: rels_ext.update() except FedoraConnectionException, fedoraEXL: if str(fedoraEXL.body).find( "is currently being modified by another thread") != -1: loop = True print( "Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid) else: print("Error updating obj(%s) RELS-EXT" % childObject.pid)
def process(self): try: pid = Instrument.__getClasses()[self.classID] instrumentClass = FedoraWrapper.client.getObject(pid) except KeyError: instrumentClass = FedoraWrapper.getNextObject(self.prefix, label='Instrument class %s' % self.classID) Instrument.__addInstrumentClass(self.classID, instrumentClass.pid) c_rels = [ ( FR.rels_predicate(alias='fjm-db', predicate='instrumentClassID'), FR.rels_object(self.classID, FR.rels_object.LITERAL) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:instrumentClassCModel', FR.rels_object.PID) ) ] FedoraWrapper.addRelationshipsWithoutDup(c_rels, fedora=instrumentClass).update() dc = dict() dc['title'] = [self.instrumentClass] Instrument.save_dc(instrumentClass, dc) instrumentClass.state = unicode('A') try: pid = Instrument.__getInstruments()[self.instrumentName] instrument = FedoraWrapper.client.getObject(pid) except KeyError: instrument = FedoraWrapper.getNextObject(self.prefix, label='Instrument %s' % self.dbid) Instrument.__addInstrument(self.instrumentName, instrument.pid) dc = dict() dc['title'] = [self.instrumentName] Instrument.save_dc(instrument, dc) i_rels = [ ( FR.rels_predicate(alias='fjm-db', predicate='instrumentID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:instrumentCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(instrumentClass.pid, FR.rels_object.PID) ) ] FedoraWrapper.addRelationshipsWithoutDup(i_rels, fedora=instrument).update() FedoraWrapper.correlateDBEntry('instrument', 'instrumentID') instrument.state = unicode('A')
def correlateDBEntry(predicate, idpred): ''' This function is used to add relations involving PIDs to objects, based on relations to literals which were added during the original ingest. For example, in the original ingest, 'performances' are added with a relation to the score db id 'fjm-db:basedOn', which the scores are have relations to their DB id 'fjm-db:scoreID'. This function uses a query which matches the two literals, and adds the relation 'atm-rel:basedOn' (note: same predicate, different namespace) to the performance, which relates directly to the score whose ID matched. NOTE: SPARQL is bloody amazing. That is all... (query description: 1. add prefixes, 2. select the object and subject of the relationship to resolve, based on matching the ID 3. optionally select any already existing relationships 4. keep results where step 3 returned nothing, or those where the selected $sub is not equal to anything found in step 3.) TODO (minor): I can see this being a little slow, as it is called fairly often... Some method to streamline this might be nice, or to call it less frequently?... Anyway. ''' FedoraWrapper.init() for result in FedoraWrapper.client.searchTriples(query='\ PREFIX atm-rel: <%(atm-rel)s> \ PREFIX fjm-db: <%(fjm-db)s> \ SELECT $obj $sub \ FROM <#ri> \ WHERE { \ $obj fjm-db:%(predicate)s $id . \ $sub fjm-db:%(idpred)s $id . \ OPTIONAL {$obj atm-rel:%(predicate)s $pid} . \ FILTER(!bound($pid) || $sub != $pid) \ }' % { 'fjm-db': ao.NS['fjm-db'].uri, 'atm-rel': ao.NS['atm-rel'].uri, 'predicate': predicate, 'idpred': idpred }, lang='sparql', limit='1000000'): FedoraWrapper.addRelationshipWithoutDup(( FR.rels_predicate(alias='atm-rel', predicate=predicate), FR.rels_object(result['sub']['value'].rpartition('/')[2], FR.rels_object.PID) ), fedora=FedoraWrapper.client.getObject(result['obj']['value'].rpartition('/')[2])).update()
def handle_still_mods(still_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param still_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' still_path = get_file_path_from_xpath(still_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Still Image']") still_pid = fedora.getNextPID(name_space) still_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-')+1:mods_file_name.rfind('.')]) still_object = fedora.createObject(still_pid, label = still_label) still_object_RELS_EXT = fedora_relationships.rels_ext(still_object,[hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(still_object, mods_file_path) if still_path: png_file_handle = open(still_path, 'rb') try: still_object.addDataStream(u'PNG', u'aTmpStr', label=u'PNG', mimeType = u'image/png', controlGroup = u'M', logMessage = u'Added PNG datastream.') datastream = still_object['PNG'] datastream.setContent(png_file_handle) logging.info('Added PNG datastream to:' + still_pid) except FedoraConnectionException: logging.error('Error in adding PNG datastream to:' + still_pid + '\n') png_file_handle.close() #relationships still_clip_element_list = still_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']") if still_clip_element_list: still_clip_file_name = still_clip_element_list[0].text still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isStillOf'), clips_to_pids[still_clip_file_name]) else: still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isStillOf'), movie_pid) still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiStill') still_object_RELS_EXT.update() return True return False
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}): """ Create the RELS-EXT relationships between childObject and object:parentPid We set the default namespace for our interconnections, then apply the content model, and make childObject a member of the object:parentPid collection. If object:parentPid doesn't have the collection content model then strange things might happen. @param childObject The FedoraObject to attach the RELS-EXT to. @param parentPid The pid of the parent to assign to childObject. @param contentModel The @contentModel to give to childObject. @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data. @param extraRelationsips Any additional relationship values to assign to childObject. By default the object gets: hasModel:contentModel and isMemberOfCollection:parentPid """ nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] if extraNamespaces and type(extraNamespaces) is DictType: for k, v in extraNamespaces.iteritems(): nsmap.append(fedora_relationships.rels_namespace(k, v)) #add relationships rels_ext=fedora_relationships.rels_ext(childObject, nsmap, 'fedora') rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"]) rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"]) if extraRelationships and type(extraRelationships) is DictType: for k, v in extraRelationships.iteritems(): rels_ext.addRelationship(k, [v, "pid"]) loop = True while loop: loop = False try: rels_ext.update() except FedoraConnectionException, fedoraEXL: if str(fedoraEXL.body).find("is currently being modified by another thread") != -1: loop = True print("Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid) else: print("Error updating obj(%s) RELS-EXT" % childObject.pid)
def process(self): logger = self.logger logger.info('Starting to ingest: Groupo %s' % self.dbid) try: pid = FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='groupID', obj="'%s'" % self.dbid) if pid: logger.warning('Group %(id)s already exists as pid %(pid)s! Overwriting DC DS!' % {'id': self.dbid, 'pid': pid}) group = FedoraWrapper.client.getObject(pid) else: raise Exception('Something went horribly wrong! Found a pid, but couldn\'t access it...') except KeyError: group = FedoraWrapper.getNextObject(self.prefix, label='Group %s' % self.dbid) rels_ext = FR.rels_ext(group, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:groupCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='groupID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() dc = dict() dc['type'] = [unicode('Collection')] dc['title'] = [self.element.findtext('grupo').strip()] Group.save_dc(group, dc) FedoraWrapper.correlateDBEntry('group', 'groupID') group.state = unicode('A')
def handle_page_object(fedora_client, fedora_object, page, ocr_path, label): """ The page object gets some extra relationships as a member of a book object. It should also get: - MODS (this should be based on parent book mods, but with page label from METS structmap) - JP2 (derived from TIFF) - MIX - OCR, if available """ page_cm = ITEM_TYPE_CM_MAP['page'] page_basename = os.path.splitext(page.name)[0] page_pid = '%s-%s' % (fedora_object.pid, page_basename) page_label = u'%s, %s' % (label, drl.utils.shorten_string(fedora_object.label, 205)) extraNamespaces = { 'pageNS' : 'info:islandora/islandora-system:def/pageinfo#' } # should the page number be a counter here instead of int(page_basename)? extraRelationships = { fedora_relationships.rels_predicate('pageNS', 'isPageNumber') : str(int(page_basename)), fedora_relationships.rels_predicate('pageNS', 'isPageOf') : str(fedora_object.pid) } page_object = addObjectToFedora(fedora_client, page_label, page_pid, fedora_object.pid, page_cm, extraNamespaces=extraNamespaces, extraRelationships=extraRelationships) fedoraLib.update_datastream(page_object, 'TIFF', page.path, label=page.name, mimeType='image/tiff', controlGroup='M') handle_derived_jp2(page_object, page) #handle_derived_mix(page_object, page) if ocr_path: ocr_filename = os.path.basename(ocr_path) fedoraLib.update_datastream(page_object, u'OCR', ocr_path, label=unicode(ocr_filename), mimeType=u'text/plain', controlGroup='M')
def test_two_namespace_literal(self): xmlStr = """ <rdf:RDF xmlns:coal="http://www.coalliance.org/ontologies/relsint" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#" xmlns:jon="http://jebus/trainstation"> <rdf:Description rdf:about="info:fedora/coccc:2040"> <jon:feezle>JON</jon:feezle> </rdf:Description> </rdf:RDF> """ relationship = fedora_relationship([ rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), rels_namespace('jon', 'http://jebus/trainstation') ]) relationship.addRelationship('coccc:2040', rels_predicate('jon', 'feezle'), rels_object('JON', rels_object.LITERAL)) result_string = XmlHelper.mangle(relationship.toString()) expected_string = XmlHelper.mangle(xmlStr) self.assertEqual(result_string, expected_string, 'Generated XML Incorrect')
if object_fetch_exception.httpcode in [404]: logging.info(name_space + ':itm missing, creating object.\n') collection_object = fedora.createObject(collection_pid, label = collection_label) #collection_policy try: collection_object.addDataStream(u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error('Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT = fedora_relationships.rels_ext(collection_object, fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOfCollection','islandora:root') collection_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'islandora:collectionCModel') collection_object_RELS_EXT.update() #loop through the mods folder for mods_file in mods_files: if mods_file.endswith('-MODS.xml'): #get mods file contents mods_file_path = os.path.join(source_directory, 'mods', mods_file) mods_file_handle = open(mods_file_path) mods_contents = mods_file_handle.read() #get map_label from mods title mods_tree = etree.parse(mods_file_path) map_label = mods_tree.xpath("*[local-name() = 'titleInfo']/*[local-name() = 'title']/text()") map_label = map_label[0] if len(map_label) > 255:
controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error( 'Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT = fedora_relationships.rels_ext( collection_object, fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOf', 'islandora:root') collection_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), 'islandora:collectionCModel') collection_object_RELS_EXT.update() #loop through the mods folder for mods_file in mods_files: if mods_file.endswith('MODS.xml'): #get mods file contents mods_file_path = os.path.join(source_directory, 'mods-xml', mods_file) mods_file_handle = open(mods_file_path) mods_contents = mods_file_handle.read() #get book_label from mods title mods_tree = etree.parse(mods_file_path) book_label = mods_tree.xpath(
def handle_clip_mods(clip_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param clip_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' clip_pid = fedora.getNextPID(name_space) high_resolution_mov_path = get_file_path_from_xpath( clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='High Quality Video']" ) low_resolution_mov_path = get_file_path_from_xpath( clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Web Quality Video']" ) clip_number = mods_file_name[mods_file_name.find('-cp') + 3:mods_file_name.rfind('.')] clip_number = clip_number.replace('-sub', '') clip_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-') + 1:mods_file_name.rfind('.')]) clip_object = fedora.createObject(clip_pid, label=clip_label) #datastreams add_MODS_datastream(clip_object, mods_file_path) if high_resolution_mov_path: hires_file_handle = open(high_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'HIGHRES', u'aTmpStr', label=u'HIGHRES', mimeType=u'video/quicktime', controlGroup=u'M', logMessage=u'Added HIGHRES datastream.') datastream = clip_object['HIGHRES'] datastream.setContent(hires_file_handle) logging.info('Added HIGHRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding HIGHRES datastream to:' + clip_pid + '\n') hires_file_handle.close() if low_resolution_mov_path: lowres_file_handle = open(low_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'LOWRES', u'aTmpStr', label=u'LOWRES', mimeType=u'video/quicktime', controlGroup=u'M', logMessage=u'Added LOWRES datastream.') datastream = clip_object['LOWRES'] datastream.setContent(lowres_file_handle) logging.info('Added LOWRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding LOWRES datastream to:' + clip_pid + '\n') lowres_file_handle.close() #relationships clip_object_RELS_EXT = fedora_relationships.rels_ext( clip_object, [hamilton_rdf_name_space, fedora_model_namespace]) clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isClipOf'), movie_pid) clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isClipNumber'), fedora_relationships.rels_object( str(clip_number), fedora_relationships.rels_object.LITERAL)) global clips_to_pids clips_to_pids[mods_file_name] = clip_pid #this section handles the diferent types of clips (subs or not) if not '-sub' in mods_file_name: #add relationships clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiClip') clip_object_RELS_EXT.update() return True else: #add relationships master_clip_file_name = mods_file_name.replace('-sub', '') clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isSubOf'), clips_to_pids[master_clip_file_name]) clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiClipSubbed') clip_object_RELS_EXT.update() return True return False
def handle_transcript_mods(transcript_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param transcript_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' transcript_pid = fedora.getNextPID(name_space) transcript_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-tr-') + 4:mods_file_name.rfind('.')]) transcript_object = fedora.createObject(transcript_pid, label=transcript_label) transcript_object_RELS_EXT = fedora_relationships.rels_ext( transcript_object, [hamilton_rdf_name_space, fedora_model_namespace]) transcript_path = get_file_path_from_xpath( transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document']" ) time_synced_transcript_path = get_file_path_from_xpath( transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document with time-sync encoding']" ) #datastreams add_MODS_datastream(transcript_object, mods_file_path) if time_synced_transcript_path: time_synced_transcript_handle = open(time_synced_transcript_path, 'rb') try: transcript_object.addDataStream( u'TimeSyncedTranscript', u'aTmpStr', label=u'POPCORN', mimeType=u'application/xml', controlGroup=u'M', logMessage=u'Added TimeSyncedTranscript datastream.') datastream = transcript_object['TimeSyncedTranscript'] datastream.setContent(time_synced_transcript_handle) logging.info('Added TimeSyncedTranscript datastream to:' + transcript_pid) except FedoraConnectionException: logging.error( 'Error in adding TimeSyncedTranscript datastream to:' + transcript_pid + '\n') time_synced_transcript_handle.close() if transcript_path: pdf_file_handle = open(transcript_path, 'rb') try: transcript_object.addDataStream( u'PDF', u'aTmpStr', label=u'PDF', mimeType=u'application/pdf', controlGroup=u'M', logMessage=u'Added PDF datastream.') datastream = transcript_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + transcript_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + transcript_pid + '\n') pdf_file_handle.close() #relationships #handle is transcript of transcript_clip_element_list = transcript_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']" ) if len( transcript_clip_element_list ) == 1: #there are multiple related items if the transcript is for the whole movie transcript_clip_file_name = transcript_clip_element_list[0].text transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isTranscriptOf'), clips_to_pids[transcript_clip_file_name]) else: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isTranscriptOf'), movie_pid) #handle the 3 different transcript types if '-jpneng' in mods_file_name: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':EnglishJapaneseTranscript') elif '-jpn' in mods_file_name: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':JapaneseTranscript') elif '-eng' in mods_file_name: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':EnglishTranscript') else: return False transcript_object_RELS_EXT.update() return True
def handle_still_mods(still_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param still_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' still_path = get_file_path_from_xpath( still_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Still Image']" ) still_pid = fedora.getNextPID(name_space) still_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-') + 1:mods_file_name.rfind('.')]) still_object = fedora.createObject(still_pid, label=still_label) still_object_RELS_EXT = fedora_relationships.rels_ext( still_object, [hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(still_object, mods_file_path) if still_path: png_file_handle = open(still_path, 'rb') try: still_object.addDataStream(u'PNG', u'aTmpStr', label=u'PNG', mimeType=u'image/png', controlGroup=u'M', logMessage=u'Added PNG datastream.') datastream = still_object['PNG'] datastream.setContent(png_file_handle) logging.info('Added PNG datastream to:' + still_pid) except FedoraConnectionException: logging.error('Error in adding PNG datastream to:' + still_pid + '\n') png_file_handle.close() #relationships still_clip_element_list = still_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']" ) if still_clip_element_list: still_clip_file_name = still_clip_element_list[0].text still_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isStillOf'), clips_to_pids[still_clip_file_name]) else: still_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isStillOf'), movie_pid) still_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiStill') still_object_RELS_EXT.update() return True return False
def handle_misc_mods(misc_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param misc_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' misc_type_list = misc_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'genre'][@type='local']") if misc_type_list: misc_type = misc_type_list[0].text print(misc_type) if misc_type == 'sound recording': #fix up benshi object #datastreams add_MODS_datastream(benshi_object, mods_file_path) audio_file_path = get_file_path_from_xpath( misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Audio']" ) if audio_file_path: audio_file_handle = open(audio_file_path, 'rb') try: benshi_object.addDataStream( u'MP3', u'aTmpStr', label=u'MP3', mimeType=u'audio/mpeg', controlGroup=u'M', logMessage=u'Added MP3 datastream.') datastream = benshi_object['MP3'] datastream.setContent(audio_file_handle) logging.info('Added MP3 datastream to:' + benshi_pid) except FedoraConnectionException: logging.error('Error in adding MP3 datastream to:' + benshi_pid + '\n') audio_file_handle.close() print(audio_file_path) elif misc_type == 'essay': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label=misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext( misc_object, [hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) essay_file_path = get_file_path_from_xpath( misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']" ) if essay_file_path: essay_file_handle = open(essay_file_path, 'rb') try: misc_object.addDataStream( u'DOCX', u'aTmpStr', label=u'DOCX', mimeType= u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup=u'M', logMessage=u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(essay_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') essay_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isEssayOf'), movie_pid) misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiEssay') misc_object_RELS_EXT.update() elif misc_type == 'presentation': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label=misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext( misc_object, [hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) presentation_file_path = get_file_path_from_xpath( misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Presentation']" ) if presentation_file_path: presentation_file_handle = open(presentation_file_path, 'rb') try: misc_object.addDataStream( u'PPTX', u'aTmpStr', label=u'PPTX', mimeType= u'application/vnd.openxmlformats-officedocument.presentationml.presentation', controlGroup=u'M', logMessage=u'Added PPTX datastream.') datastream = misc_object['PPTX'] datastream.setContent(presentation_file_handle) logging.info('Added PPTX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PPTX datastream to:' + misc_pid + '\n') presentation_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isPresentationOf'), movie_pid) misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiPresentation') misc_object_RELS_EXT.update() #movie gets the opac redirect it's special elif misc_type == 'Motion Picture': #fix up movie object #datastreams add_MODS_datastream(movie_object, mods_file_path) opac_path_list = misc_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url']" ) opac_path = opac_path_list[0].text if opac_path: try: movie_object.addDataStream( u'OPAC', u'aTmpStr', label=u'OPAC', mimeType=u'text/html', controlGroup=u'R', location=unicode(opac_path), logMessage=u'Added OPAC datastream.') logging.info('Added OPAC datastream to:' + movie_pid) except FedoraConnectionException: logging.error('Error in adding OPAC datastream to:' + movie_pid + '\n') #biography is special it has a docx and a pdf #can't use 'get_file_path_from_xpath' these are different then the rest, can change it or handle things here (handle things here, dev_speed) elif misc_type == 'biography': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_Narrator') misc_object = fedora.createObject(misc_pid, label=misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext( misc_object, [hamilton_rdf_name_space, fedora_model_namespace]) #get the paths for the pdf/docx docx_file_path = False pdf_file_path = False path_list = misc_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']" ) if path_list: for path_element in path_list: if 'currently unavailable' not in path_element: if '.docx' in path_element.text: docx_file_path = os.path.normpath( os.path.join(mods_directory, path_element.text)) elif 'pdf' in path_element.text: pdf_file_path = os.path.normpath( os.path.join(mods_directory, path_element.text)) #datastreams add_MODS_datastream(misc_object, mods_file_path) if docx_file_path: docx_file_handle = open(docx_file_path, 'rb') try: misc_object.addDataStream( u'DOCX', u'aTmpStr', label=u'DOCX', mimeType= u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup=u'M', logMessage=u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(docx_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') docx_file_handle.close() if pdf_file_path: pdf_file_handle = open(pdf_file_path, 'rb') try: misc_object.addDataStream( u'PDF', u'aTmpStr', label=u'PDF', mimeType=u'application/pdf', controlGroup=u'M', logMessage=u'Added PDF datastream.') datastream = misc_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + misc_pid + '\n') pdf_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isNarratorOf'), benshi_pid) misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiNarrator') misc_object_RELS_EXT.update() else: return False return True
def __processConcert(self): logger = logging.getLogger('ingest.atm_concert.Concert.__processConcert') #Get the/an object try: pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate='concertID', obj="'%s'" % self.dbid) if pid: logger.warning('Concert %s found as %s. Overwriting DSs!' % (self.dbid, pid)) concert = FedoraWrapper.client.getObject(pid) except KeyError: concert = FedoraWrapper.getNextObject(prefix=self.prefix, label="concert %s" % self.dbid) logger.info('Adding CustomXML datastream') if Concert.save_etree(concert, self.element, 'CustomXML', 'Original XML', controlGroup='M'): logger.info('CustomXML added successfully') else: logger.error('Error while adding CustomXML!') #Ingest the WAV (if it exists...) WAV = self.element.findtext('Grabacion/wav') if WAV: WAV = self.getPath(WAV) if path.exists(WAV): update_datastream(obj=concert, dsid='WAV', filename=WAV, label='WAV', mimeType="audio/x-wav") else: logger.warning('WAV file specified (%s), but does not exist!', WAV) else: logger.warning('No WAV found at %s! Skipping...', WAV) #Ingest the MARCXML... FIXME: Maybe this might not make sense to attempt, if there's no WAV? MARC = path.join(path.dirname(WAV), '%s.xml' % self.dbid) if path.exists(MARC): update_datastream(obj=concert, dsid='MARCXML', mimeType="application/xml", filename=MARC) logger.debug('Added %s', MARC) else: logger.debug('Couldn\'t find MARCXML at %s', MARC) cycle_info = { 'norm_name': Concert.normalize_name([self.element.findtext('tipo')]) } #Create cycle stuff try: pid = Concert.__cycles()[cycle_info['norm_name']] cycle = FedoraWrapper.client.getObject(pid) except KeyError: cycle = FedoraWrapper.getNextObject(prefix=self.prefix, label='Cycle %s' % (len(Concert.__cycles()) + 1)) Concert.__cycles()[cycle_info['norm_name']] = cycle.pid c_dc = dict() c_dc['title'] = [cycle_info['norm_name']] Concert.save_dc(cycle, c_dc) FedoraWrapper.addRelationshipsWithoutDup(rels=[ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:concertCycleCModel', FR.rels_object.PID) ) ], fedora=cycle).update() #Add relations to concert object rels_ext = FR.rels_ext(obj=concert, namespaces=ao.NS.values()) rels = [ #Don't know that this one is necessary... Oh well... ( FR.rels_predicate(alias='fjm-db', predicate='concertID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ), ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOfCollection'), FR.rels_object('atm:concertCollection', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(cycle.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:concertCModel', FR.rels_object.PID) ) ] #Write 'out' rels_ext FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() desc = self.element.findtext('Descripcion') dc = dict() dc['type'] = [unicode('Event')] if desc: dc['description'] = [unicode(desc)] dc['title'] = [unicode(Concert.normalize_name([self.element.findtext('titulo')]))] Concert.save_dc(concert, dc) self.concert_obj = concert concert.state = unicode('A')
def process(self): logger = self.logger logger.info('Starting to ingest: Performer %s' % self.dbid) try: logger.info('Checking to see if %s already exists in Fedora' % self.norm_name) pid = self[self.norm_name] logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s!' % {'name': self.norm_name, 'pid': pid}) self.performer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: try: logger.debug('Not known by name, checking by performerID') pid = FedoraWrapper.getPid(uri=Performer.NS['fjm-db'].uri, predicate='performerID', obj="'%s'" % self.dbid) logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid}) self.performer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: logger.info('Doesn\'t exist: creating a new Fedora Object') self.performer = FedoraWrapper.getNextObject(self.prefix, label='Performer: %s' % self.dbid) dc = dict() dc['title'] = [self.norm_name] Performer.save_dc(self.performer, dc) rels_ext = FR.rels_ext(self.performer, namespaces=Performer.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:personCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='performerID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: eaccpf = CPF.EACCPF(self.performer.pid, xml=self.performer['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException as e: if e.httpcode == 404: eaccpf = CPF.EACCPF(self.performer.pid) event_type="created" else: raise e eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_performer.py") eaccpf.add_XML_source(caption='XML from database dump', xml=self.element) eaccpf.add_name_entry(name=self.name) Performer.save_etree(self.performer, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M') self[self.norm_name] = self.performer.pid self.performer.state = unicode('A') FedoraWrapper.correlateDBEntry('player', 'performerID')
def handle_clip_mods(clip_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param clip_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' clip_pid = fedora.getNextPID(name_space) high_resolution_mov_path = get_file_path_from_xpath(clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='High Quality Video']") low_resolution_mov_path = get_file_path_from_xpath(clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Web Quality Video']") clip_number = mods_file_name[mods_file_name.find('-cp') + 3:mods_file_name.rfind('.')] clip_number = clip_number.replace('-sub','') clip_label=unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-')+1:mods_file_name.rfind('.')]) clip_object = fedora.createObject(clip_pid, label = clip_label) #datastreams add_MODS_datastream(clip_object, mods_file_path) if high_resolution_mov_path: hires_file_handle = open(high_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'HIGHRES', u'aTmpStr', label = u'HIGHRES', mimeType = u'video/quicktime', controlGroup = u'M', logMessage = u'Added HIGHRES datastream.') datastream = clip_object['HIGHRES'] datastream.setContent(hires_file_handle) logging.info('Added HIGHRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding HIGHRES datastream to:' + clip_pid + '\n') hires_file_handle.close() if low_resolution_mov_path: lowres_file_handle = open(low_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'LOWRES', u'aTmpStr', label=u'LOWRES', mimeType = u'video/quicktime', controlGroup = u'M', logMessage = u'Added LOWRES datastream.') datastream = clip_object['LOWRES'] datastream.setContent(lowres_file_handle) logging.info('Added LOWRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding LOWRES datastream to:' + clip_pid + '\n') lowres_file_handle.close() #relationships clip_object_RELS_EXT = fedora_relationships.rels_ext(clip_object,[hamilton_rdf_name_space, fedora_model_namespace]) clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isClipOf'), movie_pid) clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isClipNumber'), fedora_relationships.rels_object(str(clip_number), fedora_relationships.rels_object.LITERAL)) global clips_to_pids clips_to_pids[mods_file_name] = clip_pid #this section handles the diferent types of clips (subs or not) if not '-sub' in mods_file_name: #add relationships clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiClip') clip_object_RELS_EXT.update() return True else: #add relationships master_clip_file_name = mods_file_name.replace('-sub','') clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isSubOf'), clips_to_pids[master_clip_file_name]) clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiClipSubbed') clip_object_RELS_EXT.update() return True return False
def handle_misc_mods(misc_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param misc_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' misc_type_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'genre'][@type='local']") if misc_type_list: misc_type = misc_type_list[0].text print(misc_type) if misc_type == 'sound recording':#fix up benshi object #datastreams add_MODS_datastream(benshi_object, mods_file_path) audio_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Audio']") if audio_file_path: audio_file_handle = open(audio_file_path, 'rb') try: benshi_object.addDataStream(u'MP3', u'aTmpStr', label=u'MP3', mimeType = u'audio/mpeg', controlGroup = u'M', logMessage = u'Added MP3 datastream.') datastream = benshi_object['MP3'] datastream.setContent(audio_file_handle) logging.info('Added MP3 datastream to:' + benshi_pid) except FedoraConnectionException: logging.error('Error in adding MP3 datastream to:' + benshi_pid + '\n') audio_file_handle.close() print(audio_file_path) elif misc_type == 'essay': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label = misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) essay_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']") if essay_file_path: essay_file_handle = open(essay_file_path, 'rb') try: misc_object.addDataStream(u'DOCX', u'aTmpStr', label=u'DOCX', mimeType = u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup = u'M', logMessage = u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(essay_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') essay_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isEssayOf'), movie_pid) misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiEssay') misc_object_RELS_EXT.update() elif misc_type == 'presentation': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label = misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) presentation_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Presentation']") if presentation_file_path: presentation_file_handle = open(presentation_file_path, 'rb') try: misc_object.addDataStream(u'PPTX', u'aTmpStr', label=u'PPTX', mimeType = u'application/vnd.openxmlformats-officedocument.presentationml.presentation', controlGroup = u'M', logMessage = u'Added PPTX datastream.') datastream = misc_object['PPTX'] datastream.setContent(presentation_file_handle) logging.info('Added PPTX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PPTX datastream to:' + misc_pid + '\n') presentation_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isPresentationOf'), movie_pid) misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiPresentation') misc_object_RELS_EXT.update() #movie gets the opac redirect it's special elif misc_type == 'Motion Picture':#fix up movie object #datastreams add_MODS_datastream(movie_object, mods_file_path) opac_path_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url']") opac_path = opac_path_list[0].text if opac_path: try: movie_object.addDataStream(u'OPAC', u'aTmpStr', label = u'OPAC', mimeType = u'text/html', controlGroup = u'R', location = unicode(opac_path), logMessage = u'Added OPAC datastream.') logging.info('Added OPAC datastream to:' + movie_pid) except FedoraConnectionException: logging.error('Error in adding OPAC datastream to:' + movie_pid + '\n') #biography is special it has a docx and a pdf #can't use 'get_file_path_from_xpath' these are different then the rest, can change it or handle things here (handle things here, dev_speed) elif misc_type == 'biography': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_Narrator') misc_object = fedora.createObject(misc_pid, label = misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace]) #get the paths for the pdf/docx docx_file_path = False pdf_file_path = False path_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']") if path_list: for path_element in path_list: if 'currently unavailable' not in path_element: if '.docx' in path_element.text: docx_file_path = os.path.normpath(os.path.join(mods_directory, path_element.text)) elif 'pdf' in path_element.text: pdf_file_path = os.path.normpath(os.path.join(mods_directory, path_element.text)) #datastreams add_MODS_datastream(misc_object, mods_file_path) if docx_file_path: docx_file_handle = open(docx_file_path, 'rb') try: misc_object.addDataStream(u'DOCX', u'aTmpStr', label = u'DOCX', mimeType = u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup = u'M', logMessage = u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(docx_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') docx_file_handle.close() if pdf_file_path: pdf_file_handle = open(pdf_file_path, 'rb') try: misc_object.addDataStream(u'PDF', u'aTmpStr', label = u'PDF', mimeType = u'application/pdf', controlGroup = u'M', logMessage = u'Added PDF datastream.') datastream = misc_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + misc_pid + '\n') pdf_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isNarratorOf'), benshi_pid) misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiNarrator') misc_object_RELS_EXT.update() else: return False return True
def __processProgram(self): p_el = self.element.find('programa') filename = self.getPath(p_el.findtext('ruta')) if len(p_el) != 0: try: pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate='programConcertID', obj="'%s'" % self.dbid) program = FedoraWrapper.client.getObject(pid) except KeyError: #Get a Fedora Object for the program program = FedoraWrapper.getNextObject(self.prefix, label='Program for concert %(dbid)s' % {'dbid': self.dbid}) #Add the PDF to the program object... Should probably do an "existence" check, but anyway... update_datastream(obj=program, dsid='PDF', filename=filename, mimeType='application/pdf' ) #Create the RELS-EXT datastream rels_ext = FR.rels_ext(obj=program, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:programCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='programConcertID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] for a_el in p_el.findall('AutorNotas[@id]'): fore, sur = a_el.findtext('Nombre'), a_el.findtext('Apellidos') normed = self.normalize_name([fore, sur]) try: pid = Person._people()[normed] author = FedoraWrapper.client.getObject(pid) except KeyError: author = FedoraWrapper.getNextObject(self.prefix, label="an author") dc = dict() dc['title'] = [normed] Concert.save_dc(author, dc) rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='authoredBy'), FR.rels_object(author.pid, FR.rels_object.PID) ) ) FedoraWrapper.addRelationshipsWithoutDup([ ( FR.rels_predicate(alias='fedora-model', predicate="hasModel"), FR.rels_object('atm:personCModel', FR.rels_object.PID) ) ], fedora=author).update() #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: #No point in updating if there's already one there... This is really just a check? eaccpf = CPF.EACCPF(author.pid, xml=author['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException, e: if e.httpcode == 404: eaccpf = CPF.EACCPF(author.pid) event_type="created" else: raise e eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_concert.py") eaccpf.add_XML_source(caption='(Slightly modified (Put into an element)) XML from database dump', xml=a_el) eaccpf.add_name_entry(name={'forename': fore, 'surname': sur}) Concert.save_etree(author, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M') author.state = unicode('A') #XXX: This is seeming particularly less-than-elegant at the moment, creating a 'placeholder' object for composer notes... Anyway. if len(p_el.findall('Notas_Obras/Obra[@id]')) > 0: try: pid = '%s:composerText' % self.prefix author = FedoraWrapper.client.getObject(pid) except: author = FedoraWrapper.client.createObject(pid, label=unicode('Composer Text')) #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: eaccpf = CPF.EACCPF(author.pid, xml=author['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException, e: if e.httpcode == 404: eaccpf = CPF.EACCPF(author.pid) event_type="created" else: raise e eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_concert.py") name = {'forename': 'Texto', 'surname': 'Compositores'} eaccpf.add_name_entry(name=name) dc = dict() dc['title'] = Concert.normalize_name(name) Concert.save_dc(author, dc) Concert.save_etree(author, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M') rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='authoredBy'), FR.rels_object(author.pid, FR.rels_object.PID) ) ) author.state = unicode('A')
def handle_transcript_mods(transcript_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param transcript_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' transcript_pid = fedora.getNextPID(name_space) transcript_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-tr-') + 4:mods_file_name.rfind('.')]) transcript_object = fedora.createObject(transcript_pid, label = transcript_label) transcript_object_RELS_EXT = fedora_relationships.rels_ext(transcript_object,[hamilton_rdf_name_space, fedora_model_namespace]) transcript_path = get_file_path_from_xpath(transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document']") time_synced_transcript_path = get_file_path_from_xpath(transcript_mods_parser,"//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document with time-sync encoding']") #datastreams add_MODS_datastream(transcript_object, mods_file_path) if time_synced_transcript_path: time_synced_transcript_handle = open(time_synced_transcript_path, 'rb') try: transcript_object.addDataStream(u'TimeSyncedTranscript', u'aTmpStr', label=u'POPCORN', mimeType = u'application/xml', controlGroup = u'M', logMessage = u'Added TimeSyncedTranscript datastream.') datastream = transcript_object['TimeSyncedTranscript'] datastream.setContent(time_synced_transcript_handle) logging.info('Added TimeSyncedTranscript datastream to:' + transcript_pid) except FedoraConnectionException: logging.error('Error in adding TimeSyncedTranscript datastream to:' + transcript_pid + '\n') time_synced_transcript_handle.close() if transcript_path: pdf_file_handle = open(transcript_path, 'rb') try: transcript_object.addDataStream(u'PDF', u'aTmpStr', label=u'PDF', mimeType = u'application/pdf', controlGroup = u'M', logMessage = u'Added PDF datastream.') datastream = transcript_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + transcript_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + transcript_pid + '\n') pdf_file_handle.close() #relationships #handle is transcript of transcript_clip_element_list = transcript_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']") if len(transcript_clip_element_list) == 1:#there are multiple related items if the transcript is for the whole movie transcript_clip_file_name = transcript_clip_element_list[0].text transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isTranscriptOf'), clips_to_pids[transcript_clip_file_name]) else: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isTranscriptOf'), movie_pid) #handle the 3 different transcript types if '-jpneng' in mods_file_name: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':EnglishJapaneseTranscript') elif '-jpn' in mods_file_name: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':JapaneseTranscript') elif '-eng' in mods_file_name: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':EnglishTranscript') else: return False transcript_object_RELS_EXT.update() return True
controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error( 'Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT = fedora_relationships.rels_ext( collection_object, fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOf', 'islandora:root') collection_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), 'islandora:collectionCModel') collection_object_RELS_EXT.update() #put in the benshi Islandora:BenshiMovie content model try: model_pid = u'islandora:benshiMovie' fedora.getObject(model_pid) except FedoraConnectionException, object_fetch_exception: if object_fetch_exception.httpcode in [404]: logging.info('islandora:BenshiMovie missing, creating object.\n') model_object = fedora.createObject(model_pid, label=u'BenshiMovieCModel') #add relationships model_object_RELS_EXT = fedora_relationships.rels_ext( model_object, fedora_model_namespace) model_object_RELS_EXT.addRelationship(
def __processPerformance(self, p_el): logger = logging.getLogger('ingest.atm_concert.Concert.__processPerformance') p_dict = { 'piece': p_el.get('id_obra'), 'concert': self.dbid, 'order': p_el.findtext('Posicion') } #TODO: Bloody well deduplicate (ensure that this object does not already exist in Fedora) try: pid = FedoraWrapper.getPid(tuples=[ (Concert.NS['fjm-db'].uri, 'basedOn', "'%s'" % p_dict['piece']), #Not sure if this is really necessary with the other two conditions... ('fedora-rels-ext:', 'isMemberOf', "<fedora:%s>" % self.concert_obj.pid), #To ensure that the performance actually belongs to this concert... (Concert.NS['atm-rel'].uri, 'concertOrder', "'%s'" % p_dict['order']) #To eliminate the confusion if the same piece is played twice in the same concert. ]) if pid: performance = FedoraWrapper.client.getObject(pid) except KeyError: performance = FedoraWrapper.getNextObject(self.prefix, label='Performance of %(piece)s in %(concert)s' % p_dict) #Add MP3 to performance (if there is one to add) p_mp3 = p_el.findtext('mp3_Obra') if p_mp3: mp3_path = self.getPath(p_mp3) if path.exists(mp3_path): update_datastream(obj=performance, dsid='MP3', filename=mp3_path, mimeType='audio/mpeg') else: logger.warning('MP3 entry for performance of %(piece)s in concert %(concert)s, but the file does not exist!' % p_dict) else: logger.debug('No performance MP3 for %(concert)s/%(piece)s' % p_dict) #Add relationships #1 - To concert #2 - To score #3 - To CM #4 - Position in concert rels_ext = FR.rels_ext(obj=performance, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:performanceCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='atm-rel', predicate='concertOrder'), FR.rels_object(p_dict['order'], FR.rels_object.LITERAL) ), ( FR.rels_predicate(alias='fjm-db', predicate='basedOn'), FR.rels_object(p_dict['piece'], FR.rels_object.LITERAL) ) ] #Add relations and commit FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() FedoraWrapper.correlateDBEntry('basedOn', 'scoreID') #Create objects for any movements within the piece for m_el in p_el.findall('Movimientos/Movimiento'): m_dict = { 'concert': p_dict['concert'], 'piece': p_dict['piece'], 'id': m_el.get('id'), 'corder': p_dict['order'], 'porder': m_el.get('posicion'), 'name': m_el.findtext('NOMBRE'), 'MP3': m_el.findtext('mp3_Movimiento'), 'line': m_el.sourceline, 'file': self.file_name } #Sanity test if m_dict['porder']: #Get a Fedora Object for this movement try: pid = FedoraWrapper.getPid(tuples=[ ('fedora-rels-ext:', 'isMemberOf', '<fedora:%s>' % performance.pid), ('fedora-model:', 'hasModel', '<fedora:atm:movementCModel>'), (Concert.NS['atm-rel'].uri, 'pieceOrder', "'%s'" % m_dict['porder']) ]) mov = FedoraWrapper.client.getObject(pid) except KeyError: mov = FedoraWrapper.getNextObject(self.prefix, label='Movement: %(concert)s/%(piece)s/%(id)s' % m_dict) #Get DC and set the title if we have a name. mov_dc = dict() mov_dc['type'] = [unicode('Event')] if m_dict['name']: mov_dc['title'] = [unicode(m_dict['name'])] Concert.save_dc(mov, mov_dc) #Set the three required relations: #1 - To the performance #2 - To the content model #3 - The order this movement occurs within the piece m_rels_ext = FR.rels_ext(obj=mov, namespaces=Concert.NS.values()) m_rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(performance.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:movementCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='atm-rel', predicate='pieceOrder'), FR.rels_object(m_dict['porder'], FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(m_rels, rels_ext=m_rels_ext).update() #Add the MP3 (if it exists) if m_dict['MP3']: mp3_path = self.getPath(m_dict['MP3']) if path.exists(mp3_path): update_datastream(obj=mov, dsid='MP3', filename=mp3_path, mimeType='audio/mpeg') else: logger.warning("MP3 entry for movement %(id)s in performance of %(piece)s in %(concert)s on line %(line)s of %(file)s" % m_dict) else: logger.debug('No movement MP3 for %(concert)s/%(piece)s/%(id)s on line %(line)s of %(file)s' % m_dict) else: logger.error('Movement %(concert)s/%(piece)s/%(id)s does not have a position near line %(line)s of %(file)s!' % m_dict) #Done with movements #Create objects for the performers. for per_el in p_el.findall('Interpretes/Interprete'): perf = { 'id': per_el.get('id'), 'group': per_el.get('id_grupo', default=None), 'line': per_el.sourceline, 'file': self.file_name } perf.update(p_dict) if perf['id']: rels = [ #Relate performer to CModel ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:performerCModel', FR.rels_object.PID) ), #Relate performer to performance ( FR.rels_predicate(alias='atm-rel', predicate='performance'), FR.rels_object(performance.pid, FR.rels_object.PID) ), #Relate perfomer to their 'person' entry ( FR.rels_predicate(alias='fjm-db', predicate='player'), FR.rels_object(perf['id'], FR.rels_object.LITERAL) ) ] try: t_list = list() for pred, obj in rels: if obj.type == FR.rels_object.LITERAL: t_obj = "'%s'" % obj else: t_obj = "<fedora:%s>" % obj t_list.append(("%s" % Concert.NS[pred.alias].uri, "%s" % pred.predicate, "%s" % t_obj)) pid = FedoraWrapper.getPid(tuples=t_list) if pid: performer = FedoraWrapper.client.getObject(pid) except KeyError: performer = FedoraWrapper.getNextObject(prefix = self.prefix, label = 'Performer: %(concert)s/%(piece)s/%(id)s in group %(group)s' % perf) #Relate the performer to the listed group (or 'unaffiliated, if none) if perf['group'] != None: rels.append( ( FR.rels_predicate(alias='fjm-db', predicate='group'), FR.rels_object(perf['group'], FR.rels_object.LITERAL) ) ) else: rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='group'), FR.rels_object('atm:unaffiliatedPerfomer', FR.rels_object.PID) ) ) for i_el in per_el.findall('Instrumentos/Instrumento'): inst_id = i_el.get('id') rels.append( ( FR.rels_predicate(alias='fjm-db', predicate='instrument'), FR.rels_object(inst_id, FR.rels_object.LITERAL) ) ) FedoraWrapper.addRelationshipsWithoutDup(rels, fedora=performer).update() FedoraWrapper.correlateDBEntry('player', 'performerID') FedoraWrapper.correlateDBEntry('group', 'groupID') FedoraWrapper.correlateDBEntry('instrument', 'instrumentID') else: logger.error("Performer on line %(line)s of %(file)s does not have an ID!" % perf)
def process(self): logger = self.logger logger.info('Starting to ingest: Score %s' % self.dbid) try: pid = FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='scoreID', obj="'%s'" % self.dbid) if pid: logger.warning('Score %(id)s already exists as pid %(pid)s! Overwriting PDF and DC DSs!' % {'id': self.dbid, 'pid': pid}) score = FedoraWrapper.client.getObject(pid) else: raise Exception('Something went horribly wrong! Found a pid, but couldn\'t access it...') except KeyError: score = FedoraWrapper.getNextObject(self.prefix, label='Score %s' % self.dbid) rels_ext = FR.rels_ext(score, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:scoreCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='scoreID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] titn = self.element.findtext('titn_partitura') if titn: rels.append( ( FR.rels_predicate(alias='fjm-titn', predicate='score'), FR.rels_object(titn, FR.rels_object.LITERAL) ) ) #FIXME: 'Direction' of composer relation... Should I go from the score to the composer, or (as I think I do in my hand-made objects) from the composer to the score... Or should I make the relationships go in both directions? composer = self.element.findtext('ID_COMPOSITOR') if composer: rels.append( ( FR.rels_predicate(alias='fjm-db', predicate='composedBy'), FR.rels_object(composer, FR.rels_object.LITERAL) ) ) FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() FedoraWrapper.correlateDBEntry('composedBy', 'composerID') FedoraWrapper.correlateDBEntry('basedOn', 'scoreID') dc = dict() dc['type'] = [unicode('StillImage')] dc['title'] = [self.element.findtext('TITULO')] Score.save_dc(score, dc) filename = self.element.findtext('Ruta_Partitura') if filename: fn = self.getPath(filename) if path.exists(fn): update_datastream(obj=score, dsid='PDF', label="Score PDF", filename=fn, mimeType='application/pdf') else: logger.error('PDF specified for score %(id)s, but file does not seem to exist!' % {'id': self.dbid}) marc = self.getPath(path.join(path.dirname(filename), '%s.xml' % self.dbid)) if path.exists(marc): update_datastream(obj=score, dsid='MARCXML', label="MARC XML", filename=marc, mimeType='application/xml') else: logger.info('No PDF for %s', self.dbid)
if object_fetch_exception.httpcode in [404]: logging.info(name_space + ':JapaneseSilentFilmCollection missing, creating object.\n') collection_object = fedora.createObject(collection_pid, label = collection_label) #collection_policy try: collection_object.addDataStream(u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error('Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT=fedora_relationships.rels_ext(collection_object,fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOf','islandora:root') collection_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'islandora:collectionCModel') collection_object_RELS_EXT.update() #put in the benshi Islandora:BenshiMovie content model try: model_pid = u'islandora:benshiMovie' fedora.getObject(model_pid) except FedoraConnectionException, object_fetch_exception: if object_fetch_exception.httpcode in [404]: logging.info('islandora:BenshiMovie missing, creating object.\n') model_object = fedora.createObject(model_pid, label = u'BenshiMovieCModel') #add relationships model_object_RELS_EXT=fedora_relationships.rels_ext(model_object,fedora_model_namespace) model_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'fedora-system:ContentModel-3.0') model_object_RELS_EXT.update()