def handle_base_object(fedora_client, item, ns, cm): """ Create the base object record in Fedora, add common datastreams. @param item: The django item object from legacy workflow @param ns: The namespace to be used for the object's pid @param cm: The pid of the content model to be associated with the object Other required data fields come from the item record itself: pid, label """ print '%s - handle base object' % (item.do_id,) parent_pid = '%s:root' % (ns,) pid = '%s:%s' % (ns, item.do_id) label = drl.utils.shorten_string(item.name, 245) # if this object already exists, return (for now) try: obj = fedora_client.getObject(pid) return obj except: pass # validate required objects, (for now) skip if not found try: mods = workflow.core.models.Item_File.objects.get(item=item, use='MODS') dc = workflow.core.models.Item_File.objects.get(item=item, use='DC') thumb = workflow.core.models.Item_File.objects.get(item=item, use='THUMB') except: return try: obj = addObjectToFedora(fedora_client, label, pid, parent_pid, cm) except Exception, ex: print 'connection error while trying to add fedora object %s: %s' % (pid, ex.message) return False
def handle_page_object(fedora_client, fedora_object, page, ocr_path, label): """ The page object gets some extra relationships as a member of a book object. It should also get: - MODS (this should be based on parent book mods, but with page label from METS structmap) - JP2 (derived from TIFF) - MIX - OCR, if available """ page_cm = ITEM_TYPE_CM_MAP['page'] page_basename = os.path.splitext(page.name)[0] page_pid = '%s-%s' % (fedora_object.pid, page_basename) page_label = u'%s, %s' % (label, drl.utils.shorten_string(fedora_object.label, 205)) extraNamespaces = { 'pageNS' : 'info:islandora/islandora-system:def/pageinfo#' } # should the page number be a counter here instead of int(page_basename)? extraRelationships = { fedora_relationships.rels_predicate('pageNS', 'isPageNumber') : str(int(page_basename)), fedora_relationships.rels_predicate('pageNS', 'isPageOf') : str(fedora_object.pid) } page_object = addObjectToFedora(fedora_client, page_label, page_pid, fedora_object.pid, page_cm, extraNamespaces=extraNamespaces, extraRelationships=extraRelationships) fedoraLib.update_datastream(page_object, 'TIFF', page.path, label=page.name, mimeType='image/tiff', controlGroup='M') handle_derived_jp2(page_object, page) #handle_derived_mix(page_object, page) if ocr_path: ocr_filename = os.path.basename(ocr_path) fedoraLib.update_datastream(page_object, u'OCR', ocr_path, label=unicode(ocr_filename), mimeType=u'text/plain', controlGroup='M')