예제 #1
0
def handle_base_object(fedora_client, item, ns, cm):
    """
    Create the base object record in Fedora, add common datastreams.

    @param item: The django item object from legacy workflow
    @param ns: The namespace to be used for the object's pid
    @param cm: The pid of the content model to be associated with the object

    Other required data fields come from the item record itself: pid, label

    """
    print '%s - handle base object' % (item.do_id,)
    parent_pid = '%s:root' % (ns,)
    pid = '%s:%s' % (ns, item.do_id)
    label = drl.utils.shorten_string(item.name, 245)
    # if this object already exists, return (for now)
    try:
        obj = fedora_client.getObject(pid)
        return obj 
    except:
        pass
    # validate required objects, (for now) skip if not found
    try:
        mods = workflow.core.models.Item_File.objects.get(item=item, use='MODS')
        dc = workflow.core.models.Item_File.objects.get(item=item, use='DC')
        thumb = workflow.core.models.Item_File.objects.get(item=item, use='THUMB')
    except:
        return
    try:
        obj = addObjectToFedora(fedora_client, label, pid, parent_pid, cm)
    except Exception, ex:
        print 'connection error while trying to add fedora object %s: %s' % (pid, ex.message)
        return False
예제 #2
0
def handle_page_object(fedora_client, fedora_object, page, ocr_path, label):
    """
    The page object gets some extra relationships as a member of a book object.
    It should also get:
        - MODS (this should be based on parent book mods, but with page label from METS structmap)
        - JP2 (derived from TIFF)
        - MIX
        - OCR, if available
    """
    page_cm = ITEM_TYPE_CM_MAP['page']
    page_basename = os.path.splitext(page.name)[0]
    page_pid = '%s-%s' % (fedora_object.pid, page_basename)
    page_label = u'%s, %s' % (label, drl.utils.shorten_string(fedora_object.label, 205))
    extraNamespaces = { 'pageNS' : 'info:islandora/islandora-system:def/pageinfo#' }
    # should the page number be a counter here instead of int(page_basename)?
    extraRelationships = { fedora_relationships.rels_predicate('pageNS', 'isPageNumber') : str(int(page_basename)),
                           fedora_relationships.rels_predicate('pageNS', 'isPageOf') : str(fedora_object.pid) }
    page_object = addObjectToFedora(fedora_client, page_label, page_pid, fedora_object.pid, page_cm, extraNamespaces=extraNamespaces, extraRelationships=extraRelationships)
    fedoraLib.update_datastream(page_object, 'TIFF', page.path, label=page.name, mimeType='image/tiff', controlGroup='M')
    handle_derived_jp2(page_object, page)
    #handle_derived_mix(page_object, page)
    if ocr_path:
        ocr_filename = os.path.basename(ocr_path) 
        fedoraLib.update_datastream(page_object, u'OCR', ocr_path, label=unicode(ocr_filename), mimeType=u'text/plain', controlGroup='M')