Пример #1
0
    # collection policy
    fedoraLib.update_datastream(collection_object,
                                u"COLLECTION_POLICY",
                                "data/collection_policy.xml",
                                label=u'COLLECTION_POLICY',
                                mimeType=u'text/xml',
                                controlGroup=u'X')

    # thumbnail, if one is supplied
    if tnUrl:
        tnExt = os.path.splitext(tnUrl)[1]
        fedoraLib.update_datastream(collection_object,
                                    u'TN',
                                    tnUrl,
                                    label=u"%s_TN%s" % (myLabel, tnExt),
                                    mimeType=misc.getMimeType(tnExt))

    # rels-ext relations
    collection_relsext = createRelsExt(collection_object, parentPid,
                                       contentModel)

    return collection_object


def addObjectToFedora(fedora,
                      myLabel,
                      myPid,
                      parentPid,
                      contentModel,
                      tnUrl=None,
                      extraNamespaces={},
        return collection_object
    except FedoraConnectionException, fcx:
        if not fcx.httpcode in [404]:
            raise fcx
        # if it is a 404, then we're ok - just make the object and continue

    collection_object = fedora.createObject(myPid, label=myLabel)

    # this is the biggest difference between objects and collections - a collection policy
    # collection policy
    fedoraLib.update_datastream(collection_object, u"COLLECTION_POLICY", "data/collection_policy.xml", label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X')

    # thumbnail, if one is supplied
    if tnUrl:
        tnExt = os.path.splitext(tnUrl)[1]
        fedoraLib.update_datastream(collection_object, u'TN', tnUrl, label=u"%s_TN%s" % (myLabel, tnExt), mimeType=misc.getMimeType(tnExt))

    # rels-ext relations
    collection_relsext = createRelsExt(collection_object, parentPid, contentModel)

    return collection_object

def addObjectToFedora(fedora, myLabel, myPid, parentPid, contentModel, tnUrl=None, state=u'A', extraNamespaces={}, extraRelationships={}):
    """
    Add an object (not a collection) to fedora
    @param fedora The fedora instance to add the object to
    @param myLabel The label to apply to the object object
    @param myPid The pid of the object to try and create, if the pid is already a valid object/collection, then return that object instead
    @parentPid The parent object to nest this one under
    @contentModel The content model to attach to this object
    @tnUrl [optional] The url of an image to use as the thumbnail
Пример #3
0
def addBookPageToFedora(inputTiff, tmpDir):
    '''
Helper function that handles adding and configuring a fedora object for a book page based on the input image and mods file
do i need something separate to add a book collection obj?
@param inputTiff:  the archival data source
@param tmpDir: file directory where non-archeival stuff gets put

@return bool: true on function success false on fail
'''
    #run conversions
    converter.tif_to_jp2(inputTiff, tmpDir, 'default', 'default')
    converter.tif_OCR(inputTiff, tmpDir, {'PDF': 'default', 'Text': 'default'})

    #determine page number: used for naming
    fullTiffDir = os.path.dirname(inputTiff)
    tifDir = os.path.basename(fullTiffDir)
    tiffName = os.path.basename(inputTiff)
    pageNumber = os.path.basename(inputTiff)
    pageNumber = int(pageNumber[0:pageNumber.index('_')])
    #if front cover
    if tiffName.count('front_cover') == 1:
        pageNumber = 1
    elif tiffName.count('inner_cover') == 1:
        pageNumber = 2
    #if it's the inner leaf
    elif tiffName.count('inner_leaf') == 1:
        pageNumber = 3
    #if back cover
    elif tiffName.count('back_cover') == 1:
        #get number of tiff files
        numberOfTiffs = 0
        dir = os.path.dirname(inputTiff)
        for file in os.listdir(dir):
            if file[(len(file) - 4):len(file)] == '.tif' or file[(
                    len(file) - 5):len(file)] == '.tiff':
                numberOfTiffs += 1
        pageNumber = numberOfTiffs
    #standard a [left side]
    elif tiffName.count('a') == 1:
        if pageNumber == 1:
            pageNumber = 4
        else:
            pageNumber = pageNumber * 2 + 2
    #standard b [right side]
    elif tiffName.count('b') == 1:
        if pageNumber == 1:
            pageNumber = 5
        else:
            pageNumber = pageNumber * 2 + 3
    else:
        logging.error('Bad tiff file name: ' + inputTiff +
                      ' giving fileNumber: ' + str(pageNumber) + '\n')
        return False

    logging.info('Working on ingest of page: ' + str(pageNumber) +
                 ' with source file: ' + inputTiff)

    #create the fedora book page object
    pagePid = fedora.getNextPID(u'uofm')
    #pagePid = fedora.getNextPID(u'Awill')
    myLabel = unicode(tifDir + '_Page' + str(pageNumber))
    obj = fedora.createObject(pagePid, label=myLabel)

    #create ingest urls
    if tiffName[(len(tiffName) - 4):len(tiffName)] == '.tif':
        tiffNameNoExt = tiffName[0:len(tiffName) - 4]
        tifExt = '.tif'
    if tiffName[(len(tiffName) - 5):len(tiffName)] == '.tiff':
        tiffNameNoExt = tiffName[0:len(tiffName) - 5]
        tifExt = '.tiff'

    baseInUrl = os.path.join(fullTiffDir, tiffNameNoExt)
    baseOutUrl = os.path.join(tmpDir, tiffNameNoExt)
    tiffUrl = open(baseInUrl + tifExt)
    jp2Url = open(baseOutUrl + '.jp2')
    pdfUrl = open(baseOutUrl + '.pdf')
    ocrUrl = open(baseOutUrl + '.txt')
    #this gets the metadata for the page from the tif
    exifPath = baseOutUrl + '.xml'
    converter.exif_to_xml(inputTiff, exifPath)
    exifUrl = open(exifPath)

    #this is used for creating the book pdf later
    global pagesDict
    pagesDict[pageNumber] = baseOutUrl + '.pdf'

    garbage = u'smelly'
    #tiff datastream
    try:
        obj.addDataStream(u'TIFF',
                          garbage,
                          label=u'TIFF',
                          mimeType=u'image/tiff',
                          controlGroup=u'M',
                          logMessage=u'Added the archival tiff file.')
        logging.info('Added TIFF datastream to:' + pagePid)
        ds = obj['TIFF']
        ds.setContent(tiffUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding TIFF datastream to:' + pagePid +
                          '\n')

    #jp2 datastream
    try:
        obj.addDataStream(u'JP2',
                          garbage,
                          label=u'JP2',
                          mimeType=u'image/jp2',
                          controlGroup=u'M',
                          logMessage=u'Added jp2 image file.')
        logging.info('Added JP2 datastream to:' + pagePid)
        ds = obj['JP2']
        ds.setContent(jp2Url)
    except FedoraConnectionException:
        logging.exception('Error in adding JP2 datastream to:' + pagePid +
                          '\n')

    #pdf datastream
    try:
        obj.addDataStream(u'PDF',
                          garbage,
                          label=u'PDF',
                          mimeType=u'application/pdf',
                          controlGroup=u'M',
                          logMessage=u'Added pdf with OCR.')
        logging.info('Added PDF datastream to:' + pagePid)
        ds = obj['PDF']
        ds.setContent(pdfUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding PDF datastream to:' + pagePid +
                          '\n')

    #ocr datastream
    try:
        obj.addDataStream(u'OCR',
                          garbage,
                          label=u'OCR',
                          mimeType=u'text/plain',
                          controlGroup=u'M',
                          logMessage=u'Added basic text of OCR.')
        logging.info('Added OCR datastream to:' + pagePid)
        ds = obj['OCR']
        ds.setContent(ocrUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding OCR Datastream to:' + pagePid +
                          '\n')

    #exif datastream
    try:
        obj.addDataStream(u'EXIF',
                          garbage,
                          label=u'EXIF',
                          mimeType=u'text/xml',
                          controlGroup=u'M',
                          logMessage=u'Added the archival EXIF file.')
        logging.info('Added EXIF datastream to:' + pagePid)
        ds = obj['EXIF']
        ds.setContent(exifUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding EXIF datastream to:' + pagePid +
                          '\n')

    objRelsExt = fedora_relationships.rels_ext(obj, [
        fedora_relationships.rels_namespace(
            'pageNS', 'info:islandora/islandora-system:def/pageinfo#'),
        fedora_relationships.rels_namespace(
            'fedora-model', 'info:fedora/fedora-system:def/model#')
    ])
    objRelsExt.addRelationship('isMemberOf', bookPid)
    objRelsExt.addRelationship(
        fedora_relationships.rels_predicate('pageNS', 'isPageNumber'),
        fedora_relationships.rels_object(
            str(pageNumber), fedora_relationships.rels_object.LITERAL))
    objRelsExt.addRelationship(
        fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
        'archiveorg:pageCModel')

    objRelsExt.update()

    #Dynamic Datastreams
    #grab all files that share a name with the tiff and do not use the already used extensions
    dynamicDSList = os.listdir(fullTiffDir)
    dynamicDSListCopy = list(
        dynamicDSList)  #better than taking os.listdir twice
    for dynamicDSFile in dynamicDSListCopy:
        if dynamicDSFile[0:dynamicDSFile.find('.')]!=tiffNameNoExt or (dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.tif' or \
        dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.tiff' or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.pdf' \
        or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.jp2' or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.txt'\
        or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.xml'):#@todo: refactor to use regualar expression module?
            dynamicDSList.remove(dynamicDSFile)
    #create the dynamic datastreams
    for dynamicDSFile in dynamicDSList:
        dynamicDSFileEXT = dynamicDSFile[dynamicDSFile.find('.') +
                                         1:len(dynamicDSFile)]
        dynamicDSFileMimeType = misc.getMimeType(dynamicDSFileEXT)
        dynamicDSFileHandle = open(os.path.join(fullTiffDir, dynamicDSFile),
                                   'r')
        try:
            obj.addDataStream(unicode(dynamicDSFileEXT),
                              garbage,
                              label=unicode(dynamicDSFileEXT),
                              mimeType=unicode(dynamicDSFileMimeType),
                              controlGroup=u'M',
                              logMessage=unicode('Added the datastream:' +
                                                 dynamicDSFileEXT))
            logging.info('Added the datastream: ' + dynamicDSFileEXT +
                         ' to: ' + pagePid)
            ds = obj[dynamicDSFileEXT]
            ds.setContent(dynamicDSFileHandle)
        except FedoraConnectionException:
            logging.exception('Error in adding' + dynamicDSFileEXT +
                              'datastream to:' + pagePid + '\n')

    return True
def addBookPageToFedora(inputTiff, tmpDir):
    '''
Helper function that handles adding and configuring a fedora object for a book page based on the input image and mods file
do i need something separate to add a book collection obj?
@param inputTiff:  the archival data source
@param tmpDir: file directory where non-archeival stuff gets put

@return bool: true on function success false on fail
'''
    #run conversions
    converter.tif_to_jp2(inputTiff,tmpDir,'default','default')
    converter.tif_OCR(inputTiff,tmpDir,{'PDF':'default','Text':'default'})
    
    #determine page number: used for naming
    fullTiffDir=os.path.dirname(inputTiff)
    tifDir=os.path.basename(fullTiffDir)
    tiffName=os.path.basename(inputTiff)
    pageNumber=os.path.basename(inputTiff)
    pageNumber=int(pageNumber[0:pageNumber.index('_')])
    #if front cover
    if tiffName.count('front_cover')==1:
        pageNumber=1
    elif tiffName.count('inner_cover')==1:
        pageNumber=2
    #if it's the inner leaf
    elif tiffName.count('inner_leaf')==1:
        pageNumber=3
    #if back cover
    elif tiffName.count('back_cover')==1:
        #get number of tiff files
        numberOfTiffs=0
        dir=os.path.dirname(inputTiff)
        for file in os.listdir(dir):
            if file[(len(file)-4):len(file)]=='.tif' or file[(len(file)-5):len(file)]=='.tiff':
                numberOfTiffs+=1
        pageNumber=numberOfTiffs
    #standard a [left side]
    elif tiffName.count('a')==1:
        if pageNumber==1:
            pageNumber=4
        else:
            pageNumber=pageNumber*2+2
    #standard b [right side]
    elif tiffName.count('b')==1:
        if pageNumber==1:
            pageNumber=5
        else:
            pageNumber=pageNumber*2+3
    else:
        logging.error('Bad tiff file name: '+inputTiff+' giving fileNumber: '+str(pageNumber)+'\n')
        return False
    
    logging.info('Working on ingest of page: '+str(pageNumber)+' with source file: '+inputTiff)    

    
    #create the fedora book page object
    pagePid = fedora.getNextPID(u'uofm')
    #pagePid = fedora.getNextPID(u'Awill')
    myLabel=unicode(tifDir+'_Page'+str(pageNumber))
    obj = fedora.createObject(pagePid, label=myLabel)

    #create ingest urls
    if tiffName[(len(tiffName)-4):len(tiffName)]=='.tif':
        tiffNameNoExt=tiffName[0:len(tiffName)-4]
        tifExt='.tif'
    if tiffName[(len(tiffName)-5):len(tiffName)]=='.tiff':
        tiffNameNoExt=tiffName[0:len(tiffName)-5]
        tifExt='.tiff'
    
    baseInUrl=os.path.join(fullTiffDir,tiffNameNoExt)
    baseOutUrl=os.path.join(tmpDir,tiffNameNoExt)
    tiffUrl=open(baseInUrl+tifExt)
    jp2Url=open(baseOutUrl+'.jp2')
    pdfUrl=open(baseOutUrl+'.pdf')
    ocrUrl=open(baseOutUrl+'.txt')
    #this gets the metadata for the page from the tif
    exifPath=baseOutUrl+'.xml'
    converter.exif_to_xml(inputTiff,exifPath)
    exifUrl= open(exifPath)
        
    #this is used for creating the book pdf later
    global pagesDict
    pagesDict[pageNumber]=baseOutUrl+'.pdf'
    

    garbage=u'smelly'
    #tiff datastream
    try:
        obj.addDataStream(u'TIFF', garbage, label=u'TIFF',
             mimeType=u'image/tiff', controlGroup=u'M',
             logMessage=u'Added the archival tiff file.')
        logging.info('Added TIFF datastream to:'+pagePid)
        ds=obj['TIFF']
        ds.setContent(tiffUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding TIFF datastream to:'+pagePid+'\n')
  
    #jp2 datastream
    try:
        obj.addDataStream(u'JP2',garbage, label=u'JP2',
             mimeType=u'image/jp2', controlGroup=u'M',
             logMessage=u'Added jp2 image file.')
        logging.info('Added JP2 datastream to:'+pagePid)
        ds=obj['JP2']
        ds.setContent(jp2Url)
    except FedoraConnectionException:
        logging.exception('Error in adding JP2 datastream to:'+pagePid+'\n')
        
        
    #pdf datastream
    try:
        obj.addDataStream(u'PDF', garbage, label=u'PDF',
             mimeType=u'application/pdf', controlGroup=u'M',
             logMessage=u'Added pdf with OCR.')
        logging.info('Added PDF datastream to:'+pagePid)
        ds=obj['PDF']
        ds.setContent(pdfUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding PDF datastream to:'+pagePid+'\n')
        
        
    #ocr datastream
    try:
        obj.addDataStream(u'OCR', garbage, label=u'OCR',
             mimeType=u'text/plain', controlGroup=u'M',
             logMessage=u'Added basic text of OCR.')
        logging.info('Added OCR datastream to:'+pagePid)
        ds=obj['OCR']
        ds.setContent(ocrUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding OCR Datastream to:'+pagePid+'\n')
        
    #exif datastream
    try:
        obj.addDataStream(u'EXIF', garbage, label=u'EXIF',
             mimeType=u'text/xml', controlGroup=u'M',
             logMessage=u'Added the archival EXIF file.')
        logging.info('Added EXIF datastream to:'+pagePid)
        ds=obj['EXIF']
        ds.setContent(exifUrl)
    except FedoraConnectionException:
        logging.exception('Error in adding EXIF datastream to:'+pagePid+'\n')

    objRelsExt=fedora_relationships.rels_ext(obj,[fedora_relationships.rels_namespace('pageNS','info:islandora/islandora-system:def/pageinfo#'),
                                                  fedora_relationships.rels_namespace('fedora-model','info:fedora/fedora-system:def/model#')])
    objRelsExt.addRelationship('isMemberOf',bookPid)
    objRelsExt.addRelationship(fedora_relationships.rels_predicate('pageNS','isPageNumber'),fedora_relationships.rels_object(str(pageNumber),fedora_relationships.rels_object.LITERAL))
    objRelsExt.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'archiveorg:pageCModel')
    
    objRelsExt.update()
    
    #Dynamic Datastreams
    #grab all files that share a name with the tiff and do not use the already used extensions
    dynamicDSList=os.listdir(fullTiffDir)
    dynamicDSListCopy=list(dynamicDSList)#better than taking os.listdir twice
    for dynamicDSFile in dynamicDSListCopy:
        if dynamicDSFile[0:dynamicDSFile.find('.')]!=tiffNameNoExt or (dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.tif' or \
        dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.tiff' or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.pdf' \
        or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.jp2' or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.txt'\
        or dynamicDSFile[dynamicDSFile.find('.'):len(dynamicDSFile)]=='.xml'):#@todo: refactor to use regualar expression module?
            dynamicDSList.remove(dynamicDSFile)
    #create the dynamic datastreams
    for dynamicDSFile in dynamicDSList:
        dynamicDSFileEXT=dynamicDSFile[dynamicDSFile.find('.')+1:len(dynamicDSFile)]
        dynamicDSFileMimeType=misc.getMimeType(dynamicDSFileEXT)
        dynamicDSFileHandle=open(os.path.join(fullTiffDir,dynamicDSFile),'r')
        try:
            obj.addDataStream(unicode(dynamicDSFileEXT), garbage, label=unicode(dynamicDSFileEXT),
                 mimeType=unicode(dynamicDSFileMimeType), controlGroup=u'M',
                 logMessage=unicode('Added the datastream:'+dynamicDSFileEXT))
            logging.info('Added the datastream: '+dynamicDSFileEXT+' to: '+pagePid)
            ds=obj[dynamicDSFileEXT]
            ds.setContent(dynamicDSFileHandle)
        except FedoraConnectionException:
            logging.exception('Error in adding'+ dynamicDSFileEXT +'datastream to:'+pagePid+'\n')
        
          
    return True