Exemplo n.º 1
0
def rendermedia(filecontent):
    result = deURN(filecontent)
    rows = result.split('\n')
    FIELDS = rows[0].strip().split('\t')
    rows = rows[1:]
    result = []
    for counter, row in enumerate(rows):
        row = row.strip() # seems there may be a stray \r still at the end of the string.
        if row == '' or row[0] == '#': continue
        row = row.split('\t')
        media = {'otherfields': []}
        media['counter'] = counter
        for i,r in enumerate(row):
            if FIELDS[i] == 'objectnumber':
                media['accession'] = row[i]
            elif FIELDS[i] == 'name':
                media['mainentry'] = row[i]
                # media['otherfields'].append({'label': 'File', 'value': row[i]})
            elif FIELDS[i] == 'objectCSID':
                media['csid'] = row[i]
            elif FIELDS[i] == 'mediaCSID':
                media['media'] = row[i]
            elif FIELDS[i] == 'blobCSID':
                media['blobs'] = [ row[i] ]
            elif FIELDS[i] == 'creator':
                media['otherfields'].append({'label': 'Creator', 'value': row[i]})
            elif FIELDS[i] == 'description':
                media['otherfields'].append({'label': 'Description', 'value': row[i]})
            elif FIELDS[i] == 'date':
                media['otherfields'].append({'label': 'Image Date', 'value': row[i]})
        result.append(media)
    return result
Exemplo n.º 2
0
def reformat(filecontent):
    result = deURN(filecontent)
    result = result.replace('\n', '<tr><td>')
    result = result.replace('\t', '<td>')
    result = result.replace('|', '<td>')
    result = result.replace('False', '<span class="error">False</span>')
    result += '</table>'
    return '<table width="100%"><tr><td>\n' + result
Exemplo n.º 3
0
def reformat(filecontent):
    result = deURN(filecontent)
    result = result.replace('\n','<tr><td>')
    result = result.replace('\t','<td>')
    result = result.replace('|','<td>')
    result = result.replace('False','<span class="error">False</span>')
    result += '</table>'
    return '<table width="100%"><tr><td>\n' + result
Exemplo n.º 4
0
def extractTag(xml, tag):
    element = xml.find('.//%s' % tag)
    try:
        if "urn:" in element.text:
            element_text = deURN(str(element.text))
        else:
            element_text = element.text
    except:
        element_text = ''
    return element_text
Exemplo n.º 5
0
def rendermedia(filecontent):
    result = deURN(filecontent)
    rows = result.split('\n')
    FIELDS = rows[0].strip().split('\t')
    rows = rows[1:]
    result = []
    for counter, row in enumerate(rows):
        row = row.strip(
        )  # seems there may be a stray \r still at the end of the string.
        if row == '' or row[0] == '#': continue
        row = row.split('\t')
        media = {'otherfields': []}
        media['counter'] = counter
        for i, r in enumerate(row):
            if FIELDS[i] == 'objectnumber':
                media['accession'] = row[i]
            elif FIELDS[i] == 'name':
                media['mainentry'] = row[i]
                # media['otherfields'].append({'label': 'File', 'value': row[i]})
            elif FIELDS[i] == 'objectCSID':
                media['csid'] = row[i]
            elif FIELDS[i] == 'mediaCSID':
                media['media'] = row[i]
            elif FIELDS[i] == 'blobCSID':
                media['blobs'] = [row[i]]
            elif FIELDS[i] == 'creator':
                media['otherfields'].append({
                    'label': 'Creator',
                    'value': row[i]
                })
            elif FIELDS[i] == 'description':
                media['otherfields'].append({
                    'label': 'Description',
                    'value': row[i]
                })
            elif FIELDS[i] == 'date':
                media['otherfields'].append({
                    'label': 'Image Date',
                    'value': row[i]
                })
        result.append(media)
    return result
Exemplo n.º 6
0
def extract_refname(xml, term, pgSz, record_type):
    try:
        cspaceXML = fromstring(xml)
        totalItems = int(cspaceXML.find('.//totalItems').text)
        if totalItems == 0:
            return 'ZeroResults X X X X'.split(' '), totalItems
        items = cspaceXML.findall('.//list-item')
        for i in items:
            csid = i.find('.//csid')
            csid = csid.text
            try:
                refName = extract_tag(i, 'refName')
                updated_at = extract_tag(i, 'updatedAt')
                if record_type == 'collectionobjects':
                    try:
                        termDisplayName = deURN(refName)
                    except:
                        raise
                else:
                    try:
                        termDisplayName = extract_tag(i, 'termDisplayName')
                    except:
                        try:
                            termDisplayName = extract_tag(i, 'displayName')
                        except:
                            raise
            except:
                loginfo(
                    'csvimport',
                    'could not get termDisplayName or refName or updatedAt from %s'
                    % csid, {}, {})
                return 'Failed X X X X'.split(' '), totalItems
            if normalize(term) == normalize(termDisplayName):
                return ['OK', csid,
                        str(termDisplayName), refName, updated_at], totalItems
        if totalItems > pgSz:
            return 'MaybeMissed X X X X'.split(' '), totalItems
        return ['NoMatch', '', term, term, 'X'], totalItems
    except:
        raise
        return 'xmlParseFailed X X X X'.split(' '), totalItems
Exemplo n.º 7
0
def updateXML(fieldset, updateItems, xml):
    message = ''

    # Fields vary with fieldsets
    if fieldset == 'keyinfo':
        fieldList = ('pahmaFieldCollectionPlace', 'assocPeople', 'objectName',
                     'pahmaEthnographicFileCode')
    elif fieldset == 'namedesc':
        fieldList = ('briefDescription', 'objectName')
    elif fieldset == 'registration':
        # nb:  'pahmaAltNumType' is handled with  'pahmaAltNum'
        fieldList = ('objectName', 'pahmaAltNum', 'fieldCollector')
    elif fieldset == 'hsrinfo':
        fieldList = ('objectName', 'pahmaFieldCollectionPlace',
                     'briefDescription')
    elif fieldset == 'objtypecm':
        fieldList = ('objectName', 'collection', 'responsibleDepartment',
                     'pahmaFieldCollectionPlace', 'pahmaTmsLegacyDepartment')
    elif fieldset == 'collection':
        fieldList = ('objectName', 'collection')
    elif fieldset == 'placeanddate':
        fieldList = ('objectName', 'pahmaFieldLocVerbatim',
                     'pahmaFieldCollectionDate')
    elif fieldset == 'places':
        fieldList = ('pahmaFieldLocVerbatim', 'pahmaFieldCollectionPlace',
                     'objectProductionPlace', 'contentPlace')
    elif fieldset == 'dates':
        fieldList = ('objectProductionDate', 'pahmaFieldCollectionDate',
                     'contentDate', 'briefDescription')
    elif fieldset == 'mattax':
        fieldList = ('material', 'taxon', 'briefDescription')
    elif fieldset == 'student':
        fieldList = ('taxon', 'fieldLocCountry', 'fieldLocState',
                     'fieldLocCounty')
    elif fieldset == 'fullmonty':
        fieldList = ('assocPeople', 'briefDescription', 'collection',
                     'contentDate', 'contentPlace', 'fieldCollector',
                     'material', 'objectName', 'objectName',
                     'objectProductionDate', 'objectProductionPlace',
                     'objectProductionPerson', 'pahmaAltNum',
                     'pahmaEthnographicFileCode', 'pahmaFieldCollectionDate',
                     'pahmaFieldCollectionPlace', 'pahmaFieldLocVerbatim',
                     'pahmaObjectStatus', 'responsibleDepartment', 'taxon',
                     'material')

    root = etree.fromstring(xml)
    # add the user's changes to the XML
    for relationType in fieldList:
        # sys.stderr.write('tag1: %s\n' % relationType)
        # this app does not insert empty values into anything!
        if not relationType in updateItems.keys(
        ) or updateItems[relationType] == '':
            continue
        listSuffix = 'List'
        extra = ''
        if relationType in [
                'assocPeople', 'pahmaAltNum', 'pahmaFieldCollectionDate',
                'objectProductionDate', 'objectProductionPlace',
                'objectProductionPerson', 'contentDate', 'material', 'taxon',
                'fieldLocCountry', 'fieldLocState', 'fieldLocCounty'
        ]:
            extra = 'Group'
        elif relationType in [
                'briefDescription', 'fieldCollector', 'responsibleDepartment',
                'contentPlace'
        ]:
            listSuffix = 's'
        if relationType in [
                'collection', 'pahmaFieldLocVerbatim', 'contentDate',
                'pahmaTmsLegacyDepartment'
        ]:
            listSuffix = ''
        else:
            pass
            # html += ">>> ",'.//'+relationType+extra+'List'
        # sys.stderr.write('tag2: %s\n' % (relationType + extra + listSuffix))
        if relationType == 'taxon':
            tmprelationType = 'taxonomicIdent'
        elif relationType in [
                'fieldLocCountry', 'fieldLocState', 'fieldLocCounty'
        ]:
            tmprelationType = 'locality'
        else:
            tmprelationType = relationType
        metadata = root.findall('.//' + tmprelationType + extra + listSuffix)
        if 'objectNumber' in updateItems and updateItems['objectNumber'] == '':
            updateItems['objectNumber'] = root.find('.//objectNumber').text
        try:
            metadata = metadata[0]  # there had better be only one!
        except:
            # hmmm ... we didn't find this element in the record. Make a note a carry on!
            # message += 'No "' + relationType + extra + listSuffix + '" element found to update.'
            continue
        # html += ">>> ",relationType,':',updateItems[relationType]
        if relationType in [
                'assocPeople', 'objectName', 'pahmaAltNum', 'material',
                'taxon', 'objectProductionPerson', 'objectProductionPlace',
                'fieldLocCountry', 'fieldLocState', 'fieldLocCounty'
        ]:
            # group = metadata.findall('.//'+relationType+'Group')
            # sys.stderr.write('  updateItem: ' + relationType + ':: ' + updateItems[relationType] + '\n' )
            Entries = metadata.findall('.//' + relationType)
            if not alreadyExists(updateItems[relationType], Entries):
                try:
                    newElement = metadata.findall('.//' + tmprelationType +
                                                  'Group')[0]
                except:
                    newElement = etree.Element(tmprelationType + 'Group')
                leafElement = etree.Element(relationType)
                leafElement.text = updateItems[relationType]
                newElement.append(leafElement)
                if relationType in ['assocPeople', 'pahmaAltNum']:
                    apgType = etree.Element(relationType + 'Type')
                    # this needs to be a refname for PAHMA's assocpeopletype...
                    apgType.text = updateItems[
                        relationType +
                        'Type'] if relationType == 'pahmaAltNum' else "urn:cspace:pahma.cspace.berkeley.edu:vocabularies:name(assocpeople):item:name(assocpeopletype06)'made by'"
                    # sys.stderr.write(relationType + 'Type:' + updateItems[relationType + 'Type'])
                    newElement.append(apgType)
                if (len(Entries) == 1 and Entries[0].text is None
                    ) or tmprelationType == 'locality':
                    # sys.stderr.write('reusing empty element: %s\n' % Entries[0].tag)
                    # sys.stderr.write('ents : %s\n' % Entries[0].text)
                    for child in metadata:
                        # html += '<br>tag: ', child.tag
                        if child.tag == tmprelationType + 'Group':
                            # html += '<br> found it! ',child.tag
                            metadata.remove(child)
                    metadata.insert(0, newElement)
                else:
                    metadata.insert(0, newElement)
            else:
                if IsAlreadyPreferred(updateItems[relationType], Entries):
                    continue
                else:
                    # exists, but not preferred. make it the preferred: remove it from where it is, insert it as 1st
                    for child in metadata:
                        if child.tag == tmprelationType + 'Group':
                            checkval = child.find('.//' + relationType)
                            if checkval.text == updateItems[relationType]:
                                savechild = child
                                metadata.remove(child)
                    metadata.insert(0, savechild)
                pass
            # for AltNums, we need to update the AltNumType even if the AltNum hasn't changed
            if relationType == 'pahmaAltNum':
                apgType = metadata.find('.//' + relationType + 'Type')
                apgType.text = updateItems[relationType + 'Type']
                # sys.stderr.write('  updated: pahmaAltNumType to' + updateItems[relationType + 'Type'] + '\n' )
        elif relationType in [
                'briefDescription', 'fieldCollector', 'responsibleDepartment',
                'contentPlace'
        ]:
            Entries = metadata.findall('.//' + relationType)
            # for e in Entries:
            # html += '%s, %s<br>' % (e.tag, e.text)
            # sys.stderr.write(' e: %s\n' % e.text)
            if alreadyExists(updateItems[relationType], Entries):
                if IsAlreadyPreferred(updateItems[relationType], Entries):
                    # message += "%s exists as %s, already preferred;" % (updateItems[relationType],relationType)
                    pass
                else:
                    # exists, but not preferred. make it the preferred: remove it from where it is, insert it as 1st
                    for child in Entries:
                        sys.stderr.write(' c: %s\n' % child.tag)
                        if child.text == updateItems[relationType]:
                            new_element = child
                            metadata.remove(child)
                            # message += '%s removed. len = %s<br/>' % (child.text, len(Entries))
                    metadata.insert(0, new_element)
                    message += "'%s' exists in %s, now preferred.<br/>" % (
                        deURN(updateItems[relationType]), relationType)
                    # html += 'already exists: %s<br>' % updateItems[relationType]
            # check if the existing element is empty; if so, use it, don't add a new element
            else:
                if len(Entries) == 1 and Entries[0].text is None:
                    # message += "removed %s ;<br/>" % (Entries[0].tag)
                    metadata.remove(Entries[0])
                new_element = etree.Element(relationType)
                new_element.text = updateItems[relationType]
                metadata.insert(0, new_element)
                message += "added '%s' as the preferred term in %s.<br/>" % (
                    deURN(updateItems[relationType]), relationType)

        elif relationType in [
                'objectProductionDate', 'pahmaFieldCollectionDate',
                'contentDate'
        ]:
            # we'll be replacing the entire structured date group
            newDateGroup = etree.Element('%sGroup' % relationType)
            new_element = etree.Element('dateDisplayDate')
            new_element.text = updateItems[relationType]
            newDateGroup.insert(0, new_element)

            DateGroup = metadata.find('.//%sGroup' % relationType)
            if DateGroup is not None:
                metadata.remove(DateGroup)
            # one of many special cases...
            if relationType == 'contentDate':
                DateGroup = metadata.findall('.//*')
                [metadata.remove(d) for d in DateGroup]
                metadata.insert(0, new_element)
            else:
                metadata.insert(0, newDateGroup)
        else:
            # check if value is already present. if so, skip
            if alreadyExists(updateItems[relationType],
                             metadata.findall('.//' + relationType)):
                if IsAlreadyPreferred(updateItems[relationType],
                                      metadata.findall('.//' + relationType)):
                    continue
                else:
                    message += "'%s' already exists as an NPT in %s: This value has been inserted as the PT and in doing so is now duplicated.<br/>" % (
                        deURN(updateItems[relationType]), relationType)
                    pass
            newElement = etree.Element(relationType)
            newElement.text = updateItems[relationType]
            metadata.insert(0, newElement)
    objectCount = root.find('.//numberOfObjects')
    if 'objectCount' in updateItems:
        if objectCount is None:
            objectCount = etree.Element('numberOfObjects')
            collectionobjects_common = root.find(
                './/{http://collectionspace.org/services/collectionobject}collectionobjects_common'
            )
            collectionobjects_common.insert(0, objectCount)
        objectCount.text = updateItems['objectCount']

    inventoryCount = root.find('.//inventoryCount')
    if 'inventoryCount' in updateItems:
        if inventoryCount is None:
            inventoryCount = etree.Element('inventoryCount')
            collectionobjects_pahma = root.find(
                './/{http://collectionspace.org/services/collectionobject/local/pahma}collectionobjects_pahma'
            )
            collectionobjects_pahma.insert(0, inventoryCount)
        inventoryCount.text = updateItems['inventoryCount']
    for fld in 'pahmaTmsLegacyDepartment pahmaFieldLocVerbatim'.split(' '):
        if fld in updateItems and updateItems[fld] != '':
            fldtoupdate = root.find('.//' + fld)
            if fldtoupdate is None:
                fldtoupdate = etree.Element(fld)
                collectionobjects_pahma = root.find(
                    './/{http://collectionspace.org/services/collectionobject/local/pahma}collectionobjects_pahma'
                )
                collectionobjects_pahma.insert(0, fldtoupdate)
            fldtoupdate.text = updateItems[fld]

    collection = root.find('.//collection')
    if 'collection' in updateItems:
        if collection is None:
            collection = etree.Element('collection')
            collectionobjects_common = root.find(
                './/{http://collectionspace.org/services/collectionobject}collectionobjects_common'
            )
            collectionobjects_common.insert(0, collection)
            message += " %s added as &lt;%s&gt;.<br/>" % (deURN(
                updateItems['collection']), 'collection')
        collection.text = updateItems['collection']

    payload = '<?xml version="1.0" encoding="UTF-8"?>\n' + etree.tostring(
        root, encoding='unicode')
    # update collectionobject..
    # html += "<br>pretending to post update to %s to REST API..." % updateItems['objectCsid']
    # elapsedtimetotal = time.time()
    # messages = []
    # messages.append("posting to %s REST API..." % uri)
    # print(payload)
    # messages.append(payload)

    return message, payload