def internalize_rels(pid, dsid, source, cursor=None): """ Internalize rels given a ds_db_id. """ cursor = check_cursor(cursor) if dsid not in ['DC', 'RELS-EXT', 'RELS-INT']: return cursor object_reader.object_id_from_raw(pid, cursor=cursor) object_id = cursor.fetchone()['id'] datastream_reader.datastream({ 'object': object_id, 'dsid': dsid }, cursor=cursor) ds_info = cursor.fetchone() if ds_info is None or ds_info['resource'] is None: if dsid == 'DC': internalize_rels_dc(None, object_id, cursor=cursor) elif dsid == 'RELS-INT': internalize_rels_int(etree.parse(None), object_id, source, cursor=cursor) elif dsid == 'RELS-EXT': internalize_rels_ext(None, object_id, source, cursor=cursor) return cursor else: datastream_reader.resource(ds_info['resource'], cursor=cursor) resource_info = cursor.fetchone() resource_path = filestore.resolve_uri(resource_info['uri']) with open(resource_path, 'rb') as relations_file: if dsid == 'DC': internalize_rels_dc(relations_file, object_id, cursor=cursor) elif dsid == 'RELS-INT': internalize_rels_int(etree.parse(relations_file), object_id, source, cursor=cursor) elif dsid == 'RELS-EXT': internalize_rels_ext(relations_file, object_id, source, cursor=cursor) return cursor
def update_checksums(resource, checksums, cursor=None): """ Bring a resource's checksums up to date. Raises: ValueError: On checksum mismatch. """ # Fedora hash types mapped to the names that hashlib uses. hash_type_map = { 'MD5': 'md5', 'SHA-1': 'sha1', 'SHA-256': 'sha256', 'SHA-384': 'sha384', 'SHA-512': 'sha512' } if checksums is not None: old_checksums = datastream_reader.checksums(resource, cursor=cursor).fetchall() for checksum in checksums: # Resolve default checksum. if checksum['type'] == 'DEFAULT': checksum['type'] = _config['default_hash_algorithm'] # Checksums can be disabled. if checksum['type'] == 'DISABLED': for old_checksum in old_checksums: datastream_purger.delete_checksum(old_checksum['id'], cursor=cursor) continue # Only set or validate checksums if they have changed. update_checksum = True for old_checksum in old_checksums: # If we get checksums with no type it is the old. if not checksum['type']: checksum['type'] = old_checksum['type'] if (old_checksum['type'] == checksum['type'] and old_checksum['checksum'] == checksum['checksum']): update_checksum = False if update_checksum: checksum['resource'] = resource file_path = resolve_uri( datastream_reader.resource( resource, cursor=cursor).fetchone()['uri']) checksum_value = checksum_file(file_path, hash_type_map[checksum['type']]) if not checksum['checksum']: # Set checksum. checksum['checksum'] = checksum_value elif checksum_value != checksum['checksum']: raise ValueError('Checksum mismatch.') datastream_writer.upsert_checksum(checksum, cursor=cursor) cursor.fetchone()
def datastream_to_profile(ds_info, cursor, version=0): """ Get a datastream profile dict from a DB DS dict. """ versionable = 'true' if ds_info['versioned'] else 'false' location = None location_type = 'INTERNAL_ID' mime = None checksum = 'none' checksum_type = 'DISABLED' size = None if ds_info['resource'] is not None: ds_reader.resource(ds_info['resource'], cursor=cursor) resource_info = cursor.fetchone() if resource_info is not None: location = resource_info['uri'] if ds_info['control_group'] != 'R': size = filestore.uri_size(resource_info['uri']) else: location_type = 'URL' ds_reader.mime(resource_info['mime'], cursor=cursor) mime = cursor.fetchone()['mime'] ds_reader.checksums(ds_info['resource'], cursor=cursor) checksum_info = cursor.fetchone() if checksum_info is not None: checksum = checksum_info['checksum'] checksum_type = checksum_info['type'] cursor.fetchall() return { 'dsLabel': ds_info['label'], 'dsCreateDate': format_date(ds_info['modified']), 'dsState': ds_info['state'], 'dsMIME': mime, 'dsControlGroup': ds_info['control_group'], 'dsVersionable': versionable, 'dsVersionID': '{}.{}'.format(ds_info['dsid'], version), 'dsChecksumType': checksum_type, 'dsChecksum': checksum, 'dsSize': size, 'dsLocation': location, 'dsLocationType': location_type, }
def _get_ds_dissemination(self, req, pid, dsid): """ Provide datastream content. """ with get_connection() as conn, conn.cursor() as cursor: object_info = object_reader.object_id_from_raw( pid, cursor=cursor).fetchone() if object_info is None: raise ObjectDoesNotExistError(pid) time = utils.iso8601_to_datetime(req.get_param('asOfDateTime')) ds_info = ds_reader.datastream( { 'object': object_info['id'], 'dsid': dsid }, cursor=cursor).fetchone() if ds_info is None: raise DatastreamDoesNotExistError(pid, dsid) if time is not None: ds_info = ds_reader.datastream_as_of_time( ds_info['id'], time, cursor) if ds_info is None: raise DatastreamDoesNotExistError(pid, dsid, time) resource_info = ds_reader.resource(ds_info['resource']).fetchone() if resource_info is None: return None info = {} mime_info = ds_reader.mime_from_resource(resource_info['id'], cursor=cursor).fetchone() if mime_info: info['mime'] = mime_info['mime'] # Redirect if we are a redirect DS. if ds_info['control_group'] == 'R': info['location'] = resource_info['uri'] else: # Send data if we are not a redirect DS. file_path = filestore.resolve_uri(resource_info['uri']) info['stream'] = open(file_path, 'rb') return info
def _get_info(self, pid, dsid): """ Get the MIME-type and URI of the given datastream. Returns: A three-tuple comprising: - the datastream control group - the URI of the resource the datastream represents - the MIME type of the datastream's resource Raises: DatastreamDoesNotExistError: The datastream doesn't exist. """ with get_connection() as conn, conn.cursor() as cursor: datastream_info = ds_reader.datastream_from_raw( pid, dsid, cursor=cursor).fetchone() if datastream_info is None: raise DatastreamDoesNotExistError(pid, dsid) resource_info = ds_reader.resource(datastream_info['resource'], cursor=cursor).fetchone() mime_info = ds_reader.mime(resource_info['mime'], cursor=cursor).fetchone() return (datastream_info['control_group'], resource_info['uri'], mime_info['mime'])
def write_ds(ds, old=False, cursor=None): """ Create a datastream on the current object. """ cursor = check_cursor(cursor, ISOLATION_LEVEL_READ_COMMITTED) if ds['data'] is not None: # We already have data. filestore.create_datastream_from_data(ds, ds['data'], mime=ds['mimetype'], checksums=ds['checksums'], old=old, cursor=cursor) elif ds['data_ref'] is not None: # There is data but not in the request. if ds['control_group'] == 'R': # Data will remain external. ds_writer.upsert_mime(ds['mimetype'], cursor=cursor) ds_writer.upsert_resource( { 'uri': ds['data_ref']['REF'], 'mime': cursor.fetchone()['id'], }, cursor=cursor) ds['resource'] = cursor.fetchone()['id'] ds_writer.upsert_datastream(ds, cursor=cursor) elif ds['data_ref']['REF'].startswith(filestore.UPLOAD_SCHEME): # Data has been uploaded. filestore.create_datastream_from_upload(ds, ds['data_ref']['REF'], mime=ds['mimetype'], checksums=ds['checksums'], old=old, cursor=cursor) else: # We need to fetch data. ds_resp = requests.get(ds['data_ref']['REF'], stream=True) # @XXX: we should be able to avoid creating this file by # wrapping the raw attribute on the response to decode on read. ds_file = utils.SpooledTemporaryFile() for chunk in ds_resp.iter_content(_config['download_chunk_size']): ds_file.write(chunk) ds_file.seek(0) filestore.create_datastream_from_data(ds, ds_file, mime=ds['mimetype'], checksums=ds['checksums'], old=old, cursor=cursor) else: # There is no data change. mime = ds_writer.upsert_mime(ds['mimetype'], cursor=cursor).fetchone()['id'] uri = ds_reader.resource(ds['resource'], cursor=cursor).fetchone()['uri'] ds_writer.upsert_resource({'uri': uri, 'mime': mime}, cursor=cursor) filestore.update_checksums(ds['resource'], ds['checksums'], cursor=cursor) ds_writer.upsert_datastream(ds, cursor=cursor) return cursor
def populate_foxml_datastream(foxml, pid, datastream, base_url='http://localhost:8080/fedora', archival=False, inline_to_managed=False, cursor=None): """ Add a FOXML datastream into an lxml etree. """ datastream_attributes = { 'ID': datastream['dsid'], 'STATE': datastream['state'], 'CONTROL_GROUP': datastream['control_group'], 'VERSIONABLE': str(datastream['versioned']).lower(), } with foxml.element('{{{0}}}datastream'.format(FOXML_NAMESPACE), datastream_attributes): versions = list(datastream_reader.old_datastreams(datastream['id'])) versions.append(datastream) for index, version in enumerate(versions): datastream_reader.resource(version['resource'], cursor=cursor) resource_info = cursor.fetchone() datastream_reader.mime(resource_info['mime'], cursor=cursor) mime_info = cursor.fetchone() try: created = format_date(version['committed']) except KeyError: created = format_date(datastream['created']) version_attributes = { 'ID': '{}.{}'.format(datastream['dsid'], index), 'LABEL': version['label'] if version['label'] else '', 'CREATED': created, 'MIMETYPE': mime_info['mime'], } if datastream['control_group'] != 'R': size = filestore.uri_size(resource_info['uri']) version_attributes['SIZE'] = str(size) with foxml.element( '{{{0}}}datastreamVersion'.format(FOXML_NAMESPACE), version_attributes): datastream_reader.checksums(version['resource'], cursor=cursor) checksums = cursor.fetchall() for checksum in checksums: foxml.write( etree.Element( '{{{0}}}datastreamDigest'.format(FOXML_NAMESPACE), { 'TYPE': checksum['type'], 'DIGEST': checksum['checksum'] })) if datastream['control_group'] == 'X' and ( not inline_to_managed): content_element = etree.Element( '{{{0}}}xmlContent'.format(FOXML_NAMESPACE)) uri = filestore.resolve_uri(resource_info['uri']) xml_etree = etree.parse(uri) content_element.append(xml_etree.getroot()) foxml.write(content_element) elif datastream['control_group'] in ['M', 'X'] and archival: uri = filestore.resolve_uri(resource_info['uri']) with open(uri, 'rb') as ds_file: with foxml.element('{{{0}}}binaryContent'.format( FOXML_NAMESPACE)): base64.encode(ds_file, foxml) else: if datastream['control_group'] == 'R': content_attributes = { 'TYPE': 'URL', 'REF': resource_info['uri'], } else: content_attributes = { 'TYPE': 'INTERNAL_ID', 'REF': ('{}/objects/{}/datastreams/{}/' 'content?asOfDateTime={}').format( base_url, pid, datastream['dsid'], created), } foxml.write( etree.Element( '{{{0}}}contentLocation'.format(FOXML_NAMESPACE), content_attributes))