Пример #1
def import_from_string(xml_string, targetstatus, copy_status, owner_id=None):
    Import a single resource from a string representation of its XML tree, 
    and save it with the given target status.
    Returns the imported resource object on success, raises and Exception on failure.
    from metashare.repository.models import resourceInfoType_model
    result = resourceInfoType_model.import_from_string(xml_string,

    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)

    resource = result[0]

    # Set publication_status for the new object. Also make sure that the
    # deletion flag is not set (may happen in case of re-importing a previously
    # deleted resource).
    resource.storage_object.publication_status = targetstatus
    resource.storage_object.deleted = False
    if owner_id:
        for edt_grp in User.objects.get(id=owner_id).userprofile \
        # this also takes care of saving the storage_object

    # explicitly write metadata XML and storage object to the storage folder

    # Create log ADDITION message for the new object, but only if we have a user:
    if owner_id:

    # Update statistics
    saveLRStats(resource, UPDATE_STAT)

    return resource
Пример #2
def import_from_string(xml_string, targetstatus, copy_status, owner_id=None):
    Import a single resource from a string representation of its XML tree, 
    and save it with the given target status.
    Returns the imported resource object on success, raises and Exception on failure.
    from metashare.repository.models import resourceInfoType_model
    result = resourceInfoType_model.import_from_string(xml_string, copy_status=copy_status)
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    resource = result[0]
    # Set publication_status for the new object. Also make sure that the
    # deletion flag is not set (may happen in case of re-importing a previously
    # deleted resource).
    resource.storage_object.publication_status = targetstatus
    resource.storage_object.deleted = False
    if owner_id:
        for edt_grp in User.objects.get(id=owner_id).get_profile() \
        # this also takes care of saving the storage_object

    # explicitly write metadata XML and storage object to the storage folder

    # Create log ADDITION message for the new object, but only if we have a user:
    if owner_id:
            user_id         = owner_id,
            content_type_id = ContentType.objects.get_for_model(resource).pk,
            object_id       = resource.pk,
            object_repr     = force_unicode(resource),
            action_flag     = ADDITION

    # Update statistics
    saveLRStats(resource, UPDATE_STAT)

    return resource
Пример #3
def import_from_string(xml_string, targetstatus, owner_id=None):
    Import a single resource from a string representation of its XML tree, 
    and save it with the given target status.
    Returns the imported resource object on success, raises and Exception on failure.
    from metashare.repository.models import resourceInfoType_model
    result = resourceInfoType_model.import_from_string(xml_string)
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    resource = result[0]
    # Set publication_status for new object.
    resource.storage_object.publication_status = targetstatus
    if owner_id:
    # explicitly write metadata XML and storage object to the storage folder

    # Create log ADDITION message for the new object, but only if we have a user:
    if owner_id:
            user_id         = owner_id,
            content_type_id = ContentType.objects.get_for_model(resource).pk,
            object_id       = resource.pk,
            object_repr     = force_unicode(resource),
            action_flag     = ADDITION

    # Update statistics
    saveLRStats(resource, "", "", UPDATE_STAT)

    return resource
Пример #4
def import_resources(import_folder):
    Imports resources from the given folder.
    # Check that SOLR is running, or else all resources will stay at status INTERNAL:
    from metashare.repository import verify_at_startup
    verify_at_startup()  # may raise Exception, which we don't want to catch.

    # Disable verbose debug output for the import process...
    settings.DEBUG = False
    os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True'

    from metashare.repository.supermodel import OBJECT_XML_CACHE

    # Clean cache before starting the import process.

    # iterate over storage folder content
    from django.core import serializers
    from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS
    from metashare.repository.models import resourceInfoType_model

    imported_resources = []
    erroneous_descriptors = []

    storage_path = os.path.join(import_folder, STORAGE_FOLDER)
    for folder_name in os.listdir(storage_path):
        folder_path = "{}/{}/".format(storage_path, folder_name)
        if os.path.isdir(folder_path):
                print "importing from folder: '{0}'".format(folder_name)
                # import storage object
                so_filename = os.path.join(folder_path, STORAGE)
                so_in = open(so_filename, "rb")
                for obj in serializers.deserialize("xml", so_in):
                    print "importing storage object"
                    # storage.xml only contains a single storage object
                    storage_obj = obj.object
                    # this storage object is NOT saved!
                    # we only copy the relevant attributes from this storage
                    # object to the one at the resource!
                # import resource object
                ro_filename = os.path.join(folder_path, RESOURCE)
                ro_in = open(ro_filename, "rb")
                for obj in serializers.deserialize("xml", ro_in):
                    print "importing resource object"
                    # resource.xml only contains a single resource object
                    res_obj = obj
                    # the deserialized object contains the ManyToMany attributes
                    # in m2m_data
                # import resource from metadata.xml
                res_filename = os.path.join(folder_path, METADATA)
                temp_file = open(res_filename, 'rb')
                xml_string = temp_file.read()
                result = resourceInfoType_model.import_from_string(
                    xml_string, copy_status=MASTER)
                if not result[0]:
                    msg = u''
                    if len(result) > 2:
                        msg = u'{}'.format(result[2])
                    raise Exception(msg)
                res = result[0]
                # update imported resource with imported resource object
                # and storage object
                _update_resource(res, res_obj, storage_obj)
                # copy possible binaries archives
                for archive_name in [
                        for _ext in ALLOWED_ARCHIVE_EXTENSIONS
                    archive_filename = os.path.join(folder_path, archive_name)
                    if os.path.isfile(archive_filename):
                        print "copying archive"
                        res_storage_path = '{0}/{1}/'.format(
                            os.path.join(res_storage_path, archive_name))
                        # there can be at most one binary
            except Exception as problem:
                from django import db
                if isinstance(problem, db.utils.DatabaseError):
                    # reset database connection (required for PostgreSQL)
                erroneous_descriptors.append((folder_name, problem))

    print "Done.  Successfully imported {0} resources into the database, " \
      "errors occurred in {1} cases.".format(
      len(imported_resources), len(erroneous_descriptors))
    if len(erroneous_descriptors) > 0:
        print "The following resources could not be imported:"
        for descriptor, exception in erroneous_descriptors:
            print "\t{}: {}".format(descriptor, exception)

    # Be nice and cleanup cache...
    _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()])
    print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size)

    from django.core.management import call_command
    call_command('rebuild_index', interactive=False)
Пример #5
def restore_from_folder(storage_id, copy_status=MASTER, \
                        storage_digest=None, source_node=None, force_digest=False):
    Restores the storage object and the associated resource for the given
    storage object identifier and makes it persistent in the database. 
    storage_id: the storage object identifier; it is assumed that this is the
        folder name in the storage folder folder where serialized storage object
        and metadata XML are located
    copy_status (optional): one of MASTER, REMOTE, PROXY; if present, used as
        copy status for the restored resource
    storage_digest (optional): the digest_checksum to set in the restored
        storage object

    source_node (optional): the source node if to set in the restored
        storage object
    force_digest (optional): if True, always recreate the digest zip-archive
    Returns the restored resource with its storage object set.
    from metashare.repository.models import resourceInfoType_model

    # if a storage object with this id already exists, delete it
        _so = StorageObject.objects.get(identifier=storage_id)
    except ObjectDoesNotExist:
        _so = None

    storage_folder = os.path.join(settings.STORAGE_PATH, storage_id)

    # get most current metadata.xml
    _files = os.listdir(storage_folder)
    _metadata_files = \
            [f for f in _files if f.startswith('metadata')],
    if not _metadata_files:
        raise Exception('no metadata.xml found')
    # restore resource from metadata.xml
    _metadata_file = open('{0}/{1}'.format(storage_folder, _metadata_files[0]),
    _xml_string = _metadata_file.read()
    result = resourceInfoType_model.import_from_string(_xml_string,
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    resource = result[0]
    # at this point, a storage object is already created at the resource, so update it
    _storage_object = resource.storage_object
    _storage_object.metadata = _xml_string

    # add global storage object attributes if available
    if os.path.isfile('{0}/storage-global.json'.format(storage_folder)):
        _global_json = \
            _fill_storage_object(_storage_object, '{0}/storage-global.json'.format(storage_folder))
        _storage_object.global_storage = _global_json
        LOGGER.warn('missing storage-global.json, importing resource as new')
        _storage_object.identifier = storage_id

    # add local storage object attributes if available
    if os.path.isfile('{0}/storage-local.json'.format(storage_folder)):
        _local_json = \
            _fill_storage_object(_storage_object, '{0}/storage-local.json'.format(storage_folder))
        _storage_object.local_storage = _local_json
        # always use the provided copy status, even if its different from the
        # one in the local storage object
        if copy_status:
            if _storage_object.copy_status != copy_status:
                    'overwriting copy status from storage-local.json with "{}"'
            _storage_object.copy_status = copy_status
        if copy_status:
            _storage_object.copy_status = copy_status
            # no copy status and no local storage object is provided, so use
            # a default
                'no copy status provided, using default copy status MASTER')
            _storage_object.copy_status = MASTER

    # set storage digest if provided (usually for non-local resources)
    if storage_digest:
        _storage_object.digest_checksum = storage_digest
    # set source node id if provided (usually for non-local resources)
    if source_node:
        _storage_object.source_node = source_node

    # update_storage includes saving
    # _storage_object.save()

    return resource
Пример #6
def restore_from_folder(storage_id, copy_status=None):
    Restores the storage object and the associated resource for the given
    storage object identifier and makes it persistent in the database. 
    storage_id: the storage object identifier; it is assumed that this is the
        folder name in the storage folder folder where serialized storage object
        and metadata XML are located
    copy_status (optional): one of MASTER, REMOTE, PROXY; if present, used as
        copy status for the restored resource
    Returns the restored resource with its storage object set.
    from metashare.repository.models import resourceInfoType_model
    # if a storage object with this id already exists, delete it
        _so = StorageObject.objects.get(identifier=storage_id)
    except ObjectDoesNotExist:
        _so = None
    storage_folder = os.path.join(settings.STORAGE_PATH, storage_id)

    # get most current metadata.xml
    _files = os.listdir(storage_folder)
    _metadata_files = \
        [f for f in _files if f.startswith('metadata')],
    if not _metadata_files:
        raise Exception('no metadata.xml found')
    # restore resource from metadata.xml
    _metadata_file = open('{0}/{1}'.format(storage_folder, _metadata_files[0]), 'rb')
    _xml_string = _metadata_file.read()
    result = resourceInfoType_model.import_from_string(_xml_string)
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    resource = result[0]
    # at this point, a storage object is already created at the resource, so update it 
    _storage_object = resource.storage_object
    _storage_object.metadata = _xml_string
    # add global storage object attributes if available
    if os.path.isfile('{0}/storage-global.json'.format(storage_folder)):
        _global_json = \
          _fill_storage_object(_storage_object, '{0}/storage-global.json'.format(storage_folder))
        _storage_object.global_storage = _global_json
        LOGGER.warn('missing storage-global.json, importing resource as new')
        _storage_object.identifier = storage_id
    # add local storage object attributes if available 
    if os.path.isfile('{0}/storage-local.json'.format(storage_folder)):
        _local_json = \
          _fill_storage_object(_storage_object, '{0}/storage-local.json'.format(storage_folder))
        _storage_object.local_storage = _local_json
        # always use the provided copy status, even if its different from the
        # one in the local storage object
        if copy_status:
            if _storage_object.copy_status != copy_status:
                LOGGER.warn('overwriting copy status from storage-local.json with "{}"'.format(copy_status))
            _storage_object.copy_status = copy_status
        if copy_status:
            _storage_object.copy_status = copy_status
            # no copy status and no local storage object is provided, so use
            # a default
            LOGGER.warn('no copy status provided, using default copy status MASTER')
            _storage_object.copy_status = MASTER

    return resource
def import_resources(import_folder):
    Imports resources from the given folder.
    # Check that SOLR is running, or else all resources will stay at status INTERNAL:
    from metashare.repository import verify_at_startup
    verify_at_startup() # may raise Exception, which we don't want to catch.

    # Disable verbose debug output for the import process...
    settings.DEBUG = False
    os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True'
    from metashare.repository.supermodel import OBJECT_XML_CACHE

    # Clean cache before starting the import process.
    # iterate over storage folder content
    from django.core import serializers
    from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS
    from metashare.repository.models import resourceInfoType_model

    imported_resources = []
    erroneous_descriptors = []

    storage_path = os.path.join(import_folder, STORAGE_FOLDER)
    for folder_name in os.listdir(storage_path):
        folder_path = "{}/{}/".format(storage_path, folder_name)
        if os.path.isdir(folder_path):
                print "importing from folder: '{0}'".format(folder_name)
                # import storage object
                so_filename = os.path.join(folder_path, STORAGE)
                so_in = open(so_filename, "rb")
                for obj in serializers.deserialize("xml", so_in):
                    print "importing storage object"
                    # storage.xml only contains a single storage object
                    storage_obj = obj.object
                    # this storage object is NOT saved!
                    # we only copy the relevant attributes from this storage
                    # object to the one at the resource!
                # import resource object
                ro_filename = os.path.join(folder_path, RESOURCE)
                ro_in = open(ro_filename, "rb")
                for obj in serializers.deserialize("xml", ro_in):
                    print "importing resource object"
                    # resource.xml only contains a single resource object
                    res_obj = obj
                    # the deserialized object contains the ManyToMany attributes
                    # in m2m_data
                # import resource from metadata.xml
                res_filename = os.path.join(folder_path, METADATA)
                temp_file = open(res_filename, 'rb')
                xml_string = temp_file.read()
                result = resourceInfoType_model.import_from_string(
                  xml_string, copy_status=MASTER)
                if not result[0]:
                    msg = u''
                    if len(result) > 2:
                        msg = u'{}'.format(result[2])
                    raise Exception(msg)
                res = result[0]
                # update imported resource with imported resource object 
                # and storage object
                _update_resource(res, res_obj, storage_obj)
                # copy possible binaries archives
                for archive_name in [ARCHIVE_TPL.format(_ext)
                                     for _ext in ALLOWED_ARCHIVE_EXTENSIONS]:
                    archive_filename = os.path.join(folder_path, archive_name)
                    if os.path.isfile(archive_filename):
                        print "copying archive"
                        res_storage_path = '{0}/{1}/'.format(
                          settings.STORAGE_PATH, res.storage_object.identifier)
                          os.path.join(res_storage_path, archive_name))
                        # there can be at most one binary
            except Exception as problem:
                from django import db
                if isinstance(problem, db.utils.DatabaseError):
                    # reset database connection (required for PostgreSQL)
                erroneous_descriptors.append((folder_name, problem))

    print "Done.  Successfully imported {0} resources into the database, " \
      "errors occurred in {1} cases.".format(
      len(imported_resources), len(erroneous_descriptors))
    if len(erroneous_descriptors) > 0:
        print "The following resources could not be imported:"
        for descriptor, exception in erroneous_descriptors:
            print "\t{}: {}".format(descriptor, exception)

    # Be nice and cleanup cache...
    _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()])
    print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size)
    from django.core.management import call_command
    call_command('rebuild_index', interactive=False)
Пример #8
def import_xml(filename, copy_status=MASTER):
    _xml = open(filename)
    _xml_string = _xml.read()
    result = resourceInfoType_model.import_from_string(_xml_string, copy_status=copy_status)
    return result
Пример #9
def import_xml(filename):
    _xml = open(filename)
    _xml_string = _xml.read()
    result = resourceInfoType_model.import_from_string(_xml_string)
    return result