def get_data(url, request=None):
    """Performs the Oai-Pmh request.
    Args:
        url: URL with Oai-Pmh request
        request:

    Returns:
        Response.

    Raises:
        OAIAPIException: An error occurred during the process.
        OAIAPILabelledException: An error occurred during the process.

    """
    try:
        if str(url).__contains__("?"):
            registry_url = str(url).split("?")[0]
            data, status_code = identify(registry_url)
            if status_code == status.HTTP_200_OK:
                # TODO: refactor send request with cookies (same code in other apps)
                try:
                    session_id = request.session.session_key
                except:
                    session_id = None
                http_response = send_get_request(
                    url, cookies={"sessionid": session_id})
                if http_response.status_code == status.HTTP_200_OK:
                    return Response(http_response.text,
                                    status=status.HTTP_200_OK)
                else:
                    raise oai_pmh_exceptions.OAIAPIException(
                        message="An error occurred.",
                        status_code=http_response.status_code,
                    )
            else:
                content = (
                    "An error occurred when attempting to identify resource: %s"
                    % data)
                raise oai_pmh_exceptions.OAIAPILabelledException(
                    message=content,
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
        else:
            raise oai_pmh_exceptions.OAIAPIException(
                message="An error occurred, url malformed.",
                status_code=status.HTTP_400_BAD_REQUEST,
            )
    except requests.HTTPError as err:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=str(err), status_code=err.response.status_code)
    except oai_pmh_exceptions.OAIAPIException as e:
        raise e
    except Exception as e:
        content = "An error occurred when attempting to retrieve data: %s" % str(
            e)
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=content, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
示例#2
0
def add_template_metadata_format(metadata_prefix, template_id, request):
    """Add a new template metadata format.
    Args:
        metadata_prefix: Metadata Prefix.
        template_id: Id of the template.
        request:

    Returns: Response.

    """
    try:
        template = template_api.get(template_id, request=request)
        version_manager = version_manager_api.get_from_version(template,
                                                               request=request)
        xml_schema = template.content
        target_namespace = _get_target_namespace(xml_schema)
        version_number = version_manager_api.get_version_number(
            version_manager, template_id, request=request)
        schema_url = _get_simple_template_metadata_format_schema_url(
            version_manager.title, version_number)
        obj = OaiProviderMetadataFormat(
            metadata_prefix=metadata_prefix,
            schema=schema_url,
            xml_schema=xml_schema,
            is_default=False,
            is_template=True,
            metadata_namespace=target_namespace,
            template=template,
        )
        upsert(obj, request=request)
        content = OaiPmhMessage.get_message_labelled(
            "Metadata format added with success.")

        return Response(content, status=status.HTTP_201_CREATED)
    except oai_pmh_exceptions.OAIAPILabelledException as e:
        raise e
    except DoesNotExist:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message="Unable to add the new metadata format. "
            "Impossible to retrieve the template with the "
            "given template",
            status_code=status.HTTP_404_NOT_FOUND,
        )
    except exceptions.XMLError as e:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message="Unable to add the new metadata format.%s" % str(e),
            status_code=status.HTTP_400_BAD_REQUEST,
        )
    except Exception as e:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message="Unable to add the new metadata format.%s" % str(e),
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
        )
示例#3
0
def add_metadata_format(metadata_prefix, schema_url, request):
    """Add a new metadata format.
    Args:
        metadata_prefix: Metadata Prefix.
        schema_url: URL of the schema.
        request:

    Returns: Response.

    """
    try:
        # TODO: refactor send request with cookies (same code in other apps)
        try:
            session_id = request.session.session_key
        except:
            session_id = None
        http_response = send_get_request(schema_url,
                                         cookies={"sessionid": session_id})
        if http_response.status_code == status.HTTP_200_OK:
            xml_schema = http_response.text
            target_namespace = _get_target_namespace(xml_schema)
            obj = OaiProviderMetadataFormat(
                metadata_prefix=metadata_prefix,
                schema=schema_url,
                xml_schema=xml_schema,
                is_default=False,
                metadata_namespace=target_namespace,
                is_template=False,
            )
            upsert(obj, request=request)
            content = OaiPmhMessage.get_message_labelled(
                "Metadata format added with success.")

            return Response(content, status=status.HTTP_201_CREATED)
        else:
            raise oai_pmh_exceptions.OAIAPILabelledException(
                message="Unable to add the new metadata format. Impossible"
                " to retrieve the schema at the given URL",
                status_code=status.HTTP_400_BAD_REQUEST,
            )
    except oai_pmh_exceptions.OAIAPILabelledException as e:
        raise e
    except (exceptions.XMLError, XSDError) as e:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message="Unable to add the new metadata format. %s" % str(e),
            status_code=status.HTTP_400_BAD_REQUEST,
        )
    except Exception as e:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message="Unable to add the new metadata format.%s" % str(e),
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
        )
示例#4
0
def _get_target_namespace(xml_schema):
    """Get the target namespace.
    Args:
        xml_schema:  XML representation of the schema.

    Returns:
        The target namespace.

    """
    try:
        xsd_tree = XSDTree.transform_to_xml(xml_schema)
    except Exception as e:
        raise exceptions.XMLError(str(e))

    root = xsd_tree.find(".")
    if "targetNamespace" in root.attrib:
        target_namespace = root.attrib["targetNamespace"]
        if target_namespace not in list(root.nsmap.values()):
            message = "The use of a targetNamespace without an associated prefix is not supported."
            raise oai_pmh_exceptions.OAIAPILabelledException(
                message=message, status_code=status.HTTP_400_BAD_REQUEST)
    else:
        target_namespace = "http://www.w3.org/2001/XMLSchema"

    return target_namespace
示例#5
0
def get_data(url):
    """ Performs the Oai-Pmh request.
    Args:
        url: URL with Oai-Pmh request

    Returns:
        Response.

    Raises:
        OAIAPIException: An error occurred during the process.
        OAIAPILabelledException: An error occurred during the process.

    """
    try:
        if str(url).__contains__('?'):
            registry_url = str(url).split('?')[0]
            data, status_code = identify(registry_url)
            if status_code == status.HTTP_200_OK:
                http_response = send_get_request(url)
                if http_response.status_code == status.HTTP_200_OK:
                    return Response(http_response.text,
                                    status=status.HTTP_200_OK)
                else:
                    raise oai_pmh_exceptions.OAIAPIException(
                        message='An error occurred.',
                        status_code=http_response.status_code)
            else:
                content = 'An error occurred when attempting to identify resource: %s' % data
                raise oai_pmh_exceptions.OAIAPILabelledException(
                    message=content,
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
        else:
            raise oai_pmh_exceptions.OAIAPIException(
                message='An error occurred, url malformed.',
                status_code=status.HTTP_400_BAD_REQUEST)
    except requests.HTTPError as err:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=str(err), status_code=err.response.status_code)
    except oai_pmh_exceptions.OAIAPIException as e:
        raise e
    except Exception as e:
        content = 'An error occurred when attempting to retrieve data: %s' % str(
            e)
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=content, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
def harvest_registry(registry):
    """Harvests the registry given in parameter.
    Args:
        registry: The registry to harvest.

    Returns:
        all_errors: List of errors.

    """
    # If registry is already harvesting, skip for now
    if registry.is_harvesting:
        return []

    try:
        # We are harvesting
        registry.is_harvesting = True
        upsert(registry)
        # Set the last update date
        harvest_date = datetime.datetime.now()
        # Get all metadata formats to harvest
        metadata_formats = (oai_harvester_metadata_format_api.
                            get_all_to_harvest_by_registry_id(registry.id))
        # Get all sets
        registry_all_sets = oai_harvester_set_api.get_all_by_registry_id(
            registry.id, "set_name")
        # Get all available sets
        registry_sets_to_harvest = (
            oai_harvester_set_api.get_all_to_harvest_by_registry_id(
                registry.id, "set_name"))
        # Check if we have to retrieve all sets or not. If all sets, no need to
        # provide theset parameter in the harvest request.
        #
        # Avoid to retrieve same records if records are in many sets.
        search_by_sets = len(registry_all_sets) != len(
            registry_sets_to_harvest)
        # Search by sets
        if search_by_sets and len(registry_all_sets) != 0:
            all_errors = _harvest_by_metadata_formats_and_sets(
                registry, metadata_formats, registry_sets_to_harvest,
                registry_all_sets)
        # If we don't have to search by set or the OAI Registry doesn't support sets
        else:
            all_errors = _harvest_by_metadata_formats(registry,
                                                      metadata_formats,
                                                      registry_all_sets)
        # Stop harvesting
        registry.is_harvesting = False
        # Set the last update date
        registry.last_update = harvest_date
        upsert(registry)

        return all_errors
    except Exception as e:
        registry.is_harvesting = False
        upsert(registry)
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=str(e), status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
示例#7
0
def _get_identify_as_object(url):
    """ Returns the identify information for the given URL.

    Args:
        url: URL.

    Returns:
        identify_response: identify response.

    """
    identify_response, status_code = oai_verbs_api.identify_as_object(url)
    if status_code != status.HTTP_200_OK:
        raise oai_pmh_exceptions.OAIAPILabelledException(message=identify_response[OaiPmhMessage.label],
                                                         status_code=status_code)
    return identify_response
def add_registry_by_url(url, harvest_rate, harvest, request=None):
    """Adds a registry in database. Takes care of all surrounding objects. Uses OAI-PMH verbs to gather information.

    Args:
        url: Url of the registry to add.
        harvest_rate: Harvest rate. Use to harvest data every harvest_rate seconds.
        harvest: True or False.
        request:

    Returns:
        The OaiRegistry instance.

    """
    registry = None
    if check_registry_url_already_exists(url):
        raise oai_pmh_exceptions.OAIAPINotUniqueError(
            message="Unable to create the data provider."
            " The data provider already exists.")

    identify_response = _get_identify_as_object(url)
    sets_response = _get_sets_as_object(url)
    metadata_formats_response = _get_metadata_formats_as_object(url)

    try:
        registry = _init_registry(
            url,
            harvest,
            harvest_rate,
            identify_response.repository_name,
            identify_response.description,
        )
        registry = upsert(registry)
        _upsert_identify_for_registry(identify_response, registry)
        for set_ in sets_response:
            _upsert_set_for_registry(set_, registry)
        for metadata_format in metadata_formats_response:
            _upsert_metadata_format_for_registry(metadata_format,
                                                 registry,
                                                 request=request)

        return registry
    except Exception as e:
        # Manual Rollback
        if registry is not None:
            registry.delete()

        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=str(e), status_code=HTTP_500_INTERNAL_SERVER_ERROR)
def update_registry_info(registry, request=None):
    """Updates information of a registry in database by its id.

    Args:
        registry: OaiRegistry to update.
        request:

    Returns:
        The OaiRegistry instance.

    """
    # If registry is already updating, skip for now
    if registry.is_updating:
        return []

    registry.is_updating = True
    upsert(registry)
    identify_response = _get_identify_as_object(registry.url)
    sets_response = _get_sets_as_object(registry.url)
    metadata_formats_response = _get_metadata_formats_as_object(registry.url)

    try:
        _upsert_identify_for_registry(identify_response, registry)
        registry.name = identify_response.repository_name
        registry.description = identify_response.description
        upsert(registry)
        for set_ in sets_response:
            _upsert_set_for_registry(set_, registry)
        for metadata_format in metadata_formats_response:
            _upsert_metadata_format_for_registry(metadata_format,
                                                 registry,
                                                 request=request)
        # Check if we have some deleted set
        _handle_deleted_set(registry.id, sets_response)
        # Check if we have some deleted metadata format
        _handle_deleted_metadata_format(registry.id, metadata_formats_response)
        registry.is_updating = False
        upsert(registry)

        return registry
    except Exception as e:
        registry.is_updating = False
        upsert(registry)
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=str(e), status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
示例#10
0
def _get_sets_as_object(url):
    """ Returns the sets information for the given URL.

    Args:
        url: URL.

    Returns:
        sets_response: ListSet response.

    """
    sets_response, status_code = oai_verbs_api.list_sets_as_object(url)
    if status_code not in (status.HTTP_200_OK, status.HTTP_204_NO_CONTENT):
        raise oai_pmh_exceptions.OAIAPILabelledException(message=sets_response[OaiPmhMessage.label],
                                                         status_code=status_code)
    elif status_code == status.HTTP_204_NO_CONTENT:
        sets_response = []

    return sets_response
示例#11
0
def _get_target_namespace(xml_schema):
    """ Get the target namespace.
    Args:
        xml_schema:  XML representation of the schema.

    Returns:
        The target namespace.

    """
    xsd_tree = XSDTree.fromstring(xml_schema.encode('utf-8'))
    root = xsd_tree.find(".")
    if 'targetNamespace' in root.attrib:
        target_namespace = root.attrib['targetNamespace']
        if target_namespace not in root.nsmap.values():
            message = "The use of a targetNamespace without an associated prefix is not supported."
            raise oai_pmh_exceptions.OAIAPILabelledException(message=message,
                                                             status_code=status.HTTP_400_BAD_REQUEST)
    else:
        target_namespace = "http://www.w3.org/2001/XMLSchema"

    return target_namespace
示例#12
0
def list_records(url,
                 metadata_prefix=None,
                 resumption_token=None,
                 set_h=None,
                 from_date=None,
                 until_date=None):
    """ Performs an Oai-Pmh ListRecords request.
    Args:
        url: URL of the Data Provider.
        metadata_prefix: Metadata Prefix to use for the request.
        resumption_token: Resumption Token to use for the request.
        set_h: Set to use for the request.
        from_date: From Date to use for the request.
        until_date: Until Date to use for the request.

    Returns:
        Response.
        Resumption Token.

    """
    try:
        params = {'verb': 'ListRecords'}
        if resumption_token is not None:
            params['resumptionToken'] = resumption_token
        else:
            params['metadataPrefix'] = metadata_prefix
            params['set'] = set_h
            params['from'] = from_date
            params['until'] = until_date
        rtn = []
        http_response = send_get_request(url, params=params)
        resumption_token = None
        if http_response.status_code == status.HTTP_200_OK:
            xml = http_response.text
            elements = XSDTree.iterfind(
                xml, './/{http://www.openarchives.org/OAI/2.0/}record')
            for elt in elements:
                record = sickle_operations.get_record_elt(elt, metadata_prefix)
                rtn.append(record)
            resumption_token_elt = XSDTree.iterfind(
                xml,
                './/{http://www.openarchives.org/OAI/2.0/}resumptionToken')
            resumption_token = next(iter(resumption_token_elt), None)
            if resumption_token is not None:
                resumption_token = resumption_token.text
        elif http_response.status_code == status.HTTP_404_NOT_FOUND:
            raise oai_pmh_exceptions.OAIAPILabelledException(
                message='Impossible to get data from the server. '
                'Server not found',
                status_code=status.HTTP_404_NOT_FOUND)
        else:
            raise oai_pmh_exceptions.OAIAPILabelledException(
                message='An error occurred while trying to get '
                'data from the server.',
                status_code=http_response.status_code)

        return Response(rtn, status=status.HTTP_200_OK), resumption_token
    except oai_pmh_exceptions.OAIAPIException as e:
        return e.response(), resumption_token
    except Exception as e:
        content = OaiPmhMessage.get_message_labelled(
            'An error occurred during the list_records process: %s' %
            e.message)
        return Response(
            content,
            status=status.HTTP_500_INTERNAL_SERVER_ERROR), resumption_token
示例#13
0
        if str(url).__contains__('?'):
            registry_url = str(url).split('?')[0]
            data, status_code = identify(registry_url)
            if status_code == status.HTTP_200_OK:
                http_response = send_get_request(url)
                if http_response.status_code == status.HTTP_200_OK:
                    return Response(http_response.text,
                                    status=status.HTTP_200_OK)
                else:
                    raise oai_pmh_exceptions.OAIAPIException(
                        message='An error occurred.',
                        status_code=http_response.status_code)
            else:
                content = 'An error occurred when attempting to identify resource: %s' % data
                raise oai_pmh_exceptions.OAIAPILabelledException(
                    message=content,
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
        else:
            raise oai_pmh_exceptions.OAIAPIException(
                message='An error occurred, url malformed.',
                status_code=status.HTTP_400_BAD_REQUEST)
    except requests.HTTPError, err:
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=err.message, status_code=err.response.status_code)
    except oai_pmh_exceptions.OAIAPIException as e:
        raise e
    except Exception as e:
        content = 'An error occurred when attempting to retrieve data: %s' % e.message
        raise oai_pmh_exceptions.OAIAPILabelledException(
            message=content, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)