def test_raise_missing_info(app):
    """Check that the proper exception is raised if name or url is missing."""
    from invenio_oaiharvester.errors import NameOrUrlMissing

    with app.app_context():
        with pytest.raises(NameOrUrlMissing):
            list_records()
        with pytest.raises(NameOrUrlMissing):
            get_records([])
def test_raise_missing_info(app):
    """Check that the proper exception is raised if name or url is missing."""
    from invenio_oaiharvester.errors import NameOrUrlMissing

    with app.app_context():
        with pytest.raises(NameOrUrlMissing):
            list_records()
        with pytest.raises(NameOrUrlMissing):
            get_records([])
def test_raise_wrong_date(app):
    """Check harvesting of records from multiple setspecs."""
    with app.app_context():
        with pytest.raises(WrongDateCombination):
            list_records(metadata_prefix='arXiv',
                         from_date='2015-01-18',
                         until_date='2015-01-17',
                         url='http://export.arxiv.org/oai2',
                         name=None,
                         setspecs='physics:hep-lat')
def test_raise_wrong_date(app):
    """Check harvesting of records from multiple setspecs."""
    with app.app_context():
        with pytest.raises(WrongDateCombination):
            list_records(
                metadata_prefix='arXiv',
                from_date='2015-01-18',
                until_date='2015-01-17',
                url='http://export.arxiv.org/oai2',
                name=None,
                setspecs='physics:hep-lat'
            )
示例#5
0
def test_list_records(app, sample_list_xml, sample_list_xml_cs):
    """Check harvesting of records from multiple setspecs."""
    responses.add(
        responses.GET,
        re.compile(r'http?://export.arxiv.org/oai2.*set=cs.*'),
        body=sample_list_xml_cs,
        content_type='text/xml'
    )
    responses.add(
        responses.GET,
        re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
        body=sample_list_xml,
        content_type='text/xml'
    )
    with app.app_context():
        _, records = list_records(
            metadata_prefix='arXiv',
            from_date='2015-01-15',
            until_date='2015-01-20',
            url='http://export.arxiv.org/oai2',
            name=None,
            setspecs='cs physics'
        )
        # 46 cs + 150 physics - 6 dupes == 190
        assert len(records) == 190
def test_model_based_harvesting_list(app, sample_config, sample_list_xml):
    """Test harvesting using model."""
    from invenio_oaiharvester.utils import get_oaiharvest_object
    responses.add(responses.GET,
                  re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
                  body=sample_list_xml,
                  content_type='text/xml')
    with app.app_context():
        source = get_oaiharvest_object(sample_config)
        last_updated = source.lastrun
        time.sleep(0.1)  # to allow for date checking to work better
        _, records = list_records(name=sample_config)

        assert len(records) == 150
        assert last_updated < get_oaiharvest_object(sample_config).lastrun
def test_list_no_records(app, sample_empty_set):
    """Check harvesting of records from multiple setspecs."""
    responses.add(responses.GET,
                  re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
                  body=sample_empty_set,
                  content_type='text/xml')

    with app.app_context():
        _, records = list_records(metadata_prefix='arXiv',
                                  from_date='2015-01-17',
                                  until_date='2015-01-17',
                                  url='http://export.arxiv.org/oai2',
                                  name=None,
                                  setspecs='physics:hep-lat')
        assert not records
def test_model_based_harvesting_list(app, sample_config, sample_list_xml):
    """Test harvesting using model."""
    from invenio_oaiharvester.utils import get_oaiharvest_object
    responses.add(
        responses.GET,
        re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
        body=sample_list_xml,
        content_type='text/xml'
    )
    with app.app_context():
        source = get_oaiharvest_object(sample_config)
        last_updated = source.lastrun
        time.sleep(0.1)  # to allow for date checking to work better
        _, records = list_records(name=sample_config)

        assert len(records) == 150
        assert last_updated < get_oaiharvest_object(sample_config).lastrun
def test_list_no_records(app, sample_empty_set):
    """Check harvesting of records from multiple setspecs."""
    responses.add(
        responses.GET,
        re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'),
        body=sample_empty_set,
        content_type='text/xml'
    )

    with app.app_context():
        _, records = list_records(
            metadata_prefix='arXiv',
            from_date='2015-01-17',
            until_date='2015-01-17',
            url='http://export.arxiv.org/oai2',
            name=None,
            setspecs='physics:hep-lat'
        )
        assert not records